From e4a69dbfb23555931ac02fae36fa1e141cded585 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20BIDON?= Date: Fri, 20 Oct 2023 09:46:11 +0200 Subject: [PATCH 1/3] readability: replaced rune slice iteration by switch statement Signed-off-by: Frederic BIDON --- decode.go | 87 +++++++++++++++++++++++++++++++++++----------- docs/BENCHMARKS.md | 21 +++++++++-- ip.go | 5 +-- uri.go | 10 +++--- 4 files changed, 93 insertions(+), 30 deletions(-) diff --git a/decode.go b/decode.go index 8c0907f..0b104ef 100644 --- a/decode.go +++ b/decode.go @@ -7,7 +7,7 @@ import ( "unicode/utf8" ) -func validateUnreservedWithExtra(s string, acceptedRunes []rune) error { +func validateUnreservedWithExtra(s string, extraRunesFunc func(rune) bool) error { for i := 0; i < len(s); { r, size := utf8.DecodeRuneInString(s[i:]) if r == utf8.RuneError { @@ -42,22 +42,10 @@ func validateUnreservedWithExtra(s string, acceptedRunes []rune) error { // unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" // pchar = unreserved / pct-encoded / sub-delims / ":" / "@" if !unicode.IsLetter(r) && !unicode.IsDigit(r) && - // unreserved - r != '-' && r != '.' && r != '_' && r != '~' && - // sub-delims - r != '!' && r != '$' && r != '&' && r != '\'' && r != '(' && r != ')' && - r != '*' && r != '+' && r != ',' && r != ';' && r != '=' { - runeFound := false - for _, acceptedRune := range acceptedRunes { - if r == acceptedRune { - runeFound = true - break - } - } - - if !runeFound { - return fmt.Errorf("contains an invalid character: '%U' (%q) near %q", r, r, s[i:]) - } + !isUnreserved(r) && + !isSubDelims(r) && + (extraRunesFunc == nil || !extraRunesFunc(r)) { + return fmt.Errorf("contains an invalid character: '%U' (%q) near %q", r, r, s[i:]) } } @@ -86,7 +74,7 @@ func unescapePercentEncoding(s string) (rune, int, error) { return 0, 0, fmt.Errorf("expected a '%%' escape character, near: %q", s) } - if s[offset] != '%' { + if s[offset] != percentMark { return 0, 0, fmt.Errorf("expected a '%%' escape character, near: %q", s[offset:]) } offset++ @@ -104,7 +92,7 @@ func unescapePercentEncoding(s string) (rune, int, error) { return 0, 0, fmt.Errorf("expected a '%%' escape character, near: %q", s) } - if s[offset] != '%' { + if s[offset] != percentMark { return 0, 0, fmt.Errorf("expected a '%%' escape character, near: %q", s[offset:]) } offset++ @@ -121,7 +109,7 @@ func unescapePercentEncoding(s string) (rune, int, error) { return 0, 0, fmt.Errorf("expected a '%%' escape character, near: %q", s) } - if s[offset] != '%' { + if s[offset] != percentMark { return 0, 0, fmt.Errorf("expected a '%%' escape character, near: %q", s[offset:]) } offset++ @@ -190,3 +178,62 @@ func unhex(c byte) byte { } return 0 } + +func isUnreserved(r rune) bool { + // unreserved characters + switch r { + case '-', '.', '_', '~': + return true + default: + return false + } +} + +func isSubDelims(r rune) bool { + // sub-delims + switch r { + case '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=': + return true + default: + return false + } +} + +/* +func isGenDelims(r rune) bool { + // gen-delims + switch r{ + case ':', '/', '?', '#', '[', ']', '@': + return true + default: + return false + } +} +*/ + +func isPcharExtraRune(r rune) bool { + switch r { + case colonMark, atHost: + return true + default: + return false + } +} + +func isQueryOrFragmentExtraRune(r rune) bool { + switch r { + case colonMark, atHost, slashMark, questionMark: + return true + default: + return false + } +} + +func isUserInfoExtraRune(r rune) bool { + switch r { + case colonMark: + return true + default: + return false + } +} diff --git a/docs/BENCHMARKS.md b/docs/BENCHMARKS.md index 61be10b..ecaac27 100644 --- a/docs/BENCHMARKS.md +++ b/docs/BENCHMARKS.md @@ -191,7 +191,7 @@ Benchmark_Parse/with_URL_payload_with_IPs-16 93061443 374.6 ns/op Benchmark_String-16 180403320 199.9 ns/op 142 B/op 5 allocs/op -# After strict percent-encoding check on host +## After strict percent-encoding check on host goos: linux goarch: amd64 @@ -214,7 +214,7 @@ Benchmark_String Benchmark_String-16 178247580 203.6 ns/op 142 B/op 5 allocs/op PASS -# After rewrite with uriReader +## After rewrite with uriReader go test -bench . -benchtime 30s -run Bench goos: linux @@ -230,7 +230,7 @@ Benchmark_Parse/with_URL_payload_with_IPs-16 96785080 369.1 ns/op Benchmark_String-16 180658692 197.4 ns/op 142 B/op 5 allocs/op PASS -# After rewrite with RuneInString, no Reader +## After rewrite with RuneInString, no Reader go test -v -run Bench -benchtime 30s -bench Bench goos: linux @@ -254,3 +254,18 @@ Benchmark_String Benchmark_String-16 176733871 202.6 ns/op 142 B/op 5 allocs/op PASS +## replaced rune slice iteration by switch statement +goos: linux +goarch: amd64 +pkg: github.com/fredbi/uri +cpu: Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz +Benchmark_Parse/with_URI_simple_payload-4 1964378 610.4 ns/op 208 B/op 2 allocs/op +Benchmark_Parse/with_URL_simple_payload-4 2546800 462.1 ns/op 168 B/op 1 allocs/op +Benchmark_Parse/with_URI_mixed_payload-4 1938195 618.8 ns/op 208 B/op 2 allocs/op +Benchmark_Parse/with_URL_mixed_payload-4 2709578 435.6 ns/op 163 B/op 1 allocs/op +Benchmark_Parse/with_URI_payload_with_IPs-4 1875967 645.6 ns/op 197 B/op 1 allocs/op +Benchmark_Parse/with_URL_payload_with_IPs-4 2291342 520.7 ns/op 176 B/op 1 allocs/op +Benchmark_String-4 4502607 254.0 ns/op 142 B/op 5 allocs/op +PASS +ok github.com/fredbi/uri 11.949s + diff --git a/ip.go b/ip.go index 8f04ca5..c906dda 100644 --- a/ip.go +++ b/ip.go @@ -221,6 +221,7 @@ func validateIPvFuture(address string) error { return errors.New("invalid IP vFuture format: expect a non-empty address after the version tag") } - // TODO: wrong because IpvFuture is not escaped - return validateUnreservedWithExtra(address[offset:], userInfoExtraRunes) + // RFC3986 states that IpvFuture is not escaped, but IPv6 has already evolved to add an escape zoneID. + // We assume that IPvFuture supports escaping as well. + return validateUnreservedWithExtra(address[offset:], isUserInfoExtraRune) } diff --git a/uri.go b/uri.go index 45b9494..913d9be 100644 --- a/uri.go +++ b/uri.go @@ -387,7 +387,7 @@ func (u *uri) validateScheme(scheme string) error { // pchar = unreserved / pct-encoded / sub-delims / ":" / "@" // query = *( pchar / "/" / "?" ) func (u *uri) validateQuery(query string) error { - if err := validateUnreservedWithExtra(query, queryOrFragmentExtraRunes); err != nil { + if err := validateUnreservedWithExtra(query, isQueryOrFragmentExtraRune); err != nil { return errorsJoin(ErrInvalidQuery, err) } @@ -402,7 +402,7 @@ func (u *uri) validateQuery(query string) error { // // fragment = *( pchar / "/" / "?" ) func (u *uri) validateFragment(fragment string) error { - if err := validateUnreservedWithExtra(fragment, queryOrFragmentExtraRunes); err != nil { + if err := validateUnreservedWithExtra(fragment, isQueryOrFragmentExtraRune); err != nil { return errorsJoin(ErrInvalidFragment, err) } @@ -513,7 +513,7 @@ func (a authorityInfo) validatePath(path string) error { } if pos > previousPos { - if err := validateUnreservedWithExtra(path[previousPos:pos], pcharExtraRunes); err != nil { + if err := validateUnreservedWithExtra(path[previousPos:pos], isPcharExtraRune); err != nil { return errorsJoin( ErrInvalidPath, err, @@ -525,7 +525,7 @@ func (a authorityInfo) validatePath(path string) error { } if previousPos < len(path) { // don't care if the last char was a separator - if err := validateUnreservedWithExtra(path[previousPos:], pcharExtraRunes); err != nil { + if err := validateUnreservedWithExtra(path[previousPos:], isPcharExtraRune); err != nil { return errorsJoin( ErrInvalidPath, err, @@ -636,7 +636,7 @@ func (a authorityInfo) validatePort(port, host string) error { // // userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) func (a authorityInfo) validateUserInfo(userinfo string) error { - if err := validateUnreservedWithExtra(userinfo, userInfoExtraRunes); err != nil { + if err := validateUnreservedWithExtra(userinfo, isUserInfoExtraRune); err != nil { return errorsJoin( ErrInvalidUserInfo, err, From 2de9090c5fe4600f8104d9c60d580a138bf28a8c Mon Sep 17 00:00:00 2001 From: Frederic BIDON Date: Wed, 25 Oct 2023 15:21:57 +0200 Subject: [PATCH 2/3] updated benchmarks Signed-off-by: Frederic BIDON --- docs/BENCHMARKS.md | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/docs/BENCHMARKS.md b/docs/BENCHMARKS.md index ecaac27..fb835c9 100644 --- a/docs/BENCHMARKS.md +++ b/docs/BENCHMARKS.md @@ -134,6 +134,9 @@ Benchmark_Parse/with_URL_payload_with_IPs Benchmark_Parse/with_URL_payload_with_IPs-16 96977450 376.3 ns/op 176 B/op 1 allocs/op ## After stricter IP parsing (naive) + +Naive implementation with too many gc allocs. + go test -v -bench . -benchtime 30s -run Bench goos: linux goarch: amd64 @@ -216,6 +219,8 @@ PASS ## After rewrite with uriReader +Abstraction comes at a cost. NO GO + go test -bench . -benchtime 30s -run Bench goos: linux goarch: amd64 @@ -255,17 +260,28 @@ Benchmark_String-16 176733871 202.6 ns/op PASS ## replaced rune slice iteration by switch statement + +Actually a slight degradation. NO GO + + go test -v -pgo=auto -run Bench -benchtime 30s -bench Bench goos: linux goarch: amd64 pkg: github.com/fredbi/uri -cpu: Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz -Benchmark_Parse/with_URI_simple_payload-4 1964378 610.4 ns/op 208 B/op 2 allocs/op -Benchmark_Parse/with_URL_simple_payload-4 2546800 462.1 ns/op 168 B/op 1 allocs/op -Benchmark_Parse/with_URI_mixed_payload-4 1938195 618.8 ns/op 208 B/op 2 allocs/op -Benchmark_Parse/with_URL_mixed_payload-4 2709578 435.6 ns/op 163 B/op 1 allocs/op -Benchmark_Parse/with_URI_payload_with_IPs-4 1875967 645.6 ns/op 197 B/op 1 allocs/op -Benchmark_Parse/with_URL_payload_with_IPs-4 2291342 520.7 ns/op 176 B/op 1 allocs/op -Benchmark_String-4 4502607 254.0 ns/op 142 B/op 5 allocs/op +cpu: AMD Ryzen 7 5800X 8-Core Processor +Benchmark_Parse +Benchmark_Parse/with_URI_simple_payload +Benchmark_Parse/with_URI_simple_payload-16 92742778 391.3 ns/op 160 B/op 1 allocs/op +Benchmark_Parse/with_URL_simple_payload +Benchmark_Parse/with_URL_simple_payload-16 100000000 321.1 ns/op 168 B/op 1 allocs/op +Benchmark_Parse/with_URI_mixed_payload +Benchmark_Parse/with_URI_mixed_payload-16 93061579 393.8 ns/op 160 B/op 1 allocs/op +Benchmark_Parse/with_URL_mixed_payload +Benchmark_Parse/with_URL_mixed_payload-16 100000000 301.8 ns/op 163 B/op 1 allocs/op +Benchmark_Parse/with_URI_payload_with_IPs +Benchmark_Parse/with_URI_payload_with_IPs-16 81460168 424.6 ns/op 160 B/op 1 allocs/op +Benchmark_Parse/with_URL_payload_with_IPs +Benchmark_Parse/with_URL_payload_with_IPs-16 94139295 365.8 ns/op 176 B/op 1 allocs/op +Benchmark_String +Benchmark_String-16 178303498 201.8 ns/op 142 B/op 5 allocs/op PASS -ok github.com/fredbi/uri 11.949s From a5c48489ad9de8995641b5a17f8b99787f7aa6fd Mon Sep 17 00:00:00 2001 From: Frederic BIDON Date: Wed, 25 Oct 2023 18:50:09 +0200 Subject: [PATCH 3/3] removed unused globals Signed-off-by: Frederic BIDON --- uri.go | 7 ------- 1 file changed, 7 deletions(-) diff --git a/uri.go b/uri.go index 913d9be..5943ace 100644 --- a/uri.go +++ b/uri.go @@ -88,13 +88,6 @@ const ( maxDomainLength = 255 ) -var ( - // predefined sets of accecpted runes beyond the "unreserved" character set - pcharExtraRunes = []rune{colonMark, atHost} // pchar = unreserved | ':' | '@' - queryOrFragmentExtraRunes = append(pcharExtraRunes, slashMark, questionMark) - userInfoExtraRunes = append(pcharExtraRunes, colonMark) -) - // IsURI tells if a URI is valid according to RFC3986/RFC397. func IsURI(raw string) bool { _, err := Parse(raw)