diff --git a/decode.go b/decode.go index 8c0907f..0b104ef 100644 --- a/decode.go +++ b/decode.go @@ -7,7 +7,7 @@ import ( "unicode/utf8" ) -func validateUnreservedWithExtra(s string, acceptedRunes []rune) error { +func validateUnreservedWithExtra(s string, extraRunesFunc func(rune) bool) error { for i := 0; i < len(s); { r, size := utf8.DecodeRuneInString(s[i:]) if r == utf8.RuneError { @@ -42,22 +42,10 @@ func validateUnreservedWithExtra(s string, acceptedRunes []rune) error { // unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" // pchar = unreserved / pct-encoded / sub-delims / ":" / "@" if !unicode.IsLetter(r) && !unicode.IsDigit(r) && - // unreserved - r != '-' && r != '.' && r != '_' && r != '~' && - // sub-delims - r != '!' && r != '$' && r != '&' && r != '\'' && r != '(' && r != ')' && - r != '*' && r != '+' && r != ',' && r != ';' && r != '=' { - runeFound := false - for _, acceptedRune := range acceptedRunes { - if r == acceptedRune { - runeFound = true - break - } - } - - if !runeFound { - return fmt.Errorf("contains an invalid character: '%U' (%q) near %q", r, r, s[i:]) - } + !isUnreserved(r) && + !isSubDelims(r) && + (extraRunesFunc == nil || !extraRunesFunc(r)) { + return fmt.Errorf("contains an invalid character: '%U' (%q) near %q", r, r, s[i:]) } } @@ -86,7 +74,7 @@ func unescapePercentEncoding(s string) (rune, int, error) { return 0, 0, fmt.Errorf("expected a '%%' escape character, near: %q", s) } - if s[offset] != '%' { + if s[offset] != percentMark { return 0, 0, fmt.Errorf("expected a '%%' escape character, near: %q", s[offset:]) } offset++ @@ -104,7 +92,7 @@ func unescapePercentEncoding(s string) (rune, int, error) { return 0, 0, fmt.Errorf("expected a '%%' escape character, near: %q", s) } - if s[offset] != '%' { + if s[offset] != percentMark { return 0, 0, fmt.Errorf("expected a '%%' escape character, near: %q", s[offset:]) } offset++ @@ -121,7 +109,7 @@ func unescapePercentEncoding(s string) (rune, int, error) { return 0, 0, fmt.Errorf("expected a '%%' escape character, near: %q", s) } - if s[offset] != '%' { + if s[offset] != percentMark { return 0, 0, fmt.Errorf("expected a '%%' escape character, near: %q", s[offset:]) } offset++ @@ -190,3 +178,62 @@ func unhex(c byte) byte { } return 0 } + +func isUnreserved(r rune) bool { + // unreserved characters + switch r { + case '-', '.', '_', '~': + return true + default: + return false + } +} + +func isSubDelims(r rune) bool { + // sub-delims + switch r { + case '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=': + return true + default: + return false + } +} + +/* +func isGenDelims(r rune) bool { + // gen-delims + switch r{ + case ':', '/', '?', '#', '[', ']', '@': + return true + default: + return false + } +} +*/ + +func isPcharExtraRune(r rune) bool { + switch r { + case colonMark, atHost: + return true + default: + return false + } +} + +func isQueryOrFragmentExtraRune(r rune) bool { + switch r { + case colonMark, atHost, slashMark, questionMark: + return true + default: + return false + } +} + +func isUserInfoExtraRune(r rune) bool { + switch r { + case colonMark: + return true + default: + return false + } +} diff --git a/docs/BENCHMARKS.md b/docs/BENCHMARKS.md index 61be10b..fb835c9 100644 --- a/docs/BENCHMARKS.md +++ b/docs/BENCHMARKS.md @@ -134,6 +134,9 @@ Benchmark_Parse/with_URL_payload_with_IPs Benchmark_Parse/with_URL_payload_with_IPs-16 96977450 376.3 ns/op 176 B/op 1 allocs/op ## After stricter IP parsing (naive) + +Naive implementation with too many gc allocs. + go test -v -bench . -benchtime 30s -run Bench goos: linux goarch: amd64 @@ -191,7 +194,7 @@ Benchmark_Parse/with_URL_payload_with_IPs-16 93061443 374.6 ns/op Benchmark_String-16 180403320 199.9 ns/op 142 B/op 5 allocs/op -# After strict percent-encoding check on host +## After strict percent-encoding check on host goos: linux goarch: amd64 @@ -214,7 +217,9 @@ Benchmark_String Benchmark_String-16 178247580 203.6 ns/op 142 B/op 5 allocs/op PASS -# After rewrite with uriReader +## After rewrite with uriReader + +Abstraction comes at a cost. NO GO go test -bench . -benchtime 30s -run Bench goos: linux @@ -230,7 +235,7 @@ Benchmark_Parse/with_URL_payload_with_IPs-16 96785080 369.1 ns/op Benchmark_String-16 180658692 197.4 ns/op 142 B/op 5 allocs/op PASS -# After rewrite with RuneInString, no Reader +## After rewrite with RuneInString, no Reader go test -v -run Bench -benchtime 30s -bench Bench goos: linux @@ -254,3 +259,29 @@ Benchmark_String Benchmark_String-16 176733871 202.6 ns/op 142 B/op 5 allocs/op PASS +## replaced rune slice iteration by switch statement + +Actually a slight degradation. NO GO + + go test -v -pgo=auto -run Bench -benchtime 30s -bench Bench +goos: linux +goarch: amd64 +pkg: github.com/fredbi/uri +cpu: AMD Ryzen 7 5800X 8-Core Processor +Benchmark_Parse +Benchmark_Parse/with_URI_simple_payload +Benchmark_Parse/with_URI_simple_payload-16 92742778 391.3 ns/op 160 B/op 1 allocs/op +Benchmark_Parse/with_URL_simple_payload +Benchmark_Parse/with_URL_simple_payload-16 100000000 321.1 ns/op 168 B/op 1 allocs/op +Benchmark_Parse/with_URI_mixed_payload +Benchmark_Parse/with_URI_mixed_payload-16 93061579 393.8 ns/op 160 B/op 1 allocs/op +Benchmark_Parse/with_URL_mixed_payload +Benchmark_Parse/with_URL_mixed_payload-16 100000000 301.8 ns/op 163 B/op 1 allocs/op +Benchmark_Parse/with_URI_payload_with_IPs +Benchmark_Parse/with_URI_payload_with_IPs-16 81460168 424.6 ns/op 160 B/op 1 allocs/op +Benchmark_Parse/with_URL_payload_with_IPs +Benchmark_Parse/with_URL_payload_with_IPs-16 94139295 365.8 ns/op 176 B/op 1 allocs/op +Benchmark_String +Benchmark_String-16 178303498 201.8 ns/op 142 B/op 5 allocs/op +PASS + diff --git a/ip.go b/ip.go index 8f04ca5..c906dda 100644 --- a/ip.go +++ b/ip.go @@ -221,6 +221,7 @@ func validateIPvFuture(address string) error { return errors.New("invalid IP vFuture format: expect a non-empty address after the version tag") } - // TODO: wrong because IpvFuture is not escaped - return validateUnreservedWithExtra(address[offset:], userInfoExtraRunes) + // RFC3986 states that IpvFuture is not escaped, but IPv6 has already evolved to add an escape zoneID. + // We assume that IPvFuture supports escaping as well. + return validateUnreservedWithExtra(address[offset:], isUserInfoExtraRune) } diff --git a/uri.go b/uri.go index 45b9494..5943ace 100644 --- a/uri.go +++ b/uri.go @@ -88,13 +88,6 @@ const ( maxDomainLength = 255 ) -var ( - // predefined sets of accecpted runes beyond the "unreserved" character set - pcharExtraRunes = []rune{colonMark, atHost} // pchar = unreserved | ':' | '@' - queryOrFragmentExtraRunes = append(pcharExtraRunes, slashMark, questionMark) - userInfoExtraRunes = append(pcharExtraRunes, colonMark) -) - // IsURI tells if a URI is valid according to RFC3986/RFC397. func IsURI(raw string) bool { _, err := Parse(raw) @@ -387,7 +380,7 @@ func (u *uri) validateScheme(scheme string) error { // pchar = unreserved / pct-encoded / sub-delims / ":" / "@" // query = *( pchar / "/" / "?" ) func (u *uri) validateQuery(query string) error { - if err := validateUnreservedWithExtra(query, queryOrFragmentExtraRunes); err != nil { + if err := validateUnreservedWithExtra(query, isQueryOrFragmentExtraRune); err != nil { return errorsJoin(ErrInvalidQuery, err) } @@ -402,7 +395,7 @@ func (u *uri) validateQuery(query string) error { // // fragment = *( pchar / "/" / "?" ) func (u *uri) validateFragment(fragment string) error { - if err := validateUnreservedWithExtra(fragment, queryOrFragmentExtraRunes); err != nil { + if err := validateUnreservedWithExtra(fragment, isQueryOrFragmentExtraRune); err != nil { return errorsJoin(ErrInvalidFragment, err) } @@ -513,7 +506,7 @@ func (a authorityInfo) validatePath(path string) error { } if pos > previousPos { - if err := validateUnreservedWithExtra(path[previousPos:pos], pcharExtraRunes); err != nil { + if err := validateUnreservedWithExtra(path[previousPos:pos], isPcharExtraRune); err != nil { return errorsJoin( ErrInvalidPath, err, @@ -525,7 +518,7 @@ func (a authorityInfo) validatePath(path string) error { } if previousPos < len(path) { // don't care if the last char was a separator - if err := validateUnreservedWithExtra(path[previousPos:], pcharExtraRunes); err != nil { + if err := validateUnreservedWithExtra(path[previousPos:], isPcharExtraRune); err != nil { return errorsJoin( ErrInvalidPath, err, @@ -636,7 +629,7 @@ func (a authorityInfo) validatePort(port, host string) error { // // userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) func (a authorityInfo) validateUserInfo(userinfo string) error { - if err := validateUnreservedWithExtra(userinfo, userInfoExtraRunes); err != nil { + if err := validateUnreservedWithExtra(userinfo, isUserInfoExtraRune); err != nil { return errorsJoin( ErrInvalidUserInfo, err,