diff --git a/decode.go b/decode.go index 8c0907f..b643a15 100644 --- a/decode.go +++ b/decode.go @@ -5,9 +5,11 @@ import ( "strings" "unicode" "unicode/utf8" + + "github.com/bits-and-blooms/bitset" ) -func validateUnreservedWithExtra(s string, acceptedRunes []rune) error { +func validateUnreservedWithExtra(s string, runeSet charSet) error { for i := 0; i < len(s); { r, size := utf8.DecodeRuneInString(s[i:]) if r == utf8.RuneError { @@ -35,29 +37,8 @@ func validateUnreservedWithExtra(s string, acceptedRunes []rune) error { continue } - // RFC grammar definitions: - // sub-delims = "!" / "$" / "&" / "'" / "(" / ")" - // / "*" / "+" / "," / ";" / "=" - // gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" - // unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" - // pchar = unreserved / pct-encoded / sub-delims / ":" / "@" - if !unicode.IsLetter(r) && !unicode.IsDigit(r) && - // unreserved - r != '-' && r != '.' && r != '_' && r != '~' && - // sub-delims - r != '!' && r != '$' && r != '&' && r != '\'' && r != '(' && r != ')' && - r != '*' && r != '+' && r != ',' && r != ';' && r != '=' { - runeFound := false - for _, acceptedRune := range acceptedRunes { - if r == acceptedRune { - runeFound = true - break - } - } - - if !runeFound { - return fmt.Errorf("contains an invalid character: '%U' (%q) near %q", r, r, s[i:]) - } + if !runeSet.IsInSet(r) { + return fmt.Errorf("contains an invalid character: '%U' (%q) near %q", r, r, s[i:]) } } @@ -86,7 +67,7 @@ func unescapePercentEncoding(s string) (rune, int, error) { return 0, 0, fmt.Errorf("expected a '%%' escape character, near: %q", s) } - if s[offset] != '%' { + if s[offset] != percentMark { return 0, 0, fmt.Errorf("expected a '%%' escape character, near: %q", s[offset:]) } offset++ @@ -104,7 +85,7 @@ func unescapePercentEncoding(s string) (rune, int, error) { return 0, 0, fmt.Errorf("expected a '%%' escape character, near: %q", s) } - if s[offset] != '%' { + if s[offset] != percentMark { return 0, 0, fmt.Errorf("expected a '%%' escape character, near: %q", s[offset:]) } offset++ @@ -121,7 +102,7 @@ func unescapePercentEncoding(s string) (rune, int, error) { return 0, 0, fmt.Errorf("expected a '%%' escape character, near: %q", s) } - if s[offset] != '%' { + if s[offset] != percentMark { return 0, 0, fmt.Errorf("expected a '%%' escape character, near: %q", s[offset:]) } offset++ @@ -179,6 +160,73 @@ func isNumerical(input string) bool { return strings.IndexFunc(input, isNotDigit[rune]) == -1 } +var accepted = []byte{ + '-', '.', '_', '~', + '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', +} + +type charSet struct { + *bitset.BitSet + runeFunc func(rune) bool +} + +func (c charSet) IsInSet(r rune) bool { + if r < utf8.RuneSelf { + return c.Test(uint(r)) + } + + return c.runeFunc(r) +} + +func (c charSet) Clone() charSet { + return charSet{ + BitSet: c.BitSet.Clone(), + } +} + +var ( + unreservedAndSubDelimsCharSet charSet + pcharCharSet charSet + userInfoCharSet charSet + queryOrFragmentCharSet charSet +) + +func init() { + unreservedAndSubDelimsCharSet = charSet{ + BitSet: bitset.New(uint(len(accepted))), + runeFunc: isUnreservedOrSubDelimsRune, + } + + for _, r := range accepted { + unreservedAndSubDelimsCharSet.Set(uint(r)) + } + for r := '0'; r <= '9'; r++ { + unreservedAndSubDelimsCharSet.Set(uint(r)) + } + for r := 'A'; r <= 'Z'; r++ { + unreservedAndSubDelimsCharSet.Set(uint(r)) + } + for r := 'a'; r <= 'z'; r++ { + unreservedAndSubDelimsCharSet.Set(uint(r)) + } + + pcharCharSet = unreservedAndSubDelimsCharSet.Clone() + pcharCharSet.Set(uint(':')) + pcharCharSet.Set(uint('@')) + pcharCharSet.runeFunc = isPcharRune + + userInfoCharSet = unreservedAndSubDelimsCharSet.Clone() + userInfoCharSet.Set(uint(':')) + userInfoCharSet.runeFunc = isUserInfoRune + + queryOrFragmentCharSet = unreservedAndSubDelimsCharSet.Clone() + queryOrFragmentCharSet.Set(uint(colonMark)) + queryOrFragmentCharSet.Set(uint(atHost)) + queryOrFragmentCharSet.Set(uint(slashMark)) + queryOrFragmentCharSet.Set(uint(questionMark)) + queryOrFragmentCharSet.runeFunc = isQueryOrFragmentRune +} + func unhex(c byte) byte { switch { case '0' <= c && c <= '9': @@ -190,3 +238,68 @@ func unhex(c byte) byte { } return 0 } + +func isUnreservedOrSubDelimsRune(r rune) bool { + return unicode.IsLetter(r) || unicode.IsDigit(r) || + isUnreserved(r) || + isSubDelims(r) +} + +func isUnreserved(r rune) bool { + // unreserved characters + switch r { + case '-', '.', '_', '~': + return true + default: + return false + } +} + +func isSubDelims(r rune) bool { + // sub-delims + switch r { + case '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=': + return true + default: + return false + } +} + +/* +func isGenDelims(r rune) bool { + // gen-delims + switch r{ + case ':', '/', '?', '#', '[', ']', '@': + return true + default: + return false + } +} +*/ + +func isPcharRune(r rune) bool { + switch r { + case colonMark, atHost: + return true + default: + return isUnreservedOrSubDelimsRune(r) + } +} + +func isQueryOrFragmentRune(r rune) bool { + switch r { + case colonMark, atHost, slashMark, questionMark: + return true + default: + return isUnreservedOrSubDelimsRune(r) + } +} + +func isUserInfoRune(r rune) bool { + switch r { + case colonMark: + return true + default: + return isUnreservedOrSubDelimsRune(r) + } +} diff --git a/decode_test.go b/decode_test.go index 6f6b669..4aca6e7 100644 --- a/decode_test.go +++ b/decode_test.go @@ -98,6 +98,6 @@ func TestUnhex(t *testing.T) { func TestValidateUnreservedWithExtra(t *testing.T) { // edge case: invalid rune in string require.Error(t, - validateUnreservedWithExtra(string([]rune{utf8.RuneError}), nil), + validateUnreservedWithExtra(string([]rune{utf8.RuneError}), unreservedAndSubDelimsCharSet), ) } diff --git a/docs/BENCHMARKS.md b/docs/BENCHMARKS.md index 3bd4cb9..43a66ef 100644 --- a/docs/BENCHMARKS.md +++ b/docs/BENCHMARKS.md @@ -134,6 +134,9 @@ Benchmark_Parse/with_URL_payload_with_IPs Benchmark_Parse/with_URL_payload_with_IPs-16 96977450 376.3 ns/op 176 B/op 1 allocs/op ## After stricter IP parsing (naive) + +Naive implementation with too many gc allocs. + go test -v -bench . -benchtime 30s -run Bench goos: linux goarch: amd64 @@ -191,7 +194,7 @@ Benchmark_Parse/with_URL_payload_with_IPs-16 93061443 374.6 ns/op Benchmark_String-16 180403320 199.9 ns/op 142 B/op 5 allocs/op -# After strict percent-encoding check on host +## After strict percent-encoding check on host goos: linux goarch: amd64 @@ -214,7 +217,9 @@ Benchmark_String Benchmark_String-16 178247580 203.6 ns/op 142 B/op 5 allocs/op PASS -# After rewrite with uriReader +## After rewrite with uriReader + +Abstraction comes at a cost. NO GO go test -bench . -benchtime 30s -run Bench goos: linux @@ -230,7 +235,7 @@ Benchmark_Parse/with_URL_payload_with_IPs-16 96785080 369.1 ns/op Benchmark_String-16 180658692 197.4 ns/op 142 B/op 5 allocs/op PASS -# After rewrite with RuneInString, no Reader +## After rewrite with RuneInString, no Reader go test -v -run Bench -benchtime 30s -bench Bench goos: linux @@ -254,7 +259,7 @@ Benchmark_String Benchmark_String-16 176733871 202.6 ns/op 142 B/op 5 allocs/op PASS -# After optim allocs String() +## After optim allocs String() go test -v -run Bench -benchtime 30s -bench String goos: linux @@ -265,3 +270,29 @@ Benchmark_String Benchmark_String-16 457095075 79.87 ns/op 48 B/op 1 allocs/op PASS +## replaced rune slice iteration by switch statement + +Actually a slight degradation. NO GO + + go test -v -pgo=auto -run Bench -benchtime 30s -bench Bench +goos: linux +goarch: amd64 +pkg: github.com/fredbi/uri +cpu: AMD Ryzen 7 5800X 8-Core Processor +Benchmark_Parse +Benchmark_Parse/with_URI_simple_payload +Benchmark_Parse/with_URI_simple_payload-16 92742778 391.3 ns/op 160 B/op 1 allocs/op +Benchmark_Parse/with_URL_simple_payload +Benchmark_Parse/with_URL_simple_payload-16 100000000 321.1 ns/op 168 B/op 1 allocs/op +Benchmark_Parse/with_URI_mixed_payload +Benchmark_Parse/with_URI_mixed_payload-16 93061579 393.8 ns/op 160 B/op 1 allocs/op +Benchmark_Parse/with_URL_mixed_payload +Benchmark_Parse/with_URL_mixed_payload-16 100000000 301.8 ns/op 163 B/op 1 allocs/op +Benchmark_Parse/with_URI_payload_with_IPs +Benchmark_Parse/with_URI_payload_with_IPs-16 81460168 424.6 ns/op 160 B/op 1 allocs/op +Benchmark_Parse/with_URL_payload_with_IPs +Benchmark_Parse/with_URL_payload_with_IPs-16 94139295 365.8 ns/op 176 B/op 1 allocs/op +Benchmark_String +Benchmark_String-16 178303498 201.8 ns/op 142 B/op 5 allocs/op +PASS + diff --git a/docs/TODO.md b/docs/TODO.md index fb9ac58..622e26a 100644 --- a/docs/TODO.md +++ b/docs/TODO.md @@ -7,6 +7,7 @@ * [x] more nitpicks - check if the length checks on DNS host name are in bytes or in runes => bytes * [x] DefaultPort(), IsDefaultPort() * [] IRI ucs charset compliance (att: perf challenge) +* [] FilePath() * [] normalizer * [] V2 zero alloc, no interface, fluent builder with inner error checking * [] doc: complete the librarian/archivist work on specs, etc + FAQ to clarify the somewhat arcane world of this set of RFCs. diff --git a/go.mod b/go.mod index d1442b2..0b74d2e 100644 --- a/go.mod +++ b/go.mod @@ -3,6 +3,7 @@ module github.com/fredbi/uri go 1.19 require ( + github.com/bits-and-blooms/bitset v1.10.0 github.com/pkg/profile v1.7.0 github.com/stretchr/testify v1.8.4 ) diff --git a/go.sum b/go.sum index 5a05862..8947e3a 100644 --- a/go.sum +++ b/go.sum @@ -1,3 +1,5 @@ +github.com/bits-and-blooms/bitset v1.10.0 h1:ePXTeiPEazB5+opbv5fr8umg2R/1NlzgDsyepwsSr88= +github.com/bits-and-blooms/bitset v1.10.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8= github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= diff --git a/ip.go b/ip.go index 8f04ca5..5f62b4b 100644 --- a/ip.go +++ b/ip.go @@ -165,7 +165,7 @@ func validateIPv6(host string) error { ) } - if err := validateUnreservedWithExtra(zoneID, nil); err != nil { + if err := validateUnreservedWithExtra(zoneID, unreservedAndSubDelimsCharSet); err != nil { return errorsJoin( ErrInvalidHostAddress, fmt.Errorf("invalid IPv6 zoneID %q: %w", zoneID, err), @@ -221,6 +221,7 @@ func validateIPvFuture(address string) error { return errors.New("invalid IP vFuture format: expect a non-empty address after the version tag") } - // TODO: wrong because IpvFuture is not escaped - return validateUnreservedWithExtra(address[offset:], userInfoExtraRunes) + // RFC3986 states that IpvFuture is not escaped, but IPv6 has already evolved to add an escape zoneID. + // We assume that IPvFuture supports escaping as well. + return validateUnreservedWithExtra(address[offset:], userInfoCharSet) } diff --git a/profile_test.go b/profile_test.go index 07a7bb3..d4f45dd 100644 --- a/profile_test.go +++ b/profile_test.go @@ -6,13 +6,12 @@ import ( "testing" "github.com/pkg/profile" - "github.com/stretchr/testify/require" ) func TestParseWithProfile(t *testing.T) { const ( profDir = "prof" - n = 1000 + n = 100000 ) t.Run("collect CPU profile", func(t *testing.T) { @@ -52,8 +51,9 @@ func runProfile(t *testing.T, n int) { } u, err := Parse(testCase.uriRaw) - require.NoErrorf(t, err, "unexpected error for %q", testCase.uriRaw) - require.NotEmpty(t, u) + if u == nil || err != nil { + t.Fatalf("unexpected error for %q", testCase.uriRaw) + } } } } diff --git a/profiling/fixtures/fixtures.go b/profiling/fixtures/fixtures.go new file mode 100644 index 0000000..bb4ca99 --- /dev/null +++ b/profiling/fixtures/fixtures.go @@ -0,0 +1,1022 @@ +package fixtures + +import ( + "fmt" + "strings" + "unicode/utf8" + + "github.com/fredbi/uri" +) + +type ( + URITest struct { + URIRaw string + Err error + Comment string + IsReference bool + IsNotURI bool + } + + testGenerator func() []URITest +) + +var AllGenerators = []testGenerator{ + rawParsePassTests, + rawParseFailTests, + rawParseReferenceTests, + rawParseStructureTests, + rawParseSchemeTests, + rawParseUserInfoTests, + rawParsePathTests, + rawParseHostTests, + rawParseIPHostTests, + rawParsePortTests, + rawParseQueryTests, + rawParseFragmentTests, +} + +func rawParseReferenceTests() []URITest { + return []URITest{ + { + Comment: "valid missing scheme for an URI reference", + URIRaw: "//foo.bar/?baz=qux#quux", + IsReference: true, + }, + { + Comment: "valid URI reference (not a valid URI)", + URIRaw: "//host.domain.com/a/b", + IsReference: true, + }, + { + Comment: "valid URI reference with port (not a valid URI)", + URIRaw: "//host.domain.com:8080/a/b", + IsReference: true, + }, + { + Comment: "absolute reference with port", + URIRaw: "//host.domain.com:8080/a/b", + IsReference: true, + }, + { + Comment: "absolute reference with query params", + URIRaw: "//host.domain.com:8080?query=x/a/b", + IsReference: true, + }, + { + Comment: "absolute reference with query params (reversed)", + URIRaw: "//host.domain.com:8080/a/b?query=x", + IsReference: true, + }, + { + Comment: "invalid URI which is a valid reference", + URIRaw: "//not.a.user@not.a.host/just/a/path", + IsReference: true, + }, + { + Comment: "not an URI but a valid reference", + URIRaw: "/", + IsReference: true, + }, + { + Comment: "URL is an URI reference", + URIRaw: "//not.a.user@not.a.host/just/a/path", + IsReference: true, + }, + { + Comment: "URL is an URI reference, with escaped host", + URIRaw: "//not.a.user@%66%6f%6f.com/just/a/path/also", + IsReference: true, + }, + { + Comment: "non letter is an URI reference", + URIRaw: "*", + IsReference: true, + }, + { + Comment: "file name is an URI reference", + URIRaw: "foo.html", + IsReference: true, + }, + { + Comment: "directory is an URI reference", + URIRaw: "../dir/", + IsReference: true, + }, + { + Comment: "empty string is an URI reference", + URIRaw: "", + IsReference: true, + }, + } +} + +func rawParseStructureTests() []URITest { + return []URITest{ + { + Comment: "// without // prefix, this is parsed as a path", + URIRaw: "mailto:user@domain.com", + }, + { + Comment: "with // prefix, this parsed as a user + host", + URIRaw: "mailto://user@domain.com", + }, + { + Comment: "pathological input (1)", + URIRaw: "?//x", + Err: uri.ErrInvalidURI, + }, + { + Comment: "pathological input (2)", + URIRaw: "#//x", + Err: uri.ErrInvalidURI, + }, + { + Comment: "pathological input (3)", + URIRaw: "://x", + Err: uri.ErrInvalidURI, + }, + { + Comment: "pathological input (4)", + URIRaw: ".?:", + Err: uri.ErrInvalidURI, + }, + { + Comment: "pathological input (5)", + URIRaw: ".#:", + Err: uri.ErrInvalidURI, + }, + { + Comment: "pathological input (6)", + URIRaw: "?", + Err: uri.ErrInvalidURI, + }, + { + Comment: "pathological input (7)", + URIRaw: "#", + Err: uri.ErrInvalidURI, + }, + { + Comment: "pathological input (8)", + URIRaw: "?#", + Err: uri.ErrInvalidURI, + }, + { + Comment: "invalid empty URI", + URIRaw: "", + Err: uri.ErrNoSchemeFound, + }, + { + Comment: "invalid with blank", + URIRaw: " ", + Err: uri.ErrNoSchemeFound, + }, + { + Comment: "no separator", + URIRaw: "foo", + Err: uri.ErrNoSchemeFound, + }, + { + Comment: "no ':' separator", + URIRaw: "foo@bar", + Err: uri.ErrNoSchemeFound, + }, + } +} + +func rawParseSchemeTests() []URITest { + return []URITest{ + { + Comment: "urn scheme", + URIRaw: "urn://example-bin.org/path", + }, + { + Comment: "only scheme (DNS host), valid!", + URIRaw: "http:", + }, + { + Comment: "only scheme (registered name empty), valid!", + URIRaw: "foo:", + }, + { + Comment: "scheme without prefix (urn)", + URIRaw: "urn:oasis:names:specification:docbook:dtd:xml:4.1.2", + }, + { + Comment: "scheme without prefix (urn-like)", + URIRaw: "news:comp.infosystems.www.servers.unix", + }, + { + Comment: "+ and - in scheme (e.g. tel resource)", + URIRaw: "tel:+1-816-555-1212", + }, + { + Comment: "should assert scheme", + URIRaw: "urn:oasis:names:specification:docbook:dtd:xml:4.1.2", + }, + { + Comment: "legit separator in scheme", + URIRaw: "http+unix://%2Fvar%2Frun%2Fsocket/path?key=value", + }, + { + Comment: "with scheme only", + URIRaw: "https:", + }, + { + Comment: "empty scheme", + URIRaw: "://bob/", + Err: uri.ErrInvalidURI, + }, + { + Comment: "invalid scheme (should start with a letter) (2)", + URIRaw: "?invalidscheme://www.example.com", + Err: uri.ErrInvalidURI, + }, + { + Comment: "invalid scheme (invalid character) (2)", + URIRaw: "ht?tps:", + Err: uri.ErrInvalidURI, + }, + { + Comment: "relative URIs with a colon (':') in their first segment are not considered well-formed", + URIRaw: "2013.05.29_14:33:41", + Err: uri.ErrInvalidScheme, + }, + { + Comment: "invalid scheme (should start with a letter) (1)", + URIRaw: "1http://bob", + Err: uri.ErrInvalidScheme, + }, + { + Comment: "invalid scheme (too short)", + URIRaw: "x://bob", + Err: uri.ErrInvalidScheme, + }, + { + Comment: "invalid scheme (invalid character) (1)", + URIRaw: "x{}y://bob", + Err: uri.ErrInvalidScheme, + }, + { + Comment: "invalid scheme (invalid character) (3)", + URIRaw: "inv;alidscheme://www.example.com", + Err: uri.ErrInvalidScheme, + }, + { + Comment: "absolute URI that represents an implicit file URI.", + URIRaw: "c:\\directory\filename", + Err: uri.ErrInvalidScheme, + }, + { + Comment: "represents a hierarchical absolute URI and does not contain '://'", + URIRaw: "www.contoso.com/path/file", + Err: uri.ErrNoSchemeFound, + }, + } +} + +func rawParsePathTests() []URITest { + return []URITest{ + { + Comment: "legitimate use of several starting /'s in path'", + URIRaw: "file://hostname//etc/hosts", + }, + { + Comment: "authority is not empty: valid path with double '/' (see issue#3)", + URIRaw: "http://host:8080//foo.html", + }, + { + Comment: "path", + URIRaw: "https://example-bin.org/path?", + }, + { + Comment: "empty path, query and fragment", + URIRaw: "mailto://u:p@host.domain.com?#", + }, + { + Comment: "empty path", + URIRaw: "http://foo.com", + }, + { + Comment: "path only, no query, no fragmeny", + URIRaw: "http://foo.com/path", + }, + { + Comment: "path with escaped spaces", + URIRaw: "http://example.w3.org/path%20with%20spaces.html", + }, + { + Comment: "path is just an escape space", + URIRaw: "http://example.w3.org/%20", + }, + { + Comment: "dots in path", + URIRaw: "ftp://ftp.is.co.za/../../../rfc/rfc1808.txt", + }, + { + Comment: "= in path", + URIRaw: "ldap://[2001:db8::7]/c=GB?objectClass?one", + }, + { + Comment: "path with drive letter (e.g. windows) (1)", + // this one is dubious: Microsoft (.Net) recognizes the C:/... string as a path and + // states this as incorrect uri -- all other validators state a host "c" and state this uri as a valid one + URIRaw: "file://c:/directory/filename", + }, + { + Comment: "path with drive letter (e.g. windows) (2)", + // The unambiguous correct URI notation is file:///c:/directory/filename + URIRaw: "file:///c:/directory/filename", + }, + { + Comment: `if a URI does not contain an authority component, + then the path cannot begin with two slash characters ("//").`, + URIRaw: "https:////a?query=value#fragment", + Err: uri.ErrInvalidPath, + }, + { + Comment: "contains unescaped backslashes even if they will be treated as forward slashes", + URIRaw: "http:\\host/path/file", + Err: uri.ErrInvalidPath, + }, + { + Comment: "invalid path (invalid characters)", + URIRaw: "http://www.example.org/hello/{}yzx;=1.1/world.txt/?id=5&part=three#there-you-go", + Err: uri.ErrInvalidPath, + }, + { + Comment: "should detect a path starting with several /'s", + URIRaw: "file:////etc/hosts", + Err: uri.ErrInvalidPath, + }, + { + Comment: "empty host => double '/' invalid in this context", + URIRaw: "http:////foo.html", + Err: uri.ErrInvalidPath, + }, + { + Comment: "trailing empty fragment, invalid path", + URIRaw: "http://example.w3.org/%legit#", + Err: uri.ErrInvalidPath, + }, + { + Comment: "partial escape (1)", + URIRaw: "http://example.w3.org/%a", + Err: uri.ErrInvalidPath, + }, + { + Comment: "partial escape (2)", + URIRaw: "http://example.w3.org/%a/foo", + Err: uri.ErrInvalidPath, + }, + { + Comment: "partial escape (3)", + URIRaw: "http://example.w3.org/%illegal", + Err: uri.ErrInvalidPath, + }, + } +} + +func rawParseHostTests() []URITest { + return []URITest{ + { + Comment: "authorized dash '-' in host", + URIRaw: "https://example-bin.org/path", + }, + { + Comment: "host with many segments", + URIRaw: "ftp://ftp.is.co.za/rfc/rfc1808.txt", + }, + { + Comment: "percent encoded host is valid, with encoded character not being valid", + URIRaw: "urn://user:passwd@ex%7Cample.com:8080/a?query=value#fragment", + }, + { + Comment: "valid percent-encoded host (dash character is allowed in registered name)", + URIRaw: "urn://user:passwd@ex%2Dample.com:8080/a?query=value#fragment", + }, + { + Comment: "check percent encoding with DNS hostname, dash allowed in DNS name", + URIRaw: "https://user:passwd@ex%2Dample.com:8080/a?query=value#fragment", + }, + { + Comment: "should error on empty host", + URIRaw: "https://user:passwd@:8080/a?query=value#fragment", + Err: uri.ErrMissingHost, + }, + { + Comment: "unicode host", + URIRaw: "http://www.詹姆斯.org/", + }, + { + Comment: "illegal characters", + URIRaw: "http://", + Err: uri.ErrInvalidDNSName, + }, + { + Comment: "should detect an invalid host (DNS rule) (1)", + URIRaw: "https://user:passwd@286;0.0.1:8080/a?query=value#fragment", + Err: uri.ErrInvalidHost, + }, + { + Comment: "should detect an invalid host (DNS rule) (2)", + URIRaw: "https://user:passwd@256.256.256.256:8080/a?query=value#fragment", + Err: uri.ErrInvalidHost, + }, + { + Comment: "registered name containing unallowed character", + URIRaw: "bob://x|y/", + Err: uri.ErrInvalidHost, + }, + { + Comment: "invalid host (contains blank space)", + URIRaw: "http://www.exa mple.org", + Err: uri.ErrInvalidHost, + }, + { + Comment: "DNS hostname is too long", + URIRaw: fmt.Sprintf("https://%s/", strings.Repeat("x", 256)), + Err: uri.ErrInvalidDNSName, + }, + { + Comment: "DNS segment in hostname is empty", + URIRaw: "https://seg..com/", + Err: uri.ErrInvalidDNSName, + }, + { + Comment: "DNS last segment in hostname is empty", + URIRaw: "https://seg.empty.com./", + Err: uri.ErrInvalidDNSName, + }, + { + Comment: "DNS segment ends with unallowed character", + URIRaw: "https://x-.y.com/", + Err: uri.ErrInvalidDNSName, + }, + { + Comment: "DNS segment in hostname too long", + URIRaw: fmt.Sprintf("https://%s.%s.com/", strings.Repeat("x", 63), strings.Repeat("y", 64)), + Err: uri.ErrInvalidDNSName, + }, + { + Comment: "DNS with all segments empty", + URIRaw: "https://........./", + Err: uri.ErrInvalidDNSName, + }, + { + Comment: "DNS segment ends with incomplete escape sequence", + URIRaw: "https://x.y.com%/", + Err: uri.ErrInvalidDNSName, + }, + { + Comment: "DNS segment contains an invalid rune", + URIRaw: fmt.Sprintf("https://x.y.com%s/", string([]rune{utf8.RuneError})), + Err: uri.ErrInvalidDNSName, + }, + } +} + +func rawParseIPHostTests() []URITest { + return []URITest{ + { + Comment: "IPv6 host", + URIRaw: "mailto://user@[fe80::1]", + }, + { + Comment: "IPv6 host with zone", + URIRaw: "https://user:passwd@[FF02:30:0:0:0:0:0:5%25en1]:8080/a?query=value#fragment", + }, + { + Comment: "ipv4 host", + URIRaw: "https://user:passwd@127.0.0.1:8080/a?query=value#fragment", + }, + { + Comment: "IPv4 host", + URIRaw: "http://192.168.0.1/", + }, + { + Comment: "IPv4 host with port", + URIRaw: "http://192.168.0.1:8080/", + }, + { + Comment: "IPv6 host", + URIRaw: "http://[fe80::1]/", + }, + { + Comment: "IPv6 host with port", + URIRaw: "http://[fe80::1]:8080/", + }, + { + Comment: "IPv6 host with zone", + URIRaw: "https://user:passwd@[21DA:00D3:0000:2F3B:02AA:00FF:FE28:9C5A%25lo]:8080/a?query=value#fragment", + }, + // Tests exercising RFC 6874 compliance: + { + Comment: "IPv6 host with (escaped) zone identifier", + URIRaw: "http://[fe80::1%25en0]/", + }, + { + Comment: "IPv6 host with zone identifier and port", + URIRaw: "http://[fe80::1%25en0]:8080/", + }, + { + Comment: "IPv6 host with percent-encoded+unreserved zone identifier", + URIRaw: "http://[fe80::1%25%65%6e%301-._~]/", + }, + { + Comment: "IPv6 host with percent-encoded+unreserved zone identifier", + URIRaw: "http://[fe80::1%25%65%6e%301-._~]:8080/", + }, + { + Comment: "IPv6 host with invalid percent-encoding in zone identifier", + URIRaw: "http://[fe80::1%25%C3~]:8080/", + Err: uri.ErrInvalidHostAddress, + }, + { + Comment: "IPv6 host with invalid percent-encoding in zone identifier", + URIRaw: "http://[fe80::1%25%F3~]:8080/", + Err: uri.ErrInvalidHostAddress, + }, + { + Comment: "IP v4 host (escaped) %31 is percent-encoded for '1'", + URIRaw: "http://192.168.0.%31/", + Err: uri.ErrInvalidHost, + }, + { + Comment: "IPv4 address with percent-encoding is not allowed", + URIRaw: "http://192.168.0.%31:8080/", + Err: uri.ErrInvalidHost, + }, + { + Comment: "invalid IPv4 with port (2)", + URIRaw: "https://user:passwd@127.256.0.1:8080/a?query=value#fragment", + Err: uri.ErrInvalidHost, + }, + { + Comment: "invalid IPv4 with port (3)", + URIRaw: "https://user:passwd@127.0127.0.1:8080/a?query=value#fragment", + Err: uri.ErrInvalidHost, + }, + { + Comment: "valid IPv4 with port (1)", + URIRaw: "https://user:passwd@127.0.0.1:8080/a?query=value#fragment", + }, + { + Comment: "invalid IPv4: part>255", + URIRaw: "https://user:passwd@256.256.256.256:8080/a?query=value#fragment", + Err: uri.ErrInvalidHost, + }, + { + Comment: "IPv6 percent-encoding is limited to ZoneID specification, mus be %25", + URIRaw: "http://[fe80::%31]/", + Err: uri.ErrInvalidHostAddress, + }, + { + Comment: "IPv6 percent-encoding is limited to ZoneID specification, mus be %25 (2))", + URIRaw: "http://[fe80::%31]:8080/", + Err: uri.ErrInvalidHostAddress, + }, + { + Comment: "IPv6 percent-encoding is limited to ZoneID specification, mus be %25 (2))", + URIRaw: "http://[fe80::%31%25en0]/", + Err: uri.ErrInvalidHostAddress, + }, + { + Comment: "IPv6 percent-encoding is limited to ZoneID specification, mus be %25 (2))", + URIRaw: "http://[%310:fe80::%25en0]/", + Err: uri.ErrInvalidHostAddress, + }, + { + URIRaw: "https://user:passwd@[FF02:30:0:0:0:0:0:5%25en0]:8080/a?query=value#fragment", + }, + { + URIRaw: "https://user:passwd@[FF02:30:0:0:0:0:0:5%25lo]:8080/a?query=value#fragment", + }, + { + Comment: "IPv6 with wrong percent encoding", + URIRaw: "http://[fe80::%%31]:8080/", + Err: uri.ErrInvalidHostAddress, + }, + { + Comment: "IPv6 with wrong percent encoding", + URIRaw: "http://[fe80::%26lo]:8080/", + Err: uri.ErrInvalidHostAddress, + }, + // These two cases are valid as textual representations as + // described in RFC 4007, but are not valid as address + // literals with IPv6 zone identifiers in URIs as described in + // RFC 6874. + { + Comment: "invalid IPv6 (double empty ::)", + URIRaw: "https://user:passwd@[FF02::3::5]:8080/a?query=value#fragment", + Err: uri.ErrInvalidHostAddress, + }, + { + Comment: "invalid IPv6 host with empty (escaped) zone", + URIRaw: "https://user:passwd@[21DA:00D3:0000:2F3B:02AA:00FF:FE28:9C5A%25]:8080/a?query=value#fragment", + Err: uri.ErrInvalidHostAddress, + }, + { + Comment: "invalid IPv6 with unescaped zone (bad percent-encoding)", + URIRaw: "https://user:passwd@[FADF:01%en0]:8080/a?query=value#fragment", + Err: uri.ErrInvalidHostAddress, + }, + { + Comment: "should not parse IPv6 host with empty zone (bad percent encoding)", + URIRaw: "https://user:passwd@[21DA:00D3:0000:2F3B:02AA:00FF:FE28:9C5A%]:8080/a?query=value#fragment", + Err: uri.ErrInvalidHostAddress, + }, + { + Comment: "empty IPv6", + URIRaw: "scheme://user:passwd@[]/valid", + Err: uri.ErrInvalidURI, + }, + { + Comment: "zero IPv6", + URIRaw: "scheme://user:passwd@[::]/valid", + }, + { + Comment: "invalid IPv6 (lack closing bracket) (1)", + URIRaw: "http://[fe80::1/", + Err: uri.ErrInvalidURI, + }, + { + Comment: "invalid IPv6 (lack closing bracket) (2)", + URIRaw: "https://user:passwd@[FF02:30:0:0:0:0:0:5%25en0:8080/a?query=value#fragment", + Err: uri.ErrInvalidURI, + }, + { + Comment: "invalid IPv6 (lack closing bracket) (3)", + URIRaw: "https://user:passwd@[FADF:01%en0:8080/a?query=value#fragment", + Err: uri.ErrInvalidHostAddress, + }, + { + Comment: "missing closing bracket for IPv6 litteral (1)", + URIRaw: "https://user:passwd@[FF02::3::5:8080", + Err: uri.ErrInvalidHostAddress, + }, + { + Comment: "missing closing bracket for IPv6 litteral (2)", + URIRaw: "https://user:passwd@[FF02::3::5:8080/?#", + Err: uri.ErrInvalidHostAddress, + }, + { + Comment: "missing closing bracket for IPv6 litteral (3)", + URIRaw: "https://user:passwd@[FF02::3::5:8080#", + Err: uri.ErrInvalidHostAddress, + }, + { + Comment: "missing closing bracket for IPv6 litteral (4)", + URIRaw: "https://user:passwd@[FF02::3::5:8080#abc", + Err: uri.ErrInvalidHostAddress, + }, + { + Comment: "IPv6 empty zone", + URIRaw: "https://user:passwd@[FF02:30:0:0:0:0:0:5%25]:8080/a?query=value#fragment", + Err: uri.ErrInvalidHostAddress, + }, + { + Comment: "IPv6 unescaped zone with reserved characters", + URIRaw: "https://user:passwd@[FF02:30:0:0:0:0:0:5%25:lo]:8080/a?query=value#fragment", + Err: uri.ErrInvalidHostAddress, + }, + { + Comment: "IPv6 addresses not between square brackets are invalid hosts (1)", + URIRaw: "https://0%3A0%3A0%3A0%3A0%3A0%3A0%3A1/a", + Err: uri.ErrInvalidHost, + }, + { + Comment: "IPv6 addresses not between square brackets are invalid hosts (2)", + URIRaw: "https://FF02:30:0:0:0:0:0:5%25/a", + Err: uri.ErrInvalidPort, // ':30' parses as a port + }, + { + Comment: "IP addresses between square brackets should not be ipv4 addresses", + URIRaw: "https://[192.169.224.1]/a", + Err: uri.ErrInvalidHostAddress, + }, + // Just for fun: IPvFuture... + { + Comment: "IPvFuture address", + URIRaw: "http://[v6.fe80::a_en1]", + }, + { + Comment: "IPvFuture address", + URIRaw: "http://[vFFF.fe80::a_en1]", + }, + { + Comment: "IPvFuture address (invalid version)", + URIRaw: "http://[vZ.fe80::a_en1]", + Err: uri.ErrInvalidHostAddress, + }, + { + Comment: "IPvFuture address (invalid version)", + URIRaw: "http://[v]", + Err: uri.ErrInvalidHostAddress, + }, + { + Comment: "IPvFuture address (empty address)", + URIRaw: "http://[vB.]", + Err: uri.ErrInvalidHostAddress, + }, + { + Comment: "IPvFuture address (invalid characters)", + URIRaw: "http://[vAF.{}]", + Err: uri.ErrInvalidHostAddress, + }, + { + Comment: "IPvFuture address (invalid rune) (1)", + URIRaw: fmt.Sprintf("http://[v6.fe80::a_en1%s]", string([]rune{utf8.RuneError})), + Err: uri.ErrInvalidHostAddress, + }, + { + Comment: "IPvFuture address (invalid rune) (2)", + URIRaw: fmt.Sprintf("http://[v6%s.fe80::a_en1]", string([]rune{utf8.RuneError})), + Err: uri.ErrInvalidHostAddress, + }, + } +} + +func rawParsePortTests() []URITest { + return []URITest{ + { + Comment: "multiple ports", + URIRaw: "https://user:passwd@[21DA:00D3:0000:2F3B:02AA:00FF:FE28:9C5A]:8080:8090/a?query=value#fragment", + Err: uri.ErrInvalidPort, + }, + { + Comment: "should detect an invalid port", + URIRaw: "https://user:passwd@[21DA:00D3:0000:2F3B:02AA:00FF:FE28:9C5A]:8080:8090/a?query=value#fragment", + Err: uri.ErrInvalidPort, + }, + { + Comment: "path must begin with / or it collides with port", + URIRaw: "https://host:8080a?query=value#fragment", + Err: uri.ErrInvalidPort, + }, + } +} + +func rawParseQueryTests() []URITest { + return []URITest{ + { + Comment: "valid empty query after '?'", + URIRaw: "https://example-bin.org/path?", + }, + { + Comment: "valid query (separator character)", + URIRaw: "http://www.example.org/hello/world.txt/?id=5@part=three#there-you-go", + }, + { + Comment: "query contains invalid characters", + URIRaw: "http://httpbin.org/get?utf8=\xe2\x98\x83", + Err: uri.ErrInvalidQuery, + }, + { + Comment: "invalid query (invalid character) (1)", + URIRaw: "http://www.example.org/hello/world.txt/?id=5&pa{}rt=three#there-you-go", + Err: uri.ErrInvalidQuery, + }, + { + Comment: "invalid query (invalid character) (2)", + URIRaw: "http://www.example.org/hello/world.txt/?id=5&p|art=three#there-you-go", + Err: uri.ErrInvalidQuery, + }, + { + Comment: "invalid query (invalid character) (3)", + URIRaw: "http://httpbin.org/get?utf8=\xe2\x98\x83", + Err: uri.ErrInvalidQuery, + }, + { + Comment: "query is not correctly escaped.", + URIRaw: "http://www.contoso.com/path???/file name", + Err: uri.ErrInvalidQuery, + }, + { + Comment: "check percent encoding with query, incomplete escape sequence", + URIRaw: "https://user:passwd@ex%C3ample.com:8080/a?query=value%#fragment", + Err: uri.ErrInvalidQuery, + }, + } +} + +func rawParseFragmentTests() []URITest { + return []URITest{ + { + Comment: "empty fragment", + URIRaw: "mailto://u:p@host.domain.com#", + }, + { + Comment: "empty query and fragment", + URIRaw: "mailto://u:p@host.domain.com?#", + }, + { + Comment: "invalid char in fragment", + URIRaw: "http://www.example.org/hello/world.txt/?id=5&part=three#there-you-go{}", + Err: uri.ErrInvalidFragment, + }, + { + Comment: "invalid fragment", + URIRaw: "http://example.w3.org/legit#ill[egal", + Err: uri.ErrInvalidFragment, + }, + { + Comment: "check percent encoding with fragment, incomplete escape sequence", + URIRaw: "https://user:passwd@ex%C3ample.com:8080/a?query=value#fragment%", + Err: uri.ErrInvalidFragment, + }, + } +} + +func rawParsePassTests() []URITest { + // TODO: regroup themes, verify redundant testing + return []URITest{ + { + URIRaw: "foo://example.com:8042/over/there?name=ferret#nose", + }, + { + URIRaw: "http://httpbin.org/get?utf8=%e2%98%83", + }, + { + URIRaw: "mailto://user@domain.com", + }, + { + URIRaw: "ssh://user@git.openstack.org:29418/openstack/keystone.git", + }, + { + URIRaw: "https://willo.io/#yolo", + }, + { + Comment: "simple host and path", + URIRaw: "http://localhost/", + }, + { + Comment: "(redundant)", + URIRaw: "http://www.richardsonnen.com/", + }, + // from https://github.com/python-hyper/rfc3986/blob/master/tests/test_validators.py + { + Comment: "complete authority", + URIRaw: "ssh://ssh@git.openstack.org:22/sigmavirus24", + }, + { + Comment: "(redundant)", + URIRaw: "https://git.openstack.org:443/sigmavirus24", + }, + { + Comment: "query + fragment", + URIRaw: "ssh://git.openstack.org:22/sigmavirus24?foo=bar#fragment", + }, + { + Comment: "(redundant)", + URIRaw: "git://github.com", + }, + { + Comment: "complete", + URIRaw: "https://user:passwd@http-bin.org:8080/a?query=value#fragment", + }, + // from github.com/scalatra/rl: URI parser in scala + { + Comment: "port", + URIRaw: "http://www.example.org:8080", + }, + { + Comment: "(redundant)", + URIRaw: "http://www.example.org/", + }, + { + Comment: "UTF-8 host", + URIRaw: "http://www.詹姆斯.org/", + }, + { + Comment: "path", + URIRaw: "http://www.example.org/hello/world.txt", + }, + { + Comment: "query", + URIRaw: "http://www.example.org/hello/world.txt/?id=5&part=three", + }, + { + Comment: "query+fragment", + URIRaw: "http://www.example.org/hello/world.txt/?id=5&part=three#there-you-go", + }, + { + Comment: "fragment only", + URIRaw: "http://www.example.org/hello/world.txt/#here-we-are", + }, + { + Comment: "trailing empty fragment: legit", + URIRaw: "http://example.w3.org/legit#", + }, + { + Comment: "should detect a path starting with a /", + URIRaw: "file:///etc/hosts", + }, + { + Comment: `if a URI contains an authority component, + then the path component must either be empty or begin with a slash ("/") character`, + URIRaw: "https://host:8080?query=value#fragment", + }, + { + Comment: "path must begin with / (2)", + URIRaw: "https://host:8080/a?query=value#fragment", + }, + { + Comment: "double //, legit with escape", + URIRaw: "http+unix://%2Fvar%2Frun%2Fsocket/path?key=value", + }, + { + Comment: "double leading slash, legit context", + URIRaw: "http://host:8080//foo.html", + }, + { + URIRaw: "http://www.example.org/hello/world.txt/?id=5&part=three#there-you-go", + }, + { + URIRaw: "http://www.example.org/hélloô/mötor/world.txt/?id=5&part=three#there-you-go", + }, + { + URIRaw: "http://www.example.org/hello/yzx;=1.1/world.txt/?id=5&part=three#there-you-go", + }, + { + URIRaw: "file://c:/directory/filename", + }, + { + URIRaw: "ldap://[2001:db8::7]/c=GB?objectClass?one", + }, + { + URIRaw: "ldap://[2001:db8::7]:8080/c=GB?objectClass?one", + }, + { + URIRaw: "http+unix:/%2Fvar%2Frun%2Fsocket/path?key=value", + }, + { + URIRaw: "https://user:passwd@[21DA:00D3:0000:2F3B:02AA:00FF:FE28:9C5A]:8080/a?query=value#fragment", + }, + { + Comment: "should assert path and fragment", + URIRaw: "https://example-bin.org/path#frag?withQuestionMark", + }, + { + Comment: "should assert path and fragment (2)", + URIRaw: "mailto://u:p@host.domain.com?#ahahah", + }, + { + Comment: "should assert path and query", + URIRaw: "ldap://[2001:db8::7]/c=GB?objectClass?one", + }, + { + Comment: "should assert path and query", + URIRaw: "http://www.example.org/hello/world.txt/?id=5&part=three", + }, + { + Comment: "should assert path and query", + URIRaw: "http://www.example.org/hello/world.txt/?id=5&part=three?another#abc?efg", + }, + { + Comment: "should assert path and query", + URIRaw: "https://user:passwd@[21DA:00D3:0000:2F3B:02AA:00FF:FE28:9C5A%25en0]:8080/a?query=value#fragment", + }, + { + Comment: "should parse user/password, IPv6 percent-encoded host with zone", + URIRaw: "https://user:passwd@[::1%25lo]:8080/a?query=value#fragment", + }, + // This is an invalid UTF8 sequence that SHOULD error, at least in the context of + // Ref: https://url.spec.whatwg.org/#percent-encoded-bytes + { + Comment: "check percent encoding with DNS hostname, invalid escape sequence in host segment", + URIRaw: "https://user:passwd@ex%C3ample.com:8080/a?query=value#fragment", + Err: uri.ErrInvalidDNSName, + }, + { + Comment: "check percent encoding with registered hostname, invalid escape sequence in host segment", + URIRaw: "tel://user:passwd@ex%C3ample.com:8080/a?query=value#fragment", + Err: uri.ErrInvalidHost, + }, + { + Comment: "check percent encoding with registered hostname, incomplete escape sequence in host segment", + URIRaw: "https://user:passwd@ex%C3ample.com%:8080/a?query=value#fragment", + Err: uri.ErrInvalidDNSName, + }, + } +} + +func rawParseUserInfoTests() []URITest { + return []URITest{ + { + Comment: "userinfo contains invalid character '{'", + URIRaw: "mailto://{}:{}@host.domain.com", + Err: uri.ErrInvalidUserInfo, + }, + { + Comment: "invalid user", + URIRaw: "https://user{}:passwd@[FF02:30:0:0:0:0:0:5%25en0]:8080/a?query=value#fragment", + Err: uri.ErrInvalidUserInfo, + }, + } +} + +func rawParseFailTests() []URITest { + // other failures not already caught by the other test cases + // (atm empty) + return nil +} diff --git a/profiling/go.mod b/profiling/go.mod new file mode 100644 index 0000000..7de8d2b --- /dev/null +++ b/profiling/go.mod @@ -0,0 +1,16 @@ +module github.com/fredbi/uri/profiling + +go 1.21.3 + +// replace github.com/fredbi/uri => /home/fred/src/github.com/fredbi/uri +replace github.com/fredbi/uri => github.com/fredbi/uri v1.1.1-0.20231026093253-d1ef1b61c7b4 + +require ( + github.com/fredbi/uri v1.1.0 + github.com/pkg/profile v1.7.0 +) + +require ( + github.com/felixge/fgprof v0.9.3 // indirect + github.com/google/pprof v0.0.0-20211214055906-6f57359322fd // indirect +) diff --git a/profiling/go.sum b/profiling/go.sum new file mode 100644 index 0000000..10fc16c --- /dev/null +++ b/profiling/go.sum @@ -0,0 +1,28 @@ +github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= +github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= +github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/felixge/fgprof v0.9.3 h1:VvyZxILNuCiUCSXtPtYmmtGvb65nqXh2QFWc0Wpf2/g= +github.com/felixge/fgprof v0.9.3/go.mod h1:RdbpDgzqYVh/T9fPELJyV7EYJuHB55UTEULNun8eiPw= +github.com/fredbi/uri v1.1.1-0.20231026093253-d1ef1b61c7b4 h1:BJYUjBHrwzdHvemfFxgZ57YrGnaDtALmcqBC3PhDk54= +github.com/fredbi/uri v1.1.1-0.20231026093253-d1ef1b61c7b4/go.mod h1:aYTUoAXBOq7BLfVJ8GnKmfcuURosB1xyHDIfWeC/iW4= +github.com/google/pprof v0.0.0-20211214055906-6f57359322fd h1:1FjCyPC+syAzJ5/2S8fqdZK1R22vvA0J7JZKcuOIQ7Y= +github.com/google/pprof v0.0.0-20211214055906-6f57359322fd/go.mod h1:KgnwoLYCZ8IQu3XUZ8Nc/bM9CCZFOyjUNOSygVozoDg= +github.com/ianlancetaylor/demangle v0.0.0-20210905161508-09a460cdf81d/go.mod h1:aYm2/VgdVmcIU8iMfdMvDMsRAQjcfZSKFby6HOFvi/w= +github.com/pkg/profile v1.7.0 h1:hnbDkaNWPCLMO9wGLdBFTIZvzDrDfBM2072E1S9gJkA= +github.com/pkg/profile v1.7.0/go.mod h1:8Uer0jas47ZQMJ7VD+OHknK4YDY07LPUC6dEvqDjvNo= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= +github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= +golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/profiling/main.go b/profiling/main.go new file mode 100644 index 0000000..af12a13 --- /dev/null +++ b/profiling/main.go @@ -0,0 +1,65 @@ +package main + +import ( + "log" + + "github.com/fredbi/uri" + "github.com/fredbi/uri/profiling/fixtures" + "github.com/pkg/profile" +) + +const ( + profDir = "prof" +) + +func main() { + const ( + n = 100000 + ) + + profileCPU(n) + profileMemory(n) +} + +func profileCPU(n int) { + defer profile.Start( + profile.CPUProfile, + profile.ProfilePath(profDir), + profile.NoShutdownHook, + ).Stop() + + // current: Parse calls total CPU: 100ms -> 70ms + // validateHost: 30ms -> 10ms + // validatePath: 20ms -> 20ms (same, less gc work) -> 10ms + // validatePort: 10ms + runProfile(n) +} + +func profileMemory(n int) { + defer profile.Start( + profile.MemProfile, + profile.ProfilePath(profDir), + profile.NoShutdownHook, + ).Stop() + + // current: object allocs: 653 746 -> 533 849 -> 505 606 + runProfile(n) +} + +func runProfile(n int) { + for i := 0; i < n; i++ { + for _, generator := range fixtures.AllGenerators { + for _, testCase := range generator() { + if testCase.IsReference || testCase.Err != nil { + // skip URI references and invalid cases + continue + } + + u, err := uri.Parse(testCase.URIRaw) + if u == nil || err != nil { + log.Fatalf("unexpected error for %q", testCase.URIRaw) + } + } + } + } +} diff --git a/profiling/prof-base/cpu.pprof b/profiling/prof-base/cpu.pprof new file mode 100644 index 0000000..28abfa7 Binary files /dev/null and b/profiling/prof-base/cpu.pprof differ diff --git a/profiling/prof-base/mem.pprof b/profiling/prof-base/mem.pprof new file mode 100644 index 0000000..036d5e9 Binary files /dev/null and b/profiling/prof-base/mem.pprof differ diff --git a/profiling/prof/cpu.pprof b/profiling/prof/cpu.pprof new file mode 100644 index 0000000..246ec67 Binary files /dev/null and b/profiling/prof/cpu.pprof differ diff --git a/profiling/prof/mem.pprof b/profiling/prof/mem.pprof new file mode 100644 index 0000000..f632a89 Binary files /dev/null and b/profiling/prof/mem.pprof differ diff --git a/profiling/profiling b/profiling/profiling new file mode 100755 index 0000000..781b91e Binary files /dev/null and b/profiling/profiling differ diff --git a/uri.go b/uri.go index 2ff34e3..104a43e 100644 --- a/uri.go +++ b/uri.go @@ -94,13 +94,6 @@ const ( maxDomainLength = 255 ) -var ( - // predefined sets of accecpted runes beyond the "unreserved" character set - pcharExtraRunes = []rune{colonMark, atHost} // pchar = unreserved | ':' | '@' - queryOrFragmentExtraRunes = append(pcharExtraRunes, slashMark, questionMark) - userInfoExtraRunes = append(pcharExtraRunes, colonMark) -) - // IsURI tells if a URI is valid according to RFC3986/RFC397. func IsURI(raw string) bool { _, err := Parse(raw) @@ -393,7 +386,7 @@ func (u *uri) validateScheme(scheme string) error { // pchar = unreserved / pct-encoded / sub-delims / ":" / "@" // query = *( pchar / "/" / "?" ) func (u *uri) validateQuery(query string) error { - if err := validateUnreservedWithExtra(query, queryOrFragmentExtraRunes); err != nil { + if err := validateUnreservedWithExtra(query, queryOrFragmentCharSet); err != nil { return errorsJoin(ErrInvalidQuery, err) } @@ -408,7 +401,7 @@ func (u *uri) validateQuery(query string) error { // // fragment = *( pchar / "/" / "?" ) func (u *uri) validateFragment(fragment string) error { - if err := validateUnreservedWithExtra(fragment, queryOrFragmentExtraRunes); err != nil { + if err := validateUnreservedWithExtra(fragment, queryOrFragmentCharSet); err != nil { return errorsJoin(ErrInvalidFragment, err) } @@ -528,7 +521,7 @@ func (a authorityInfo) validatePath(path string) error { } if pos > previousPos { - if err := validateUnreservedWithExtra(path[previousPos:pos], pcharExtraRunes); err != nil { + if err := validateUnreservedWithExtra(path[previousPos:pos], pcharCharSet); err != nil { return errorsJoin( ErrInvalidPath, err, @@ -540,7 +533,7 @@ func (a authorityInfo) validatePath(path string) error { } if previousPos < len(path) { // don't care if the last char was a separator - if err := validateUnreservedWithExtra(path[previousPos:], pcharExtraRunes); err != nil { + if err := validateUnreservedWithExtra(path[previousPos:], pcharCharSet); err != nil { return errorsJoin( ErrInvalidPath, err, @@ -615,7 +608,7 @@ func validateHostForScheme(host string, schemes ...string) error { func validateRegisteredHostForScheme(host string) error { // RFC 3986 registered name - if err := validateUnreservedWithExtra(host, nil); err != nil { + if err := validateUnreservedWithExtra(host, unreservedAndSubDelimsCharSet); err != nil { return errorsJoin( ErrInvalidRegisteredName, err, @@ -661,7 +654,7 @@ func (a authorityInfo) validatePort(port, host string) error { // // userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) func (a authorityInfo) validateUserInfo(userinfo string) error { - if err := validateUnreservedWithExtra(userinfo, userInfoExtraRunes); err != nil { + if err := validateUnreservedWithExtra(userinfo, userInfoCharSet); err != nil { return errorsJoin( ErrInvalidUserInfo, err,