diff --git a/README.md b/README.md index f93a541..49ae9e8 100644 --- a/README.md +++ b/README.md @@ -21,8 +21,31 @@ which provides a workable but loose implementation of the RFC for URLs. ## What's new? -### V1.2 announcement +### v2.0.0 + +**Breaking changes** + +Most users should not be affected by these breaking changes. + +* `URI` and `Authority` become concrete types. Interfaces are discarded. +* `Parse()` and `ParseReference()` now return a `URI` value, no longer a pointer. +* The `Validate() error` methods have been removed: validation is carried out when parsing only. +* The `Builder` interface and `URI.Builder()` function have been removed. + `URI` exposes fluent builder methods instead. +* `UsesDNSHostValidation()` has been removed and replaced by a private default function. + Override is possible via `Option`. Similar custom behavior may be achieved for `DefaultPort()`. + +**Features** +* `Parse(string, ...Option)` and `ParseReference(string, ...Option)` now support options to tune the + `URI` validation. + +**Performances** + +* perf: massive improvement due to giving up pointers (parsing now is a zero-allocation operation). + This boosts `Parse()` to be even faster than the standard library `net/url.Parse()`. + +### V1.2 announcement To do before I cut a v1.2.0: * [] handle empty fragment, empty query. Ex: `https://host?` is not equivalent to `http://host`. @@ -123,11 +146,14 @@ V2 is getting closer to completion. It comes with: ### Building -The exposed type `URI` can be transformed into a fluent `Builder` to set the parts of an URI. +The exposed type `URI` can be used as a fluent builder to set the parts of an URI. ```go aURI, _ := Parse("mailto://user@domain.com") - newURI := auri.Builder().SetUserInfo(test.name).SetHost("newdomain.com").SetScheme("http").SetPort("443") + newURI := auri.SetUserInfo(test.name). + SetHost("newdomain.com"). + SetScheme("http"). + SetPort("443") ``` ### Canonicalization diff --git a/benchmark_test.go b/benchmark_test.go index 9c91bf1..06cb3f5 100644 --- a/benchmark_test.go +++ b/benchmark_test.go @@ -88,31 +88,31 @@ func benchParseURLStdLib(payload []string) func(*testing.B) { func Benchmark_String(b *testing.B) { var ip ipType - tests := []*uri{ + tests := []URI{ { - "foo", "//example.com:8042/over/there", "name=ferret", "nose", - authorityInfo{"//", "", "example.com", "8042", "/over/there", ip, nil}, nil, + "foo", "//example.com:8042/over/there", "name=ferret", "nose", + Authority{nil, "//", "", "example.com", "8042", "/over/there", ip}, }, { - "http", "//httpbin.org/get", "utf8=\xe2\x98\x83", "", - authorityInfo{"//", "", "httpbin.org", "", "/get", ip, nil}, nil, + "http", "//httpbin.org/get", "utf8=\xe2\x98\x83", "", + Authority{nil, "//", "", "httpbin.org", "", "/get", ip}, }, { - "mailto", "user@domain.com", "", "", - authorityInfo{"//", "user", "domain.com", "", "", ip, nil}, nil, + "mailto", "user@domain.com", "", "", + Authority{nil, "//", "user", "domain.com", "", "", ip}, }, { - "ssh", "//user@git.openstack.org:29418/openstack/keystone.git", "", "", - authorityInfo{"//", "user", "git.openstack.org", "29418", "/openstack/keystone.git", ip, nil}, nil, + "ssh", "//user@git.openstack.org:29418/openstack/keystone.git", "", "", + Authority{nil, "//", "user", "git.openstack.org", "29418", "/openstack/keystone.git", ip}, }, { - "https", "//willo.io/", "", "yolo", - authorityInfo{"//", "", "willo.io", "", "/", ip, nil}, nil, + "https", "//willo.io/", "", "yolo", + Authority{nil, "//", "", "willo.io", "", "/", ip}, }, } diff --git a/builder.go b/builder.go index 8378ba5..39ce8ad 100644 --- a/builder.go +++ b/builder.go @@ -1,59 +1,140 @@ package uri -// Builder builds URIs. -type Builder interface { - URI() URI - SetScheme(scheme string) Builder - SetUserInfo(userinfo string) Builder - SetHost(host string) Builder - SetPort(port string) Builder - SetPath(path string) Builder - SetQuery(query string) Builder - SetFragment(fragment string) Builder - - // Returns the URI this Builder represents. - String() string -} - -func (u *uri) SetScheme(scheme string) Builder { +// Builder methods + +func (u URI) WithScheme(scheme string, opts ...Option) URI { + if u.Err() != nil { + return u + } + + opts = append(opts, withValidationFlags(flagValidateScheme|flagValidateHost)) + o, redeem := applyURIOptions(opts) + defer func() { redeem(o) }() + u.scheme = scheme + u.authority.ipType, u.err = u.validate(o) + return u } -func (u *uri) SetUserInfo(userinfo string) Builder { - u.ensureAuthorityExists() +func (u URI) WithAuthority(authority Authority, opts ...Option) URI { + if u.Err() != nil { + return u + } + + opts = append(opts, withValidationFlags(flagValidateHost|flagValidatePort|flagValidateUserInfo|flagValidatePath)) + o, redeem := applyURIOptions(opts) + defer func() { redeem(o) }() + + u.authority = authority + u.authority.ipType, u.err = u.validate(o) + u.authority.err = u.err + + return u +} + +func (u URI) WithUserInfo(userinfo string, opts ...Option) URI { + if u.Err() != nil { + return u + } + + opts = append(opts, withValidationFlags(flagValidateUserInfo)) + o, redeem := applyURIOptions(opts) + defer func() { redeem(o) }() + + u.authority = u.authority.withEnsuredAuthority() u.authority.userinfo = userinfo + u.authority.ipType, u.err = u.validate(o) + u.authority.err = u.err + return u } -func (u *uri) SetHost(host string) Builder { - u.ensureAuthorityExists() +func (u URI) WithHost(host string, opts ...Option) URI { + if u.Err() != nil { + return u + } + + opts = append(opts, withValidationFlags(flagValidateHost|flagValidatePort)) + o, redeem := applyURIOptions(opts) + defer func() { redeem(o) }() + + u.authority = u.authority.withEnsuredAuthority() u.authority.host = host + u.authority.ipType, u.err = u.validate(o) + u.authority.err = u.err + return u } -func (u *uri) SetPort(port string) Builder { - u.ensureAuthorityExists() +func (u URI) WithPort(port string, opts ...Option) URI { // TODO: port as int? + if u.Err() != nil { + return u + } + + opts = append(opts, withValidationFlags(flagValidatePort)) + o, redeem := applyURIOptions(opts) + defer func() { redeem(o) }() + + u.authority = u.authority.withEnsuredAuthority() u.authority.port = port + u.authority.ipType, u.err = u.validate(o) + u.authority.err = u.err + return u } -func (u *uri) SetPath(path string) Builder { - u.ensureAuthorityExists() +func (u URI) WithPath(path string, opts ...Option) URI { + if u.Err() != nil { + return u + } + + opts = append(opts, withValidationFlags(flagValidatePath)) + o, redeem := applyURIOptions(opts) + defer func() { redeem(o) }() + + u.authority = u.authority.withEnsuredAuthority() u.authority.path = path + u.authority.ipType, u.err = u.validate(o) + u.authority.err = u.err + return u } -func (u *uri) SetQuery(query string) Builder { +func (u URI) WithQuery(query string, opts ...Option) URI { + if u.Err() != nil { + return u + } + + opts = append(opts, withValidationFlags(flagValidateQuery)) + o, redeem := applyURIOptions(opts) + defer func() { redeem(o) }() + u.query = query + u.authority.ipType, u.err = u.validate(o) + return u } -func (u *uri) SetFragment(fragment string) Builder { +func (u URI) WithFragment(fragment string, opts ...Option) URI { + if u.Err() != nil { + return u + } + + opts = append(opts, withValidationFlags(flagValidateFragment)) + o, redeem := applyURIOptions(opts) + defer func() { redeem(o) }() + u.fragment = fragment + u.authority.ipType, u.err = u.validate(o) + return u } -func (u *uri) Builder() Builder { - return u +func (a Authority) withEnsuredAuthority() Authority { + if a.userinfo != "" || a.host != "" || a.port != "" { + a.prefix = authorityPrefix + } + + return a } diff --git a/builder_test.go b/builder_test.go index 1c56ede..50b3e4a 100644 --- a/builder_test.go +++ b/builder_test.go @@ -40,11 +40,9 @@ func Test_Builder(t *testing.T) { "failed to parse uri: %v", err, ) - nuri := auri.Builder().SetUserInfo(test.name).SetHost("newdomain.com").SetScheme("http").SetPort("443") - zuri, ok := nuri.(URI) - require.True(t, ok) - assert.Equal(t, "//"+test.name+"@newdomain.com:443", zuri.Authority().String()) - assert.Equal(t, "443", nuri.URI().Authority().Port()) + nuri := auri.WithUserInfo(test.name).WithHost("newdomain.com").WithScheme("http").WithPort("443") + assert.Equal(t, "//"+test.name+"@newdomain.com:443", nuri.Authority().String()) + assert.Equal(t, "443", nuri.Authority().Port()) val := nuri.String() assert.Equalf(t, val, test.uriChanged, @@ -52,29 +50,29 @@ func Test_Builder(t *testing.T) { "test: %#v", test.uriChanged, "values don't match: %v != %v (actual: %#v, expected: %#v)", val, test.uriChanged, ) - assert.Equal(t, "http", nuri.URI().Scheme()) + assert.Equal(t, "http", nuri.Scheme()) - _ = nuri.SetPath("/abcd") - assert.Equal(t, "/abcd", nuri.URI().Authority().Path()) + nuri = nuri.WithPath("/abcd") + assert.Equal(t, "/abcd", nuri.Authority().Path()) - _ = nuri.SetQuery("a=b&x=5").SetFragment("chapter") - assert.Equal(t, url.Values{"a": []string{"b"}, "x": []string{"5"}}, nuri.URI().Query()) - assert.Equal(t, "chapter", nuri.URI().Fragment()) - assert.Equal(t, test.uriChanged+"/abcd?a=b&x=5#chapter", nuri.URI().String()) + nuri = nuri.WithQuery("a=b&x=5").WithFragment("chapter") + assert.Equal(t, url.Values{"a": []string{"b"}, "x": []string{"5"}}, nuri.Query()) + assert.Equal(t, "chapter", nuri.Fragment()) + assert.Equal(t, test.uriChanged+"/abcd?a=b&x=5#chapter", nuri.String()) assert.Equal(t, test.uriChanged+"/abcd?a=b&x=5#chapter", nuri.String()) }) } }) t.Run("when building from scratch", func(t *testing.T) { - u, _ := Parse("http:") - b := u.Builder() + u, err := Parse("http:") + require.NoError(t, err) require.Empty(t, u.Authority()) assert.Equal(t, "", u.Authority().UserInfo()) - b = b.SetUserInfo("user:pwd").SetHost("newdomain").SetPort("444") - assert.Equal(t, "http://user:pwd@newdomain:444", b.String()) + v := u.WithUserInfo("user:pwd").WithHost("newdomain").WithPort("444") + assert.Equal(t, "http://user:pwd@newdomain:444", v.String()) }) t.Run("when overriding with an invalid value", func(t *testing.T) { @@ -82,9 +80,9 @@ func Test_Builder(t *testing.T) { u, err := Parse(uriRaw) require.NoError(t, err) - b := u.Builder() - b.SetPort("X8080") + + u = u.WithPort("X8080") auth := u.Authority() - require.Error(t, auth.Validate()) + require.Error(t, auth.Err()) }) } diff --git a/default_ports.go b/default_ports.go index 29d72d8..7b54318 100644 --- a/default_ports.go +++ b/default_ports.go @@ -9,7 +9,7 @@ import ( // the defaut port defined for this scheme (if any). // // For example, an URI like http://host:8080 would return false, since 80 is the default http port. -func (u uri) IsDefaultPort() bool { +func (u URI) IsDefaultPort() bool { if len(u.authority.port) == 0 { return true } @@ -23,7 +23,7 @@ func (u uri) IsDefaultPort() bool { // or zero if no such default is known. // // For example, for scheme "https", the default port is 443. -func (u uri) DefaultPort() int { +func (u URI) DefaultPort() int { return int(defaultPortForScheme(strings.ToLower(u.scheme))) } diff --git a/dns.go b/dns.go index 7c678e9..92d787c 100644 --- a/dns.go +++ b/dns.go @@ -14,6 +14,7 @@ import ( // in case you need specific schemes to validate the host as a DNS name. // // See: https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml +// TODO: now pass it as an option. make private var UsesDNSHostValidation = func(scheme string) bool { switch scheme { // prioritize early exit on most commonly used schemes diff --git a/dns_test.go b/dns_test.go index d5dc4d3..f60a29a 100644 --- a/dns_test.go +++ b/dns_test.go @@ -18,6 +18,7 @@ func TestDNSvsHost(t *testing.T) { } func TestValidateHostForScheme(t *testing.T) { + o := defaultOptions() for _, host := range []string{ "a.b.c", "a", @@ -32,7 +33,7 @@ func TestValidateHostForScheme(t *testing.T) { "a.b.c.d%30", "a.b.c.%55", } { - require.NoErrorf(t, validateHostForScheme(host, "http"), + require.NoErrorf(t, validateHostForScheme(host, "http", o), "expected host %q to validate", host, ) @@ -62,7 +63,7 @@ func TestValidateHostForScheme(t *testing.T) { "%", "%X", } { - require.Errorf(t, validateHostForScheme(host, "http"), + require.Errorf(t, validateHostForScheme(host, "http", o), "expected host %q NOT to validate", host, ) diff --git a/docs/BENCHMARKS.md b/docs/BENCHMARKS.md index 3bd4cb9..44b66a4 100644 --- a/docs/BENCHMARKS.md +++ b/docs/BENCHMARKS.md @@ -265,3 +265,61 @@ Benchmark_String Benchmark_String-16 457095075 79.87 ns/op 48 B/op 1 allocs/op PASS +## After removing all allocs with v2 + +Yay! + +go test -v -run Bench -benchtime 30s -bench Bench +goos: linux +goarch: amd64 +pkg: github.com/fredbi/uri +cpu: AMD Ryzen 7 5800X 8-Core Processor +Benchmark_Parse +Benchmark_Parse/with_URI_simple_payload + +Benchmark_Parse/with_URI_simple_payload-16 138432174 259.9 ns/op 0 B/op 0 allocs/op +Benchmark_Parse/with_URL_simple_payload +Benchmark_Parse/with_URL_simple_payload-16 100000000 320.4 ns/op 168 B/op 1 allocs/op +Benchmark_Parse/with_URI_mixed_payload +Benchmark_Parse/with_URI_mixed_payload-16 137858967 261.1 ns/op 0 B/op 0 allocs/op +Benchmark_Parse/with_URL_mixed_payload +Benchmark_Parse/with_URL_mixed_payload-16 100000000 304.4 ns/op 163 B/op 1 allocs/op +Benchmark_Parse/with_URI_payload_with_IPs +Benchmark_Parse/with_URI_payload_with_IPs-16 128555860 284.5 ns/op 0 B/op 0 allocs/op +Benchmark_Parse/with_URL_payload_with_IPs +Benchmark_Parse/with_URL_payload_with_IPs-16 97581129 368.1 ns/op 176 B/op 1 allocs/op +Benchmark_String +Benchmark_String-16 444468163 80.65 ns/op 48 B/op 1 allocs/op +Benchmark_DNSSchemes +Benchmark_DNSSchemes/with_switch +Benchmark_DNSSchemes/with_switch-16 1000000000 3.044 ns/op 0 B/op 0 allocs/op +PASS + +# V2 with options setup + +go test -v -run Bench -benchtime 30s -bench Bench +goos: linux +goarch: amd64 +pkg: github.com/fredbi/uri +cpu: AMD Ryzen 7 5800X 8-Core Processor +Benchmark_Parse +Benchmark_Parse/with_URI_simple_payload +Benchmark_Parse/with_URI_simple_payload-16 137914446 257.1 ns/op 0 B/op 0 allocs/op +Benchmark_Parse/with_URL_simple_payload +Benchmark_Parse/with_URL_simple_payload-16 100000000 319.7 ns/op 168 B/op 1 allocs/op +Benchmark_Parse/with_URI_mixed_payload +Benchmark_Parse/with_URI_mixed_payload-16 138111548 262.6 ns/op 0 B/op 0 allocs/op +Benchmark_Parse/with_URL_mixed_payload +Benchmark_Parse/with_URL_mixed_payload-16 100000000 302.5 ns/op 163 B/op 1 allocs/op +Benchmark_Parse/with_URI_payload_with_IPs +Benchmark_Parse/with_URI_payload_with_IPs-16 128662461 282.0 ns/op 0 B/op 0 allocs/op +Benchmark_Parse/with_URL_payload_with_IPs +Benchmark_Parse/with_URL_payload_with_IPs-16 99240152 365.3 ns/op 176 B/op 1 allocs/op +Benchmark_String +Benchmark_String-16 446582586 80.53 ns/op 48 B/op 1 allocs/op +Benchmark_DNSSchemes +Benchmark_DNSSchemes/with_switch +Benchmark_DNSSchemes/with_switch-16 1000000000 3.030 ns/op 0 B/op 0 allocs/op +PASS + + diff --git a/docs/TODO.md b/docs/TODO.md index 9447320..8db4d81 100644 --- a/docs/TODO.md +++ b/docs/TODO.md @@ -13,8 +13,9 @@ v1.2 * [x] fix scheme tolerance to be on ASCII only * [x] document the choice of a strict % escaping policy regarding char encoding (UTF8 mandatory) * [] handle empty fragment, empty query +* [] fix scheme tolerance to be on ASCII only * [] IRI ucs charset compliance (att: perf challenge) -- [] Support IRI iprivate in query +* [] Support IRI iprivate in query * [] normalizer v2.0.0 diff --git a/docs/V2.md b/docs/V2.md index ffabc81..dbef6de 100644 --- a/docs/V2.md +++ b/docs/V2.md @@ -2,38 +2,49 @@ ## Should feel more like `net/url.URL` - * Make `uri.URI` a concrete type + * [x] Make `uri.URI` a concrete type * In theory, this is a breaking change, but has most likely little impact on actual usage - - * The interface abstraction comes at a (small) performance cost, and there is no palatable benefit from it - * More methods from `URL` should be supported by `URI`, e.g. `UnmarshalText()`, `MarshalBinary()`, `Redacted()`, `IsAbs()`... + * [x] Checked that this wouldn't break fyne-io/fyne + + * [x] The interface abstraction comes at a (small) performance cost, and there is no palatable benefit from it + * More methods from `URL` should be supported by `URI`, e.g. : + * [x] `UnmarshalText()`, + * [x] `MarshalBinary()`, + * [x] `IsAbs()` + * `Redacted()`... see [todo.go](../todo.go) * Similarly, support more methods from `net/url.UserInfo` in the `Authority` type. * However: - * Let's keep the `Authority` part, as it better sticks to how the object is structured according to the RFC. - * Let's keep the fluent `Builder` component. I don't think that exposing fields like in `URL` is a good choice. + * [x] Let's keep the `Authority` part, as it better sticks to how the object is structured according to the RFC. + * [x] Let's keep the fluent `Builder` component. I don't think that exposing fields like in `URL` is a good choice. + * [x] Remove `Authority` and `Builder` interfaces. ## Canonicalization - * Extra feature: no breaking change + * Extra features only: no breaking change - * `URI.String()` currently just prints out the URL. We may leave it like this. - * A `Normalize()` method should canonicalize the URI (case, simplified path, etc), like the `purell` package does. - Notice that `purell` seems to be no longer maintained: pulling a dependency is probably not appropriate. + * [x] `URI.String()` currently just prints out the URL. We may leave it like this. + * `Normalize()` and `Normalized()` methods should canonicalize the URI (case, simplified path, etc), + like the `purell` package does. + Notice that `purell` seems to be no longer maintained: pulling a dependency is probably not appropriate. ## Strictness/compliance options * Should not be breaking, but enhanced strictness could break a few tests in consuming packages. - * We should add options like `type Option func(*options)` to provide more flexibility when parsing & validating + * [x] We should add options like `type Option func(*options)` to provide more flexibility when parsing & validating * In particular: * we should be able to abide strictly by the historical standard (no unicode, **puny code in host names should be validated**) (for punycode, let's take some inspiration from `https://github.com/jonasbn/punycode` and `https://pkg.go.dev/golang.org/x/net/idna#Profile`) * callers should be able to opt in for IRI vs strict (historical) URI - e.g. only ASCII - rather than the current mixed implementation (yet again, that was a pragmatic... still pondering if putting more nitpicking is appropriate). - * callers should be able to configure DNS schemes from options rather than overriding a package-level variable - * default options could be set at the package level to save on systematic option resolution at parsing time + * [x] callers should be able to configure DNS schemes from options rather than overriding a package-level variable + * [x] default options could be set at the package level to save on systematic option resolution at parsing time (alternatively, expose a `Parser` type to wrap options once for a series of subsequent calls to `Parse()`/`Validate()`) * support canonicalization options with flags such as those defined in `https://pkg.go.dev/golang.org/x/net/idna` + * add windows-friendly tolerance option for windows file paths (`Normalize()` would produce a RFC-compliant form) * Standard compliance improvements * Improve IRI support: current support for IRI is at best loose, albeit pragmatic (currently, a valid ALPHA token is a unicode letter codepoint, only ASCII digits are supported) +## Performance + * [x] Introduce `Make` methods to return a struct rather than a pointer: this saves an allocation + for most use cases in which it is not necessary to get a pointer receiver. diff --git a/errors.go b/errors.go index 86abf8a..065a818 100644 --- a/errors.go +++ b/errors.go @@ -88,11 +88,3 @@ func (e ipError) Error() string { return "" } } - -func (u uri) Err() error { - return u.err -} - -func (a authorityInfo) Err() error { - return a.err -} diff --git a/errors_test.go b/errors_test.go index 025d65e..2a6d493 100644 --- a/errors_test.go +++ b/errors_test.go @@ -1,55 +1,7 @@ package uri -import ( - "errors" - "testing" - - "github.com/stretchr/testify/require" -) +import "errors" // errSentinelTest is used in test cases for when we want to assert an error // but do not want to check specifically which error was returned. var errSentinelTest = Error(errors.New("test")) - -func TestIPError(t *testing.T) { - for _, e := range []error{ - errInvalidCharacter, - errValueGreater255, - errAtLeastOneDigit, - errLeadingZero, - errTooLong, - errTooShort, - } { - require.NotEmpty(t, e.Error()) - } - - const invalidValue uint8 = 255 - require.Empty(t, ipError(invalidValue).Error()) -} - -func TestErr(t *testing.T) { - t.Run("with valid URIs", func(t *testing.T) { - for _, toPin := range rawParsePassTests() { - test := toPin - u, err := Parse(test.uriRaw) - require.NoErrorf(t, err, "in testcase: %s (%q)", test.comment, test.uriRaw) - require.EqualValues(t, err, u.Err(), "in testcase: %s (%q)", test.comment, test.uriRaw) - - if !errors.Is(err, ErrInvalidQuery) && !errors.Is(err, ErrInvalidScheme) && !errors.Is(err, ErrInvalidURI) { - require.EqualValues(t, err, u.Authority().Err(), "in testcase: %s (%q)", test.comment, test.uriRaw) - } - } - }) - t.Run("with invalid URIs", func(t *testing.T) { - for _, toPin := range rawParseFailTests() { - test := toPin - u, err := Parse(test.uriRaw) - require.Errorf(t, err, "in testcase: %s (%q)", test.comment, test.uriRaw) - require.EqualValues(t, err, u.Err(), "in testcase: %s (%q)", test.comment, test.uriRaw) - - if !errors.Is(err, ErrInvalidQuery) && !errors.Is(err, ErrInvalidScheme) && !errors.Is(err, ErrInvalidURI) { - require.EqualValues(t, err, u.Authority().Err(), "in testcase: %s (%q)", test.comment, test.uriRaw) - } - } - }) -} diff --git a/fixtures_test.go b/fixtures_test.go index 3905725..2e29f85 100644 --- a/fixtures_test.go +++ b/fixtures_test.go @@ -123,12 +123,12 @@ func rawParseStructureTests() []uriTest { { comment: "// without // prefix, this is parsed as a path", uriRaw: "mailto:user@domain.com", - uri: &uri{ + uri: URI{ scheme: "mailto", hierPart: "user@domain.com", query: "", fragment: "", - authority: authorityInfo{ + authority: Authority{ path: "user@domain.com", }, }, @@ -136,12 +136,12 @@ func rawParseStructureTests() []uriTest { { comment: "with // prefix, this parsed as a user + host", uriRaw: "mailto://user@domain.com", - uri: &uri{ + uri: URI{ scheme: "mailto", hierPart: "//user@domain.com", query: "", fragment: "", - authority: authorityInfo{ + authority: Authority{ prefix: "//", userinfo: "user", host: "domain.com", @@ -322,6 +322,16 @@ func rawParseSchemeTests() []uriTest { uriRaw: "www.contoso.com/path/file", err: ErrNoSchemeFound, }, + { + comment: "invalid scheme (invalid unicode letter character) (3)", + uriRaw: "érié://www.example.com", + err: ErrInvalidScheme, + }, + { + comment: "invalid scheme (invalid unicode digit character) (3)", + uriRaw: "numeriⅩ://www.example.com", + err: ErrInvalidScheme, + }, } } @@ -572,12 +582,12 @@ func rawParseIPHostTests() []uriTest { { comment: "IPv6 host", uriRaw: "mailto://user@[fe80::1]", - uri: &uri{ + uri: URI{ scheme: "mailto", hierPart: "//user@[fe80::1]", query: "", fragment: "", - authority: authorityInfo{ + authority: Authority{ prefix: "//", userinfo: "user", host: "fe80::1", @@ -939,12 +949,12 @@ func rawParsePassTests() []uriTest { return []uriTest{ { uriRaw: "foo://example.com:8042/over/there?name=ferret#nose", - uri: &uri{ + uri: URI{ scheme: "foo", hierPart: "//example.com:8042/over/there", query: "name=ferret", fragment: "nose", - authority: authorityInfo{ + authority: Authority{ prefix: "//", userinfo: "", host: "example.com", @@ -956,12 +966,12 @@ func rawParsePassTests() []uriTest { }, { uriRaw: "http://httpbin.org/get?utf8=%e2%98%83", - uri: &uri{ + uri: URI{ scheme: "http", hierPart: "//httpbin.org/get", query: "utf8=%e2%98%83", fragment: "", - authority: authorityInfo{ + authority: Authority{ prefix: "//", userinfo: "", host: "httpbin.org", @@ -973,12 +983,12 @@ func rawParsePassTests() []uriTest { }, { uriRaw: "mailto://user@domain.com", - uri: &uri{ + uri: URI{ scheme: "mailto", hierPart: "//user@domain.com", query: "", fragment: "", - authority: authorityInfo{ + authority: Authority{ prefix: "//", userinfo: "user", host: "domain.com", @@ -990,12 +1000,12 @@ func rawParsePassTests() []uriTest { }, { uriRaw: "ssh://user@git.openstack.org:29418/openstack/keystone.git", - uri: &uri{ + uri: URI{ scheme: "ssh", hierPart: "//user@git.openstack.org:29418/openstack/keystone.git", query: "", fragment: "", - authority: authorityInfo{ + authority: Authority{ prefix: "//", userinfo: "user", host: "git.openstack.org", @@ -1006,12 +1016,12 @@ func rawParsePassTests() []uriTest { }, { uriRaw: "https://willo.io/#yolo", - uri: &uri{ + uri: URI{ scheme: "https", hierPart: "//willo.io/", query: "", fragment: "yolo", - authority: authorityInfo{ + authority: Authority{ prefix: "//", userinfo: "", host: "willo.io", @@ -1155,8 +1165,7 @@ func rawParsePassTests() []uriTest { uriRaw: "ldap://[2001:db8::7]/c=GB?objectClass?one", asserter: func(t testing.TB, u URI) { assert.Equal(t, "/c=GB", u.Authority().Path()) - nuri := u.(*uri) - assert.Equal(t, "objectClass?one", nuri.query) // TODO(fred): use Query() and url.Values + assert.Equal(t, "objectClass?one", u.query) // TODO(fred): use Query() and url.Values assert.Equal(t, "", u.Fragment()) }, }, @@ -1165,8 +1174,7 @@ func rawParsePassTests() []uriTest { uriRaw: "http://www.example.org/hello/world.txt/?id=5&part=three", asserter: func(t testing.TB, u URI) { assert.Equal(t, "/hello/world.txt/", u.Authority().Path()) - nuri := u.(*uri) - assert.Equal(t, "id=5&part=three", nuri.query) + assert.Equal(t, "id=5&part=three", u.query) assert.Equal(t, "", u.Fragment()) }, }, @@ -1175,8 +1183,7 @@ func rawParsePassTests() []uriTest { uriRaw: "http://www.example.org/hello/world.txt/?id=5&part=three?another#abc?efg", asserter: func(t testing.TB, u URI) { assert.Equal(t, "/hello/world.txt/", u.Authority().Path()) - nuri := u.(*uri) - assert.Equal(t, "id=5&part=three?another", nuri.query) + assert.Equal(t, "id=5&part=three?another", u.query) assert.Equal(t, "abc?efg", u.Fragment()) assert.Equal(t, url.Values{"id": []string{"5"}, "part": []string{"three?another"}}, u.Query()) }, diff --git a/go.mod b/go.mod index d1442b2..590b348 100644 --- a/go.mod +++ b/go.mod @@ -5,6 +5,7 @@ go 1.19 require ( github.com/pkg/profile v1.7.0 github.com/stretchr/testify v1.8.4 + golang.org/x/net v0.15.0 ) require ( @@ -12,5 +13,6 @@ require ( github.com/felixge/fgprof v0.9.3 // indirect github.com/google/pprof v0.0.0-20211214055906-6f57359322fd // indirect github.com/pmezard/go-difflib v1.0.0 // indirect + golang.org/x/text v0.13.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index 5a05862..5f3b6d4 100644 --- a/go.sum +++ b/go.sum @@ -19,7 +19,11 @@ github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/ github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= +golang.org/x/net v0.15.0 h1:ugBLEUaxABaB5AJqW9enI0ACdci2RUd4eP51NTBvuJ8= +golang.org/x/net v0.15.0/go.mod h1:idbUs1IY1+zTqbi8yxTbhexhEEk5ur9LInksu6HrEpk= golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k= +golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/ip.go b/ip.go index 8f04ca5..8144db6 100644 --- a/ip.go +++ b/ip.go @@ -9,14 +9,20 @@ import ( "unicode/utf8" ) +type ipType struct { + isIPv4 bool + isIPv6 bool + isIPvFuture bool +} + // IsIP indicates if the URI host was specified using an IP address (v4 or v6). -func (a authorityInfo) IsIP() bool { +func (a Authority) IsIP() bool { // IPvFuture won't parse as a netip.Addr return a.isIPv4 || (a.isIPv6 && !a.isIPvFuture) } // IPAddr returns the parsed netip.Addr whenever IsIP is true (or the zero value whenever false). -func (a authorityInfo) IPAddr() netip.Addr { +func (a Authority) IPAddr() netip.Addr { if !a.IsIP() { return netip.Addr{} } diff --git a/options.go b/options.go new file mode 100644 index 0000000..e5eb204 --- /dev/null +++ b/options.go @@ -0,0 +1,240 @@ +package uri + +import ( + "sync" + + "golang.org/x/net/idna" +) + +type ( + // Option allows for fine-grained tuning of tolerances to standards + // when validating an URI + Option func(*options) + + //nolint: unused + options struct { + schemeIsDNSFunc func(string) bool + defaultPortFunc func(string) int + idnaFlags []idna.Option + withDNSHostValidation bool + withStrictASCII bool + withStrictIPv6 bool + withURIReference bool + withStrictURI bool + withStrictIRI bool + withWindowsFriendly bool + withRedactedPassword bool + + // select validations: this is used by builder methods to carry out + // partial validation. + validationFlags uint16 + } + + // optionsPool holds allocated options in a pool, + // to avoid undue gc pressure when using custom options + // intensively. + // Notice that default options (possibly customized once) + // do not allocate anything. + optionsPool struct { + *sync.Pool + } +) + +const ( + flagValidateScheme uint16 = 1 << iota + flagValidateHost + flagValidatePort + flagValidateUserInfo + flagValidatePath + flagValidateQuery + flagValidateFragment +) + +var ( + packageLevelDefaults = options{ + schemeIsDNSFunc: UsesDNSHostValidation, + validationFlags: ^uint16(0), + } + + packageLevelReferenceDefaults = options{ + schemeIsDNSFunc: UsesDNSHostValidation, + withURIReference: true, + validationFlags: ^uint16(0), + } + + muxDefaults sync.Mutex + poolOfOptions = optionsPool{ + Pool: &sync.Pool{ + New: func() any { + return defaultOptions() + }, + }, + } +) + +// borrowOptions reuses a previously allocated option from the pool. +// +// Optional behavior is reset to the package-level defaults. +func borrowURIOptions() *options { + o := poolOfOptions.Get().(*options) + *o = packageLevelDefaults + + return o +} + +func borrowURIReferenceOptions() *options { + o := poolOfOptions.Get().(*options) + *o = packageLevelReferenceDefaults + + return o +} + +func redeemOptions(o *options) { + if o == &packageLevelDefaults || o == &packageLevelReferenceDefaults { + return + } + poolOfOptions.Put(o) +} + +// defaultOptions allocates a new struct to hold options +func defaultOptions() *options { + o := packageLevelDefaults // shallow-clone defaults + + return &o +} + +// applyURIOptions applies options on a struct borrowed from the pool, +// with defaults reset to support URIs (not URI references). +// +// **Don't mutate the returned options** +func applyURIOptions(opts []Option) (*options, func(*options)) { + if len(opts) == 0 { + // no overrides, no need to allocate a copy of the options + return &packageLevelDefaults, redeemOptions + } + + o := borrowURIOptions() + + for _, apply := range opts { + apply(o) + } + + return o, redeemOptions +} + +// applyURIOptions applies options on a struct borrowed from the pool, +// with defaults reset to support URI references. +// +// **Don't mutate the returned options** +func applyURIReferenceOptions(opts []Option) (*options, func(*options)) { + if len(opts) == 0 { + // no overrides, no need to allocate a copy of the options + return &packageLevelReferenceDefaults, redeemOptions + } + + o := borrowURIReferenceOptions() + + for _, apply := range opts { + apply(o) + } + + return o, redeemOptions +} + +// SetDefaultOptions allows to tweak package level defaults. +// +// You should only use this in initialization steps, as this manipulates +// a package global variable. +func SetDefaultOptions(opts ...Option) { + muxDefaults.Lock() + defer muxDefaults.Unlock() + + o := &packageLevelDefaults + p := &packageLevelReferenceDefaults + + for _, apply := range opts { + apply(o) + apply(p) + } +} + +func withValidationFlags(flags uint16) Option { + return func(o *options) { + o.validationFlags = flags + } +} + +// WithSchemeIsDNSFunc overrides the default DNS scheme identification function. +// +// The passed function is assumed to return true whenever a (lower cased) scheme +// should be considered to use Internet domain names. +func WithSchemeIsDNSFunc(fn func(string) bool) Option { + return func(o *options) { + o.schemeIsDNSFunc = fn + } +} + +// WithDefaultPortFunc overrides the default scheme to default port function. +// +// The passed function is assumed to return a port number for a (lower cased) scheme. +func WithDefaultPortFunc(fn func(string) int) Option { + return func(o *options) { + o.defaultPortFunc = fn + } +} + +// WithDNSSchemes adds extra schemes to the DNS host name validation. +func WithDNSSchemes(_ ...string) Option { + return func(o *options) { + // TODO + } +} + +// WithReference tells the validator whether to accept URI references. +func WithReference(enabled bool) Option { + return func(o *options) { + o.withURIReference = enabled + } +} + +// WithIDNAFlags sets golang.org/x/idna.Option's for domain name validation +func WithIDNAFlags(flags ...idna.Option) Option { + return func(o *options) { + o.idnaFlags = flags + } +} + +// WithStrictURI tells the validator to be strict regarding RFC3986 for URIs. +// +// This means that only ASCII characters are accepted (other unicode character MUST be escaped). +func WithStrictURI(enabled bool) Option { + return func(o *options) { + o.withStrictURI = enabled + } +} + +// WithStrictIRI tells the validator to be strict regarding RFC3987 for IRIs +// +// This means that all valid UCS characters are accepted without escaping. +func WithStrictIRI(enabled bool) Option { + return func(o *options) { + o.withStrictIRI = enabled + } +} + +// WithWindowsFriendly tells the validator to accept Windows file paths that +// are common, but formally invalid URI path (e.g. 'C:\folder\File.txt'). +// +// This deviation is only supported for scheme "file", so the following URI is tolerated +// and parsed as a legit URI: +// file://C:\folder\file.txt +// +// is internally transformed to the expected: +// file:///C:/folder/file.txt +// +// Notice that URI references do not support this option if the scheme is not specified. +func WithWindowsFriendly(enabled bool) Option { + return func(o *options) { + o.withWindowsFriendly = enabled + } +} diff --git a/options_test.go b/options_test.go new file mode 100644 index 0000000..b1a3380 --- /dev/null +++ b/options_test.go @@ -0,0 +1,27 @@ +package uri + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestOptions(t *testing.T) { + t.Run("with default validation flags", func(t *testing.T) { + o, redeem := applyURIOptions([]Option{}) + defer func() { redeem(o) }() + + /* TODO remove + t.Logf("flagValidateScheme=%d", flagValidateScheme) + t.Logf("flagValidateHost=%d", flagValidateHost) + t.Logf("flagValidatePort=%d", flagValidatePort) + t.Logf("flagValidateUserInfo=%d", flagValidateUserInfo) + t.Logf("flagValidatePath=%d", flagValidatePath) + t.Logf("flagValidateQuery=%d", flagValidateQuery) + t.Logf("flagValidateFragment=%d", flagValidateFragment) + */ + + require.True(t, o.validationFlags&flagValidateScheme > 0) + require.True(t, o.validationFlags&flagValidateFragment > 0) + }) +} diff --git a/pre_go20_test.go b/pre_go20_test.go deleted file mode 100644 index f081481..0000000 --- a/pre_go20_test.go +++ /dev/null @@ -1,19 +0,0 @@ -// go: !go1.20 - -package uri - -import ( - "errors" - "fmt" - "testing" - - "github.com/stretchr/testify/assert" -) - -func TestErrUri(t *testing.T) { - e := errorsJoin(ErrInvalidURI, errSentinelTest, fmt.Errorf("cause")) - - assert.True(t, errors.Is(ErrInvalidURI, ErrInvalidURI)) - assert.True(t, errors.Is(e, ErrInvalidURI)) - assert.True(t, errors.Is(e, errSentinelTest)) -} diff --git a/todo.go b/todo.go new file mode 100644 index 0000000..58b8008 --- /dev/null +++ b/todo.go @@ -0,0 +1,66 @@ +package uri + +import ( + "path" +) + +func (u URI) WithJoinPath(elems ...string) URI { + if u.Err() != nil { + return u + } + + o, redeem := applyURIOptions([]Option{withValidationFlags(flagValidatePath)}) + defer func() { redeem(o) }() + + // Ref: + // x, zee := ur.Parse("ez") + // x.JoinPath(elem...) + u.authority = u.authority.withEnsuredAuthority() + full := append([]string{u.authority.path}, elems...) + u.authority.path = path.Join(full...) + u.authority.ipType, u.err = u.validate(o) + u.authority.err = u.err + return u +} + +func (u URI) WithUserPassword(username, password string) URI { //nolint: unparam,revive + return URI{} +} + +func (u URI) WithRedacted() URI { + return URI{} +} + +func (u URI) RequestURI() string { + return "" // TODO +} + +func (u URI) ResolveReference(ref URI) URI { //nolint: unparam,revive + return URI{} // TODO +} + +// Not builders +func (u URI) EscapedFragment() string { + // TODO + return u.fragment +} + +func (u URI) IsReference() bool { + return false // TODO +} + +func (a Authority) Redacted() string { // NOTE: net/url.URL mutates + return "" // TODO +} + +func (a Authority) Username() string { + return "" +} + +func (a Authority) User() string { + return "" +} + +func (a Authority) Password() (string, bool) { + return "", false +} diff --git a/uri.go b/uri.go index e1604d6..4d936b9 100644 --- a/uri.go +++ b/uri.go @@ -14,69 +14,11 @@ package uri import ( "fmt" - "net/netip" "net/url" "strconv" "strings" ) -// URI represents a general RFC3986 URI. -type URI interface { - // Scheme the URI conforms to. - Scheme() string - - // Authority information for the URI, including the "//" prefix. - Authority() Authority - - // Query returns a map of key/value pairs of all parameters - // in the query string of the URI. - Query() url.Values - - // Fragment returns the fragment (component preceded by '#') in the - // URI if there is one. - Fragment() string - - // Builder returns a Builder that can be used to modify the URI. - Builder() Builder - - // String representation of the URI - String() string - - // Validate the different components of the URI - Validate() error - - // Is the current port the default for this scheme? - IsDefaultPort() bool - // Default port for this scheme - DefaultPort() int - - Err() error -} - -// Authority information that a URI contains -// as specified by RFC3986. -// -// Username and password are given by UserInfo(). -type Authority interface { - UserInfo() string - Host() string - Port() string - Path() string - String() string - Validate(...string) error - - IsIP() bool - IPAddr() netip.Addr - - Err() error -} - -type ipType struct { - isIPv4 bool - isIPv6 bool - isIPvFuture bool -} - const ( // char and string literals. colonMark = ':' @@ -104,9 +46,38 @@ var ( userInfoExtraRunes = append(pcharExtraRunes, colonMark) ) +type ( + // URI represents a general RFC3986 URI. + URI struct { + // raw components + err error + scheme string + hierPart string + query string + fragment string + + // parsed components + authority Authority + } + + // Authority information that a URI contains + // as specified by RFC3986. + // + // Username and password are given by UserInfo(). + Authority struct { + err error + prefix string + userinfo string + host string + port string + path string + ipType // after host validation, the IP type is more precisely identified + } +) + // IsURI tells if a URI is valid according to RFC3986/RFC397. -func IsURI(raw string) bool { - _, err := Parse(raw) +func IsURI(raw string, opts ...Option) bool { + _, err := Parse(raw, opts...) return err == nil } @@ -114,25 +85,35 @@ func IsURI(raw string) bool { // // Reference: https://www.rfc-editor.org/rfc/rfc3986#section-4.1 and // https://www.rfc-editor.org/rfc/rfc3986#section-4.2 -func IsURIReference(raw string) bool { - _, err := ParseReference(raw) +func IsURIReference(raw string, opts ...Option) bool { + _, err := ParseReference(raw, opts...) return err == nil } // Parse attempts to parse a URI. +// // It returns an error if the URI is not RFC3986-compliant. -func Parse(raw string) (URI, error) { - return parse(raw, false) +func Parse(raw string, opts ...Option) (URI, error) { + o, redeem := applyURIOptions(opts) + defer func() { redeem(o) }() + + return parse(raw, o) } // ParseReference attempts to parse a URI relative reference. // // It returns an error if the URI is not RFC3986-compliant. -func ParseReference(raw string) (URI, error) { - return parse(raw, true) +// +// Notice that this call is syntactically equivalent to Parse(raw, WithURIReference(true)), +// but slightly more efficient. +func ParseReference(raw string, opts ...Option) (URI, error) { + o, redeem := applyURIReferenceOptions(opts) + defer func() { redeem(o) }() + + return parse(raw, o) } -func parse(raw string, withURIReference bool) (URI, error) { +func parse(raw string, o *options) (URI, error) { var ( scheme string curr int @@ -147,16 +128,17 @@ func parse(raw string, withURIReference bool) (URI, error) { // ":", "?", "#" err := errorsJoin( ErrInvalidURI, - fmt.Errorf("URI cannot start by a ':', '?' or '#' mark"), + fmt.Errorf("URI cannot start by a '%q', '%q' or '%q' mark", colonMark, questionMark, fragmentMark), ) - return nil, err + return URI{err: err}, err } if schemeEnd == 1 { - return nil, errorsJoin( + err := errorsJoin( ErrInvalidScheme, fmt.Errorf("scheme has a minimum length of 2 characters"), ) + return URI{err: err}, err } if hierPartEnd == 1 || queryEnd == 1 { @@ -165,7 +147,7 @@ func parse(raw string, withURIReference bool) (URI, error) { ErrInvalidURI, fmt.Errorf("invalid combination of start markers, near: %q", raw[:2]), ) - return nil, err + return URI{err: err}, err } if hierPartEnd > 0 && hierPartEnd < schemeEnd || queryEnd > 0 && queryEnd < schemeEnd { @@ -175,7 +157,7 @@ func parse(raw string, withURIReference bool) (URI, error) { ErrInvalidURI, fmt.Errorf("URI part markers %q,%q,%q are in an incorrect order, near: %q", colonMark, questionMark, fragmentMark, raw[mini:maxi]), ) - return nil, err + return URI{err: err}, err } if queryEnd > 0 && queryEnd < hierPartEnd { @@ -189,18 +171,21 @@ func parse(raw string, withURIReference bool) (URI, error) { scheme = raw[curr:schemeEnd] if schemeEnd+1 == len(raw) { // trailing ':' (e.g. http:) - u := &uri{ + u := URI{ scheme: scheme, } - return u, u.Validate() + u.authority.ipType, u.err = u.validate(o) + + return u, u.err } - case !withURIReference: + case !o.withURIReference: // scheme is required for URI - return nil, errorsJoin( + err := errorsJoin( ErrNoSchemeFound, fmt.Errorf("for URI (not URI reference), the scheme is required"), ) + return URI{err: err}, err case isRelative: // scheme is optional for URI references. // @@ -216,32 +201,36 @@ func parse(raw string, withURIReference bool) (URI, error) { hierPartEnd = len(raw) } - authority, err := parseAuthority(raw[curr:hierPartEnd]) + authority, err := parseAuthority(raw[curr:hierPartEnd], o) if err != nil { err = errorsJoin(ErrInvalidURI, err) - return nil, err + return URI{err: err}, err } - u := &uri{ + u := URI{ scheme: scheme, hierPart: raw[curr:hierPartEnd], authority: authority, } - return u, u.Validate() + u.authority.ipType, u.err = u.validate(o) + u.authority.err = u.err + + return u, u.err } var ( hierPart, query, fragment string - authority authorityInfo + authority Authority err error ) if hierPartEnd > 0 { hierPart = raw[curr:hierPartEnd] - authority, err = parseAuthority(hierPart) + authority, err = parseAuthority(hierPart, o) if err != nil { - return nil, errorsJoin(ErrInvalidURI, err) + err = errorsJoin(ErrInvalidURI, err) + return URI{err: err}, err } if hierPartEnd+1 < len(raw) { @@ -260,23 +249,23 @@ func parse(raw string, withURIReference bool) (URI, error) { if queryEnd == len(raw)-1 && hierPartEnd < 0 { // trailing #, no query "?" hierPart = raw[curr:queryEnd] - authority, err = parseAuthority(hierPart) + authority, err = parseAuthority(hierPart, o) if err != nil { - return nil, errorsJoin(ErrInvalidURI, err) + err = errorsJoin(ErrInvalidURI, err) + return URI{err: err}, err } - u := &uri{ + u := URI{ scheme: scheme, hierPart: hierPart, authority: authority, query: query, } - if err = u.Validate(); err != nil { - return nil, err - } + u.authority.ipType, u.err = u.validate(o) + u.authority.err = u.err // TODO: should propagate only if this is an authority error - return u, nil + return u, u.err } if queryEnd > 0 { @@ -284,9 +273,10 @@ func parse(raw string, withURIReference bool) (URI, error) { if hierPartEnd < 0 { // no query hierPart = raw[curr:queryEnd] - authority, err = parseAuthority(hierPart) + authority, err = parseAuthority(hierPart, o) if err != nil { - return nil, errorsJoin(ErrInvalidURI, err) + err = errorsJoin(ErrInvalidURI, err) + return URI{err: err}, err } } @@ -295,7 +285,7 @@ func parse(raw string, withURIReference bool) (URI, error) { } } - u := &uri{ + u := URI{ scheme: scheme, hierPart: hierPart, query: query, @@ -303,85 +293,95 @@ func parse(raw string, withURIReference bool) (URI, error) { authority: authority, } - return u, u.Validate() -} - -type uri struct { - // raw components - scheme string - hierPart string - query string - fragment string + u.authority.ipType, u.err = u.validate(o) + u.authority.err = u.err - // parsed components - authority authorityInfo - err error -} - -func (u *uri) URI() URI { - return u + return u, u.err } // Scheme for this URI. -func (u *uri) Scheme() string { +func (u URI) Scheme() string { return u.scheme } // Authority information for the URI, including the "//" prefix. -func (u *uri) Authority() Authority { - u.ensureAuthorityExists() - return &u.authority +func (u URI) Authority() Authority { + return u.authority.withEnsuredAuthority() } // Query returns a map of key/value pairs of all parameters // in the query string of the URI. // // This map contains the parsed query parameters like standard lib URL.Query(). -func (u *uri) Query() url.Values { +func (u URI) Query() url.Values { v, _ := url.ParseQuery(u.query) return v } -func (u *uri) Fragment() string { +// Fragment returns the fragment (component preceded by '#') in the +// URI if there is one. +func (u URI) Fragment() string { return u.fragment } -// Validate checks that all parts of a URI abide by allowed characters. -func (u *uri) Validate() error { - if u.scheme != "" { - if err := u.validateScheme(u.scheme); err != nil { - u.err = err - return err +// String representation of an URI. +// +// Reference: https://www.rfc-editor.org/rfc/rfc3986#section-6.2.2.1 and later +func (u URI) String() string { + buf := strings.Builder{} + buf.Grow(len(u.scheme) + 1 + len(u.query) + 1 + len(u.fragment) + 1 + u.authority.builderSize()) + + if len(u.scheme) > 0 { + buf.WriteString(u.scheme) + buf.WriteByte(colonMark) + } + + u.authority.buildString(&buf) + + if len(u.query) > 0 { + buf.WriteByte(questionMark) + buf.WriteString(u.query) + } + + if len(u.fragment) > 0 { + buf.WriteByte(fragmentMark) + buf.WriteString(u.fragment) + } + + return buf.String() +} + +// Err is the inner error state of the URI parsing. +func (u URI) Err() error { + return u.err +} + +// validate checks that all parts of a URI abide by allowed characters. +func (u URI) validate(o *options) (ipType, error) { + if u.scheme != "" && o.validationFlags&flagValidateScheme > 0 { + if err := u.validateScheme(u.scheme, o); err != nil { + return ipType{}, err } } - if u.query != "" { - if err := u.validateQuery(u.query); err != nil { - u.err = err - return err + if u.query != "" && o.validationFlags&flagValidateQuery > 0 { + if err := u.validateQuery(u.query, o); err != nil { + return ipType{}, err } } - if u.fragment != "" { - if err := u.validateFragment(u.fragment); err != nil { - u.err = err - u.err = err - return err + if u.fragment != "" && o.validationFlags&flagValidateFragment > 0 { + if err := u.validateFragment(u.fragment, o); err != nil { + return ipType{}, err } } if u.hierPart != "" { - ip, err := u.authority.validate(u.scheme) - if err != nil { - u.err = err - u.authority.err = err - return err - } - u.authority.ipType = ip + return u.authority.validateForScheme(u.scheme, o) } // empty hierpart case - return nil + return ipType{}, nil } // validateScheme verifies the correctness of the scheme part. @@ -390,7 +390,7 @@ func (u *uri) Validate() error { // scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) // // NOTE: the scheme is not supposed to contain any percent-encoded sequence. -func (u *uri) validateScheme(scheme string) error { +func (u URI) validateScheme(scheme string, _ *options) error { if len(scheme) < 2 { return ErrInvalidScheme } @@ -428,8 +428,8 @@ func (u *uri) validateScheme(scheme string) error { // Reference: https://www.rfc-editor.org/rfc/rfc3986#section-3.4 // // pchar = unreserved / pct-encoded / sub-delims / ":" / "@" -// query = *( pchar / "/" / "?" ) -func (u *uri) validateQuery(query string) error { +// fragment = *( pchar / "/" / "?" ) +func (u URI) validateQuery(query string, _ *options) error { if err := validateUnreservedWithExtra(query, queryOrFragmentExtraRunes); err != nil { return errorsJoin(ErrInvalidQuery, err) } @@ -442,9 +442,8 @@ func (u *uri) validateQuery(query string) error { // Reference: https://www.rfc-editor.org/rfc/rfc3986#section-3.5 // // pchar = unreserved / pct-encoded / sub-delims / ":" / "@" -// -// fragment = *( pchar / "/" / "?" ) -func (u *uri) validateFragment(fragment string) error { +// fragment = *( pchar / "/" / "?" ) +func (u URI) validateFragment(fragment string, _ *options) error { if err := validateUnreservedWithExtra(fragment, queryOrFragmentExtraRunes); err != nil { return errorsJoin(ErrInvalidFragment, err) } @@ -452,21 +451,11 @@ func (u *uri) validateFragment(fragment string) error { return nil } -type authorityInfo struct { - prefix string - userinfo string - host string - port string - path string - ipType - err error -} - -func (a authorityInfo) UserInfo() string { return a.userinfo } -func (a authorityInfo) Host() string { return a.host } -func (a authorityInfo) Port() string { return a.port } -func (a authorityInfo) Path() string { return a.path } -func (a authorityInfo) String() string { +func (a Authority) UserInfo() string { return a.userinfo } +func (a Authority) Host() string { return a.host } +func (a Authority) Port() string { return a.port } +func (a Authority) Path() string { return a.path } +func (a Authority) String() string { buf := strings.Builder{} buf.Grow(a.builderSize()) a.buildString(&buf) @@ -474,11 +463,11 @@ func (a authorityInfo) String() string { return buf.String() } -func (a authorityInfo) builderSize() int { +func (a Authority) builderSize() int { return len(a.prefix) + len(a.userinfo) + 1 + len(a.host) + 2 + len(a.port) + 1 + len(a.path) } -func (a authorityInfo) buildString(buf *strings.Builder) { +func (a Authority) buildString(buf *strings.Builder) { buf.WriteString(a.prefix) buf.WriteString(a.userinfo) @@ -500,47 +489,34 @@ func (a authorityInfo) buildString(buf *strings.Builder) { buf.WriteString(a.path) } -// Validate the Authority part. +// validate the Authority part. // // Reference: https://www.rfc-editor.org/rfc/rfc3986#section-3.2 -func (a *authorityInfo) Validate(schemes ...string) error { - ip, err := a.validate(schemes...) - - if err != nil { - a.err = err - - return err - } - a.ipType = ip - - return nil -} - -func (a authorityInfo) validate(schemes ...string) (ipType, error) { +func (a Authority) validateForScheme(scheme string, o *options) (ipType, error) { var ip ipType - if a.path != "" { - if err := a.validatePath(a.path); err != nil { + if a.path != "" && o.validationFlags&flagValidatePath > 0 { + if err := a.validatePath(a.path, o); err != nil { return ip, err } } - if a.host != "" { + if a.host != "" && o.validationFlags&flagValidateHost > 0 { var err error - ip, err = a.validateHost(a.host, a.isIPv6, schemes...) + ip, err = a.validateHost(a.host, a.isIPv6, scheme, o) if err != nil { return ip, err } } - if a.port != "" { - if err := a.validatePort(a.port, a.host); err != nil { + if a.port != "" && o.validationFlags&flagValidatePort > 0 { + if err := a.validatePort(a.port, a.host, o); err != nil { return ip, err } } - if a.userinfo != "" { - if err := a.validateUserInfo(a.userinfo); err != nil { + if a.userinfo != "" && o.validationFlags&flagValidateUserInfo > 0 { + if err := a.validateUserInfo(a.userinfo, o); err != nil { return ip, err } } @@ -551,7 +527,7 @@ func (a authorityInfo) validate(schemes ...string) (ipType, error) { // validatePath validates the path part. // // Reference: https://www.rfc-editor.org/rfc/rfc3986#section-3.3 -func (a authorityInfo) validatePath(path string) error { +func (a Authority) validatePath(path string, _ *options) error { if a.host == "" && a.port == "" && len(path) >= 2 && path[0] == slashMark && path[1] == slashMark { return errorsJoin( ErrInvalidPath, @@ -594,7 +570,7 @@ func (a authorityInfo) validatePath(path string) error { // validateHost validates the host part. // // Reference: https://www.rfc-editor.org/rfc/rfc3986#section-3.2.2 -func (a authorityInfo) validateHost(host string, isIPv6 bool, schemes ...string) (ipType, error) { +func (a Authority) validateHost(host string, isIPv6 bool, scheme string, o *options) (ipType, error) { // check for IP addresses // * IPv6 are required to be enclosed within '[]' (isIPv6=true), if an IPv6 zone is present, // there is a trailing escaped sequence, but the heading IPv6 literal must not be escaped. @@ -620,7 +596,7 @@ func (a authorityInfo) validateHost(host string, isIPv6 bool, schemes ...string) } // This is not an IP: check for host DNS or registered name - if err := validateHostForScheme(host, schemes...); err != nil { + if err := validateHostForScheme(host, scheme, o); err != nil { return ipType{}, errorsJoin( ErrInvalidHost, err, @@ -637,23 +613,17 @@ func (a authorityInfo) validateHost(host string, isIPv6 bool, schemes ...string) // // dns-name see: https://www.rfc-editor.org/rfc/rfc1034, https://www.rfc-editor.org/info/rfc5890 // reg-name = *( unreserved / pct-encoded / sub-delims ) -func validateHostForScheme(host string, schemes ...string) error { - for _, scheme := range schemes { - if UsesDNSHostValidation(scheme) { - if err := validateDNSHostForScheme(host); err != nil { - return err - } - } - - if err := validateRegisteredHostForScheme(host); err != nil { +func validateHostForScheme(host string, scheme string, o *options) error { + if UsesDNSHostValidation(scheme) { + if err := validateDNSHostForScheme(host); err != nil { return err } } - return nil + return validateRegisteredHostForScheme(host, o) } -func validateRegisteredHostForScheme(host string) error { +func validateRegisteredHostForScheme(host string, _ *options) error { // RFC 3986 registered name if err := validateUnreservedWithExtra(host, nil); err != nil { return errorsJoin( @@ -670,7 +640,7 @@ func validateRegisteredHostForScheme(host string) error { // Reference: https://www.rfc-editor.org/rfc/rfc3986#section-3.2.3 // // port = *DIGIT -func (a authorityInfo) validatePort(port, host string) error { +func (a Authority) validatePort(port, host string, _ *options) error { const maxPort uint64 = 65535 if !isNumerical(port) { @@ -700,7 +670,7 @@ func (a authorityInfo) validatePort(port, host string) error { // Reference: https://www.rfc-editor.org/rfc/rfc3986#section-3.2.1 // // userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) -func (a authorityInfo) validateUserInfo(userinfo string) error { +func (a Authority) validateUserInfo(userinfo string, _ *options) error { if err := validateUnreservedWithExtra(userinfo, userInfoExtraRunes); err != nil { return errorsJoin( ErrInvalidUserInfo, @@ -711,7 +681,7 @@ func (a authorityInfo) validateUserInfo(userinfo string) error { return nil } -func parseAuthority(hier string) (authorityInfo, error) { +func parseAuthority(hier string, _ *options) (Authority, error) { // as per RFC 3986 Section 3.6 var ( prefix, userinfo, host, port, path string @@ -754,12 +724,12 @@ func parseAuthority(hier string) (authorityInfo, error) { rawHost = rawHost[closingbracket+1:] isIPv6 = true case closingbracket > bracket: - return authorityInfo{}, errorsJoin( + return Authority{}, errorsJoin( ErrInvalidHostAddress, fmt.Errorf("empty IPv6 address"), ) default: - return authorityInfo{}, errorsJoin( + return Authority{}, errorsJoin( ErrInvalidHostAddress, fmt.Errorf("mismatched square brackets"), ) @@ -780,49 +750,18 @@ func parseAuthority(hier string) (authorityInfo, error) { } } - return authorityInfo{ + return Authority{ prefix: prefix, userinfo: userinfo, host: host, port: port, path: path, - ipType: ipType{isIPv6: isIPv6}, + ipType: ipType{isIPv6: isIPv6}, // provisional flag }, nil } -func (u *uri) ensureAuthorityExists() { - if u.authority.userinfo != "" || - u.authority.host != "" || - u.authority.port != "" { - u.authority.prefix = authorityPrefix - } -} - -// String representation of an URI. -// -// Reference: https://www.rfc-editor.org/rfc/rfc3986#section-6.2.2.1 and later -func (u *uri) String() string { - buf := strings.Builder{} - buf.Grow(len(u.scheme) + 1 + len(u.query) + 1 + len(u.fragment) + 1 + u.authority.builderSize()) - - if len(u.scheme) > 0 { - buf.WriteString(u.scheme) - buf.WriteByte(colonMark) - } - - u.authority.buildString(&buf) - - if len(u.query) > 0 { - buf.WriteByte(questionMark) - buf.WriteString(u.query) - } - - if len(u.fragment) > 0 { - buf.WriteByte(fragmentMark) - buf.WriteString(u.fragment) - } - - return buf.String() +func (a Authority) Err() error { + return a.err } func miniMaxi(vals ...int) (int, int) { diff --git a/uri_extra.go b/uri_extra.go new file mode 100644 index 0000000..66ebf37 --- /dev/null +++ b/uri_extra.go @@ -0,0 +1,40 @@ +package uri + +// MashalText yields an URI as UTF8-encoded bytes +func (u URI) MarshalText() ([]byte, error) { + return []byte(u.String()), nil +} + +// MarshalBinary is like MarshalText +func (u URI) MarshalBinary() ([]byte, error) { + return u.MarshalText() +} + +// UnmarshalText unmarshals an URI from UTF8-encoded bytes. +// +// If the original input is not UTF8, consider translating it first from +// the original character set, e.g. using github.com/paulrosania/go-charset. +// +// Notice that: +// * URI references are not accepted by default +// * only package-level default options are applicable +// +// Callers may set package-level defaults to alter the default behavior. +func (u *URI) UnmarshalText(b []byte) error { + o, redeem := applyURIOptions(nil) // default options + defer func() { redeem(o) }() + + v, err := parse(string(b), o) + if err != nil { + return err + } + + *u = v + + return nil +} + +// UnmarshalBinary is like UnmarshalText +func (u *URI) UnmarshalBinary(b []byte) error { + return u.UnmarshalText(b) +} diff --git a/uri_test.go b/uri_test.go index 9c2ac00..f5de08a 100644 --- a/uri_test.go +++ b/uri_test.go @@ -14,7 +14,7 @@ import ( type ( uriTest struct { uriRaw string - uri *uri + uri URI err error comment string isReference bool @@ -112,14 +112,16 @@ func TestString(t *testing.T) { } func TestValidateScheme(t *testing.T) { + o := defaultOptions() t.Run("scheme should not be shorter than 2 characters", func(t *testing.T) { - u := &uri{} - require.Error(t, u.validateScheme("x")) + u := URI{} + require.Error(t, u.validateScheme("x", o)) }) } func TestValidatePath(t *testing.T) { - u := authorityInfo{} + o := defaultOptions() + u := Authority{} for _, path := range []string{ "/a/b/c", "a", @@ -130,7 +132,7 @@ func TestValidatePath(t *testing.T) { "www/詹姆斯/org/", "a//b//", } { - require.NoErrorf(t, u.validatePath(path), + require.NoErrorf(t, u.validatePath(path, o), "expected path %q to validate", path, ) @@ -145,7 +147,7 @@ func TestValidatePath(t *testing.T) { "{", "www/詹{姆斯/org/", } { - require.Errorf(t, u.validatePath(path), + require.Errorf(t, u.validatePath(path, o), "expected path %q NOT to validate", path, ) @@ -154,6 +156,8 @@ func TestValidatePath(t *testing.T) { func testLoop(generator testGenerator) func(t *testing.T) { // table-driven tests for IsURI, IsURIReference, Parse and ParseReference. + o := defaultOptions() + return func(t *testing.T) { for _, toPin := range generator() { test := toPin @@ -211,7 +215,8 @@ func testLoop(generator testGenerator) func(t *testing.T) { t.Run("assert IsURI", func(t *testing.T) { assertIsURI(t, test.uriRaw, test.err != nil, test.isReference) - if test.uri != nil { + var zero URI + if test.uri != zero { // we want to assert struct in-depth, otherwise no error is good enough assertURI(t, test.uriRaw, test.uri, actual) } @@ -238,7 +243,8 @@ func testLoop(generator testGenerator) func(t *testing.T) { }) t.Run("assert authority.Validate", func(t *testing.T) { - require.Nil(t, auth.Validate(actual.Scheme())) + _, err := auth.validateForScheme(actual.Scheme(), o) + require.NoError(t, err) }) }) } @@ -309,3 +315,12 @@ func assertIsURI(t *testing.T, raw string, expectError, isReference bool) { "expected %q to be a valid URI", raw, ) } + +func TestParseWithOptions(t *testing.T) { + require.True(t, + IsURI("//foo.bar/?baz=qux#quux", WithReference(true)), + ) + require.False(t, + IsURI("//foo.bar/?baz=qux#quux", WithReference(false)), + ) +}