Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 67 additions & 20 deletions decode.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import (
"unicode/utf8"
)

func validateUnreservedWithExtra(s string, acceptedRunes []rune) error {
func validateUnreservedWithExtra(s string, extraRunesFunc func(rune) bool) error {
for i := 0; i < len(s); {
r, size := utf8.DecodeRuneInString(s[i:])
if r == utf8.RuneError {
Expand Down Expand Up @@ -42,22 +42,10 @@ func validateUnreservedWithExtra(s string, acceptedRunes []rune) error {
// unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
// pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
if !unicode.IsLetter(r) && !unicode.IsDigit(r) &&
// unreserved
r != '-' && r != '.' && r != '_' && r != '~' &&
// sub-delims
r != '!' && r != '$' && r != '&' && r != '\'' && r != '(' && r != ')' &&
r != '*' && r != '+' && r != ',' && r != ';' && r != '=' {
runeFound := false
for _, acceptedRune := range acceptedRunes {
if r == acceptedRune {
runeFound = true
break
}
}

if !runeFound {
return fmt.Errorf("contains an invalid character: '%U' (%q) near %q", r, r, s[i:])
}
!isUnreserved(r) &&
!isSubDelims(r) &&
(extraRunesFunc == nil || !extraRunesFunc(r)) {
return fmt.Errorf("contains an invalid character: '%U' (%q) near %q", r, r, s[i:])
}
}

Expand Down Expand Up @@ -86,7 +74,7 @@ func unescapePercentEncoding(s string) (rune, int, error) {
return 0, 0, fmt.Errorf("expected a '%%' escape character, near: %q", s)
}

if s[offset] != '%' {
if s[offset] != percentMark {
return 0, 0, fmt.Errorf("expected a '%%' escape character, near: %q", s[offset:])
}
offset++
Expand All @@ -104,7 +92,7 @@ func unescapePercentEncoding(s string) (rune, int, error) {
return 0, 0, fmt.Errorf("expected a '%%' escape character, near: %q", s)
}

if s[offset] != '%' {
if s[offset] != percentMark {
return 0, 0, fmt.Errorf("expected a '%%' escape character, near: %q", s[offset:])
}
offset++
Expand All @@ -121,7 +109,7 @@ func unescapePercentEncoding(s string) (rune, int, error) {
return 0, 0, fmt.Errorf("expected a '%%' escape character, near: %q", s)
}

if s[offset] != '%' {
if s[offset] != percentMark {
return 0, 0, fmt.Errorf("expected a '%%' escape character, near: %q", s[offset:])
}
offset++
Expand Down Expand Up @@ -190,3 +178,62 @@ func unhex(c byte) byte {
}
return 0
}

func isUnreserved(r rune) bool {
// unreserved characters
switch r {
case '-', '.', '_', '~':
return true
default:
return false
}
}

func isSubDelims(r rune) bool {
// sub-delims
switch r {
case '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=':
return true
default:
return false
}
}

/*
func isGenDelims(r rune) bool {
// gen-delims
switch r{
case ':', '/', '?', '#', '[', ']', '@':
return true
default:
return false
}
}
*/

func isPcharExtraRune(r rune) bool {
switch r {
case colonMark, atHost:
return true
default:
return false
}
}

func isQueryOrFragmentExtraRune(r rune) bool {
switch r {
case colonMark, atHost, slashMark, questionMark:
return true
default:
return false
}
}

func isUserInfoExtraRune(r rune) bool {
switch r {
case colonMark:
return true
default:
return false
}
}
37 changes: 34 additions & 3 deletions docs/BENCHMARKS.md
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,9 @@ Benchmark_Parse/with_URL_payload_with_IPs
Benchmark_Parse/with_URL_payload_with_IPs-16 96977450 376.3 ns/op 176 B/op 1 allocs/op

## After stricter IP parsing (naive)

Naive implementation with too many gc allocs.

go test -v -bench . -benchtime 30s -run Bench
goos: linux
goarch: amd64
Expand Down Expand Up @@ -191,7 +194,7 @@ Benchmark_Parse/with_URL_payload_with_IPs-16 93061443 374.6 ns/op
Benchmark_String-16 180403320 199.9 ns/op 142 B/op 5 allocs/op


# After strict percent-encoding check on host
## After strict percent-encoding check on host

goos: linux
goarch: amd64
Expand All @@ -214,7 +217,9 @@ Benchmark_String
Benchmark_String-16 178247580 203.6 ns/op 142 B/op 5 allocs/op
PASS

# After rewrite with uriReader
## After rewrite with uriReader

Abstraction comes at a cost. NO GO

go test -bench . -benchtime 30s -run Bench
goos: linux
Expand All @@ -230,7 +235,7 @@ Benchmark_Parse/with_URL_payload_with_IPs-16 96785080 369.1 ns/op
Benchmark_String-16 180658692 197.4 ns/op 142 B/op 5 allocs/op
PASS

# After rewrite with RuneInString, no Reader
## After rewrite with RuneInString, no Reader

go test -v -run Bench -benchtime 30s -bench Bench
goos: linux
Expand All @@ -254,3 +259,29 @@ Benchmark_String
Benchmark_String-16 176733871 202.6 ns/op 142 B/op 5 allocs/op
PASS

## replaced rune slice iteration by switch statement

Actually a slight degradation. NO GO

go test -v -pgo=auto -run Bench -benchtime 30s -bench Bench
goos: linux
goarch: amd64
pkg: github.com/fredbi/uri
cpu: AMD Ryzen 7 5800X 8-Core Processor
Benchmark_Parse
Benchmark_Parse/with_URI_simple_payload
Benchmark_Parse/with_URI_simple_payload-16 92742778 391.3 ns/op 160 B/op 1 allocs/op
Benchmark_Parse/with_URL_simple_payload
Benchmark_Parse/with_URL_simple_payload-16 100000000 321.1 ns/op 168 B/op 1 allocs/op
Benchmark_Parse/with_URI_mixed_payload
Benchmark_Parse/with_URI_mixed_payload-16 93061579 393.8 ns/op 160 B/op 1 allocs/op
Benchmark_Parse/with_URL_mixed_payload
Benchmark_Parse/with_URL_mixed_payload-16 100000000 301.8 ns/op 163 B/op 1 allocs/op
Benchmark_Parse/with_URI_payload_with_IPs
Benchmark_Parse/with_URI_payload_with_IPs-16 81460168 424.6 ns/op 160 B/op 1 allocs/op
Benchmark_Parse/with_URL_payload_with_IPs
Benchmark_Parse/with_URL_payload_with_IPs-16 94139295 365.8 ns/op 176 B/op 1 allocs/op
Benchmark_String
Benchmark_String-16 178303498 201.8 ns/op 142 B/op 5 allocs/op
PASS

5 changes: 3 additions & 2 deletions ip.go
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,7 @@ func validateIPvFuture(address string) error {
return errors.New("invalid IP vFuture format: expect a non-empty address after the version tag")
}

// TODO: wrong because IpvFuture is not escaped
return validateUnreservedWithExtra(address[offset:], userInfoExtraRunes)
// RFC3986 states that IpvFuture is not escaped, but IPv6 has already evolved to add an escape zoneID.
// We assume that IPvFuture supports escaping as well.
return validateUnreservedWithExtra(address[offset:], isUserInfoExtraRune)
}
17 changes: 5 additions & 12 deletions uri.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,13 +88,6 @@ const (
maxDomainLength = 255
)

var (
// predefined sets of accecpted runes beyond the "unreserved" character set
pcharExtraRunes = []rune{colonMark, atHost} // pchar = unreserved | ':' | '@'
queryOrFragmentExtraRunes = append(pcharExtraRunes, slashMark, questionMark)
userInfoExtraRunes = append(pcharExtraRunes, colonMark)
)

// IsURI tells if a URI is valid according to RFC3986/RFC397.
func IsURI(raw string) bool {
_, err := Parse(raw)
Expand Down Expand Up @@ -387,7 +380,7 @@ func (u *uri) validateScheme(scheme string) error {
// pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
// query = *( pchar / "/" / "?" )
func (u *uri) validateQuery(query string) error {
if err := validateUnreservedWithExtra(query, queryOrFragmentExtraRunes); err != nil {
if err := validateUnreservedWithExtra(query, isQueryOrFragmentExtraRune); err != nil {
return errorsJoin(ErrInvalidQuery, err)
}

Expand All @@ -402,7 +395,7 @@ func (u *uri) validateQuery(query string) error {
//
// fragment = *( pchar / "/" / "?" )
func (u *uri) validateFragment(fragment string) error {
if err := validateUnreservedWithExtra(fragment, queryOrFragmentExtraRunes); err != nil {
if err := validateUnreservedWithExtra(fragment, isQueryOrFragmentExtraRune); err != nil {
return errorsJoin(ErrInvalidFragment, err)
}

Expand Down Expand Up @@ -513,7 +506,7 @@ func (a authorityInfo) validatePath(path string) error {
}

if pos > previousPos {
if err := validateUnreservedWithExtra(path[previousPos:pos], pcharExtraRunes); err != nil {
if err := validateUnreservedWithExtra(path[previousPos:pos], isPcharExtraRune); err != nil {
return errorsJoin(
ErrInvalidPath,
err,
Expand All @@ -525,7 +518,7 @@ func (a authorityInfo) validatePath(path string) error {
}

if previousPos < len(path) { // don't care if the last char was a separator
if err := validateUnreservedWithExtra(path[previousPos:], pcharExtraRunes); err != nil {
if err := validateUnreservedWithExtra(path[previousPos:], isPcharExtraRune); err != nil {
return errorsJoin(
ErrInvalidPath,
err,
Expand Down Expand Up @@ -636,7 +629,7 @@ func (a authorityInfo) validatePort(port, host string) error {
//
// userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
func (a authorityInfo) validateUserInfo(userinfo string) error {
if err := validateUnreservedWithExtra(userinfo, userInfoExtraRunes); err != nil {
if err := validateUnreservedWithExtra(userinfo, isUserInfoExtraRune); err != nil {
return errorsJoin(
ErrInvalidUserInfo,
err,
Expand Down