diff options
Diffstat (limited to 'llgo/third_party/gofrontend/libgo/go/net/url')
| -rw-r--r-- | llgo/third_party/gofrontend/libgo/go/net/url/url.go | 200 | ||||
| -rw-r--r-- | llgo/third_party/gofrontend/libgo/go/net/url/url_test.go | 286 |
2 files changed, 453 insertions, 33 deletions
diff --git a/llgo/third_party/gofrontend/libgo/go/net/url/url.go b/llgo/third_party/gofrontend/libgo/go/net/url/url.go index f167408faba..8ffad663d5c 100644 --- a/llgo/third_party/gofrontend/libgo/go/net/url/url.go +++ b/llgo/third_party/gofrontend/libgo/go/net/url/url.go @@ -9,6 +9,7 @@ package url import ( "bytes" "errors" + "fmt" "sort" "strconv" "strings" @@ -51,6 +52,7 @@ type encoding int const ( encodePath encoding = 1 + iota + encodeHost encodeUserPassword encodeQueryComponent encodeFragment @@ -64,12 +66,27 @@ func (e EscapeError) Error() string { // Return true if the specified character should be escaped when // appearing in a URL string, according to RFC 3986. +// +// Please be informed that for now shouldEscape does not check all +// reserved characters correctly. See golang.org/issue/5684. func shouldEscape(c byte, mode encoding) bool { // §2.3 Unreserved characters (alphanum) if 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' { return false } + if mode == encodeHost { + // §3.2.2 Host allows + // sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "=" + // as part of reg-name. + // We add : because we include :port as part of host. + // We add [ ] because we include [ipv6]:port as part of host + switch c { + case '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', ':', '[', ']': + return false + } + } + switch c { case '-', '_', '.', '~': // §2.3 Unreserved characters (mark) return false @@ -127,7 +144,7 @@ func unescape(s string, mode encoding) (string, error) { if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) { s = s[i:] if len(s) > 3 { - s = s[0:3] + s = s[:3] } return "", EscapeError(s) } @@ -224,16 +241,24 @@ func escape(s string, mode encoding) string { // Note that the Path field is stored in decoded form: /%47%6f%2f becomes /Go/. // A consequence is that it is impossible to tell which slashes in the Path were // slashes in the raw URL and which were %2f. This distinction is rarely important, -// but when it is a client must use other routines to parse the raw URL or construct -// the parsed URL. For example, an HTTP server can consult req.RequestURI, and -// an HTTP client can use URL{Host: "example.com", Opaque: "//example.com/Go%2f"} -// instead of URL{Host: "example.com", Path: "/Go/"}. +// but when it is, code must not use Path directly. +// +// Go 1.5 introduced the RawPath field to hold the encoded form of Path. +// The Parse function sets both Path and RawPath in the URL it returns, +// and URL's String method uses RawPath if it is a valid encoding of Path, +// by calling the EncodedPath method. +// +// In earlier versions of Go, the more indirect workarounds were that an +// HTTP server could consult req.RequestURI and an HTTP client could +// construct a URL struct directly and set the Opaque field instead of Path. +// These still work as well. type URL struct { Scheme string Opaque string // encoded opaque data User *Userinfo // username and password information Host string // host or host:port Path string + RawPath string // encoded path hint (Go 1.5 and later only; see EscapedPath method) RawQuery string // encoded query values, without '?' Fragment string // fragment for references, without '#' } @@ -305,7 +330,7 @@ func getscheme(rawurl string) (scheme, path string, err error) { if i == 0 { return "", "", errors.New("missing protocol scheme") } - return rawurl[0:i], rawurl[i+1:], nil + return rawurl[:i], rawurl[i+1:], nil default: // we have encountered an invalid character, // so there is no valid scheme @@ -324,9 +349,9 @@ func split(s string, c string, cutc bool) (string, string) { return s, "" } if cutc { - return s[0:i], s[i+len(c):] + return s[:i], s[i+len(c):] } - return s[0:i], s[i:] + return s[:i], s[i:] } // Parse parses rawurl into a URL structure. @@ -401,14 +426,17 @@ func parse(rawurl string, viaRequest bool) (url *URL, err error) { if err != nil { goto Error } - if strings.Contains(url.Host, "%") { - err = errors.New("hexadecimal escape in host") - goto Error - } } if url.Path, err = unescape(rest, encodePath); err != nil { goto Error } + // RawPath is a hint as to the encoding of Path to use + // in url.EncodedPath. If that method already gets the + // right answer without RawPath, leave it empty. + // This will help make sure that people don't rely on it in general. + if url.EscapedPath() != rest && validEncodedPath(rest) { + url.RawPath = rest + } return url, nil Error: @@ -418,36 +446,157 @@ Error: func parseAuthority(authority string) (user *Userinfo, host string, err error) { i := strings.LastIndex(authority, "@") if i < 0 { - host = authority - return + host, err = parseHost(authority) + } else { + host, err = parseHost(authority[i+1:]) } - userinfo, host := authority[:i], authority[i+1:] + if err != nil { + return nil, "", err + } + if i < 0 { + return nil, host, nil + } + userinfo := authority[:i] if strings.Index(userinfo, ":") < 0 { if userinfo, err = unescape(userinfo, encodeUserPassword); err != nil { - return + return nil, "", err } user = User(userinfo) } else { username, password := split(userinfo, ":", true) if username, err = unescape(username, encodeUserPassword); err != nil { - return + return nil, "", err } if password, err = unescape(password, encodeUserPassword); err != nil { - return + return nil, "", err } user = UserPassword(username, password) } - return + return user, host, nil +} + +// parseHost parses host as an authority without user +// information. That is, as host[:port]. +func parseHost(host string) (string, error) { + litOrName := host + if strings.HasPrefix(host, "[") { + // Parse an IP-Literal in RFC 3986 and RFC 6874. + // E.g., "[fe80::1], "[fe80::1%25en0]" + // + // RFC 4007 defines "%" as a delimiter character in + // the textual representation of IPv6 addresses. + // Per RFC 6874, in URIs that "%" is encoded as "%25". + i := strings.LastIndex(host, "]") + if i < 0 { + return "", errors.New("missing ']' in host") + } + colonPort := host[i+1:] + if !validOptionalPort(colonPort) { + return "", fmt.Errorf("invalid port %q after host", colonPort) + } + // Parse a host subcomponent without a ZoneID in RFC + // 6874 because the ZoneID is allowed to use the + // percent encoded form. + j := strings.Index(host[:i], "%25") + if j < 0 { + litOrName = host[1:i] + } else { + litOrName = host[1:j] + } + } + + // A URI containing an IP-Literal without a ZoneID or + // IPv4address in RFC 3986 and RFC 6847 must not be + // percent-encoded. + // + // A URI containing a DNS registered name in RFC 3986 is + // allowed to be percent-encoded, though we don't use it for + // now to avoid messing up with the gap between allowed + // characters in URI and allowed characters in DNS. + // See golang.org/issue/7991. + if strings.Contains(litOrName, "%") { + return "", errors.New("percent-encoded characters in host") + } + var err error + if host, err = unescape(host, encodeHost); err != nil { + return "", err + } + return host, nil +} + +// EscapedPath returns the escaped form of u.Path. +// In general there are multiple possible escaped forms of any path. +// EscapedPath returns u.RawPath when it is a valid escaping of u.Path. +// Otherwise EscapedPath ignores u.RawPath and computes an escaped +// form on its own. +// The String and RequestURI methods use EscapedPath to construct +// their results. +// In general, code should call EscapedPath instead of +// reading u.RawPath directly. +func (u *URL) EscapedPath() string { + if u.RawPath != "" && validEncodedPath(u.RawPath) { + p, err := unescape(u.RawPath, encodePath) + if err == nil && p == u.Path { + return u.RawPath + } + } + if u.Path == "*" { + return "*" // don't escape (Issue 11202) + } + return escape(u.Path, encodePath) +} + +// validEncodedPath reports whether s is a valid encoded path. +// It must not contain any bytes that require escaping during path encoding. +func validEncodedPath(s string) bool { + for i := 0; i < len(s); i++ { + // RFC 3986, Appendix A. + // pchar = unreserved / pct-encoded / sub-delims / ":" / "@". + // shouldEscape is not quite compliant with the RFC, + // so we check the sub-delims ourselves and let + // shouldEscape handle the others. + switch s[i] { + case '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', ':', '@': + // ok + case '[', ']': + // ok - not specified in RFC 3986 but left alone by modern browsers + case '%': + // ok - percent encoded, will decode + default: + if shouldEscape(s[i], encodePath) { + return false + } + } + } + return true +} + +// validOptionalPort reports whether port is either an empty string +// or matches /^:\d+$/ +func validOptionalPort(port string) bool { + if port == "" { + return true + } + if port[0] != ':' || len(port) == 1 { + return false + } + for _, b := range port[1:] { + if b < '0' || b > '9' { + return false + } + } + return true } // String reassembles the URL into a valid URL string. // The general form of the result is one of: // -// scheme:opaque +// scheme:opaque?query#fragment // scheme://userinfo@host/path?query#fragment // // If u.Opaque is non-empty, String uses the first form; // otherwise it uses the second form. +// To obtain the path, String uses u.EncodedPath(). // // In the second form, the following rules apply: // - if u.Scheme is empty, scheme: is omitted. @@ -475,13 +624,14 @@ func (u *URL) String() string { buf.WriteByte('@') } if h := u.Host; h != "" { - buf.WriteString(h) + buf.WriteString(escape(h, encodeHost)) } } - if u.Path != "" && u.Path[0] != '/' && u.Host != "" { + path := u.EscapedPath() + if path != "" && path[0] != '/' && u.Host != "" { buf.WriteByte('/') } - buf.WriteString(escape(u.Path, encodePath)) + buf.WriteString(path) } if u.RawQuery != "" { buf.WriteByte('?') @@ -639,7 +789,7 @@ func resolvePath(base, ref string) string { return "/" + strings.TrimLeft(strings.Join(dst, "/"), "/") } -// IsAbs returns true if the URL is absolute. +// IsAbs reports whether the URL is absolute. func (u *URL) IsAbs() bool { return u.Scheme != "" } @@ -703,7 +853,7 @@ func (u *URL) Query() Values { func (u *URL) RequestURI() string { result := u.Opaque if result == "" { - result = escape(u.Path, encodePath) + result = u.EscapedPath() if result == "" { result = "/" } diff --git a/llgo/third_party/gofrontend/libgo/go/net/url/url_test.go b/llgo/third_party/gofrontend/libgo/go/net/url/url_test.go index d8b19d805d0..ff6e9e4541a 100644 --- a/llgo/third_party/gofrontend/libgo/go/net/url/url_test.go +++ b/llgo/third_party/gofrontend/libgo/go/net/url/url_test.go @@ -13,7 +13,7 @@ import ( type URLTest struct { in string - out *URL + out *URL // expected parse; RawPath="" means same as Path roundtrip string // expected result of reserializing the URL; empty means same as "in". } @@ -41,11 +41,12 @@ var urltests = []URLTest{ { "http://www.google.com/file%20one%26two", &URL{ - Scheme: "http", - Host: "www.google.com", - Path: "/file one&two", + Scheme: "http", + Host: "www.google.com", + Path: "/file one&two", + RawPath: "/file%20one%26two", }, - "http://www.google.com/file%20one&two", + "", }, // user { @@ -289,6 +290,140 @@ var urltests = []URLTest{ }, "", }, + // host subcomponent; IPv4 address in RFC 3986 + { + "http://192.168.0.1/", + &URL{ + Scheme: "http", + Host: "192.168.0.1", + Path: "/", + }, + "", + }, + // host and port subcomponents; IPv4 address in RFC 3986 + { + "http://192.168.0.1:8080/", + &URL{ + Scheme: "http", + Host: "192.168.0.1:8080", + Path: "/", + }, + "", + }, + // host subcomponent; IPv6 address in RFC 3986 + { + "http://[fe80::1]/", + &URL{ + Scheme: "http", + Host: "[fe80::1]", + Path: "/", + }, + "", + }, + // host and port subcomponents; IPv6 address in RFC 3986 + { + "http://[fe80::1]:8080/", + &URL{ + Scheme: "http", + Host: "[fe80::1]:8080", + Path: "/", + }, + "", + }, + // host subcomponent; IPv6 address with zone identifier in RFC 6847 + { + "http://[fe80::1%25en0]/", // alphanum zone identifier + &URL{ + Scheme: "http", + Host: "[fe80::1%en0]", + Path: "/", + }, + "", + }, + // host and port subcomponents; IPv6 address with zone identifier in RFC 6847 + { + "http://[fe80::1%25en0]:8080/", // alphanum zone identifier + &URL{ + Scheme: "http", + Host: "[fe80::1%en0]:8080", + Path: "/", + }, + "", + }, + // host subcomponent; IPv6 address with zone identifier in RFC 6847 + { + "http://[fe80::1%25%65%6e%301-._~]/", // percent-encoded+unreserved zone identifier + &URL{ + Scheme: "http", + Host: "[fe80::1%en01-._~]", + Path: "/", + }, + "http://[fe80::1%25en01-._~]/", + }, + // host and port subcomponents; IPv6 address with zone identifier in RFC 6847 + { + "http://[fe80::1%25%65%6e%301-._~]:8080/", // percent-encoded+unreserved zone identifier + &URL{ + Scheme: "http", + Host: "[fe80::1%en01-._~]:8080", + Path: "/", + }, + "http://[fe80::1%25en01-._~]:8080/", + }, + // alternate escapings of path survive round trip + { + "http://rest.rsc.io/foo%2fbar/baz%2Fquux?alt=media", + &URL{ + Scheme: "http", + Host: "rest.rsc.io", + Path: "/foo/bar/baz/quux", + RawPath: "/foo%2fbar/baz%2Fquux", + RawQuery: "alt=media", + }, + "", + }, + // issue 12036 + { + "mysql://a,b,c/bar", + &URL{ + Scheme: "mysql", + Host: "a,b,c", + Path: "/bar", + }, + "", + }, + // worst case host, still round trips + { + "scheme://!$&'()*+,;=hello!:port/path", + &URL{ + Scheme: "scheme", + Host: "!$&'()*+,;=hello!:port", + Path: "/path", + }, + "", + }, + // worst case path, still round trips + { + "http://host/!$&'()*+,;=:@[hello]", + &URL{ + Scheme: "http", + Host: "host", + Path: "/!$&'()*+,;=:@[hello]", + RawPath: "/!$&'()*+,;=:@[hello]", + }, + "", + }, + // golang.org/issue/5684 + { + "http://example.com/oid/[order_id]", + &URL{ + Scheme: "http", + Host: "example.com", + Path: "/oid/[order_id]", + RawPath: "/oid/[order_id]", + }, + "", + }, } // more useful string for debugging than fmt's struct printer @@ -300,8 +435,8 @@ func ufmt(u *URL) string { pass = p } } - return fmt.Sprintf("opaque=%q, scheme=%q, user=%#v, pass=%#v, host=%q, path=%q, rawq=%q, frag=%q", - u.Opaque, u.Scheme, user, pass, u.Host, u.Path, u.RawQuery, u.Fragment) + return fmt.Sprintf("opaque=%q, scheme=%q, user=%#v, pass=%#v, host=%q, path=%q, rawpath=%q, rawq=%q, frag=%q", + u.Opaque, u.Scheme, user, pass, u.Host, u.Path, u.RawPath, u.RawQuery, u.Fragment) } func DoTest(t *testing.T, parse func(string) (*URL, error), name string, tests []URLTest) { @@ -358,9 +493,33 @@ var parseRequestURLTests = []struct { {"/", true}, {pathThatLooksSchemeRelative, true}, {"//not.a.user@%66%6f%6f.com/just/a/path/also", true}, + {"*", true}, + {"http://192.168.0.1/", true}, + {"http://192.168.0.1:8080/", true}, + {"http://[fe80::1]/", true}, + {"http://[fe80::1]:8080/", true}, + + // Tests exercising RFC 6874 compliance: + {"http://[fe80::1%25en0]/", true}, // with alphanum zone identifier + {"http://[fe80::1%25en0]:8080/", true}, // with alphanum zone identifier + {"http://[fe80::1%25%65%6e%301-._~]/", true}, // with percent-encoded+unreserved zone identifier + {"http://[fe80::1%25%65%6e%301-._~]:8080/", true}, // with percent-encoded+unreserved zone identifier + {"foo.html", false}, {"../dir/", false}, - {"*", true}, + {"http://192.168.0.%31/", false}, + {"http://192.168.0.%31:8080/", false}, + {"http://[fe80::%31]/", false}, + {"http://[fe80::%31]:8080/", false}, + {"http://[fe80::%31%25en0]/", false}, + {"http://[fe80::%31%25en0]:8080/", false}, + + // These two cases are valid as textual representations as + // described in RFC 4007, but are not valid as address + // literals with IPv6 zone identifiers in URIs as described in + // RFC 6874. + {"http://[fe80::1%en0]/", false}, + {"http://[fe80::1%en0]:8080/", false}, } func TestParseRequestURI(t *testing.T) { @@ -869,6 +1028,25 @@ var requritests = []RequestURITest{ }, "http://other.example.com/%2F/%2F/", }, + // better fix for issue 4860 + { + &URL{ + Scheme: "http", + Host: "example.com", + Path: "/////", + RawPath: "/%2F/%2F/", + }, + "/%2F/%2F/", + }, + { + &URL{ + Scheme: "http", + Host: "example.com", + Path: "/////", + RawPath: "/WRONG/", // ignored because doesn't match Path + }, + "/////", + }, { &URL{ Scheme: "http", @@ -880,6 +1058,26 @@ var requritests = []RequestURITest{ }, { &URL{ + Scheme: "http", + Host: "example.com", + Path: "/a b", + RawPath: "/a b", // ignored because invalid + RawQuery: "q=go+language", + }, + "/a%20b?q=go+language", + }, + { + &URL{ + Scheme: "http", + Host: "example.com", + Path: "/a?b", + RawPath: "/a?b", // ignored because invalid + RawQuery: "q=go+language", + }, + "/a%3Fb?q=go+language", + }, + { + &URL{ Scheme: "myschema", Opaque: "opaque", }, @@ -914,6 +1112,54 @@ func TestParseFailure(t *testing.T) { } } +func TestParseAuthority(t *testing.T) { + tests := []struct { + in string + wantErr bool + }{ + {"http://[::1]", false}, + {"http://[::1]:80", false}, + {"http://[::1]:namedport", true}, // rfc3986 3.2.3 + {"http://[::1]/", false}, + {"http://[::1]a", true}, + {"http://[::1]%23", true}, + {"http://[::1%25en0]", false}, // valid zone id + {"http://[::1]:", true}, // colon, but no port + {"http://[::1]:%38%30", true}, // no hex in port + {"http://[::1%25%10]", false}, // TODO: reject the %10 after the valid zone %25 separator? + {"http://[%10::1]", true}, // no %xx escapes in IP address + {"http://[::1]/%48", false}, // %xx in path is fine + {"http://%41:8080/", true}, // TODO: arguably we should accept reg-name with %xx + {"mysql://x@y(z:123)/foo", false}, // golang.org/issue/12023 + {"mysql://x@y(1.2.3.4:123)/foo", false}, + {"mysql://x@y([2001:db8::1]:123)/foo", false}, + {"http://[]%20%48%54%54%50%2f%31%2e%31%0a%4d%79%48%65%61%64%65%72%3a%20%31%32%33%0a%0a/", true}, // golang.org/issue/11208 + } + for _, tt := range tests { + u, err := Parse(tt.in) + if tt.wantErr { + if err == nil { + t.Errorf("Parse(%q) = %#v; want an error", tt.in, u) + } + continue + } + if err != nil { + t.Logf("Parse(%q) = %v; want no error", tt.in, err) + } + } +} + +// Issue 11202 +func TestStarRequest(t *testing.T) { + u, err := Parse("*") + if err != nil { + t.Fatal(err) + } + if got, want := u.RequestURI(), "*"; got != want { + t.Errorf("RequestURI = %q; want %q", got, want) + } +} + type shouldEscapeTest struct { in byte mode encoding @@ -926,6 +1172,7 @@ var shouldEscapeTests = []shouldEscapeTest{ {'a', encodeUserPassword, false}, {'a', encodeQueryComponent, false}, {'a', encodeFragment, false}, + {'a', encodeHost, false}, {'z', encodePath, false}, {'A', encodePath, false}, {'Z', encodePath, false}, @@ -950,6 +1197,29 @@ var shouldEscapeTests = []shouldEscapeTest{ {',', encodeUserPassword, false}, {';', encodeUserPassword, false}, {'=', encodeUserPassword, false}, + + // Host (IP address, IPv6 address, registered name, port suffix; §3.2.2) + {'!', encodeHost, false}, + {'$', encodeHost, false}, + {'&', encodeHost, false}, + {'\'', encodeHost, false}, + {'(', encodeHost, false}, + {')', encodeHost, false}, + {'*', encodeHost, false}, + {'+', encodeHost, false}, + {',', encodeHost, false}, + {';', encodeHost, false}, + {'=', encodeHost, false}, + {':', encodeHost, false}, + {'[', encodeHost, false}, + {']', encodeHost, false}, + {'0', encodeHost, false}, + {'9', encodeHost, false}, + {'A', encodeHost, false}, + {'z', encodeHost, false}, + {'_', encodeHost, false}, + {'-', encodeHost, false}, + {'.', encodeHost, false}, } func TestShouldEscape(t *testing.T) { |

