diff --git a/http-easy-lib/http-easy.rkt b/http-easy-lib/http-easy.rkt index 2e50c44..43c8533 100644 --- a/http-easy-lib/http-easy.rkt +++ b/http-easy-lib/http-easy.rkt @@ -12,6 +12,7 @@ "http-easy/private/response.rkt" "http-easy/private/session.rkt" "http-easy/private/timeout.rkt" + "http-easy/private/url.rkt" "http-easy/private/user-agent.rkt") (provide @@ -35,6 +36,10 @@ (all-from-out "http-easy/private/session.rkt") (all-from-out "http-easy/private/user-agent.rkt") + (struct-out url/literal) + string->url/literal + url/literal->string + (contract-out [current-session (parameter/c session?)])) diff --git a/http-easy-lib/http-easy/private/proxy.rkt b/http-easy-lib/http-easy/private/proxy.rkt index f0025dc..ae2224e 100644 --- a/http-easy-lib/http-easy/private/proxy.rkt +++ b/http-easy-lib/http-easy/private/proxy.rkt @@ -16,8 +16,8 @@ (contract-out [make-proxy (-> (-> url? boolean?) (-> http-conn? url? (or/c #f ssl-client-context?) void?) proxy?)] - [make-http-proxy (->* (urlish/c) ((-> url? boolean?)) proxy?)] - [make-https-proxy (->* (urlish/c) ((-> url? boolean?)) proxy?)])) + [make-http-proxy (->* [urlish/c] [(-> url? boolean?)] proxy?)] + [make-https-proxy (->* [urlish/c] [(-> url? boolean?)] proxy?)])) (struct proxy (matches? connect!) #:transparent) diff --git a/http-easy-lib/http-easy/private/session.rkt b/http-easy-lib/http-easy/private/session.rkt index 055f293..276e51d 100644 --- a/http-easy-lib/http-easy/private/session.rkt +++ b/http-easy-lib/http-easy/private/session.rkt @@ -210,7 +210,7 @@ [exn:fail? (λ (e) (channel-put resp-ch e))]) (define-values (resp-status resp-headers resp-output) (http-conn-sendrecv! - conn (url-path&query u params) + conn (url-request-uri u params) #:close? close? #:method (method->bytes method) #:headers (headers->list headers) @@ -274,21 +274,31 @@ (go (->url urlish))) +;; https://www.rfc-editor.org/rfc/rfc2616#section-14.30 (define (ensure-absolute-url orig location) (define location-url - (parameterize ([current-alist-separator-mode 'amp]) - (string->url location))) + (string->url/literal location)) (cond - [(url-host location-url) location-url] - [else (combine-url/relative orig location)])) + [(url-host location-url) + location-url] + [(not (url-path-absolute? location-url)) + (error 'ensure-absolute-url "Location destination is relative")] + [else + (match-define (url scheme user host port _ _ _ _) orig) + (match-define (url _ _ _ _ _ path query fragment) location-url) + (url/literal scheme user host port #t path query fragment)])) (define (same-origin? a b) - (and (equal? (url-scheme a) - (url-scheme b)) - (equal? (url-host a) - (url-host b)) - (equal? (url-port a) - (url-port b)))) + (and + (equal? + (url-scheme a) + (url-scheme b)) + (equal? + (url-host a) + (url-host b)) + (equal? + (url-port a) + (url-port b)))) (define (redirect? resp) (and (memv (response-status-code resp) '(301 302 303 307)) diff --git a/http-easy-lib/http-easy/private/url.rkt b/http-easy-lib/http-easy/private/url.rkt index 8ca1dd8..f7052da 100644 --- a/http-easy-lib/http-easy/private/url.rkt +++ b/http-easy-lib/http-easy/private/url.rkt @@ -3,15 +3,132 @@ (require net/uri-codec net/url racket/contract/base - racket/format + racket/match + racket/serialize racket/string) (provide + (struct-out url/literal) + string->url/literal + url/literal->string + is-percent-encoded? urlish/c ->url url-scheme* url-port* - url-path&query) + url-request-uri) + +;; A url/literal is like a url from net/url, but the user, path, query +;; and fragment are not decoded upon conversion from string and any +;; components that are already percent-encoded within those fields are +;; skipped upon conversion back to string. A component is considered to +;; already be percent-encoded if all of its % characters are followed by +;; two hexadecimal characters. +;; +;; xref https://github.com/rmculpepper/racket-http123/issues/6 +(serializable-struct url/literal url ()) + +(define (string->url/literal s) + (match-define (list _ scheme user ipv6host host port path query fragment) + (regexp-match url-regexp s)) + (let* ([scheme (and scheme (string-downcase scheme))] + [host (or (and ipv6host (string-downcase ipv6host)) + (and host (string-downcase host)))] + [port (and port (string->number port))] + [abs? (or (equal? "file" scheme) + (regexp-match? #rx"^/" path) + (and (or host user port) #t))] + [path (let ([components (regexp-split #rx"/" path)]) + (for/list ([component (in-list (if (equal? (car components) "") + (cdr components) + components))]) + (match-define (cons path-component params) + (regexp-split #rx";" component)) + (path/param + (case path-component + [(".") 'same] + [("..") 'up] + [else path-component]) + params)))] + [query (if query + (for/list ([component (in-list (regexp-split #rx"&" query))]) + (match (regexp-split #rx"=" component) + [(list name value) + (cons (string->symbol name) value)] + [(list name) + (cons (string->symbol name) #f)])) + null)]) + (url/literal scheme user host port abs? path query fragment))) + +(define (url/literal->string u) + (define out (open-output-string)) + (match-define (url scheme user host port abs? path query fragment) u) + (when scheme + (write-string scheme out) + (write-char #\: out)) + (cond + [(or user host port) + (write-string "//" out) + (when user + (write-string (maybe-percent-encode user uri-userinfo-encode) out) + (write-char #\@ out)) + (when host + (cond + [(ipv6-host? host) + (write-char #\[ out) + (write-string host out) + (write-char #\] out)] + [else + (write-string host out)])) + (when port + (write-char #\: out) + (display port out))] + [(equal? scheme "file") + (write-string "//" out)] + [else + (void)]) + (unless (null? path) + (when abs? (write-char #\/ out)) + (let loop ([path-components path]) + (match-define (path/param path-component params) + (car path-components)) + (write-string + (maybe-percent-encode + (case path-component + [(same) "."] + [(up) ".."] + [else path-component]) + uri-path-segment-encode) + out) + (for ([param (in-list params)]) + (write-char #\; out) + (write-string (maybe-percent-encode param uri-path-segment-encode) out)) + (unless (null? (cdr path-components)) + (write-char #\/ out) + (loop (cdr path-components))))) + (unless (null? query) + (write-char #\? out) + (for ([pair (in-list query)]) + (match-define (cons (app symbol->string name) value) pair) + (write-string (maybe-percent-encode name form-urlencoded-encode) out) + (when value + (write-char #\= out) + (write-string (maybe-percent-encode value form-urlencoded-encode) out)))) + (when fragment + (write-char #\# out) + (write-string (maybe-percent-encode fragment) out)) + (get-output-string out)) + +(define (maybe-percent-encode s [encode uri-encode]) + (if (is-percent-encoded? s) s (encode s))) + +(define (is-percent-encoded? s) + (define num-%-matches (length (regexp-match* #rx"%" s))) + (and (> num-%-matches 0) + (= num-%-matches (length (regexp-match* #px"%[a-fA-F0-9]{2}" s))))) + +(define (ipv6-host? s) + (regexp-match? #rx"^[0-9a-fA-F:]*:[0-9a-fA-F:]*$" s)) (define urlish/c (or/c bytes? string? url?)) @@ -28,15 +145,14 @@ (define the-url (parameterize ([current-alist-separator-mode 'amp]) (string->url s))) - (struct-copy url the-url - [scheme (string-trim #:repeat? #t (url-scheme the-url))] - [host (string-trim #:repeat? #t (url-host the-url))])] - + (struct-copy + url the-url + [scheme (string-trim #:repeat? #t (url-scheme the-url))] + [host (string-trim #:repeat? #t (url-host the-url))])] [(string-prefix? s "://") - (string->url* (~a "http" s))] - + (string->url* (string-append "http" s))] [else - (string->url* (~a "http://" s))])) + (string->url* (string-append "http://" s))])) (module+ internal (provide string->url*)) @@ -53,11 +169,14 @@ [("https") 443] [else 80]))) -(define (url-path&query u [params null]) - (define path (url-path u)) - (define all-params (append (url-query u) params)) +(define (url-request-uri u [params null]) (define abs-path - (if (null? path) + (if (null? (url-path u)) (list (path/param "" null)) - path)) - (url->string (url #f #f #f #f #t abs-path all-params #f))) + (url-path u))) + (define all-params + (append (url-query u) params)) + ((if (url/literal? u) + url/literal->string + url->string) + (url #f #f #f #f #t abs-path all-params #f))) diff --git a/http-easy-lib/info.rkt b/http-easy-lib/info.rkt index 9852100..5ef28f4 100644 --- a/http-easy-lib/info.rkt +++ b/http-easy-lib/info.rkt @@ -1,7 +1,7 @@ #lang info (define license 'BSD-3-Clause) -(define version "0.6.2") +(define version "0.7") (define collection "net") (define deps '(["base" #:version "8.1.0.4"] diff --git a/http-easy-test/net/http-easy/http-easy.rkt b/http-easy-test/net/http-easy/http-easy.rkt index b306abd..000841f 100644 --- a/http-easy-test/net/http-easy/http-easy.rkt +++ b/http-easy-test/net/http-easy/http-easy.rkt @@ -376,7 +376,32 @@ (check-equal? (response-body (get (format "http://127.0.0.1:~a" port))) - #"hello"))))) + #"hello")))) + + ;; xref: https://chrt.fm/track/E341G/dts.podtrac.com/redirect.mp3/prfx.byspotify.com/e/rss.art19.com/episodes/f441d319-c90a-4632-bed5-5bd3e596018e.mp3?rss_browser=BAhJIg9Qb2RjYXRjaGVyBjoGRVQ%3D--8c940e38b58f38097352f6f4709902a1b7f12844 + (test-case "redirect to location with encoded + in path" + (call-with-tcp-server + (lambda (lines out) + (match (car lines) + ["GET / HTTP/1.1" + (fprintf out "HTTP/1.1 302 Found\r\n") + (fprintf out "Location: /a%2Bb.mp3\r\n") + (fprintf out "\r\n")] + ["GET /a%2Bb.mp3 HTTP/1.1" + (fprintf out "HTTP/1.1 200 OK\r\n") + (fprintf out "Content-Length: 2\r\n") + (fprintf out "\r\n") + (fprintf out "ok")] + ["GET /a+b.mp3 HTTP/1.1" + (fprintf out "HTTP/1.1 400 Bad Request\r\n") + (fprintf out "Content-Length: 3\r\n") + (fprintf out "\r\n") + (fprintf out "err")])) + (lambda (port) + (parameterize ([current-session (make-session)]) + (check-equal? (response-body (get (format "http://127.0.0.1:~a" port))) #"ok") + ;; https://github.com/Bogdanp/racket-http-easy/issues/25 + (check-equal? (response-body (get (string->url/literal (format "http://127.0.0.1:~a/a%2Bb.mp3" port)))) #"ok")))))) (test-suite "custom port" diff --git a/http-easy-test/net/http-easy/private/url.rkt b/http-easy-test/net/http-easy/private/url.rkt index 1a3754b..df2ac43 100644 --- a/http-easy-test/net/http-easy/private/url.rkt +++ b/http-easy-test/net/http-easy/private/url.rkt @@ -13,7 +13,7 @@ "url" (test-suite - "url-path&query" + "url-request-uri" (test-case "extracts various kinds of paths" (define tests @@ -29,11 +29,11 @@ ("/a;b/c" () "/a;b/c") ("/å/b/c" () "/%C3%A5/b/c"))) - (for* ([pair (in-list tests)] - [s (in-value (car pair))] - [p (in-value (cadr pair))] - [e (in-value (caddr pair))]) - (check-equal? (url-path&query (string->url* s) p) e)))) + (for* ([tuple (in-list tests)] + [s (in-value (car tuple))] + [p (in-value (cadr tuple))] + [e (in-value (caddr tuple))]) + (check-equal? (url-request-uri (string->url* s) p) e)))) (test-suite "string->url*" @@ -54,7 +54,57 @@ (for* ([pair (in-list tests)] [s (in-value (car pair))] [e (in-value (cdr pair))]) - (check-equal? (url->string (string->url* s)) e s)))))) + (check-equal? (url->string (string->url* s)) e s)))) + + (test-suite + "url/literal" + + (test-case "roundtrips" + (define tests + '(("http://example.com" . "http://example.com") + ("http://bogdan@example.com:5100" . "http://bogdan@example.com:5100") + ("http://example.com/a/b/c" . "http://example.com/a/b/c") + ("http://example.com/a%2Bb.mp3" . "http://example.com/a%2Bb.mp3") + ("http://example.com/a%2Bb.mp3?c=d+e" . "http://example.com/a%2Bb.mp3?c=d%2Be") + ("a/b/c" . "a/b/c") + ("/a/b/c" . "/a/b/c") + ("/a;b;c" . "/a;b;c"))) + + (for* ([pair (in-list tests)] + [s (in-value (car pair))] + [e (in-value (cdr pair))]) + (check-equal? (url/literal->string (string->url/literal s)) e s))) + + (test-case "oracle" + (define tests + '("http://example.com" + "http://example.com/" + "http://example.com/a/b/c?d=e" + "http://example.com/a/b/c?d=e f" + "http://example.com/a;b" + "http://example.com/a/b c;d" + "http://example.com/a/b c;d e" + "http://bogdan@example.com" + "http://bogdan:secret pass@example.com" + "http://bogdan:secret pass@example.com#fragment" + "http://bogdan:secret pass@example.com#fragment a" + "a/b/c" + "/a/b/c")) + + (for ([test (in-list tests)]) + (check-equal? + (url/literal->string (string->url/literal test)) + (url->string (string->url test)))))) + + (test-suite + "is-percent-encoded?" + + (check-false (is-percent-encoded? "")) + (check-false (is-percent-encoded? "%")) + (check-false (is-percent-encoded? "abc")) + (check-false (is-percent-encoded? "a=b")) + (check-true (is-percent-encoded? "a%2Bb")) + (check-false (is-percent-encoded? "a%2Bb%"))))) (module+ test (require rackunit/text-ui) diff --git a/http-easy/http-easy.scrbl b/http-easy/http-easy.scrbl index 22e202a..9bf395f 100644 --- a/http-easy/http-easy.scrbl +++ b/http-easy/http-easy.scrbl @@ -87,9 +87,9 @@ response returns its underlying connection to the pool: @(define sr (secref "guide:streaming")) If you forget to manually close a response, its underlying connection -will get returned to the pool when the response gets -garbage-collected. Unless you explicitly use @sr, you don't have to -worry about this much. +will get returned to the pool when the response gets garbage-collected. +Unless you explicitly use @|sr|, you don't have to worry about this +much. @subsection[#:tag "guide:streaming"]{Streaming Responses} @@ -356,41 +356,42 @@ scheme and url-encode the path to the socket as the host. [#:user-agent user-agent (or/c bytes? string?) (current-user-agent)]) response?]{ Requests @racket[uri] using @racket[s]'s connection pool and - associated settings (SSL context, proxy, cookie jar, etc.). + associated settings (SSL context, proxy, cookie jar, etc.). The + @racket[uri] argument may be a @tech{literal URL}. - Response values returned by this function must be closed before - their underlying connection is returned to the pool. If the - @racket[close?] argument is @racket[#t], this is done - automatically. Ditto if the responses are garbage-collected. + Response values returned by this function must be closed before their + underlying connection is returned to the pool. If the @racket[close?] + argument is @racket[#t], this is done automatically. Ditto if the + responses are garbage-collected. If the @racket[close?] argument is @racket[#t], then the response's output port is drained and the connection is closed. If the @racket[stream?] argument is @racket[#f] (the default), then the response's output port is drained and the resulting byte string - is stored on the response value. The drained data is accessible - using the @racket[response-body] function. If the argument is - @racket[#t], then the response body is streamed and the data is - accessible via the @racket[response-output] function. This argument - has no effect when @racket[close?] is @racket[#t]. + is stored on the response value. The drained data is accessible using + the @racket[response-body] function. If the argument is @racket[#t], + then the response body is streamed and the data is accessible via the + @racket[response-output] function. This argument has no effect when + @racket[close?] is @racket[#t]. The @racket[method] argument specifies the HTTP request method to use. Query parameters may be specified directly on the @racket[uri] - argument or via the @racket[params] argument. If query parameters - are specified via both arguments, then the list of @racket[params] - is appended to those already in the @racket[uri]. + argument or via the @racket[params] argument. If query parameters are + specified via both arguments, then the list of @racket[params] is + appended to those already in the @racket[uri]. The @racket[auth] argument allows authentication headers and query - params to be added to the request. When following redirects, the + params to be added to the request. When following redirects, the auth procedure is applied to subsequent requests only if the target - URL has the @tech{same origin} as the original request. Two URLs - are considered to have the @deftech{same origin} if their scheme, - hostname and port are the same. + URL has the @tech{same origin} as the original request. Two URLs are + considered to have the @deftech{same origin} if their scheme, hostname + and port are the same. - The @racket[data] argument can be used to send arbitrary request - data to the remote end. A number of @tech{payload procedures} - are available for producing data in standard formats: + The @racket[data] argument can be used to send arbitrary request data + to the remote end. A number of @tech{payload procedures} are available + for producing data in standard formats: @interaction[ #:eval he-eval @@ -407,21 +408,21 @@ scheme and url-encode the path to the socket as the host. @racket[json-payload] as the @racket[data] argument. The @racket[data], @racket[form] and @racket[json] arguments are - mutually-exclusive. Supplying more than one at a time causes a + mutually-exclusive. Supplying more than one at a time causes a contract error to be raised. The @racket[timeouts] argument controls how long various aspects of - the request cycle will be waited on. When a timeout is exceeded, an - @racket[exn:fail:http-easy:timeout?] error is raised. When - redirects are followed, the timeouts are per request. + the request cycle will be waited on. When a timeout is exceeded, an + @racket[exn:fail:http-easy:timeout?] error is raised. When redirects + are followed, the timeouts are per request. - The @racket[max-attempts] argument controls how many times - connection errors are retried. This meant to handle connection - resets and the like and isn't a general retry mechanism. + The @racket[max-attempts] argument controls how many times connection + errors are retried. This meant to handle connection resets and the + like and isn't a general retry mechanism. - The @racket[max-redirects] argument controls how many redirects are - followed by the request. Redirect cycles are not detected. To - disable redirect following, set this argument to @racket[0]. The + The @racket[max-redirects] argument controls how many redirects + are followed by the request. Redirect cycles are not detected. To + disable redirect following, set this argument to @racket[0]. The @tt{Authorization} header is stripped from redirect requests if the target URL does not have the @tech{same origin} as the original request. @@ -430,6 +431,26 @@ scheme and url-encode the path to the socket as the host. scheme to allow requests to UNIX domain sockets.}] } +@deftogether[( + @defproc[(url/literal? [v any/c]) boolean?] + @defproc[(string->url/literal [s string?]) url/literal?] + @defproc[(url/literal->string [u url/literal?]) string?] +)]{ + A predicate and conversion procedures for a variant of @racket[url?] + that does not decode user, path, query and fragment components + upon conversion from string. When converting to a string, only + the components of the aforementioned fields that are not already + percent-encoded are encoded. A component is considered to be percent + encoded if all of its percent characters are followed by two + hexadecimal characters. + + @deftech{Literal URLs} are used automatically when handling redirects + to avoid issues that may pop up when decoding an re-encoding URLs from + standards-non-compliant servers. + + @history[#:added "0.7"] +} + @subsection{Responses}