Skip to content

session: improve handling of redirect locations wrt percent encoding #29

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions http-easy-lib/http-easy.rkt
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
"http-easy/private/response.rkt"
"http-easy/private/session.rkt"
"http-easy/private/timeout.rkt"
"http-easy/private/url.rkt"
"http-easy/private/user-agent.rkt")

(provide
Expand All @@ -35,6 +36,10 @@
(all-from-out "http-easy/private/session.rkt")
(all-from-out "http-easy/private/user-agent.rkt")

(struct-out url/literal)
string->url/literal
url/literal->string

(contract-out
[current-session (parameter/c session?)]))

Expand Down
4 changes: 2 additions & 2 deletions http-easy-lib/http-easy/private/proxy.rkt
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@

(contract-out
[make-proxy (-> (-> url? boolean?) (-> http-conn? url? (or/c #f ssl-client-context?) void?) proxy?)]
[make-http-proxy (->* (urlish/c) ((-> url? boolean?)) proxy?)]
[make-https-proxy (->* (urlish/c) ((-> url? boolean?)) proxy?)]))
[make-http-proxy (->* [urlish/c] [(-> url? boolean?)] proxy?)]
[make-https-proxy (->* [urlish/c] [(-> url? boolean?)] proxy?)]))

(struct proxy (matches? connect!)
#:transparent)
Expand Down
32 changes: 21 additions & 11 deletions http-easy-lib/http-easy/private/session.rkt
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@
[exn:fail? (λ (e) (channel-put resp-ch e))])
(define-values (resp-status resp-headers resp-output)
(http-conn-sendrecv!
conn (url-path&query u params)
conn (url-request-uri u params)
#:close? close?
#:method (method->bytes method)
#:headers (headers->list headers)
Expand Down Expand Up @@ -274,21 +274,31 @@

(go (->url urlish)))

;; https://www.rfc-editor.org/rfc/rfc2616#section-14.30
(define (ensure-absolute-url orig location)
(define location-url
(parameterize ([current-alist-separator-mode 'amp])
(string->url location)))
(string->url/literal location))
(cond
[(url-host location-url) location-url]
[else (combine-url/relative orig location)]))
[(url-host location-url)
location-url]
[(not (url-path-absolute? location-url))
(error 'ensure-absolute-url "Location destination is relative")]
[else
(match-define (url scheme user host port _ _ _ _) orig)
(match-define (url _ _ _ _ _ path query fragment) location-url)
(url/literal scheme user host port #t path query fragment)]))

(define (same-origin? a b)
(and (equal? (url-scheme a)
(url-scheme b))
(equal? (url-host a)
(url-host b))
(equal? (url-port a)
(url-port b))))
(and
(equal?
(url-scheme a)
(url-scheme b))
(equal?
(url-host a)
(url-host b))
(equal?
(url-port a)
(url-port b))))

(define (redirect? resp)
(and (memv (response-status-code resp) '(301 302 303 307))
Expand Down
149 changes: 134 additions & 15 deletions http-easy-lib/http-easy/private/url.rkt
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,132 @@
(require net/uri-codec
net/url
racket/contract/base
racket/format
racket/match
racket/serialize
racket/string)

(provide
(struct-out url/literal)
string->url/literal
url/literal->string
is-percent-encoded?
urlish/c
->url
url-scheme*
url-port*
url-path&query)
url-request-uri)

;; A url/literal is like a url from net/url, but the user, path, query
;; and fragment are not decoded upon conversion from string and any
;; components that are already percent-encoded within those fields are
;; skipped upon conversion back to string. A component is considered to
;; already be percent-encoded if all of its % characters are followed by
;; two hexadecimal characters.
;;
;; xref https://github.com/rmculpepper/racket-http123/issues/6
(serializable-struct url/literal url ())

(define (string->url/literal s)
(match-define (list _ scheme user ipv6host host port path query fragment)
(regexp-match url-regexp s))
(let* ([scheme (and scheme (string-downcase scheme))]
[host (or (and ipv6host (string-downcase ipv6host))
(and host (string-downcase host)))]
[port (and port (string->number port))]
[abs? (or (equal? "file" scheme)
(regexp-match? #rx"^/" path)
(and (or host user port) #t))]
[path (let ([components (regexp-split #rx"/" path)])
(for/list ([component (in-list (if (equal? (car components) "")
(cdr components)
components))])
(match-define (cons path-component params)
(regexp-split #rx";" component))
(path/param
(case path-component
[(".") 'same]
[("..") 'up]
[else path-component])
params)))]
[query (if query
(for/list ([component (in-list (regexp-split #rx"&" query))])
(match (regexp-split #rx"=" component)
[(list name value)
(cons (string->symbol name) value)]
[(list name)
(cons (string->symbol name) #f)]))
null)])
(url/literal scheme user host port abs? path query fragment)))

(define (url/literal->string u)
(define out (open-output-string))
(match-define (url scheme user host port abs? path query fragment) u)
(when scheme
(write-string scheme out)
(write-char #\: out))
(cond
[(or user host port)
(write-string "//" out)
(when user
(write-string (maybe-percent-encode user uri-userinfo-encode) out)
(write-char #\@ out))
(when host
(cond
[(ipv6-host? host)
(write-char #\[ out)
(write-string host out)
(write-char #\] out)]
[else
(write-string host out)]))
(when port
(write-char #\: out)
(display port out))]
[(equal? scheme "file")
(write-string "//" out)]
[else
(void)])
(unless (null? path)
(when abs? (write-char #\/ out))
(let loop ([path-components path])
(match-define (path/param path-component params)
(car path-components))
(write-string
(maybe-percent-encode
(case path-component
[(same) "."]
[(up) ".."]
[else path-component])
uri-path-segment-encode)
out)
(for ([param (in-list params)])
(write-char #\; out)
(write-string (maybe-percent-encode param uri-path-segment-encode) out))
(unless (null? (cdr path-components))
(write-char #\/ out)
(loop (cdr path-components)))))
(unless (null? query)
(write-char #\? out)
(for ([pair (in-list query)])
(match-define (cons (app symbol->string name) value) pair)
(write-string (maybe-percent-encode name form-urlencoded-encode) out)
(when value
(write-char #\= out)
(write-string (maybe-percent-encode value form-urlencoded-encode) out))))
(when fragment
(write-char #\# out)
(write-string (maybe-percent-encode fragment) out))
(get-output-string out))

(define (maybe-percent-encode s [encode uri-encode])
(if (is-percent-encoded? s) s (encode s)))

(define (is-percent-encoded? s)
(define num-%-matches (length (regexp-match* #rx"%" s)))
(and (> num-%-matches 0)
(= num-%-matches (length (regexp-match* #px"%[a-fA-F0-9]{2}" s)))))

(define (ipv6-host? s)
(regexp-match? #rx"^[0-9a-fA-F:]*:[0-9a-fA-F:]*$" s))

(define urlish/c
(or/c bytes? string? url?))
Expand All @@ -28,15 +145,14 @@
(define the-url
(parameterize ([current-alist-separator-mode 'amp])
(string->url s)))
(struct-copy url the-url
[scheme (string-trim #:repeat? #t (url-scheme the-url))]
[host (string-trim #:repeat? #t (url-host the-url))])]

(struct-copy
url the-url
[scheme (string-trim #:repeat? #t (url-scheme the-url))]
[host (string-trim #:repeat? #t (url-host the-url))])]
[(string-prefix? s "://")
(string->url* (~a "http" s))]

(string->url* (string-append "http" s))]
[else
(string->url* (~a "http://" s))]))
(string->url* (string-append "http://" s))]))

(module+ internal
(provide string->url*))
Expand All @@ -53,11 +169,14 @@
[("https") 443]
[else 80])))

(define (url-path&query u [params null])
(define path (url-path u))
(define all-params (append (url-query u) params))
(define (url-request-uri u [params null])
(define abs-path
(if (null? path)
(if (null? (url-path u))
(list (path/param "" null))
path))
(url->string (url #f #f #f #f #t abs-path all-params #f)))
(url-path u)))
(define all-params
(append (url-query u) params))
((if (url/literal? u)
url/literal->string
url->string)
(url #f #f #f #f #t abs-path all-params #f)))
2 changes: 1 addition & 1 deletion http-easy-lib/info.rkt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#lang info

(define license 'BSD-3-Clause)
(define version "0.6.2")
(define version "0.7")
(define collection "net")
(define deps
'(["base" #:version "8.1.0.4"]
Expand Down
27 changes: 26 additions & 1 deletion http-easy-test/net/http-easy/http-easy.rkt
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,32 @@
(check-equal?
(response-body
(get (format "http://127.0.0.1:~a" port)))
#"hello")))))
#"hello"))))

;; xref: https://chrt.fm/track/E341G/dts.podtrac.com/redirect.mp3/prfx.byspotify.com/e/rss.art19.com/episodes/f441d319-c90a-4632-bed5-5bd3e596018e.mp3?rss_browser=BAhJIg9Qb2RjYXRjaGVyBjoGRVQ%3D--8c940e38b58f38097352f6f4709902a1b7f12844
(test-case "redirect to location with encoded + in path"
(call-with-tcp-server
(lambda (lines out)
(match (car lines)
["GET / HTTP/1.1"
(fprintf out "HTTP/1.1 302 Found\r\n")
(fprintf out "Location: /a%2Bb.mp3\r\n")
(fprintf out "\r\n")]
["GET /a%2Bb.mp3 HTTP/1.1"
(fprintf out "HTTP/1.1 200 OK\r\n")
(fprintf out "Content-Length: 2\r\n")
(fprintf out "\r\n")
(fprintf out "ok")]
["GET /a+b.mp3 HTTP/1.1"
(fprintf out "HTTP/1.1 400 Bad Request\r\n")
(fprintf out "Content-Length: 3\r\n")
(fprintf out "\r\n")
(fprintf out "err")]))
(lambda (port)
(parameterize ([current-session (make-session)])
(check-equal? (response-body (get (format "http://127.0.0.1:~a" port))) #"ok")
;; https://github.com/Bogdanp/racket-http-easy/issues/25
(check-equal? (response-body (get (string->url/literal (format "http://127.0.0.1:~a/a%2Bb.mp3" port)))) #"ok"))))))

(test-suite
"custom port"
Expand Down
64 changes: 57 additions & 7 deletions http-easy-test/net/http-easy/private/url.rkt
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
"url"

(test-suite
"url-path&query"
"url-request-uri"

(test-case "extracts various kinds of paths"
(define tests
Expand All @@ -29,11 +29,11 @@
("/a;b/c" () "/a;b/c")
("/å/b/c" () "/%C3%A5/b/c")))

(for* ([pair (in-list tests)]
[s (in-value (car pair))]
[p (in-value (cadr pair))]
[e (in-value (caddr pair))])
(check-equal? (url-path&query (string->url* s) p) e))))
(for* ([tuple (in-list tests)]
[s (in-value (car tuple))]
[p (in-value (cadr tuple))]
[e (in-value (caddr tuple))])
(check-equal? (url-request-uri (string->url* s) p) e))))

(test-suite
"string->url*"
Expand All @@ -54,7 +54,57 @@
(for* ([pair (in-list tests)]
[s (in-value (car pair))]
[e (in-value (cdr pair))])
(check-equal? (url->string (string->url* s)) e s))))))
(check-equal? (url->string (string->url* s)) e s))))

(test-suite
"url/literal"

(test-case "roundtrips"
(define tests
'(("http://example.com" . "http://example.com")
("http://[email protected]:5100" . "http://[email protected]:5100")
("http://example.com/a/b/c" . "http://example.com/a/b/c")
("http://example.com/a%2Bb.mp3" . "http://example.com/a%2Bb.mp3")
("http://example.com/a%2Bb.mp3?c=d+e" . "http://example.com/a%2Bb.mp3?c=d%2Be")
("a/b/c" . "a/b/c")
("/a/b/c" . "/a/b/c")
("/a;b;c" . "/a;b;c")))

(for* ([pair (in-list tests)]
[s (in-value (car pair))]
[e (in-value (cdr pair))])
(check-equal? (url/literal->string (string->url/literal s)) e s)))

(test-case "oracle"
(define tests
'("http://example.com"
"http://example.com/"
"http://example.com/a/b/c?d=e"
"http://example.com/a/b/c?d=e f"
"http://example.com/a;b"
"http://example.com/a/b c;d"
"http://example.com/a/b c;d e"
"http://[email protected]"
"http://bogdan:secret [email protected]"
"http://bogdan:secret [email protected]#fragment"
"http://bogdan:secret [email protected]#fragment a"
"a/b/c"
"/a/b/c"))

(for ([test (in-list tests)])
(check-equal?
(url/literal->string (string->url/literal test))
(url->string (string->url test))))))

(test-suite
"is-percent-encoded?"

(check-false (is-percent-encoded? ""))
(check-false (is-percent-encoded? "%"))
(check-false (is-percent-encoded? "abc"))
(check-false (is-percent-encoded? "a=b"))
(check-true (is-percent-encoded? "a%2Bb"))
(check-false (is-percent-encoded? "a%2Bb%")))))

(module+ test
(require rackunit/text-ui)
Expand Down
Loading
Loading