diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index cdddadb83..2c52a543d 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -59,7 +59,11 @@ jobs: - uses: dtolnay/rust-toolchain@stable with: targets: wasm32-unknown-unknown + - name: Install wasm-pack + run: curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh - run: cargo build --target wasm32-unknown-unknown + - run: cd url && wasm-pack test --headless --chrome + - run: cd url && wasm-pack test --headless --firefox Lint: runs-on: ubuntu-latest diff --git a/url/Cargo.toml b/url/Cargo.toml index a50ab31f1..46e85f966 100644 --- a/url/Cargo.toml +++ b/url/Cargo.toml @@ -21,12 +21,21 @@ serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" bencher = "0.1" +[target.'cfg(all(target_arch = "wasm32", target_os = "unknown"))'.dev-dependencies] +wasm-bindgen-test = "0.3" +web-sys = { version = "0.3.65", features = ["Navigator", "Window"] } + [dependencies] form_urlencoded = { version = "1.2.1", path = "../form_urlencoded" } -idna = { version = "0.5.0", path = "../idna" } percent-encoding = { version = "2.3.1", path = "../percent_encoding" } serde = { version = "1.0", optional = true, features = ["derive"] } +[target.'cfg(not(all(target_arch = "wasm32", target_os = "unknown")))'.dependencies] +idna = { version = "0.5.0", path = "../idna" } + +[target.'cfg(all(target_arch = "wasm32", target_os = "unknown"))'.dependencies] +web-sys = { version = "0.3.65", features = ["Url"] } + [features] default = [] # Enable to use the #[debugger_visualizer] attribute. This feature requires Rust >= 1.71. diff --git a/url/src/host.rs b/url/src/host.rs index 9931c2f87..1bfc84d52 100644 --- a/url/src/host.rs +++ b/url/src/host.rs @@ -89,29 +89,7 @@ impl Host { return Err(ParseError::EmptyHost); } - let is_invalid_domain_char = |c| { - matches!( - c, - '\0'..='\u{001F}' - | ' ' - | '#' - | '%' - | '/' - | ':' - | '<' - | '>' - | '?' - | '@' - | '[' - | '\\' - | ']' - | '^' - | '\u{007F}' - | '|' - ) - }; - - if domain.find(is_invalid_domain_char).is_some() { + if domain.find(Self::is_invalid_domain_char).is_some() { Err(ParseError::InvalidDomainCharacter) } else if ends_in_a_number(&domain) { let address = parse_ipv4addr(&domain)?; @@ -161,10 +139,65 @@ impl Host { } } - /// convert domain with idna + #[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))] + /// Convert IDN domain to ASCII form with [idna] fn domain_to_ascii(domain: &str) -> Result { idna::domain_to_ascii(domain).map_err(Into::into) } + + #[cfg(all(target_arch = "wasm32", target_os = "unknown"))] + const SENTINEL_HOSTNAME: &'static str = "url-host-web-sys-sentinel"; + #[cfg(all(target_arch = "wasm32", target_os = "unknown"))] + const SENTINEL_URL: &'static str = "http://url-host-web-sys-sentinel"; + + #[cfg(all(target_arch = "wasm32", target_os = "unknown"))] + /// Convert IDN domain to ASCII form with [web_sys::Url] + fn domain_to_ascii(domain: &str) -> Result { + debug_assert!(Self::SENTINEL_URL.ends_with(Self::SENTINEL_HOSTNAME)); + // Url throws an error on empty hostnames + if domain.is_empty() { + return Ok(domain.to_string()); + } + // Url returns strange results for invalid domain chars + if domain.contains(Self::is_invalid_domain_char) { + return Err(ParseError::InvalidDomainCharacter); + } + + // Create a new Url with a sentinel value. + let u = web_sys::Url::new(Self::SENTINEL_URL).map_err(|_| ParseError::IdnaError)?; + debug_assert_eq!(u.hostname(), Self::SENTINEL_HOSTNAME); + // Whenever set_hostname fails, it doesn't update the Url. + u.set_hostname(domain); + let h = u.hostname(); + if h.eq_ignore_ascii_case(Self::SENTINEL_HOSTNAME) || h.is_empty() { + // It's probably invalid + Err(ParseError::IdnaError) + } else { + Ok(h) + } + } + + fn is_invalid_domain_char(c: char) -> bool { + matches!( + c, + '\0'..='\u{001F}' + | ' ' + | '#' + | '%' + | '/' + | ':' + | '<' + | '>' + | '?' + | '@' + | '[' + | '\\' + | ']' + | '^' + | '\u{007F}' + | '|' + ) + } } impl> fmt::Display for Host { diff --git a/url/src/lib.rs b/url/src/lib.rs index 78980e85f..1f827243a 100644 --- a/url/src/lib.rs +++ b/url/src/lib.rs @@ -154,9 +154,12 @@ use std::borrow::Borrow; use std::cmp; use std::fmt::{self, Write}; use std::hash; +#[cfg(any(unix, windows, target_os = "redox", target_os = "wasi"))] use std::io; use std::mem; -use std::net::{IpAddr, SocketAddr, ToSocketAddrs}; +use std::net::IpAddr; +#[cfg(any(unix, windows, target_os = "redox", target_os = "wasi"))] +use std::net::{SocketAddr, ToSocketAddrs}; use std::ops::{Range, RangeFrom, RangeTo}; use std::path::{Path, PathBuf}; use std::str; @@ -1252,6 +1255,7 @@ impl Url { /// }) /// } /// ``` + #[cfg(any(unix, windows, target_os = "redox", target_os = "wasi"))] pub fn socket_addrs( &self, default_port_number: impl Fn() -> Option, diff --git a/url/src/origin.rs b/url/src/origin.rs index 81193f510..0311c6c33 100644 --- a/url/src/origin.rs +++ b/url/src/origin.rs @@ -85,7 +85,15 @@ impl Origin { } } - /// + #[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))] + #[deprecated] + /// [The Unicode serialization of an origin][0]. + /// + /// This [has been removed from the standard][1] because it was never widely + /// adopted, and was difficult to use. + /// + /// [0]: https://html.spec.whatwg.org/multipage/#unicode-serialisation-of-an-origin + /// [1]: https://github.com/whatwg/html/pull/2689 pub fn unicode_serialization(&self) -> String { match *self { Origin::Opaque(_) => "null".to_owned(), diff --git a/url/src/parser.rs b/url/src/parser.rs index 7d94d1d71..036c9f7d0 100644 --- a/url/src/parser.rs +++ b/url/src/parser.rs @@ -87,6 +87,7 @@ simple_enum_error! { Overflow => "URLs more than 4 GB are not supported", } +#[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))] impl From<::idna::Errors> for ParseError { fn from(_: ::idna::Errors) -> ParseError { ParseError::IdnaError diff --git a/url/src/quirks.rs b/url/src/quirks.rs index 3a99e22cf..eac7c15d3 100644 --- a/url/src/quirks.rs +++ b/url/src/quirks.rs @@ -57,7 +57,13 @@ pub fn internal_components(url: &Url) -> InternalComponents { } } -/// https://url.spec.whatwg.org/#dom-url-domaintoascii +/// Converts a domain name to its ASCII (punycode) form. +/// +/// This feature was never implemented by browsers, and +/// [has been removed from the URL spec][0]. +/// +/// [0]: https://github.com/whatwg/url/issues/63 +#[deprecated] pub fn domain_to_ascii(domain: &str) -> String { match Host::parse(domain) { Ok(Host::Domain(domain)) => domain, @@ -65,7 +71,16 @@ pub fn domain_to_ascii(domain: &str) -> String { } } -/// https://url.spec.whatwg.org/#dom-url-domaintounicode +#[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))] +#[deprecated] +/// Converts a domain name to its Unicode form. +/// +/// This feature was never implemented by browsers, and +/// [has been removed from the URL spec][0]. +/// +/// This feature is not available on `wasm32-unknown-unknown` targets. +/// +/// [0]: https://github.com/whatwg/url/issues/63 pub fn domain_to_unicode(domain: &str) -> String { match Host::parse(domain) { Ok(Host::Domain(ref domain)) => { diff --git a/url/tests/expected_failures_chromium.txt b/url/tests/expected_failures_chromium.txt new file mode 100644 index 000000000..39da5ce90 --- /dev/null +++ b/url/tests/expected_failures_chromium.txt @@ -0,0 +1,14 @@ + against + against + + + set host to + set hostname to + + + + + + + + diff --git a/url/tests/expected_failures_firefox.txt b/url/tests/expected_failures_firefox.txt new file mode 100644 index 000000000..98ed87696 --- /dev/null +++ b/url/tests/expected_failures_firefox.txt @@ -0,0 +1,2 @@ + + diff --git a/url/tests/expected_failures_safari.txt b/url/tests/expected_failures_safari.txt new file mode 100644 index 000000000..d6ed932cc --- /dev/null +++ b/url/tests/expected_failures_safari.txt @@ -0,0 +1,2 @@ + against + against diff --git a/url/tests/unit.rs b/url/tests/unit.rs index f8069dc89..77a726954 100644 --- a/url/tests/unit.rs +++ b/url/tests/unit.rs @@ -11,9 +11,16 @@ use std::borrow::Cow; use std::cell::{Cell, RefCell}; use std::net::{Ipv4Addr, Ipv6Addr}; +#[cfg(any(unix, windows, target_os = "redox", target_os = "wasi"))] use std::path::{Path, PathBuf}; use url::{form_urlencoded, Host, Origin, Url}; +// https://rustwasm.github.io/wasm-bindgen/wasm-bindgen-test/usage.html +#[cfg(all(target_arch = "wasm32", target_os = "unknown"))] +use wasm_bindgen_test::{wasm_bindgen_test as test, wasm_bindgen_test_configure}; +#[cfg(all(target_arch = "wasm32", target_os = "unknown"))] +wasm_bindgen_test_configure!(run_in_browser); + #[test] fn size() { use std::mem::size_of; @@ -117,6 +124,7 @@ fn test_set_empty_query() { assert_eq!(base.as_str(), "moz://example.com/path"); } +#[cfg(any(unix, windows, target_os = "redox", target_os = "wasi"))] macro_rules! assert_from_file_path { ($path: expr) => { assert_from_file_path!($path, $path) @@ -130,6 +138,7 @@ macro_rules! assert_from_file_path { } #[test] +#[cfg(any(unix, windows))] fn new_file_paths() { if cfg!(unix) { assert_eq!(Url::from_file_path(Path::new("relative")), Err(())); @@ -162,28 +171,28 @@ fn new_path_bad_utf8() { } #[test] +#[cfg(windows)] fn new_path_windows_fun() { - if cfg!(windows) { - assert_from_file_path!(r"C:\foo\bar", "/C:/foo/bar"); - assert_from_file_path!("C:\\foo\\ba\0r", "/C:/foo/ba%00r"); + assert_from_file_path!(r"C:\foo\bar", "/C:/foo/bar"); + assert_from_file_path!("C:\\foo\\ba\0r", "/C:/foo/ba%00r"); - // Invalid UTF-8 - assert!(Url::parse("file:///C:/foo/ba%80r") - .unwrap() - .to_file_path() - .is_err()); + // Invalid UTF-8 + assert!(Url::parse("file:///C:/foo/ba%80r") + .unwrap() + .to_file_path() + .is_err()); - // test windows canonicalized path - let path = PathBuf::from(r"\\?\C:\foo\bar"); - assert!(Url::from_file_path(path).is_ok()); + // test windows canonicalized path + let path = PathBuf::from(r"\\?\C:\foo\bar"); + assert!(Url::from_file_path(path).is_ok()); - // Percent-encoded drive letter - let url = Url::parse("file:///C%3A/foo/bar").unwrap(); - assert_eq!(url.to_file_path(), Ok(PathBuf::from(r"C:\foo\bar"))); - } + // Percent-encoded drive letter + let url = Url::parse("file:///C%3A/foo/bar").unwrap(); + assert_eq!(url.to_file_path(), Ok(PathBuf::from(r"C:\foo\bar"))); } #[test] +#[cfg(any(unix, windows))] fn new_directory_paths() { if cfg!(unix) { assert_eq!(Url::from_directory_path(Path::new("relative")), Err(())); @@ -439,6 +448,7 @@ fn issue_61() { } #[test] +#[cfg(any(unix, target_os = "redox", target_os = "wasi"))] #[cfg(not(windows))] /// https://github.com/servo/rust-url/issues/197 fn issue_197() { @@ -587,6 +597,8 @@ fn test_origin_opaque() { } #[test] +#[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))] +#[allow(deprecated)] fn test_origin_unicode_serialization() { let data = [ ("http://😅.com", "http://😅.com"), @@ -623,6 +635,7 @@ fn test_origin_unicode_serialization() { } #[test] +#[cfg(any(unix, windows, target_os = "redox", target_os = "wasi"))] fn test_socket_addrs() { use std::net::ToSocketAddrs; @@ -759,8 +772,11 @@ fn test_set_href() { } #[test] +#[allow(deprecated)] fn test_domain_encoding_quirks() { - use url::quirks::{domain_to_ascii, domain_to_unicode}; + use url::quirks::domain_to_ascii; + #[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))] + use url::quirks::domain_to_unicode; let data = [ ("http://example.com", "", ""), @@ -771,6 +787,7 @@ fn test_domain_encoding_quirks() { for url in &data { assert_eq!(domain_to_ascii(url.0), url.1); + #[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))] assert_eq!(domain_to_unicode(url.0), url.2); } } @@ -804,11 +821,8 @@ fn test_expose_internals() { } #[test] +#[cfg(windows)] fn test_windows_unc_path() { - if !cfg!(windows) { - return; - } - let url = Url::from_file_path(Path::new(r"\\host\share\path\file.txt")).unwrap(); assert_eq!(url.as_str(), "file://host/share/path/file.txt"); @@ -928,6 +942,7 @@ fn test_url_from_file_path() { } /// https://github.com/servo/rust-url/issues/505 +#[cfg(any(unix, target_os = "redox", target_os = "wasi"))] #[cfg(not(windows))] #[test] fn test_url_from_file_path() { diff --git a/url/tests/wpt.rs b/url/tests/wpt.rs index 701044d67..ff527a38f 100644 --- a/url/tests/wpt.rs +++ b/url/tests/wpt.rs @@ -8,13 +8,62 @@ //! Data-driven tests imported from web-platform-tests +use serde_json::Value; use std::collections::HashMap; use std::fmt::Write; -use std::panic; - -use serde_json::Value; +#[cfg(all(target_arch = "wasm32", target_os = "unknown"))] +use std::sync::Mutex; use url::Url; +// https://rustwasm.github.io/wasm-bindgen/wasm-bindgen-test/usage.html +#[cfg(all(target_arch = "wasm32", target_os = "unknown"))] +use wasm_bindgen_test::{console_log, wasm_bindgen_test, wasm_bindgen_test_configure}; +#[cfg(all(target_arch = "wasm32", target_os = "unknown"))] +wasm_bindgen_test_configure!(run_in_browser); + +// wpt has its own test driver, but we shoe-horn this into wasm_bindgen_test +// which will discard stdout and stderr. So, we make println! go to +// console.log(), so we see failures that do not result in panics. + +#[cfg(all(target_arch = "wasm32", target_os = "unknown"))] +static PRINT_BUF: Mutex> = Mutex::new(None); + +#[cfg(all(target_arch = "wasm32", target_os = "unknown"))] +macro_rules! print { + ($($arg:tt)*) => { + let v = format!($($arg)*); + { + let mut buf = PRINT_BUF.lock().unwrap(); + if let Some(buf) = buf.as_mut() { + buf.push_str(&v); + } else { + *buf = Some(v); + } + } + }; +} + +#[cfg(all(target_arch = "wasm32", target_os = "unknown"))] +macro_rules! println { + () => { + let buf = PRINT_BUF.lock().unwrap().take(); + match buf { + Some(buf) => console_log!("{buf}"), + None => console_log!(""), + } + }; + ($($arg:tt)*) => { + let buf = PRINT_BUF.lock().unwrap().take(); + match buf { + Some(buf) => { + let v = format!($($arg)*); + console_log!("{buf}{v}"); + }, + None => console_log!($($arg)*), + } + } +} + #[derive(Debug, serde::Deserialize)] struct UrlTest { input: String, @@ -71,6 +120,7 @@ struct SetterTestExpected { hash: Option, } +#[cfg_attr(all(target_arch = "wasm32", target_os = "unknown"), wasm_bindgen_test)] fn main() { let mut filter = None; let mut args = std::env::args().skip(1); @@ -89,6 +139,25 @@ fn main() { let mut expected_failures = include_str!("expected_failures.txt") .lines() .collect::>(); + #[cfg(all(target_arch = "wasm32", target_os = "unknown"))] + { + // Every browser has its quirks. + let user_agent = web_sys::window() + .unwrap() + .navigator() + .user_agent() + .unwrap() + .to_ascii_lowercase(); + if user_agent.contains("chrom") { + expected_failures.extend(include_str!("expected_failures_chromium.txt").lines()); + } + if user_agent.contains("gecko/20") { + expected_failures.extend(include_str!("expected_failures_firefox.txt").lines()); + } + if user_agent.contains("safari") && !user_agent.contains("chrom") { + expected_failures.extend(include_str!("expected_failures_safari.txt").lines()); + } + } let mut errors = vec![]; @@ -228,16 +297,16 @@ fn run_url_test( ) -> Result<(), String> { let base = match base { Some(base) => { - let base = panic::catch_unwind(|| Url::parse(&base)) - .map_err(|_| "panicked while parsing base".to_string())? - .map_err(|e| format!("errored while parsing base: {}", e))?; + let base = + Url::parse(&base).map_err(|e| format!("errored while parsing base: {}", e))?; Some(base) } None => None, }; - let res = panic::catch_unwind(move || Url::options().base_url(base.as_ref()).parse(&input)) - .map_err(|_| "panicked while parsing input".to_string())? + let res = Url::options() + .base_url(base.as_ref()) + .parse(&input) .map_err(|e| format!("errored while parsing input: {}", e)); match result { @@ -340,38 +409,34 @@ fn run_setter_test( expected, }: SetterTest, ) -> Result<(), String> { - let mut url = panic::catch_unwind(|| Url::parse(&href)) - .map_err(|_| "panicked while parsing href".to_string())? - .map_err(|e| format!("errored while parsing href: {}", e))?; - - let url = panic::catch_unwind(move || { - match kind { - "protocol" => { - url::quirks::set_protocol(&mut url, &new_value).ok(); - } - "username" => { - url::quirks::set_username(&mut url, &new_value).ok(); - } - "password" => { - url::quirks::set_password(&mut url, &new_value).ok(); - } - "host" => { - url::quirks::set_host(&mut url, &new_value).ok(); - } - "hostname" => { - url::quirks::set_hostname(&mut url, &new_value).ok(); - } - "port" => { - url::quirks::set_port(&mut url, &new_value).ok(); - } - "pathname" => url::quirks::set_pathname(&mut url, &new_value), - "search" => url::quirks::set_search(&mut url, &new_value), - "hash" => url::quirks::set_hash(&mut url, &new_value), - _ => panic!("unknown setter kind: {:?}", kind), - }; - url - }) - .map_err(|_| "panicked while setting value".to_string())?; + let mut url = Url::parse(&href).map_err(|e| format!("errored while parsing href: {}", e))?; + + match kind { + "protocol" => { + url::quirks::set_protocol(&mut url, &new_value).ok(); + } + "username" => { + url::quirks::set_username(&mut url, &new_value).ok(); + } + "password" => { + url::quirks::set_password(&mut url, &new_value).ok(); + } + "host" => { + url::quirks::set_host(&mut url, &new_value).ok(); + } + "hostname" => { + url::quirks::set_hostname(&mut url, &new_value).ok(); + } + "port" => { + url::quirks::set_port(&mut url, &new_value).ok(); + } + "pathname" => url::quirks::set_pathname(&mut url, &new_value), + "search" => url::quirks::set_search(&mut url, &new_value), + "hash" => url::quirks::set_hash(&mut url, &new_value), + _ => { + return Err(format!("unknown setter kind: {:?}", kind)); + } + } if let Some(expected_href) = expected.href { let href = url::quirks::href(&url);