diff --git a/url/src/lib.rs b/url/src/lib.rs index 6e6e7497..f0409b90 100644 --- a/url/src/lib.rs +++ b/url/src/lib.rs @@ -1756,6 +1756,39 @@ impl Url { let old_after_path_pos = to_u32(self.serialization.len()).unwrap(); let cannot_be_a_base = self.cannot_be_a_base(); let scheme_type = SchemeType::from(self.scheme()); + let mut path_empty = false; + + // Check ':' and then see if the next character is '/' + let mut has_host = if let Some(index) = self.serialization.find(":") { + if self.serialization.len() > index + 1 + && self.serialization.as_bytes().get(index + 1) == Some(&b'/') + { + let rest = &self.serialization[(index + ":/".len())..]; + let host_part = rest.split('/').next().unwrap_or(""); + path_empty = rest.is_empty(); + !host_part.is_empty() && !host_part.contains('@') + } else { + false + } + } else { + false + }; + + // Ensure the path length is greater than 1 to account + // for cases where "/." is already appended from serialization + // If we set path, then we already checked the other two conditions: + // https://url.spec.whatwg.org/#url-serializing + // 1. The host is null + // 2. the first segment of the URL's path is an empty string + if self.path().len() + path.len() > 1 { + if let Some(index) = self.serialization.find(":") { + let removal_start = index + ":".len(); + if self.serialization[removal_start..].starts_with("/.") { + self.path_start = removal_start as u32; + } + } + } + self.serialization.truncate(self.path_start as usize); self.mutate(|parser| { if cannot_be_a_base { @@ -1765,7 +1798,6 @@ impl Url { } parser.parse_cannot_be_a_base_path(parser::Input::new_no_trim(path)); } else { - let mut has_host = true; // FIXME parser.parse_path_start( scheme_type, &mut has_host, @@ -1773,6 +1805,26 @@ impl Url { ); } }); + + // For cases where normalization is applied across both the serialization and the path. + // Append "/." immediately after the scheme (up to ":") + // This is done if three conditions are met. + // https://url.spec.whatwg.org/#url-serializing + // 1. The host is null + // 2. The url's path length is greater than 1 + // 3. the first segment of the URL's path is an empty string + if !has_host && path.len() > 1 && path_empty { + if let Some(index) = self.serialization.find(":") { + if self.serialization.len() > index + 2 + && self.serialization.as_bytes().get(index + 1) == Some(&b'/') + && self.serialization.as_bytes().get(index + 2) == Some(&b'/') + { + self.serialization.insert_str(index + ":".len(), "/."); + self.path_start += "/.".len() as u32; + } + } + } + self.restore_after_path(old_after_path_pos, &after_path); } @@ -2076,7 +2128,7 @@ impl Url { } else { self.host_end }; - let suffix = self.slice(old_suffix_pos..).to_owned(); + let mut suffix = self.slice(old_suffix_pos..).to_owned(); self.serialization.truncate(self.host_start as usize); if !self.has_authority() { debug_assert!(self.slice(self.scheme_end..self.host_start) == ":"); @@ -2096,7 +2148,16 @@ impl Url { write!(&mut self.serialization, ":{}", port).unwrap(); } } - let new_suffix_pos = to_u32(self.serialization.len()).unwrap(); + let mut new_suffix_pos = to_u32(self.serialization.len()).unwrap(); + + // Remove starting "/." for empty path segment followed by the host + if suffix.starts_with("/.//") { + let adjustment: usize = "/.".len(); + suffix.drain(..adjustment); + // pathname should be "//p" not "p" given that the first segment was empty + new_suffix_pos -= adjustment as u32; + } + self.serialization.push_str(&suffix); let adjust = |index: &mut u32| { diff --git a/url/src/path_segments.rs b/url/src/path_segments.rs index e6363c5c..5da8af0d 100644 --- a/url/src/path_segments.rs +++ b/url/src/path_segments.rs @@ -239,7 +239,7 @@ impl PathSegmentsMut<'_> { I::Item: AsRef, { let scheme_type = SchemeType::from(self.url.scheme()); - let path_start = self.url.path_start as usize; + let mut path_start = self.url.path_start as usize; self.url.mutate(|parser| { parser.context = parser::Context::PathSegmentSetter; for segment in segments { @@ -253,7 +253,44 @@ impl PathSegmentsMut<'_> { { parser.serialization.push('/'); } - let mut has_host = true; // FIXME account for this? + + let mut path_empty = false; + + // Check ':' and then see if the next character is '/' + let mut has_host = if let Some(index) = parser.serialization.find(":") { + if parser.serialization.len() > index + 1 + && parser.serialization.as_bytes().get(index + 1) == Some(&b'/') + { + let rest = &parser.serialization[(index + ":/".len())..]; + let host_part = rest.split('/').next().unwrap_or(""); + path_empty = rest.is_empty(); + !host_part.is_empty() && !host_part.contains('@') + } else { + false + } + } else { + false + }; + + // For cases where normalization is applied across both the serialization and the path. + // Append "/." immediately after the scheme (up to ":") + // This is done if three conditions are met. + // https://url.spec.whatwg.org/#url-serializing + // 1. The host is null + // 2. The url's path length is greater than 1 + // 3. the first segment of the URL's path is an empty string + if !has_host && segment.len() > 1 && path_empty { + if let Some(index) = parser.serialization.find(":") { + if parser.serialization.len() == index + 2 + && parser.serialization.as_bytes().get(index + 1) == Some(&b'/') + { + // Append an extra '/' to ensure that "/./path" becomes "/.//path" + parser.serialization.insert_str(index + ":".len(), "/./"); + path_start += "/.".len(); + } + } + } + parser.parse_path( scheme_type, &mut has_host, @@ -262,6 +299,7 @@ impl PathSegmentsMut<'_> { ); } }); + self.url.path_start = path_start as u32; self } } diff --git a/url/tests/expected_failures.txt b/url/tests/expected_failures.txt index 899e7f70..9bf60b34 100644 --- a/url/tests/expected_failures.txt +++ b/url/tests/expected_failures.txt @@ -36,14 +36,8 @@ set hostname to set hostname to - set hostname to - set hostname to <> set pathname to <> set href to set pathname to <\\\\> set pathname to set pathname to - set pathname to - set pathname to - set pathname to - set pathname to

diff --git a/url/tests/unit.rs b/url/tests/unit.rs index 6a9430bd..85b7f1f0 100644 --- a/url/tests/unit.rs +++ b/url/tests/unit.rs @@ -1387,3 +1387,127 @@ fn serde_error_message() { r#"relative URL without a base: "§invalid#+#*Ä" at line 1 column 25"# ); } + +#[test] +fn test_fuzzing_uri_failures() { + use url::quirks; + let mut url = Url::parse("data:/.dummy.path").unwrap(); + assert!(!url.cannot_be_a_base()); + + url.set_path(".dummy.path"); + assert_eq!(url.as_str(), "data:/.dummy.path"); + assert_eq!(url.path(), "/.dummy.path"); + url.check_invariants().unwrap(); + + url.path_segments_mut() + .expect("should have path segments") + .push(".another.dummy.path"); + assert_eq!(url.as_str(), "data:/.dummy.path/.another.dummy.path"); + assert_eq!(url.path(), "/.dummy.path/.another.dummy.path"); + url.check_invariants().unwrap(); + + url = Url::parse("web+demo:/").unwrap(); + assert!(!url.cannot_be_a_base()); + + url.set_path("//.dummy.path"); + assert_eq!(url.path(), "//.dummy.path"); + + let segments: Vec<_> = url + .path_segments() + .expect("should have path segments") + .collect(); + assert_eq!(segments, vec!["", ".dummy.path"]); + assert_eq!(url.as_str(), "web+demo:/.//.dummy.path"); + + quirks::set_hostname(&mut url, ".dummy.host").unwrap(); + assert_eq!(url.as_str(), "web+demo://.dummy.host//.dummy.path"); + url.check_invariants().unwrap(); + + quirks::set_hostname(&mut url, "").unwrap(); + assert_eq!(url.as_str(), "web+demo:////.dummy.path"); + url.check_invariants().unwrap(); +} + +#[test] +fn test_can_be_a_base_with_set_path() { + use url::quirks; + let mut url = Url::parse("web+demo:/").unwrap(); + assert!(!url.cannot_be_a_base()); + + url.set_path("//not-a-host"); + assert_eq!(url.path(), "//not-a-host"); + + let segments: Vec<_> = url + .path_segments() + .expect("should have path segments") + .collect(); + + assert_eq!(segments, vec!["", "not-a-host"]); + + url.set_query(Some("query")); + url.set_fragment(Some("frag")); + + assert_eq!(url.as_str(), "web+demo:/.//not-a-host?query#frag"); + quirks::set_hostname(&mut url, "test").unwrap(); + assert_eq!(url.as_str(), "web+demo://test//not-a-host?query#frag"); + url.check_invariants().unwrap(); + quirks::set_hostname(&mut url, "").unwrap(); + assert_eq!(url.as_str(), "web+demo:////not-a-host?query#frag"); + url.check_invariants().unwrap(); +} + +#[test] +fn test_can_be_a_base_with_path_segments_mut() { + let mut url = Url::parse("web+demo:/").unwrap(); + assert!(!url.cannot_be_a_base()); + + url.path_segments_mut() + .expect("should have path segments") + .push("") + .push("not-a-host"); + + url.set_query(Some("query")); + url.set_fragment(Some("frag")); + + assert_eq!(url.as_str(), "web+demo:/.//not-a-host?query#frag"); + assert_eq!(url.path(), "//not-a-host"); + url.check_invariants().unwrap(); + + let segments: Vec<_> = url + .path_segments() + .expect("should have path segments") + .collect(); + assert_eq!(segments, vec!["", "not-a-host"]); +} + +#[test] +fn test_valid_indices_after_set_path() { + // Testing everything + let mut url = Url::parse("moz:/").unwrap(); + assert!(!url.cannot_be_a_base()); + + url.set_path("/.//p"); + url.set_host(Some("host")).unwrap(); + url.set_query(Some("query")); + url.set_fragment(Some("frag")); + assert_eq!(url.as_str(), "moz://host//p?query#frag"); + assert_eq!(url.host(), Some(Host::Domain("host"))); + assert_eq!(url.path(), "//p"); + assert_eq!(url.query(), Some("query")); + assert_eq!(url.fragment(), Some("frag")); + url.check_invariants().unwrap(); + + url = Url::parse("moz:/.//").unwrap(); + assert!(!url.cannot_be_a_base()); + + url.set_path("p"); + url.set_host(Some("host")).unwrap(); + url.set_query(Some("query")); + url.set_fragment(Some("frag")); + assert_eq!(url.as_str(), "moz://host/p?query#frag"); + assert_eq!(url.host(), Some(Host::Domain("host"))); + assert_eq!(url.path(), "/p"); + assert_eq!(url.query(), Some("query")); + assert_eq!(url.fragment(), Some("frag")); + url.check_invariants().unwrap(); +}