From 3c17c67c1fca7a53eb3b7e8eeeabed31cef01839 Mon Sep 17 00:00:00 2001 From: Riccardo Mazzarini Date: Thu, 13 Nov 2025 13:25:12 +0100 Subject: [PATCH 1/4] Implement `AbsPath::normalize()` --- core/src/abs_path.rs | 265 ++++++++++++++++++++++++++++++++++++++++--- core/src/lib.rs | 7 +- tests/abs_path.rs | 88 +++++++++++++- 3 files changed, 340 insertions(+), 20 deletions(-) diff --git a/core/src/abs_path.rs b/core/src/abs_path.rs index c422d9c..5c1c63a 100644 --- a/core/src/abs_path.rs +++ b/core/src/abs_path.rs @@ -1,10 +1,13 @@ use alloc::borrow::{Cow, ToOwned}; use core::error::Error; use core::fmt; -use core::ops::Deref; +use core::ops::{Deref, Range}; + +use compact_str::CompactString; use crate::{ AbsPathBuf, + InvalidNodeNameError, MAIN_SEPARATOR_CHAR, MAIN_SEPARATOR_STR, NodeName, @@ -21,6 +24,56 @@ pub struct Components<'path> { inner: &'path str, } +/// TODO: docs. +#[cfg(feature = "std")] +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum AbsPathFromPathError { + /// The path is not absolute. + NotAbsolute, + + /// The path is not valid UTF-8. + NotUtf8, +} + +/// TODO: docs. +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub struct AbsPathNotAbsoluteError; + +/// The type of error that can occur when [`normalizing`](AbsPath::normalize) a +/// path. +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum NormalizeError { + /// The path contains `..` components that would navigate above the root. + EscapesRoot, + + /// The path component an invalid character at the given byte offset. + InvalidCharacter { byte_offset: usize, ch: char }, + + /// The path is not absolute. + NotAbsolute, +} + +struct NormalizeState<'a> { + /// The offset in the original string up to which components have been + /// processed. If it's less then the length of the original string, then + /// it's guaranteed to be right after a path separator. + cursor: usize, + + /// The normalized path being built. This is always a valid absolute path. + normalized_path: NormalizedPath, + + /// The original string being normalized. + original_str: &'a str, +} + +enum NormalizedPath { + Alloc(CompactString), + /// A byte range in the [original string](NormalizeState::original_str) + /// representing the path. Slicing with this range is guaranteed to return + /// a valid absolute path. + Slice(Range), +} + impl AbsPath { /// Returns the path as a string slice. #[inline] @@ -107,6 +160,14 @@ impl AbsPath { self.components().next_back_const() } + /// TODO: docs. + #[inline] + pub fn normalize(str: &str) -> Result, NormalizeError> { + let mut state = NormalizeState::new(str)?; + while !state.process_component()? {} + Ok(state.finish()) + } + /// TODO: docs. #[inline] pub const fn parent(&self) -> Option<&Self> { @@ -229,6 +290,165 @@ impl<'path> Components<'path> { } } +impl<'a> NormalizeState<'a> { + #[inline] + fn finish(self) -> Cow<'a, AbsPath> { + debug_assert!(self.cursor == self.original_str.len()); + match self.normalized_path { + NormalizedPath::Alloc(str) => Cow::Owned(AbsPathBuf::new(str)), + NormalizedPath::Slice(range) => { + // SAFETY: the given range always slices a valid absolute path. + let str = &self.original_str[range]; + Cow::Borrowed(unsafe { AbsPath::from_str_unchecked(str) }) + }, + } + } + + #[inline] + fn new(original_str: &'a str) -> Result { + if original_str.starts_with(MAIN_SEPARATOR_STR) { + let cursor = MAIN_SEPARATOR_STR.len(); + Ok(Self { + cursor, + normalized_path: NormalizedPath::Slice(0..cursor), + original_str, + }) + } else { + Err(NormalizeError::NotAbsolute) + } + } + + #[inline] + fn process_component(&mut self) -> Result { + let (component_len, is_last_component) = + match self.original_str.as_bytes()[self.cursor..] + .iter() + .position(|&b| b == MAIN_SEPARATOR_CHAR as u8) + { + Some(pos) => (pos, false), + None => (self.original_str.len() - self.cursor, true), + }; + + Self::push_component( + &mut self.normalized_path, + self.original_str, + self.cursor..self.cursor + component_len, + )?; + + Ok(if is_last_component { + self.cursor = self.original_str.len(); + true + } else { + self.cursor += component_len + MAIN_SEPARATOR_STR.len(); + self.cursor == self.original_str.len() + }) + } + + #[inline] + fn push_component( + normalized_path: &mut NormalizedPath, + original_str: &'a str, + component_range: Range, + ) -> Result<(), NormalizeError> { + debug_assert!(component_range.end <= original_str.len(),); + debug_assert!( + original_str[..component_range.start] + .ends_with(MAIN_SEPARATOR_CHAR) + ); + + let component = &original_str[component_range.clone()]; + + let Err(err) = NodeName::from_str(component) else { + match normalized_path { + NormalizedPath::Alloc(str) => { + if str != MAIN_SEPARATOR_STR { + str.push_str(MAIN_SEPARATOR_STR); + } + str.push_str(component); + }, + NormalizedPath::Slice(current_range) => { + if current_range.len() == MAIN_SEPARATOR_STR.len() { + *current_range = component_range; + current_range.start -= MAIN_SEPARATOR_STR.len(); + return Ok(()); + } + + // If the component is an extension of the current string + // slice, we can avoid allocating. + if current_range.end + MAIN_SEPARATOR_STR.len() + == component_range.start + { + current_range.end = component_range.end; + return Ok(()); + } + + let mut new_path = CompactString::with_capacity( + current_range.len() + + MAIN_SEPARATOR_STR.len() + + component_range.len(), + ); + + new_path.push_str(&original_str[current_range.clone()]); + new_path.push_str(MAIN_SEPARATOR_STR); + new_path.push_str(component); + + *normalized_path = NormalizedPath::Alloc(new_path); + }, + } + return Ok(()); + }; + + match err { + InvalidNodeNameError::Empty | InvalidNodeNameError::SingleDot => { + return Ok(()); + }, + InvalidNodeNameError::ContainsInvalidCharacter(ch) => { + return Err(NormalizeError::InvalidCharacter { + byte_offset: component_range.start, + ch, + }); + }, + InvalidNodeNameError::DoubleDot => {}, + } + + let current_path = match normalized_path { + NormalizedPath::Alloc(str) => &**str, + NormalizedPath::Slice(range) => &original_str[range.clone()], + }; + + let offset_of_last_separator = + r#const::bytes_offset_of_last_occurrence( + current_path.as_bytes(), + MAIN_SEPARATOR_CHAR as u8, + ) + .ok_or(NormalizeError::EscapesRoot)?; + + let new_len = if offset_of_last_separator == 0 { + if current_path.len() == MAIN_SEPARATOR_STR.len() { + return Err(NormalizeError::EscapesRoot); + } else { + MAIN_SEPARATOR_STR.len() + } + } else { + offset_of_last_separator + }; + + match normalized_path { + NormalizedPath::Alloc(str) => { + // SAFETY: the new length is less than the old length. + unsafe { + str.set_len(new_len); + } + }, + NormalizedPath::Slice(range) => { + range.end = range.start + new_len; + }, + } + + Ok(()) + } +} + impl ToOwned for AbsPath { type Owned = AbsPathBuf; @@ -361,21 +581,6 @@ impl fmt::Debug for Components<'_> { } } -/// TODO: docs. -#[cfg(feature = "std")] -#[derive(Copy, Clone, Debug, PartialEq, Eq)] -pub enum AbsPathFromPathError { - /// The path is not absolute. - NotAbsolute, - - /// The path is not valid UTF-8. - NotUtf8, -} - -/// TODO: docs. -#[derive(Copy, Clone, Debug, PartialEq, Eq)] -pub struct AbsPathNotAbsoluteError; - #[cfg(feature = "std")] impl fmt::Display for AbsPathFromPathError { #[inline] @@ -387,6 +592,9 @@ impl fmt::Display for AbsPathFromPathError { } } +#[cfg(feature = "std")] +impl Error for AbsPathFromPathError {} + impl fmt::Display for AbsPathNotAbsoluteError { #[inline] fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { @@ -394,10 +602,31 @@ impl fmt::Display for AbsPathNotAbsoluteError { } } -#[cfg(feature = "std")] -impl Error for AbsPathFromPathError {} impl Error for AbsPathNotAbsoluteError {} +impl fmt::Display for NormalizeError { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Self::EscapesRoot => { + f.write_str("path escapes the root via `..` components") + }, + Self::InvalidCharacter { byte_offset, ch } => { + write!( + f, + "path contains invalid character '{ch}' at byte range \ + {}..{}", + byte_offset, + byte_offset + ch.len_utf8(), + ) + }, + Self::NotAbsolute => AbsPathNotAbsoluteError.fmt(f), + } + } +} + +impl Error for NormalizeError {} + #[cfg(feature = "serde")] mod serde_impls { use serde::de::{Deserialize, Deserializer, Error}; diff --git a/core/src/lib.rs b/core/src/lib.rs index a2a268c..9d2b774 100644 --- a/core/src/lib.rs +++ b/core/src/lib.rs @@ -12,7 +12,12 @@ mod node_name_buf; #[cfg(feature = "std")] pub use abs_path::AbsPathFromPathError; -pub use abs_path::{AbsPath, AbsPathNotAbsoluteError, Components}; +pub use abs_path::{ + AbsPath, + AbsPathNotAbsoluteError, + Components, + NormalizeError, +}; pub use abs_path_buf::AbsPathBuf; pub use node_name::{InvalidNodeNameError, NodeName}; pub use node_name_buf::NodeNameBuf; diff --git a/tests/abs_path.rs b/tests/abs_path.rs index 30e2f82..91bf467 100644 --- a/tests/abs_path.rs +++ b/tests/abs_path.rs @@ -1,4 +1,6 @@ -use abs_path::{AbsPath, AbsPathBuf, NodeName, path}; +use std::borrow::Cow; + +use abs_path::{AbsPath, AbsPathBuf, NodeName, NormalizeError, path}; #[test] fn components_empty() { @@ -70,6 +72,90 @@ fn from_iter_1() { assert_eq!(path, "/foo/bar/baz.txt"); } +#[test] +#[cfg_attr(target_os = "windows", ignore)] +fn normalize_1() { + let p = "/foo/.."; + assert_eq!(AbsPath::normalize(p), Ok(Cow::Borrowed(path!("/")))); +} + +#[test] +#[cfg_attr(target_os = "windows", ignore)] +fn normalize_2() { + let p = "/foo/."; + assert_eq!(AbsPath::normalize(p), Ok(Cow::Borrowed(path!("/foo")))); +} + +#[test] +#[cfg_attr(target_os = "windows", ignore)] +fn normalize_3() { + let p = "/foo//bar"; + assert_eq!(AbsPath::normalize(p).as_deref(), Ok(path!("/foo/bar"))); +} + +#[test] +#[cfg_attr(target_os = "windows", ignore)] +fn normalize_4() { + let p = "/foo/bar/../baz/"; + assert_eq!(AbsPath::normalize(p).as_deref(), Ok(path!("/foo/baz"))); +} + +#[test] +#[cfg_attr(target_os = "windows", ignore)] +fn normalize_5() { + let p = "/."; + assert_eq!(AbsPath::normalize(p).as_deref(), Ok(path!("/"))); +} + +#[test] +#[cfg_attr(target_os = "windows", ignore)] +fn normalize_6() { + let p = "/foo/../bar/.//baz"; + assert_eq!(AbsPath::normalize(p).as_deref(), Ok(path!("/bar/baz"))); +} + +#[test] +#[cfg_attr(target_os = "windows", ignore)] +fn normalize_7() { + let p = "/../foo"; + assert_eq!(AbsPath::normalize(p), Err(NormalizeError::EscapesRoot)); +} + +#[test] +#[cfg_attr(target_os = "windows", ignore)] +fn normalize_8() { + let p = "/foo//"; + assert_eq!(AbsPath::normalize(p), Ok(Cow::Borrowed(path!("/foo")))); +} + +#[test] +#[cfg_attr(target_os = "windows", ignore)] +fn normalize_9() { + let p = "/."; + assert_eq!(AbsPath::normalize(p), Ok(Cow::Borrowed(path!("/")))); +} + +#[test] +#[cfg_attr(target_os = "windows", ignore)] +fn normalize_10() { + let p = "//foo/bar"; + assert_eq!(AbsPath::normalize(p), Ok(Cow::Borrowed(path!("/foo/bar")))); +} + +#[test] +#[cfg_attr(target_os = "windows", ignore)] +fn normalize_11() { + let p = "/./foo/bar"; + assert_eq!(AbsPath::normalize(p), Ok(Cow::Borrowed(path!("/foo/bar")))); +} + +#[test] +#[cfg_attr(target_os = "windows", ignore)] +fn normalize_12() { + let p = "/foo/../bar/baz"; + assert_eq!(AbsPath::normalize(p), Ok(Cow::Borrowed(path!("/bar/baz")))); +} + #[test] #[cfg_attr(target_os = "windows", ignore)] fn starts_with() { From 78844fb7ed51839c8d9a02c7d912bfd7d3ae37d7 Mon Sep 17 00:00:00 2001 From: Riccardo Mazzarini Date: Thu, 13 Nov 2025 13:27:28 +0100 Subject: [PATCH 2/4] Remove duplicate test --- tests/abs_path.rs | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/tests/abs_path.rs b/tests/abs_path.rs index 91bf467..4d6909d 100644 --- a/tests/abs_path.rs +++ b/tests/abs_path.rs @@ -131,27 +131,20 @@ fn normalize_8() { #[test] #[cfg_attr(target_os = "windows", ignore)] fn normalize_9() { - let p = "/."; - assert_eq!(AbsPath::normalize(p), Ok(Cow::Borrowed(path!("/")))); -} - -#[test] -#[cfg_attr(target_os = "windows", ignore)] -fn normalize_10() { let p = "//foo/bar"; assert_eq!(AbsPath::normalize(p), Ok(Cow::Borrowed(path!("/foo/bar")))); } #[test] #[cfg_attr(target_os = "windows", ignore)] -fn normalize_11() { +fn normalize_10() { let p = "/./foo/bar"; assert_eq!(AbsPath::normalize(p), Ok(Cow::Borrowed(path!("/foo/bar")))); } #[test] #[cfg_attr(target_os = "windows", ignore)] -fn normalize_12() { +fn normalize_11() { let p = "/foo/../bar/baz"; assert_eq!(AbsPath::normalize(p), Ok(Cow::Borrowed(path!("/bar/baz")))); } From 70b6108fc05de3d643a77600c768ac58befbff5c Mon Sep 17 00:00:00 2001 From: Riccardo Mazzarini Date: Thu, 13 Nov 2025 13:29:16 +0100 Subject: [PATCH 3/4] Tweak --- core/src/abs_path.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/abs_path.rs b/core/src/abs_path.rs index 5c1c63a..0f31f58 100644 --- a/core/src/abs_path.rs +++ b/core/src/abs_path.rs @@ -297,8 +297,8 @@ impl<'a> NormalizeState<'a> { match self.normalized_path { NormalizedPath::Alloc(str) => Cow::Owned(AbsPathBuf::new(str)), NormalizedPath::Slice(range) => { - // SAFETY: the given range always slices a valid absolute path. let str = &self.original_str[range]; + // SAFETY: the given range always slices a valid absolute path. Cow::Borrowed(unsafe { AbsPath::from_str_unchecked(str) }) }, } From dac2879acbcb72e2ec41999da594528d2d3ce214 Mon Sep 17 00:00:00 2001 From: Riccardo Mazzarini Date: Thu, 13 Nov 2025 13:30:48 +0100 Subject: [PATCH 4/4] Tweak --- tests/abs_path.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/abs_path.rs b/tests/abs_path.rs index 4d6909d..b16c310 100644 --- a/tests/abs_path.rs +++ b/tests/abs_path.rs @@ -104,7 +104,7 @@ fn normalize_4() { #[cfg_attr(target_os = "windows", ignore)] fn normalize_5() { let p = "/."; - assert_eq!(AbsPath::normalize(p).as_deref(), Ok(path!("/"))); + assert_eq!(AbsPath::normalize(p), Ok(Cow::Borrowed(path!("/")))); } #[test]