From 86335eb6b277c99cfd8c9b1a58df19795b88b8bc Mon Sep 17 00:00:00 2001 From: datdenkikniet Date: Fri, 6 Mar 2026 12:42:15 +0100 Subject: [PATCH 1/2] fix(tree): deduplicate entry-finding logic The logic for finding entries is duplicated 3 times. With an (unhealthy) smothering of generics, the common functionality can be extracted into a single function that can be reused in all places. --- gix-object/src/tree/mod.rs | 2 + gix-object/src/tree/ref_iter.rs | 75 ++++++++++++++++++++++---------- gix/src/object/tree/mod.rs | 76 ++++++++++----------------------- 3 files changed, 77 insertions(+), 76 deletions(-) diff --git a/gix-object/src/tree/mod.rs b/gix-object/src/tree/mod.rs index 3bde58146c..1626063af0 100644 --- a/gix-object/src/tree/mod.rs +++ b/gix-object/src/tree/mod.rs @@ -12,6 +12,8 @@ mod ref_iter; /// pub mod write; +pub use ref_iter::lookup_entry; + /// The state needed to apply edits instantly to in-memory trees. /// /// It's made so that each tree is looked at in the object database at most once, and held in memory for diff --git a/gix-object/src/tree/ref_iter.rs b/gix-object/src/tree/ref_iter.rs index 7e60d6ac83..8598a7710d 100644 --- a/gix-object/src/tree/ref_iter.rs +++ b/gix-object/src/tree/ref_iter.rs @@ -1,8 +1,59 @@ use bstr::BStr; +use gix_hash::ObjectId; use winnow::{error::ParserError, prelude::*}; use crate::{tree, tree::EntryRef, TreeRef, TreeRefIter}; +/// Follow a sequence of `path` components starting from the tree instance in `buf`, +/// and look them up in `odb` one by one using `buffer` until the last component is looked up and +/// its tree entry is returned. +/// +/// # Performance Notes +/// +/// Searching tree entries is currently done in sequence, which allows the search to be allocation free. It would be possible +/// to reuse a vector and use a binary search instead, which might be able to improve performance over all. +/// However, a benchmark should be created first to have some data and see which trade-off to choose here. +pub fn lookup_entry( + tree: &mut Vec, + path: I, + mut load_next_tree: C, + map_entry_to_output: M, +) -> Result, E> +where + I: IntoIterator, + P: PartialEq, + C: FnMut(ObjectId, &'_ mut Vec) -> Result>, E>, + M: FnOnce(crate::tree::EntryRef<'_>) -> R, +{ + let mut path = path.into_iter().peekable(); + + while let Some(component) = path.next() { + match TreeRefIter::from_bytes(tree) + .filter_map(Result::ok) + .find(|entry| component.eq(entry.filename)) + { + Some(entry) => { + if path.peek().is_none() { + return Ok(Some(map_entry_to_output(entry))); + } else { + let oid = entry.oid.to_owned(); + + let Some(obj) = load_next_tree(oid, tree)? else { + return Ok(None); + }; + + if !obj.kind.is_tree() { + return Ok(None); + } + } + } + None => return Ok(None), + } + } + + Ok(None) +} + impl<'a> TreeRefIter<'a> { /// Instantiate an iterator from the given tree data. pub fn from_bytes(data: &'a [u8]) -> TreeRefIter<'a> { @@ -28,30 +79,8 @@ impl<'a> TreeRefIter<'a> { P: PartialEq, { buffer.clear(); - - let mut path = path.into_iter().peekable(); buffer.extend_from_slice(self.data); - while let Some(component) = path.next() { - match TreeRefIter::from_bytes(buffer) - .filter_map(Result::ok) - .find(|entry| component.eq(entry.filename)) - { - Some(entry) => { - if path.peek().is_none() { - return Ok(Some(entry.into())); - } else { - let next_id = entry.oid.to_owned(); - let obj = odb.try_find(&next_id, buffer)?; - let Some(obj) = obj else { return Ok(None) }; - if !obj.kind.is_tree() { - return Ok(None); - } - } - } - None => return Ok(None), - } - } - Ok(None) + lookup_entry(buffer, path, |oid, buf| odb.try_find(&oid, buf), |entry| entry.into()) } /// Like [`Self::lookup_entry()`], but takes any [`AsRef`](`std::path::Path`) directly via `relative_path`, diff --git a/gix/src/object/tree/mod.rs b/gix/src/object/tree/mod.rs index aa8a0aa253..d862011727 100644 --- a/gix/src/object/tree/mod.rs +++ b/gix/src/object/tree/mod.rs @@ -1,6 +1,6 @@ use gix_hash::ObjectId; pub use gix_object::tree::{EntryKind, EntryMode}; -use gix_object::{bstr::BStr, FindExt, TreeRefIter}; +use gix_object::{bstr::BStr, tree::lookup_entry, FindExt, TreeRefIter}; use crate::{object::find, Id, ObjectDetached, Repository, Tree}; @@ -63,34 +63,15 @@ impl<'repo> Tree<'repo> { I: IntoIterator, P: PartialEq, { - let mut buf = self.repo.empty_reusable_buffer(); - buf.clear(); - - let mut path = path.into_iter().peekable(); + let buf = &mut self.repo.empty_reusable_buffer(); buf.extend_from_slice(&self.data); - while let Some(component) = path.next() { - match TreeRefIter::from_bytes(&buf) - .filter_map(Result::ok) - .find(|entry| component.eq(entry.filename)) - { - Some(entry) => { - if path.peek().is_none() { - return Ok(Some(Entry { - inner: entry.into(), - repo: self.repo, - })); - } else { - let next_id = entry.oid.to_owned(); - let obj = self.repo.objects.find(&next_id, &mut buf)?; - if !obj.kind.is_tree() { - return Ok(None); - } - } - } - None => return Ok(None), - } - } - Ok(None) + + let map_entry = |entry: gix_object::tree::EntryRef<'_>| Entry { + inner: entry.into(), + repo: self.repo, + }; + + lookup_entry(buf, path, |oid, buf| self.repo.find(&oid, buf).map(Some), map_entry) } /// Follow a sequence of `path` components starting from this instance, and look them up one by one until the last component @@ -109,31 +90,20 @@ impl<'repo> Tree<'repo> { I: IntoIterator, P: PartialEq, { - let mut path = path.into_iter().peekable(); - while let Some(component) = path.next() { - match TreeRefIter::from_bytes(&self.data) - .filter_map(Result::ok) - .find(|entry| component.eq(entry.filename)) - { - Some(entry) => { - if path.peek().is_none() { - return Ok(Some(Entry { - inner: entry.into(), - repo: self.repo, - })); - } else { - let next_id = entry.oid.to_owned(); - let obj = self.repo.objects.find(&next_id, &mut self.data)?; - self.id = next_id; - if !obj.kind.is_tree() { - return Ok(None); - } - } - } - None => return Ok(None), - } - } - Ok(None) + let map_entry = |entry: gix_object::tree::EntryRef<'_>| Entry { + inner: entry.into(), + repo: self.repo, + }; + + lookup_entry( + &mut self.data, + path, + |oid, buf| { + self.id = oid; + self.repo.find(&oid, buf).map(Some) + }, + map_entry, + ) } /// Like [`Self::lookup_entry()`], but takes a `Path` directly via `relative_path`, a path relative to this tree. From 1531458c6edc063ae9eb14a355eaa0cdfa0fd5a7 Mon Sep 17 00:00:00 2001 From: datdenkikniet Date: Fri, 6 Mar 2026 20:28:35 +0100 Subject: [PATCH 2/2] fix(tree): deduplicate path finding logic, take 2 Instead of doing some messed up callback stuff, we can also use std::ops::ControlFlow. It is slightly more verbose, but definitely easier to read. --- gix-object/src/tree/mod.rs | 2 +- gix-object/src/tree/ref_iter.rs | 86 ++++++++++++++++----------------- gix/src/object/tree/mod.rs | 56 +++++++++++++-------- 3 files changed, 77 insertions(+), 67 deletions(-) diff --git a/gix-object/src/tree/mod.rs b/gix-object/src/tree/mod.rs index 1626063af0..a6b59e436a 100644 --- a/gix-object/src/tree/mod.rs +++ b/gix-object/src/tree/mod.rs @@ -12,7 +12,7 @@ mod ref_iter; /// pub mod write; -pub use ref_iter::lookup_entry; +pub use ref_iter::iter_next; /// The state needed to apply edits instantly to in-memory trees. /// diff --git a/gix-object/src/tree/ref_iter.rs b/gix-object/src/tree/ref_iter.rs index 8598a7710d..56da362d92 100644 --- a/gix-object/src/tree/ref_iter.rs +++ b/gix-object/src/tree/ref_iter.rs @@ -1,57 +1,39 @@ +use std::ops::ControlFlow; + use bstr::BStr; -use gix_hash::ObjectId; use winnow::{error::ParserError, prelude::*}; use crate::{tree, tree::EntryRef, TreeRef, TreeRefIter}; -/// Follow a sequence of `path` components starting from the tree instance in `buf`, -/// and look them up in `odb` one by one using `buffer` until the last component is looked up and -/// its tree entry is returned. -/// -/// # Performance Notes -/// -/// Searching tree entries is currently done in sequence, which allows the search to be allocation free. It would be possible -/// to reuse a vector and use a binary search instead, which might be able to improve performance over all. -/// However, a benchmark should be created first to have some data and see which trade-off to choose here. -pub fn lookup_entry( - tree: &mut Vec, - path: I, - mut load_next_tree: C, - map_entry_to_output: M, -) -> Result, E> +/// Yes +pub fn iter_next<'a, I, P>( + components: &mut core::iter::Peekable, + tree: crate::Data<'a>, +) -> core::ops::ControlFlow>, gix_hash::ObjectId> where - I: IntoIterator, + I: Iterator, P: PartialEq, - C: FnMut(ObjectId, &'_ mut Vec) -> Result>, E>, - M: FnOnce(crate::tree::EntryRef<'_>) -> R, { - let mut path = path.into_iter().peekable(); - - while let Some(component) = path.next() { - match TreeRefIter::from_bytes(tree) - .filter_map(Result::ok) - .find(|entry| component.eq(entry.filename)) - { - Some(entry) => { - if path.peek().is_none() { - return Ok(Some(map_entry_to_output(entry))); - } else { - let oid = entry.oid.to_owned(); - - let Some(obj) = load_next_tree(oid, tree)? else { - return Ok(None); - }; - - if !obj.kind.is_tree() { - return Ok(None); - } - } - } - None => return Ok(None), - } + if !tree.kind.is_tree() { + return ControlFlow::Break(None); } - Ok(None) + let Some(component) = components.next() else { + return ControlFlow::Break(None); + }; + + let Some(entry) = TreeRefIter::from_bytes(tree.data) + .filter_map(Result::ok) + .find(|entry| component.eq(entry.filename)) + else { + return ControlFlow::Break(None); + }; + + if components.peek().is_none() { + ControlFlow::Break(Some(entry)) + } else { + ControlFlow::Continue(entry.oid.to_owned()) + } } impl<'a> TreeRefIter<'a> { @@ -80,7 +62,21 @@ impl<'a> TreeRefIter<'a> { { buffer.clear(); buffer.extend_from_slice(self.data); - lookup_entry(buffer, path, |oid, buf| odb.try_find(&oid, buf), |entry| entry.into()) + + let mut iter = path.into_iter().peekable(); + let mut data = crate::Data::new(crate::Kind::Tree, buffer); + + loop { + data = match iter_next(&mut iter, data) { + ControlFlow::Continue(oid) => { + let Some(next_tree) = odb.try_find(&oid, buffer)? else { + break Ok(None); + }; + next_tree + } + ControlFlow::Break(v) => break Ok(v.map(Into::into)), + } + } } /// Like [`Self::lookup_entry()`], but takes any [`AsRef`](`std::path::Path`) directly via `relative_path`, diff --git a/gix/src/object/tree/mod.rs b/gix/src/object/tree/mod.rs index d862011727..49151bb7a2 100644 --- a/gix/src/object/tree/mod.rs +++ b/gix/src/object/tree/mod.rs @@ -1,6 +1,8 @@ +use std::ops::ControlFlow; + use gix_hash::ObjectId; pub use gix_object::tree::{EntryKind, EntryMode}; -use gix_object::{bstr::BStr, tree::lookup_entry, FindExt, TreeRefIter}; +use gix_object::{bstr::BStr, tree::iter_next, FindExt, TreeRefIter}; use crate::{object::find, Id, ObjectDetached, Repository, Tree}; @@ -66,12 +68,22 @@ impl<'repo> Tree<'repo> { let buf = &mut self.repo.empty_reusable_buffer(); buf.extend_from_slice(&self.data); - let map_entry = |entry: gix_object::tree::EntryRef<'_>| Entry { - inner: entry.into(), - repo: self.repo, - }; - - lookup_entry(buf, path, |oid, buf| self.repo.find(&oid, buf).map(Some), map_entry) + let mut iter = path.into_iter().peekable(); + let mut data = gix_object::Data::new(gix_object::Kind::Tree, buf); + + loop { + data = match iter_next(&mut iter, data) { + ControlFlow::Continue(oid) => self.repo.find(&oid, buf)?, + ControlFlow::Break(entry) => { + let mapped = entry.map(|e| Entry { + inner: e.into(), + repo: self.repo, + }); + + break Ok(mapped); + } + } + } } /// Follow a sequence of `path` components starting from this instance, and look them up one by one until the last component @@ -90,20 +102,22 @@ impl<'repo> Tree<'repo> { I: IntoIterator, P: PartialEq, { - let map_entry = |entry: gix_object::tree::EntryRef<'_>| Entry { - inner: entry.into(), - repo: self.repo, - }; - - lookup_entry( - &mut self.data, - path, - |oid, buf| { - self.id = oid; - self.repo.find(&oid, buf).map(Some) - }, - map_entry, - ) + let mut iter = path.into_iter().peekable(); + let mut data = gix_object::Data::new(gix_object::Kind::Tree, &self.data); + + loop { + data = match iter_next(&mut iter, data) { + ControlFlow::Continue(oid) => { + self.id = oid; + self.repo.find(&oid, &mut self.data)? + } + ControlFlow::Break(entry) => { + let repo = self.repo; + let mapped = entry.map(|e| Entry { inner: e.into(), repo }); + break Ok(mapped); + } + } + } } /// Like [`Self::lookup_entry()`], but takes a `Path` directly via `relative_path`, a path relative to this tree.