Skip to content

Commit 18b42ca

Browse files
fix!(gix-object): Store bytes in EntryMode so Tree roundtrips
Before this change, EntryMode wrapped a `u16` which didn't store enough information to match the git representation as a `&[u8]` of len 5 or 6. In particular, the mode that represents a `Tree` could be represented as `"40000"` or `"040000"`, and the difference would get lost once it was represented as `0o40000`. Change the backing type for `EntryMode` from a `u16` to a `[u8; 6]`. This way, we can represent `b"40000"` as `b"40000 "` which differs from `b"040000"` Add a regression test that ensures `EntryMode` must roundtrip. Fixes [issue 1887](GitoxideLabs#1887) BREAKING CHANGE: * Sha1 for certain git-objects will change (to match Git's) * The API had to be updated to decouple callers from the internal data representation
1 parent 4660f7a commit 18b42ca

File tree

7 files changed

+225
-146
lines changed

7 files changed

+225
-146
lines changed

gix-object/src/object/convert.rs

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ impl From<tree::EntryRef<'_>> for tree::Entry {
7171
fn from(other: tree::EntryRef<'_>) -> tree::Entry {
7272
let tree::EntryRef { mode, filename, oid } = other;
7373
tree::Entry {
74-
mode,
74+
mode: mode.to_owned(),
7575
filename: filename.to_owned(),
7676
oid: oid.into(),
7777
}
@@ -89,6 +89,25 @@ impl<'a> From<&'a tree::Entry> for tree::EntryRef<'a> {
8989
}
9090
}
9191

92+
impl<'a> TryFrom<&'a [u8]> for tree::EntryMode {
93+
type Error = &'a [u8];
94+
95+
fn try_from(mode: &'a [u8]) -> Result<Self, Self::Error> {
96+
let len = if let Some(delim) = mode.iter().position(|b| *b == b' ') {
97+
delim
98+
} else {
99+
mode.len().min(6)
100+
};
101+
let mut git_representation = [b' '; 6];
102+
git_representation[..len].copy_from_slice(&mode[..len]);
103+
let value = tree::parse_git_mode(&git_representation).ok_or(mode)?;
104+
Ok(Self {
105+
value,
106+
git_representation,
107+
})
108+
}
109+
}
110+
92111
impl From<ObjectRef<'_>> for Object {
93112
fn from(v: ObjectRef<'_>) -> Self {
94113
match v {

gix-object/src/tree/mod.rs

Lines changed: 171 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use crate::{
2-
bstr::{BStr, BString},
2+
bstr::{BStr, BString, ByteSlice},
33
tree, Tree, TreeRef,
44
};
55
use std::cell::RefCell;
@@ -36,19 +36,100 @@ pub struct Editor<'a> {
3636
tree_buf: Vec<u8>,
3737
}
3838

39+
/// Parse a valid git mode into a u16
40+
/// A valid git mode can be represented by a set of 5-6 octal digits. The leftmost octal digit can
41+
/// be at most one. These conditions guarantee that it can be represented as a `u16`, although that
42+
/// representation is lossy compared to the byte slice it came from as `"040000"` and `"40000"` will
43+
/// both be represented as `0o40000`.
44+
/// Input:
45+
/// We accept input that contains exactly a valid git mode or a valid git mode followed by a
46+
/// space, then anything (in case we are just pointing in the memory from the Git Tree
47+
/// representation)
48+
/// Return value:
49+
/// The value (`u16`) given a valid input or `None` otherwise
50+
pub const fn parse_git_mode(i: &[u8]) -> Option<u16> {
51+
let mut mode = 0;
52+
let mut idx = 0;
53+
// const fn, this is why we can't have nice things (like `.iter().any()`)
54+
while idx < i.len() {
55+
let b = i[idx];
56+
// Delimiter, return what we got
57+
if b == b' ' {
58+
return Some(mode);
59+
}
60+
// Not a pure octal input
61+
if b < b'0' || b > b'7' {
62+
return None;
63+
}
64+
// More than 6 octal digits we must have hit the delimiter or the input was malformed
65+
if idx > 6 {
66+
return None;
67+
}
68+
mode = (mode << 3) + (b - b'0') as u16;
69+
idx += 1;
70+
}
71+
Some(mode)
72+
}
73+
74+
/// Just a place-holder until the `slice_split_once` feature stabilizes
75+
fn split_once(slice: &[u8], value: u8) -> Option<(&'_ [u8], &'_ [u8])> {
76+
let mut iterator = slice.splitn(2, |b| *b == value);
77+
let first = iterator.next();
78+
let second = iterator.next();
79+
first.and_then(|first| second.map(|second| (first, second)))
80+
}
81+
82+
/// From the slice we get from a Git Tree representation, extract and parse the "mode" part
83+
/// Return:
84+
/// * `Some((mode as u16, (mode as slice, rest as slice)))` if the input slice is valid
85+
/// * `None` otherwise
86+
#[allow(clippy::type_complexity)]
87+
fn extract_git_mode(i: &[u8]) -> Option<(u16, (&[u8], &[u8]))> {
88+
if let Some((mode_slice, rest_slice)) = split_once(i, b' ') {
89+
if rest_slice.is_empty() {
90+
return None;
91+
}
92+
93+
parse_git_mode(mode_slice).map(|mode_num| (mode_num, (mode_slice, rest_slice)))
94+
} else {
95+
None
96+
}
97+
}
98+
99+
impl TryFrom<u32> for tree::EntryMode {
100+
type Error = u32;
101+
102+
fn try_from(mode: u32) -> Result<Self, Self::Error> {
103+
Ok(match mode {
104+
0o40000 | 0o120000 | 0o160000 => (mode as u16).into(),
105+
blob_mode if blob_mode & 0o100000 == 0o100000 => (mode as u16).into(),
106+
_ => return Err(mode),
107+
})
108+
}
109+
}
110+
39111
/// The mode of items storable in a tree, similar to the file mode on a unix file system.
40112
///
41-
/// Used in [`mutable::Entry`][crate::tree::Entry] and [`EntryRef`].
113+
/// Used in [`mutable::Entry`][crate::tree::Entry].
42114
///
43115
/// Note that even though it can be created from any `u16`, it should be preferable to
44116
/// create it by converting [`EntryKind`] into `EntryMode`.
45117
#[derive(Clone, Copy, PartialEq, Eq, Ord, PartialOrd, Hash)]
46118
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
47-
pub struct EntryMode(pub u16);
119+
pub struct EntryMode {
120+
pub(crate) value: u16,
121+
pub(crate) git_representation: [u8; 6],
122+
}
48123

49124
impl std::fmt::Debug for EntryMode {
50125
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
51-
write!(f, "EntryMode({:#o})", self.0)
126+
write!(f, "EntryMode(0o{})", self.as_bstr())
127+
}
128+
}
129+
130+
impl std::fmt::Octal for EntryMode {
131+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
132+
write!(f, "{}", self.as_bstr())
52133
}
53134
}
54135

@@ -72,9 +153,41 @@ pub enum EntryKind {
72153
Commit = 0o160000,
73154
}
74155

75-
impl From<EntryKind> for EntryMode {
76-
fn from(value: EntryKind) -> Self {
77-
EntryMode(value as u16)
156+
// Mask away the bottom 12 bits and the top 2 bits
157+
const IFMT: u16 = 0o170000;
158+
const fn parse_entry_kind_from_value(mode: u16) -> EntryKind {
159+
let etype = mode & IFMT;
160+
if etype == 0o100000 {
161+
if mode & 0o000100 == 0o000100 {
162+
EntryKind::BlobExecutable
163+
} else {
164+
EntryKind::Blob
165+
}
166+
} else if etype == EntryKind::Link as u16 {
167+
EntryKind::Link
168+
} else if etype == EntryKind::Tree as u16 {
169+
EntryKind::Tree
170+
} else {
171+
EntryKind::Commit
172+
}
173+
}
174+
175+
impl From<u16> for EntryKind {
176+
fn from(mode: u16) -> Self {
177+
parse_entry_kind_from_value(mode)
178+
}
179+
}
180+
181+
impl From<u16> for EntryMode {
182+
fn from(value: u16) -> Self {
183+
let kind: EntryKind = value.into();
184+
Self::from(kind)
185+
}
186+
}
187+
188+
impl From<EntryMode> for u16 {
189+
fn from(value: EntryMode) -> Self {
190+
value.value
78191
}
79192
}
80193

@@ -88,70 +201,73 @@ impl From<EntryMode> for EntryKind {
88201
impl EntryKind {
89202
/// Return the representation as used in the git internal format.
90203
pub fn as_octal_str(&self) -> &'static BStr {
204+
self.as_octal_bytes().as_bstr()
205+
}
206+
207+
/// Return the representation as used in the git internal format.
208+
pub fn as_octal_bytes(&self) -> &'static [u8] {
91209
use EntryKind::*;
92-
let bytes: &[u8] = match self {
210+
match self {
93211
Tree => b"40000",
94212
Blob => b"100644",
95213
BlobExecutable => b"100755",
96214
Link => b"120000",
97215
Commit => b"160000",
98-
};
99-
bytes.into()
216+
}
217+
}
218+
/// Return the representation as a human readable description
219+
pub fn as_descriptive_str(&self) -> &'static str {
220+
use EntryKind::*;
221+
match self {
222+
Tree => "tree",
223+
Blob => "blob",
224+
BlobExecutable => "exe",
225+
Link => "link",
226+
Commit => "commit",
227+
}
100228
}
101229
}
102230

103-
impl std::ops::Deref for EntryMode {
104-
type Target = u16;
105-
106-
fn deref(&self) -> &Self::Target {
107-
&self.0
231+
impl From<EntryKind> for EntryMode {
232+
fn from(value: EntryKind) -> Self {
233+
let mut git_representation = [b' '; 6];
234+
git_representation[..value.as_octal_str().len()].copy_from_slice(value.as_octal_str());
235+
EntryMode {
236+
git_representation,
237+
value: value as u16,
238+
}
108239
}
109240
}
110241

111-
const IFMT: u16 = 0o170000;
112-
113242
impl EntryMode {
114243
/// Discretize the raw mode into an enum with well-known state while dropping unnecessary details.
115244
pub const fn kind(&self) -> EntryKind {
116-
let etype = self.0 & IFMT;
117-
if etype == 0o100000 {
118-
if self.0 & 0o000100 == 0o000100 {
119-
EntryKind::BlobExecutable
120-
} else {
121-
EntryKind::Blob
122-
}
123-
} else if etype == EntryKind::Link as u16 {
124-
EntryKind::Link
125-
} else if etype == EntryKind::Tree as u16 {
126-
EntryKind::Tree
127-
} else {
128-
EntryKind::Commit
129-
}
130-
}
131-
132-
/// Return true if this entry mode represents a Tree/directory
133-
pub const fn is_tree(&self) -> bool {
134-
self.0 & IFMT == EntryKind::Tree as u16
245+
parse_entry_kind_from_value(self.value)
135246
}
136247

137248
/// Return true if this entry mode represents the commit of a submodule.
138249
pub const fn is_commit(&self) -> bool {
139-
self.0 & IFMT == EntryKind::Commit as u16
250+
matches!(self.kind(), EntryKind::Commit)
140251
}
141252

142253
/// Return true if this entry mode represents a symbolic link
143254
pub const fn is_link(&self) -> bool {
144-
self.0 & IFMT == EntryKind::Link as u16
255+
matches!(self.kind(), EntryKind::Link)
256+
}
257+
258+
/// Return true if this entry mode represents anything BUT Tree/directory
259+
pub const fn is_tree(&self) -> bool {
260+
matches!(self.kind(), EntryKind::Tree)
145261
}
146262

147263
/// Return true if this entry mode represents anything BUT Tree/directory
148264
pub const fn is_no_tree(&self) -> bool {
149-
self.0 & IFMT != EntryKind::Tree as u16
265+
!matches!(self.kind(), EntryKind::Tree)
150266
}
151267

152268
/// Return true if the entry is any kind of blob.
153269
pub const fn is_blob(&self) -> bool {
154-
self.0 & IFMT == 0o100000
270+
matches!(self.kind(), EntryKind::Blob | EntryKind::BlobExecutable)
155271
}
156272

157273
/// Return true if the entry is an executable blob.
@@ -167,37 +283,26 @@ impl EntryMode {
167283
)
168284
}
169285

170-
/// Represent the mode as descriptive string.
171-
pub const fn as_str(&self) -> &'static str {
172-
use EntryKind::*;
173-
match self.kind() {
174-
Tree => "tree",
175-
Blob => "blob",
176-
BlobExecutable => "exe",
177-
Link => "link",
178-
Commit => "commit",
286+
/// How many bytes of the backing representation are significant?
287+
#[allow(clippy::len_without_is_empty)]
288+
pub fn len(&self) -> usize {
289+
if let Some(delim) = self.git_representation.iter().position(|b| *b == b' ') {
290+
delim
291+
} else {
292+
self.git_representation.len()
179293
}
180294
}
181295

182296
/// Return the representation as used in the git internal format, which is octal and written
183297
/// to the `backing` buffer. The respective sub-slice that was written to is returned.
184-
pub fn as_bytes<'a>(&self, backing: &'a mut [u8; 6]) -> &'a BStr {
185-
if self.0 == 0 {
186-
std::slice::from_ref(&b'0')
187-
} else {
188-
let mut nb = 0;
189-
let mut n = self.0;
190-
while n > 0 {
191-
let remainder = (n % 8) as u8;
192-
backing[nb] = b'0' + remainder;
193-
n /= 8;
194-
nb += 1;
195-
}
196-
let res = &mut backing[..nb];
197-
res.reverse();
198-
res
199-
}
200-
.into()
298+
pub fn as_bytes(&self) -> &'_ [u8] {
299+
&self.git_representation[..self.len()]
300+
}
301+
302+
/// Return the representation as used in the git internal format, which is octal and written
303+
/// to the `backing` buffer. The respective sub-slice that was written to is returned.
304+
pub fn as_bstr(&self) -> &'_ BStr {
305+
self.as_bytes().as_bstr()
201306
}
202307
}
203308

0 commit comments

Comments
 (0)