Skip to content

Commit 8e66591

Browse files
committed
feat: tree-editing TBD
1 parent 71bf808 commit 8e66591

File tree

10 files changed

+697
-276
lines changed

10 files changed

+697
-276
lines changed

Cargo.lock

+9
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

gix-object/Cargo.toml

+3
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ gix-features = { version = "^0.38.2", path = "../gix-features", features = [
4141
"progress",
4242
] }
4343
gix-hash = { version = "^0.14.2", path = "../gix-hash" }
44+
gix-hashtable = { version = "^0.5.2", path = "../gix-hashtable" }
4445
gix-validate = { version = "^0.9.0", path = "../gix-validate" }
4546
gix-actor = { version = "^0.32.0", path = "../gix-actor" }
4647
gix-date = { version = "^0.9.0", path = "../gix-date" }
@@ -64,6 +65,8 @@ document-features = { version = "0.2.0", optional = true }
6465
criterion = "0.5.1"
6566
pretty_assertions = "1.0.0"
6667
gix-testtools = { path = "../tests/tools" }
68+
gix-odb = { path = "../gix-odb" }
69+
termtree = "0.5.1"
6770

6871
[package.metadata.docs.rs]
6972
all-features = true

gix-object/src/tree/editor.rs

+234
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,234 @@
1+
use crate::tree::EntryKind;
2+
use crate::{tree, Tree};
3+
use bstr::{BStr, BString, ByteSlice, ByteVec};
4+
use gix_hash::ObjectId;
5+
use gix_hashtable::hash_map::Entry;
6+
use std::cmp::Ordering;
7+
8+
/// The state needed to apply edits instantly to in-memory trees.
9+
///
10+
/// It's made so that each tree is looked at in the object database at most once, and held in memory for
11+
/// all edits until everything is flushed to write all changed trees.
12+
///
13+
/// The editor is optimized to edit existing trees, but can deal with building entirely new trees as well
14+
/// with some penalties.
15+
///
16+
/// ### Note
17+
///
18+
/// For reasons of efficiency, internally a SHA1 based hashmap is used to avoid having to store full paths
19+
/// to each edited tree. The chance of collision is low, but could be engineered to overwrite or write into
20+
/// an unintended tree.
21+
pub struct Editor<'a> {
22+
/// A way to lookup trees.
23+
find: &'a dyn crate::FindExt,
24+
/// All trees we currently hold in memory. Each of these may change while adding and removing entries.
25+
/// null-object-ids mark tree-entries whose value we don't know yet, they are placeholders that will be
26+
/// dropped when writing at the latest.
27+
trees: gix_hashtable::HashMap<ObjectId, Tree>,
28+
/// A buffer to build up paths when finding the tree to edit.
29+
path_buf: BString,
30+
}
31+
32+
/// Lifecycle
33+
impl<'a> Editor<'a> {
34+
/// Create a new editor that uses `root` as base for all edits. Use `find` to lookup existing
35+
/// trees when edits are made. Each tree will only be looked-up once and then edited in place from
36+
/// that point on.
37+
pub fn new(root: Tree, find: &'a dyn crate::FindExt) -> Self {
38+
Editor {
39+
find,
40+
trees: gix_hashtable::HashMap::from_iter(Some((empty_path_hash(), root))),
41+
path_buf: Vec::with_capacity(512).into(),
42+
}
43+
}
44+
}
45+
46+
/// Operations
47+
impl<'a> Editor<'a> {
48+
/// Write the entire in-memory state of all changed trees (and only changed trees) to `out`.
49+
///
50+
/// The last call to `out` will be the changed root tree, whose object-id will also be returned.
51+
/// `out` is free to do any kind of additional validation, like to assure that all entries in the tree exist.
52+
/// We don't assure that as there is no validation that inserted entries are valid object ids.
53+
///
54+
/// Future calls to [`upsert`](Self::upsert) or similar will keep working on the last seen state of the
55+
/// just-written root-tree.
56+
/// If this is not desired, use [set_root()](Self::set_root()).
57+
pub fn write<E>(&mut self, mut out: impl FnMut(&Tree) -> Result<ObjectId, E>) -> Result<ObjectId, E> {
58+
assert_ne!(self.trees.len(), 0, "there is at least the root tree");
59+
60+
// back is for children, front is for parents.
61+
let mut parents = vec![(
62+
None::<usize>,
63+
BString::default(),
64+
self.trees
65+
.remove(&empty_path_hash())
66+
.expect("root tree is always present"),
67+
)];
68+
let mut children = Vec::new();
69+
while let Some((parent_idx, mut rela_path, mut tree)) = children.pop().or_else(|| parents.pop()) {
70+
let mut all_entries_unchanged_or_written = true;
71+
for entry in &tree.entries {
72+
if entry.mode.is_tree() {
73+
let prev_len = push_path_component(&mut rela_path, &entry.filename);
74+
if let Some(sub_tree) = self.trees.remove(&path_hash(&rela_path)) {
75+
all_entries_unchanged_or_written = false;
76+
let next_parent_idx = parents.len();
77+
children.push((Some(next_parent_idx), rela_path.clone(), sub_tree));
78+
}
79+
rela_path.truncate(prev_len);
80+
}
81+
}
82+
if all_entries_unchanged_or_written {
83+
tree.entries.retain(|e| !e.oid.is_null());
84+
let tree_id = out(&tree)?;
85+
if let Some((_, _, parent_to_adjust)) =
86+
parent_idx.map(|idx| parents.get_mut(idx).expect("always present, pointing towards zero"))
87+
{
88+
let name = filename(rela_path.as_bstr());
89+
let entry_idx = parent_to_adjust
90+
.entries
91+
.binary_search_by(|e| cmp_entry_with_name(e, name, true))
92+
.expect("the parent always knows us by name");
93+
if tree.entries.is_empty() {
94+
parent_to_adjust.entries.remove(entry_idx);
95+
} else {
96+
parent_to_adjust.entries[entry_idx].oid = tree_id;
97+
}
98+
}
99+
if parents.is_empty() {
100+
debug_assert!(children.is_empty(), "we consume children before parents");
101+
debug_assert!(rela_path.is_empty(), "this should always be the root tree");
102+
103+
// There may be left-over trees if they are replaced with blobs for example.
104+
self.trees.clear();
105+
self.trees.insert(empty_path_hash(), tree);
106+
return Ok(tree_id);
107+
}
108+
} else {
109+
parents.push((parent_idx, rela_path, tree));
110+
}
111+
}
112+
113+
unreachable!("we exit as soon as everything is consumed")
114+
}
115+
116+
/// Insert a new entry of `kind` with `id` at `rela_path`, an iterator over each path component in the tree,
117+
/// like `a/b/c`. Names are matched case-sensitively.
118+
///
119+
/// Existing leaf-entries will be overwritten unconditionally, and it is assumed that `id` is available in the object database
120+
/// or will be made available at a later point to assure the integrity of the produced tree.
121+
///
122+
/// Intermediate trees will be created if they don't exist in the object database, otherwise they will be loaded and entries
123+
/// will be inserted into them instead.
124+
///
125+
/// Note that `id` can be [null](ObjectId::null()) to create a placeholder. These will not be written, and paths leading
126+
/// through them will not be considered a problem.
127+
///
128+
/// `id` can also be an empty tree, along with [the respective `kind`](EntryKind::Tree), even though that's normally not allowed
129+
/// in Git trees.
130+
pub fn upsert<I, C>(
131+
&mut self,
132+
rela_path: I,
133+
kind: EntryKind,
134+
id: ObjectId,
135+
) -> Result<&mut Self, crate::find::existing_object::Error>
136+
where
137+
I: IntoIterator<Item = C>,
138+
C: AsRef<BStr>,
139+
{
140+
let mut cursor = self.trees.get_mut(&empty_path_hash()).expect("root is always present");
141+
self.path_buf.clear();
142+
let mut rela_path = rela_path.into_iter().peekable();
143+
while let Some(name) = rela_path.next() {
144+
let name = name.as_ref();
145+
let is_last = rela_path.peek().is_none();
146+
match cursor
147+
.entries
148+
.binary_search_by(|e| cmp_entry_with_name(e, name, false))
149+
.or_else(|_| cursor.entries.binary_search_by(|e| cmp_entry_with_name(e, name, true)))
150+
{
151+
Ok(idx) => {
152+
let entry = &mut cursor.entries[idx];
153+
if is_last {
154+
entry.oid = id;
155+
entry.mode = kind.into();
156+
break;
157+
} else {
158+
// TODO: lookup existing tree if it is one, otherwise overwrite.
159+
entry.oid = id.kind().null();
160+
entry.mode = EntryKind::Tree.into();
161+
}
162+
}
163+
Err(insertion_idx) => {
164+
cursor.entries.insert(
165+
insertion_idx,
166+
tree::Entry {
167+
filename: name.into(),
168+
mode: if is_last { kind.into() } else { EntryKind::Tree.into() },
169+
oid: if is_last { id } else { id.kind().null() },
170+
},
171+
);
172+
if is_last {
173+
break;
174+
}
175+
}
176+
}
177+
debug_assert!(
178+
!is_last,
179+
"adding new trees makes sense only for intermediate components"
180+
);
181+
push_path_component(&mut self.path_buf, name);
182+
let path_id = path_hash(&self.path_buf);
183+
cursor = match self.trees.entry(path_id) {
184+
Entry::Occupied(e) => e.into_mut(),
185+
Entry::Vacant(e) => e.insert(Tree::default()),
186+
};
187+
}
188+
Ok(self)
189+
}
190+
191+
/// Set the root tree of the modification to `root`, assuring it has a well-known state.
192+
///
193+
/// Note that this erases all previous edits.
194+
///
195+
/// This is useful if the same editor is re-used for various trees.
196+
pub fn set_root(&mut self, root: Tree) -> &mut Self {
197+
self.trees.clear();
198+
self.trees.insert(empty_path_hash(), root);
199+
self
200+
}
201+
}
202+
203+
fn cmp_entry_with_name(a: &tree::Entry, filename: &BStr, is_tree: bool) -> Ordering {
204+
let common = a.filename.len().min(filename.len());
205+
a.filename[..common].cmp(&filename[..common]).then_with(|| {
206+
let a = a.filename.get(common).or_else(|| a.mode.is_tree().then_some(&b'/'));
207+
let b = filename.get(common).or_else(|| is_tree.then_some(&b'/'));
208+
a.cmp(&b)
209+
})
210+
}
211+
212+
fn filename(path: &BStr) -> &BStr {
213+
path.rfind_byte(b'/').map_or(path, |pos| &path[pos + 1..])
214+
}
215+
216+
fn empty_path_hash() -> ObjectId {
217+
gix_features::hash::hasher(gix_hash::Kind::Sha1).digest().into()
218+
}
219+
220+
fn path_hash(path: &[u8]) -> ObjectId {
221+
let mut hasher = gix_features::hash::hasher(gix_hash::Kind::Sha1);
222+
hasher.update(path);
223+
hasher.digest().into()
224+
}
225+
226+
fn push_path_component(base: &mut BString, component: &[u8]) -> usize {
227+
let prev_len = base.len();
228+
debug_assert!(base.last() != Some(&b'/'));
229+
if !base.is_empty() {
230+
base.push_byte(b'/');
231+
}
232+
base.push_str(component);
233+
prev_len
234+
}

gix-object/src/tree/mod.rs

+3
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@ use crate::{
55
tree,
66
};
77

8+
mod editor;
9+
pub use editor::Editor;
10+
811
mod ref_iter;
912
///
1013
pub mod write;

0 commit comments

Comments
 (0)