|
| 1 | +use crate::tree::EntryKind; |
| 2 | +use crate::{tree, Tree}; |
| 3 | +use bstr::{BStr, BString, ByteSlice, ByteVec}; |
| 4 | +use gix_hash::ObjectId; |
| 5 | +use gix_hashtable::hash_map::Entry; |
| 6 | +use std::cmp::Ordering; |
| 7 | + |
| 8 | +/// The state needed to apply edits instantly to in-memory trees. |
| 9 | +/// |
| 10 | +/// It's made so that each tree is looked at in the object database at most once, and held in memory for |
| 11 | +/// all edits until everything is flushed to write all changed trees. |
| 12 | +/// |
| 13 | +/// The editor is optimized to edit existing trees, but can deal with building entirely new trees as well |
| 14 | +/// with some penalties. |
| 15 | +/// |
| 16 | +/// ### Note |
| 17 | +/// |
| 18 | +/// For reasons of efficiency, internally a SHA1 based hashmap is used to avoid having to store full paths |
| 19 | +/// to each edited tree. The chance of collision is low, but could be engineered to overwrite or write into |
| 20 | +/// an unintended tree. |
| 21 | +pub struct Editor<'a> { |
| 22 | + /// A way to lookup trees. |
| 23 | + find: &'a dyn crate::FindExt, |
| 24 | + /// All trees we currently hold in memory. Each of these may change while adding and removing entries. |
| 25 | + /// null-object-ids mark tree-entries whose value we don't know yet, they are placeholders that will be |
| 26 | + /// dropped when writing at the latest. |
| 27 | + trees: gix_hashtable::HashMap<ObjectId, Tree>, |
| 28 | + /// A buffer to build up paths when finding the tree to edit. |
| 29 | + path_buf: BString, |
| 30 | + /// Our buffer for storing tree-data in, right before decoding it. |
| 31 | + tree_buf: Vec<u8>, |
| 32 | +} |
| 33 | + |
| 34 | +/// Lifecycle |
| 35 | +impl<'a> Editor<'a> { |
| 36 | + /// Create a new editor that uses `root` as base for all edits. Use `find` to lookup existing |
| 37 | + /// trees when edits are made. Each tree will only be looked-up once and then edited in place from |
| 38 | + /// that point on. |
| 39 | + pub fn new(root: Tree, find: &'a dyn crate::FindExt) -> Self { |
| 40 | + Editor { |
| 41 | + find, |
| 42 | + trees: gix_hashtable::HashMap::from_iter(Some((empty_path_hash(), root))), |
| 43 | + path_buf: Vec::with_capacity(256).into(), |
| 44 | + tree_buf: Vec::with_capacity(512), |
| 45 | + } |
| 46 | + } |
| 47 | +} |
| 48 | + |
| 49 | +/// Operations |
| 50 | +impl<'a> Editor<'a> { |
| 51 | + /// Write the entire in-memory state of all changed trees (and only changed trees) to `out`. |
| 52 | + /// |
| 53 | + /// The last call to `out` will be the changed root tree, whose object-id will also be returned. |
| 54 | + /// `out` is free to do any kind of additional validation, like to assure that all entries in the tree exist. |
| 55 | + /// We don't assure that as there is no validation that inserted entries are valid object ids. |
| 56 | + /// |
| 57 | + /// Future calls to [`upsert`](Self::upsert) or similar will keep working on the last seen state of the |
| 58 | + /// just-written root-tree. |
| 59 | + /// If this is not desired, use [set_root()](Self::set_root()). |
| 60 | + pub fn write<E>(&mut self, mut out: impl FnMut(&Tree) -> Result<ObjectId, E>) -> Result<ObjectId, E> { |
| 61 | + assert_ne!(self.trees.len(), 0, "there is at least the root tree"); |
| 62 | + |
| 63 | + // back is for children, front is for parents. |
| 64 | + let mut parents = vec![( |
| 65 | + None::<usize>, |
| 66 | + BString::default(), |
| 67 | + self.trees |
| 68 | + .remove(&empty_path_hash()) |
| 69 | + .expect("root tree is always present"), |
| 70 | + )]; |
| 71 | + let mut children = Vec::new(); |
| 72 | + while let Some((parent_idx, mut rela_path, mut tree)) = children.pop().or_else(|| parents.pop()) { |
| 73 | + let mut all_entries_unchanged_or_written = true; |
| 74 | + for entry in &tree.entries { |
| 75 | + if entry.mode.is_tree() { |
| 76 | + let prev_len = push_path_component(&mut rela_path, &entry.filename); |
| 77 | + if let Some(sub_tree) = self.trees.remove(&path_hash(&rela_path)) { |
| 78 | + all_entries_unchanged_or_written = false; |
| 79 | + let next_parent_idx = parents.len(); |
| 80 | + children.push((Some(next_parent_idx), rela_path.clone(), sub_tree)); |
| 81 | + } |
| 82 | + rela_path.truncate(prev_len); |
| 83 | + } |
| 84 | + } |
| 85 | + if all_entries_unchanged_or_written { |
| 86 | + tree.entries.retain(|e| !e.oid.is_null()); |
| 87 | + if let Some((_, _, parent_to_adjust)) = |
| 88 | + parent_idx.map(|idx| parents.get_mut(idx).expect("always present, pointing towards zero")) |
| 89 | + { |
| 90 | + let name = filename(rela_path.as_bstr()); |
| 91 | + let entry_idx = parent_to_adjust |
| 92 | + .entries |
| 93 | + .binary_search_by(|e| cmp_entry_with_name(e, name, true)) |
| 94 | + .expect("the parent always knows us by name"); |
| 95 | + if tree.entries.is_empty() { |
| 96 | + parent_to_adjust.entries.remove(entry_idx); |
| 97 | + } else { |
| 98 | + parent_to_adjust.entries[entry_idx].oid = out(&tree)?; |
| 99 | + } |
| 100 | + } else if parents.is_empty() { |
| 101 | + debug_assert!(children.is_empty(), "we consume children before parents"); |
| 102 | + debug_assert!(rela_path.is_empty(), "this should always be the root tree"); |
| 103 | + |
| 104 | + // There may be left-over trees if they are replaced with blobs for example. |
| 105 | + let root_tree_id = out(&tree)?; |
| 106 | + self.trees.clear(); |
| 107 | + self.trees.insert(empty_path_hash(), tree); |
| 108 | + return Ok(root_tree_id); |
| 109 | + } else if !tree.entries.is_empty() { |
| 110 | + out(&tree)?; |
| 111 | + } |
| 112 | + } else { |
| 113 | + parents.push((parent_idx, rela_path, tree)); |
| 114 | + } |
| 115 | + } |
| 116 | + |
| 117 | + unreachable!("we exit as soon as everything is consumed") |
| 118 | + } |
| 119 | + |
| 120 | + /// Insert a new entry of `kind` with `id` at `rela_path`, an iterator over each path component in the tree, |
| 121 | + /// like `a/b/c`. Names are matched case-sensitively. |
| 122 | + /// |
| 123 | + /// Existing leaf-entries will be overwritten unconditionally, and it is assumed that `id` is available in the object database |
| 124 | + /// or will be made available at a later point to assure the integrity of the produced tree. |
| 125 | + /// |
| 126 | + /// Intermediate trees will be created if they don't exist in the object database, otherwise they will be loaded and entries |
| 127 | + /// will be inserted into them instead. |
| 128 | + /// |
| 129 | + /// Note that `id` can be [null](ObjectId::null()) to create a placeholder. These will not be written, and paths leading |
| 130 | + /// through them will not be considered a problem. |
| 131 | + /// |
| 132 | + /// `id` can also be an empty tree, along with [the respective `kind`](EntryKind::Tree), even though that's normally not allowed |
| 133 | + /// in Git trees. |
| 134 | + pub fn upsert<I, C>( |
| 135 | + &mut self, |
| 136 | + rela_path: I, |
| 137 | + kind: EntryKind, |
| 138 | + id: ObjectId, |
| 139 | + ) -> Result<&mut Self, crate::find::existing_object::Error> |
| 140 | + where |
| 141 | + I: IntoIterator<Item = C>, |
| 142 | + C: AsRef<BStr>, |
| 143 | + { |
| 144 | + let mut cursor = self.trees.get_mut(&empty_path_hash()).expect("root is always present"); |
| 145 | + self.path_buf.clear(); |
| 146 | + let mut rela_path = rela_path.into_iter().peekable(); |
| 147 | + while let Some(name) = rela_path.next() { |
| 148 | + let name = name.as_ref(); |
| 149 | + let is_last = rela_path.peek().is_none(); |
| 150 | + let mut needs_sorting = false; |
| 151 | + let current_level_must_be_tree = !is_last || kind == EntryKind::Tree; |
| 152 | + let check_type_change = |entry: &tree::Entry| entry.mode.is_tree() != current_level_must_be_tree; |
| 153 | + let tree_to_lookup = match cursor |
| 154 | + .entries |
| 155 | + .binary_search_by(|e| cmp_entry_with_name(e, name, false)) |
| 156 | + .or_else(|file_insertion_idx| { |
| 157 | + cursor |
| 158 | + .entries |
| 159 | + .binary_search_by(|e| cmp_entry_with_name(e, name, true)) |
| 160 | + .map_err(|dir_insertion_index| { |
| 161 | + if current_level_must_be_tree { |
| 162 | + dir_insertion_index |
| 163 | + } else { |
| 164 | + file_insertion_idx |
| 165 | + } |
| 166 | + }) |
| 167 | + }) { |
| 168 | + Ok(idx) => { |
| 169 | + let entry = &mut cursor.entries[idx]; |
| 170 | + if is_last { |
| 171 | + // unconditionally overwrite what's there. |
| 172 | + entry.oid = id; |
| 173 | + needs_sorting = check_type_change(entry); |
| 174 | + entry.mode = kind.into(); |
| 175 | + None |
| 176 | + } else if entry.mode.is_tree() { |
| 177 | + // Possibly lookup the existing tree on our way down the path. |
| 178 | + Some(entry.oid) |
| 179 | + } else { |
| 180 | + // it is no tree, but we are traversing a path, so turn it into one. |
| 181 | + entry.oid = id.kind().null(); |
| 182 | + needs_sorting = check_type_change(entry); |
| 183 | + entry.mode = EntryKind::Tree.into(); |
| 184 | + None |
| 185 | + } |
| 186 | + } |
| 187 | + Err(insertion_idx) => { |
| 188 | + cursor.entries.insert( |
| 189 | + insertion_idx, |
| 190 | + tree::Entry { |
| 191 | + filename: name.into(), |
| 192 | + mode: if is_last { kind.into() } else { EntryKind::Tree.into() }, |
| 193 | + oid: if is_last { id } else { id.kind().null() }, |
| 194 | + }, |
| 195 | + ); |
| 196 | + if is_last { |
| 197 | + break; |
| 198 | + } |
| 199 | + None |
| 200 | + } |
| 201 | + }; |
| 202 | + if needs_sorting { |
| 203 | + cursor.entries.sort(); |
| 204 | + } |
| 205 | + if is_last { |
| 206 | + break; |
| 207 | + } |
| 208 | + push_path_component(&mut self.path_buf, name); |
| 209 | + let path_id = path_hash(&self.path_buf); |
| 210 | + cursor = match self.trees.entry(path_id) { |
| 211 | + Entry::Occupied(e) => e.into_mut(), |
| 212 | + Entry::Vacant(e) => e.insert( |
| 213 | + if let Some(tree_id) = tree_to_lookup.filter(|tree_id| !tree_id.is_empty_tree()) { |
| 214 | + self.find.find_tree(&tree_id, &mut self.tree_buf)?.into() |
| 215 | + } else { |
| 216 | + Tree::default() |
| 217 | + }, |
| 218 | + ), |
| 219 | + }; |
| 220 | + } |
| 221 | + Ok(self) |
| 222 | + } |
| 223 | + |
| 224 | + /// Set the root tree of the modification to `root`, assuring it has a well-known state. |
| 225 | + /// |
| 226 | + /// Note that this erases all previous edits. |
| 227 | + /// |
| 228 | + /// This is useful if the same editor is re-used for various trees. |
| 229 | + pub fn set_root(&mut self, root: Tree) -> &mut Self { |
| 230 | + self.trees.clear(); |
| 231 | + self.trees.insert(empty_path_hash(), root); |
| 232 | + self |
| 233 | + } |
| 234 | +} |
| 235 | + |
| 236 | +fn cmp_entry_with_name(a: &tree::Entry, filename: &BStr, is_tree: bool) -> Ordering { |
| 237 | + let common = a.filename.len().min(filename.len()); |
| 238 | + a.filename[..common].cmp(&filename[..common]).then_with(|| { |
| 239 | + let a = a.filename.get(common).or_else(|| a.mode.is_tree().then_some(&b'/')); |
| 240 | + let b = filename.get(common).or_else(|| is_tree.then_some(&b'/')); |
| 241 | + a.cmp(&b) |
| 242 | + }) |
| 243 | +} |
| 244 | + |
| 245 | +fn filename(path: &BStr) -> &BStr { |
| 246 | + path.rfind_byte(b'/').map_or(path, |pos| &path[pos + 1..]) |
| 247 | +} |
| 248 | + |
| 249 | +fn empty_path_hash() -> ObjectId { |
| 250 | + gix_features::hash::hasher(gix_hash::Kind::Sha1).digest().into() |
| 251 | +} |
| 252 | + |
| 253 | +fn path_hash(path: &[u8]) -> ObjectId { |
| 254 | + let mut hasher = gix_features::hash::hasher(gix_hash::Kind::Sha1); |
| 255 | + hasher.update(path); |
| 256 | + hasher.digest().into() |
| 257 | +} |
| 258 | + |
| 259 | +fn push_path_component(base: &mut BString, component: &[u8]) -> usize { |
| 260 | + let prev_len = base.len(); |
| 261 | + debug_assert!(base.last() != Some(&b'/')); |
| 262 | + if !base.is_empty() { |
| 263 | + base.push_byte(b'/'); |
| 264 | + } |
| 265 | + base.push_str(component); |
| 266 | + prev_len |
| 267 | +} |
0 commit comments