Skip to content

Commit e738acc

Browse files
committed
feat: add InMemoryPassThrough implementation.
An implementation of `Header`, `Write` and `Find`, that can optionally write everything to an in-memory store, and if enabled, also read objects back from there. That way it can present a consistent view to objects from two locations.
1 parent b279957 commit e738acc

File tree

6 files changed

+275
-0
lines changed

6 files changed

+275
-0
lines changed

Cargo.lock

+1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

gix-odb/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ serde = ["dep:serde", "gix-hash/serde", "gix-object/serde", "gix-pack/serde"]
2121

2222
[dependencies]
2323
gix-features = { version = "^0.38.2", path = "../gix-features", features = ["rustsha1", "walkdir", "zlib", "crc32"] }
24+
gix-hashtable = { version = "^0.5.2", path = "../gix-hashtable" }
2425
gix-hash = { version = "^0.14.2", path = "../gix-hash" }
2526
gix-date = { version = "^0.9.0", path = "../gix-date" }
2627
gix-path = { version = "^0.10.10", path = "../gix-path" }

gix-odb/src/lib.rs

+3
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,9 @@ pub fn sink(object_hash: gix_hash::Kind) -> Sink {
6666
}
6767
}
6868

69+
///
70+
pub mod memory;
71+
6972
mod sink;
7073

7174
///

gix-odb/src/memory.rs

+166
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
use crate::find::Header;
2+
use gix_object::Data;
3+
use std::cell::RefCell;
4+
use std::ops::{Deref, DerefMut};
5+
6+
/// An object database to read from any implementation but write to memory.
7+
/// Previously written objects can be returned from memory upon query which
8+
/// makes the view of objects consistent, but it's impact temporary unless
9+
/// [`memory objects`](Proxy::memory) are persisted in a separate step.
10+
///
11+
/// It's possible to turn off the memory by removing it from the instance.
12+
pub struct Proxy<T> {
13+
/// The actual odb implementation
14+
inner: T,
15+
/// The kind of hash to produce when writing new objects.
16+
object_hash: gix_hash::Kind,
17+
/// The storage for in-memory objects.
18+
/// If `None`, the proxy will always read from and write-through to `inner`.
19+
memory: Option<RefCell<Storage>>,
20+
}
21+
22+
/// Lifecycle
23+
impl<T> Proxy<T> {
24+
/// Create a new instance using `odb` as actual object provider, with an empty in-memory store for
25+
/// objects that are to be written.
26+
/// Use `object_hash` to determine the kind of hash to produce when writing new objects.
27+
pub fn new(odb: T, object_hash: gix_hash::Kind) -> Proxy<T> {
28+
Proxy {
29+
inner: odb,
30+
object_hash,
31+
memory: Some(Default::default()),
32+
}
33+
}
34+
}
35+
36+
/// Lifecycle
37+
impl<T> Proxy<T> {
38+
/// Take all the objects in memory so far, with the memory storage itself and return it.
39+
///
40+
/// The instance will remain in a state where it won't be able to store objects in memory at all,
41+
/// they will now be stored in the underlying object database.
42+
///
43+
/// To avoid that, use [`reset_object_memory()`](Self::reset_object_memory()) or return the storage
44+
/// using [`set_object_memory()`](Self::set_object_memory()).
45+
pub fn take_object_memory(&mut self) -> Option<Storage> {
46+
self.memory.take().map(|mem| mem.into_inner())
47+
}
48+
49+
/// Set the object storage to contain only `new` objects, and return whichever objects were there previously.
50+
pub fn set_object_memory(&mut self, new: Storage) -> Option<Storage> {
51+
let previous = self.take_object_memory();
52+
self.memory = Some(RefCell::new(new));
53+
previous
54+
}
55+
56+
/// Reset the internal storage to be empty, and return the previous storage, with all objects
57+
/// it contained.
58+
///
59+
/// Note that this does nothing if this instance didn't contain object memory in the first place.
60+
/// In that case, set it explicitly.
61+
pub fn reset_object_memory(&self) -> Option<Storage> {
62+
self.memory.as_ref().map(|m| std::mem::take(&mut *m.borrow_mut()))
63+
}
64+
65+
/// Return the amount of objects currently stored in memory.
66+
pub fn num_objects_in_memory(&self) -> usize {
67+
self.memory.as_ref().map_or(0, |m| m.borrow().len())
68+
}
69+
}
70+
71+
impl<T> gix_object::Find for Proxy<T>
72+
where
73+
T: gix_object::Find,
74+
{
75+
fn try_find<'a>(
76+
&self,
77+
id: &gix_hash::oid,
78+
buffer: &'a mut Vec<u8>,
79+
) -> Result<Option<Data<'a>>, gix_object::find::Error> {
80+
if let Some(map) = self.memory.as_ref() {
81+
let map = map.borrow();
82+
if let Some((kind, data)) = map.get(id) {
83+
buffer.clear();
84+
buffer.extend_from_slice(data);
85+
return Ok(Some(Data {
86+
kind: *kind,
87+
data: &*buffer,
88+
}));
89+
}
90+
}
91+
self.inner.try_find(id, buffer)
92+
}
93+
}
94+
95+
impl<T> crate::Header for Proxy<T>
96+
where
97+
T: crate::Header,
98+
{
99+
fn try_header(&self, id: &gix_hash::oid) -> Result<Option<Header>, gix_object::find::Error> {
100+
if let Some(map) = self.memory.as_ref() {
101+
let map = map.borrow();
102+
if let Some((kind, data)) = map.get(id) {
103+
return Ok(Some(Header::Loose {
104+
kind: *kind,
105+
size: data.len() as u64,
106+
}));
107+
}
108+
}
109+
self.inner.try_header(id)
110+
}
111+
}
112+
113+
impl<T> crate::Write for Proxy<T>
114+
where
115+
T: crate::Write,
116+
{
117+
fn write_stream(
118+
&self,
119+
kind: gix_object::Kind,
120+
size: u64,
121+
from: &mut dyn std::io::Read,
122+
) -> Result<gix_hash::ObjectId, crate::write::Error> {
123+
let Some(map) = self.memory.as_ref() else {
124+
return self.inner.write_stream(kind, size, from);
125+
};
126+
127+
let mut buf = Vec::new();
128+
from.read_to_end(&mut buf)?;
129+
130+
let id = gix_object::compute_hash(self.object_hash, kind, &buf);
131+
map.borrow_mut().insert(id, (kind, buf));
132+
Ok(id)
133+
}
134+
}
135+
136+
impl<T> Deref for Proxy<T> {
137+
type Target = T;
138+
139+
fn deref(&self) -> &Self::Target {
140+
&self.inner
141+
}
142+
}
143+
144+
impl<T> DerefMut for Proxy<T> {
145+
fn deref_mut(&mut self) -> &mut Self::Target {
146+
&mut self.inner
147+
}
148+
}
149+
150+
/// A mapping between an object id and all data corresponding to an object, acting like a `HashMap<ObjectID, (Kind, Data)>`.
151+
#[derive(Default, Debug, Clone, Eq, PartialEq)]
152+
pub struct Storage(gix_hashtable::HashMap<gix_hash::ObjectId, (gix_object::Kind, Vec<u8>)>);
153+
154+
impl Deref for Storage {
155+
type Target = gix_hashtable::HashMap<gix_hash::ObjectId, (gix_object::Kind, Vec<u8>)>;
156+
157+
fn deref(&self) -> &Self::Target {
158+
&self.0
159+
}
160+
}
161+
162+
impl DerefMut for Storage {
163+
fn deref_mut(&mut self) -> &mut Self::Target {
164+
&mut self.0
165+
}
166+
}

gix-odb/tests/odb/memory.rs

+103
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
use crate::odb::hex_to_id;
2+
use gix_object::{tree, FindExt};
3+
use gix_odb::{Header, HeaderExt, Write};
4+
use gix_testtools::tempfile::TempDir;
5+
6+
#[test]
7+
fn without_memory() -> crate::Result {
8+
let (mut odb, _tmp) = db_rw()?;
9+
let mut buf = Vec::new();
10+
let mem = odb.take_object_memory().expect("it starts out with memory set");
11+
assert_eq!(mem.len(), 0, "no object is stored initially");
12+
let existing = hex_to_id("21d3ba9a26b790a4858d67754ae05d04dfce4d0c");
13+
let tree = odb.find_tree(&existing, &mut buf).expect("present and valid");
14+
assert_eq!(tree.entries.len(), 1);
15+
odb.header(existing).expect("header can be found just the same");
16+
17+
let mut tree = tree.to_owned();
18+
tree.entries.push(tree::Entry {
19+
mode: tree::EntryKind::Blob.into(),
20+
filename: "z-for-sorting_another-file-with-same-content".into(),
21+
oid: existing,
22+
});
23+
let new_tree_id = odb.write(&tree)?;
24+
assert_eq!(new_tree_id, hex_to_id("249b0b4106a5e9e7875e446a26468e22ec47a05c"));
25+
let actual = odb.header(new_tree_id).expect("header of new objects can be found");
26+
assert_eq!(actual.kind(), gix_object::Kind::Tree);
27+
assert_eq!(actual.size(), 104);
28+
29+
let new_tree = odb
30+
.find_tree(&new_tree_id, &mut buf)
31+
.expect("new tree is also available as object")
32+
.to_owned();
33+
assert_eq!(new_tree, tree);
34+
35+
Ok(())
36+
}
37+
38+
#[test]
39+
fn with_memory() -> crate::Result {
40+
let mut odb = db()?;
41+
assert_eq!(
42+
(*odb).iter()?.count(),
43+
6,
44+
"let's be sure we didn't accidentally write anything"
45+
);
46+
let mut buf = Vec::new();
47+
let existing = hex_to_id("21d3ba9a26b790a4858d67754ae05d04dfce4d0c");
48+
let tree = odb.find_tree(&existing, &mut buf).expect("present and valid");
49+
assert_eq!(tree.entries.len(), 1);
50+
odb.header(existing).expect("header can be found just the same");
51+
assert_eq!(
52+
odb.num_objects_in_memory(),
53+
0,
54+
"nothing is stored when fetching objects - it's not an object cache"
55+
);
56+
57+
let mut tree = tree.to_owned();
58+
tree.entries.push(tree::Entry {
59+
mode: tree::EntryKind::Blob.into(),
60+
filename: "z-for-sorting_another-file-with-same-content".into(),
61+
oid: existing,
62+
});
63+
let new_tree_id = odb.write(&tree)?;
64+
assert_eq!(new_tree_id, hex_to_id("249b0b4106a5e9e7875e446a26468e22ec47a05c"));
65+
let actual = odb.header(new_tree_id).expect("header of new objects can be found");
66+
assert_eq!(actual.kind(), gix_object::Kind::Tree);
67+
assert_eq!(actual.size(), 104);
68+
69+
let new_tree = odb
70+
.find_tree(&new_tree_id, &mut buf)
71+
.expect("new tree is also available as object")
72+
.to_owned();
73+
assert_eq!(new_tree, tree);
74+
75+
let mem = odb.reset_object_memory().expect("memory is still available");
76+
assert_eq!(mem.len(), 1, "one new object was just written");
77+
78+
assert_eq!(
79+
odb.try_header(&new_tree_id)?,
80+
None,
81+
"without memory, the object can't be found anymore"
82+
);
83+
84+
let prev_mem = odb.set_object_memory(mem).expect("reset means it's just cleared");
85+
assert_eq!(prev_mem.len(), 0, "nothing was stored after the reset");
86+
87+
assert_eq!(odb.num_objects_in_memory(), 1, "we put all previous objects back");
88+
89+
Ok(())
90+
}
91+
92+
fn db() -> crate::Result<gix_odb::memory::Proxy<gix_odb::Handle>> {
93+
let odb = gix_odb::at(
94+
gix_testtools::scripted_fixture_read_only_standalone("repo_with_loose_objects.sh")?.join(".git/objects"),
95+
)?;
96+
Ok(gix_odb::memory::Proxy::new(odb, gix_hash::Kind::Sha1))
97+
}
98+
99+
fn db_rw() -> crate::Result<(gix_odb::memory::Proxy<gix_odb::Handle>, TempDir)> {
100+
let tmp = gix_testtools::scripted_fixture_writable_standalone("repo_with_loose_objects.sh")?;
101+
let odb = gix_odb::at(tmp.path().join(".git/objects"))?;
102+
Ok((gix_odb::memory::Proxy::new(odb, gix_hash::Kind::Sha1), tmp))
103+
}

gix-odb/tests/odb/mod.rs

+1
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ fn db_small_packs() -> gix_odb::Handle {
1818
pub mod alternate;
1919
pub mod find;
2020
pub mod header;
21+
pub mod memory;
2122
pub mod regression;
2223
pub mod sink;
2324
pub mod store;

0 commit comments

Comments
 (0)