Skip to content

Commit bbdb34f

Browse files
committed
feat: cacheless iterator for hamt
1 parent fe4d5c1 commit bbdb34f

File tree

5 files changed

+208
-45
lines changed

5 files changed

+208
-45
lines changed

ipld/hamt/src/hamt.rs

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ use multihash_codetable::Code;
1313
use serde::de::DeserializeOwned;
1414
use serde::{Serialize, Serializer};
1515

16-
use crate::iter::IterImpl;
16+
use crate::iter::{IterImpl, IterItem};
1717
use crate::node::Node;
1818
use crate::pointer::version::Version;
1919
use crate::{Config, Error, Hash, HashAlgorithm, Sha256, pointer::version};
@@ -372,12 +372,13 @@ where
372372
#[inline]
373373
pub fn for_each<F>(&self, mut f: F) -> Result<(), Error>
374374
where
375-
V: DeserializeOwned,
375+
K: Clone,
376+
V: DeserializeOwned + Clone,
376377
F: FnMut(&K, &V) -> anyhow::Result<()>,
377378
{
378379
for res in self {
379380
let (k, v) = res?;
380-
(f)(k, v)?;
381+
(f)(k.as_ref(), v.as_ref())?;
381382
}
382383
Ok(())
383384
}
@@ -430,7 +431,7 @@ where
430431
where
431432
K: Borrow<Q> + Clone,
432433
Q: Eq + Hash + ?Sized,
433-
V: DeserializeOwned,
434+
V: DeserializeOwned + Clone,
434435
F: FnMut(&K, &V) -> anyhow::Result<()>,
435436
{
436437
let mut iter = match &starting_key {
@@ -441,10 +442,10 @@ where
441442
let mut traversed = 0usize;
442443
for res in iter.by_ref().take(max.unwrap_or(usize::MAX)) {
443444
let (k, v) = res?;
444-
(f)(k, v)?;
445+
(f)(k.as_ref(), v.as_ref())?;
445446
traversed += 1;
446447
}
447-
let next = iter.next().transpose()?.map(|kv| kv.0).cloned();
448+
let next = iter.next().transpose()?.map(|kv| kv.0.as_ref().clone());
448449
Ok((traversed, next))
449450
}
450451

@@ -456,8 +457,8 @@ where
456457

457458
impl<BS, V, K, H, Ver> HamtImpl<BS, V, K, H, Ver>
458459
where
459-
K: DeserializeOwned + PartialOrd,
460-
V: DeserializeOwned,
460+
K: DeserializeOwned + PartialOrd + Clone,
461+
V: DeserializeOwned + Clone,
461462
Ver: Version,
462463
BS: Blockstore,
463464
{
@@ -513,10 +514,10 @@ where
513514
/// for res in hamt.iter_from(results.last().unwrap().0)?.skip(1) {
514515
/// results.push((res?));
515516
/// }
516-
/// results.sort_by_key(|kv| kv.1);
517+
/// results.sort_by_key(|kv| kv.1.clone());
517518
///
518519
/// // Assert that we got out what we put in.
519-
/// let results: Vec<_> = results.into_iter().map(|(k, v)|(k.clone(), v.clone())).collect();
520+
/// let results: Vec<_> = results.into_iter().map(|(k, v)|(k.clone(), v.as_ref().clone())).collect();
520521
/// assert_eq!(kvs, results);
521522
///
522523
/// # anyhow::Ok(())
@@ -533,12 +534,12 @@ where
533534

534535
impl<'a, BS, V, K, H, Ver> IntoIterator for &'a HamtImpl<BS, V, K, H, Ver>
535536
where
536-
K: DeserializeOwned + PartialOrd,
537-
V: DeserializeOwned,
537+
K: DeserializeOwned + PartialOrd + Clone,
538+
V: DeserializeOwned + Clone,
538539
Ver: Version,
539540
BS: Blockstore,
540541
{
541-
type Item = Result<(&'a K, &'a V), Error>;
542+
type Item = Result<(IterItem<'a, K>, IterItem<'a, V>), Error>;
542543
type IntoIter = IterImpl<'a, BS, V, K, H, Ver>;
543544

544545
fn into_iter(self) -> Self::IntoIter {

ipld/hamt/src/iter.rs

Lines changed: 162 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
// Copyright 2021-2023 Protocol Labs
22
// SPDX-License-Identifier: Apache-2.0, MIT
33
use std::borrow::Borrow;
4+
use std::fmt::Display;
45
use std::iter::FusedIterator;
56

67
use forest_hash_utils::BytesKey;
@@ -17,8 +18,8 @@ use crate::{Config, Error, Hash, HashAlgorithm, KeyValuePair, Sha256};
1718
pub struct IterImpl<'a, BS, V, K = BytesKey, H = Sha256, Ver = version::V3> {
1819
store: &'a BS,
1920
conf: &'a Config,
20-
stack: Vec<std::slice::Iter<'a, Pointer<K, V, H, Ver>>>,
21-
current: std::slice::Iter<'a, KeyValuePair<K, V>>,
21+
stack: Vec<StackItem<'a, Pointer<K, V, H, Ver>>>,
22+
current: StackItem<'a, KeyValuePair<K, V>>,
2223
}
2324

2425
/// Iterator over HAMT Key/Value tuples (hamt v0).
@@ -27,19 +28,90 @@ pub type Iterv0<'a, BS, V, K = BytesKey, H = Sha256> = IterImpl<'a, BS, V, K, H,
2728
/// Iterator over HAMT Key/Value tuples.
2829
pub type Iter<'a, BS, V, K = BytesKey, H = Sha256> = IterImpl<'a, BS, V, K, H, version::V3>;
2930

31+
enum StackItem<'a, V> {
32+
Iter(std::slice::Iter<'a, V>),
33+
IntoIter(std::vec::IntoIter<V>),
34+
}
35+
36+
impl<'a, V> From<std::slice::Iter<'a, V>> for StackItem<'a, V> {
37+
fn from(value: std::slice::Iter<'a, V>) -> Self {
38+
Self::Iter(value)
39+
}
40+
}
41+
42+
impl<'a, V> From<std::vec::IntoIter<V>> for StackItem<'a, V> {
43+
fn from(value: std::vec::IntoIter<V>) -> Self {
44+
Self::IntoIter(value)
45+
}
46+
}
47+
48+
impl<'a, V> Iterator for StackItem<'a, V> {
49+
type Item = IterItem<'a, V>;
50+
51+
fn next(&mut self) -> Option<Self::Item> {
52+
match self {
53+
Self::Iter(it) => it.next().map(|i| i.into()),
54+
Self::IntoIter(it) => it.next().map(|i| i.into()),
55+
}
56+
}
57+
}
58+
59+
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
60+
pub enum IterItem<'a, V> {
61+
Borrowed(&'a V),
62+
Owned(V),
63+
}
64+
65+
impl<V: Display> Display for IterItem<'_, V> {
66+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
67+
self.as_ref().fmt(f)
68+
}
69+
}
70+
71+
impl<V> AsRef<V> for IterItem<'_, V> {
72+
fn as_ref(&self) -> &V {
73+
match self {
74+
Self::Borrowed(v) => v,
75+
Self::Owned(v) => v,
76+
}
77+
}
78+
}
79+
80+
impl<V: PartialEq> PartialEq<V> for IterItem<'_, V> {
81+
fn eq(&self, other: &V) -> bool {
82+
self.as_ref().eq(other)
83+
}
84+
85+
fn ne(&self, other: &V) -> bool {
86+
self.as_ref().ne(other)
87+
}
88+
}
89+
90+
impl<'a, V> From<V> for IterItem<'a, V> {
91+
fn from(value: V) -> Self {
92+
Self::Owned(value)
93+
}
94+
}
95+
96+
impl<'a, V> From<&'a V> for IterItem<'a, V> {
97+
fn from(value: &'a V) -> Self {
98+
Self::Borrowed(value)
99+
}
100+
}
101+
30102
impl<'a, K, V, BS, H, Ver> IterImpl<'a, BS, V, K, H, Ver>
31103
where
32-
K: DeserializeOwned,
33-
V: DeserializeOwned,
104+
K: DeserializeOwned + Clone,
105+
V: DeserializeOwned + Clone,
34106
Ver: Version,
35107
BS: Blockstore,
36108
{
37109
pub(crate) fn new(store: &'a BS, root: &'a Node<K, V, H, Ver>, conf: &'a Config) -> Self {
38110
Self {
39111
conf,
40112
store,
41-
stack: vec![root.pointers.iter()],
42-
current: [].iter(),
113+
stack: vec![root.pointers.iter().into()],
114+
current: [].iter().into(),
43115
}
44116
}
45117

@@ -56,24 +128,52 @@ where
56128
{
57129
let hashed_key = H::hash(key);
58130
let mut hash = HashBits::new(&hashed_key);
59-
let mut node = root;
131+
let mut node = IterItem::Borrowed(root);
60132
let mut stack = Vec::new();
61133
loop {
62134
let idx = hash.next(conf.bit_width)?;
63-
stack.push(node.pointers[node.index_for_bit_pos(idx)..].iter());
135+
match node.clone() {
136+
IterItem::Borrowed(node) => {
137+
stack.push(StackItem::from(
138+
node.pointers[node.index_for_bit_pos(idx)..].iter(),
139+
));
140+
}
141+
IterItem::Owned(node) => {
142+
stack.push(StackItem::from(
143+
node.pointers[node.index_for_bit_pos(idx)..]
144+
.to_vec()
145+
.into_iter(),
146+
));
147+
}
148+
}
64149
node = match stack.last_mut().unwrap().next() {
65150
Some(p) => match p {
66-
Pointer::Link { cid, cache } => cache.get_or_try_init(|| {
67-
Node::load(conf, store, cid, stack.len() as u32).map(Box::new)
68-
})?,
69-
Pointer::Dirty(node) => node,
70-
Pointer::Values(values) => {
151+
IterItem::Borrowed(Pointer::Link { cid, cache: _ }) => {
152+
Node::load(conf, store, cid, stack.len() as u32)?.into()
153+
}
154+
IterItem::Owned(Pointer::Link { cid, cache: _ }) => {
155+
Node::load(conf, store, &cid, stack.len() as u32)?.into()
156+
}
157+
IterItem::Borrowed(Pointer::Dirty(node)) => node.as_ref().into(),
158+
IterItem::Owned(Pointer::Dirty(node)) => (*node).into(),
159+
IterItem::Borrowed(Pointer::Values(values)) => {
160+
return match values.iter().position(|kv| kv.key().borrow() == key) {
161+
Some(offset) => Ok(Self {
162+
conf,
163+
store,
164+
stack,
165+
current: values[offset..].iter().into(),
166+
}),
167+
None => Err(Error::StartKeyNotFound),
168+
};
169+
}
170+
IterItem::Owned(Pointer::Values(values)) => {
71171
return match values.iter().position(|kv| kv.key().borrow() == key) {
72172
Some(offset) => Ok(Self {
73173
conf,
74174
store,
75175
stack,
76-
current: values[offset..].iter(),
176+
current: values[offset..].to_vec().into_iter().into(),
77177
}),
78178
None => Err(Error::StartKeyNotFound),
79179
};
@@ -92,33 +192,64 @@ where
92192
K: DeserializeOwned + PartialOrd,
93193
V: DeserializeOwned,
94194
{
95-
type Item = Result<(&'a K, &'a V), Error>;
195+
type Item = Result<(IterItem<'a, K>, IterItem<'a, V>), Error>;
96196

97197
fn next(&mut self) -> Option<Self::Item> {
98-
if let Some(v) = self.current.next() {
99-
return Some(Ok((v.key(), v.value())));
198+
match self.current.next() {
199+
Some(IterItem::Borrowed(v)) => return Some(Ok((v.key().into(), v.value().into()))),
200+
Some(IterItem::Owned(KeyValuePair(k, v))) => return Some(Ok((k.into(), v.into()))),
201+
_ => {}
100202
}
101203
loop {
102204
let Some(next) = self.stack.last_mut()?.next() else {
103205
self.stack.pop();
104206
continue;
105207
};
106208
match next {
107-
Pointer::Link { cid, cache } => {
108-
let node = match cache.get_or_try_init(|| {
109-
Node::load(self.conf, &self.store, cid, self.stack.len() as u32)
110-
.map(Box::new)
111-
}) {
112-
Ok(node) => node,
113-
Err(e) => return Some(Err(e)),
114-
};
115-
self.stack.push(node.pointers.iter())
209+
IterItem::Borrowed(Pointer::Link { cid, cache: _ }) => {
210+
let node =
211+
match Node::load(self.conf, &self.store, cid, self.stack.len() as u32) {
212+
Ok(node) => node,
213+
Err(e) => return Some(Err(e)),
214+
};
215+
self.stack.push(node.pointers.into_iter().into())
216+
}
217+
IterItem::Owned(Pointer::Link { cid, cache: _ }) => {
218+
let node =
219+
match Node::load(self.conf, &self.store, &cid, self.stack.len() as u32) {
220+
Ok(node) => node,
221+
Err(e) => return Some(Err(e)),
222+
};
223+
self.stack.push(node.pointers.into_iter().into())
224+
}
225+
IterItem::Borrowed(Pointer::Dirty(node)) => {
226+
self.stack.push(node.pointers.iter().into())
227+
}
228+
IterItem::Owned(Pointer::Dirty(node)) => {
229+
self.stack.push(node.pointers.into_iter().into())
230+
}
231+
IterItem::Borrowed(Pointer::Values(kvs)) => {
232+
self.current = kvs.iter().into();
233+
match self.current.next() {
234+
Some(IterItem::Borrowed(v)) => {
235+
return Some(Ok((v.key().into(), v.value().into())));
236+
}
237+
Some(IterItem::Owned(KeyValuePair(k, v))) => {
238+
return Some(Ok((k.into(), v.into())));
239+
}
240+
_ => {}
241+
}
116242
}
117-
Pointer::Dirty(node) => self.stack.push(node.pointers.iter()),
118-
Pointer::Values(kvs) => {
119-
self.current = kvs.iter();
120-
if let Some(v) = self.current.next() {
121-
return Some(Ok((v.key(), v.value())));
243+
IterItem::Owned(Pointer::Values(kvs)) => {
244+
self.current = kvs.into_iter().into();
245+
match self.current.next() {
246+
Some(IterItem::Borrowed(v)) => {
247+
return Some(Ok((v.key().into(), v.value().into())));
248+
}
249+
Some(IterItem::Owned(KeyValuePair(k, v))) => {
250+
return Some(Ok((k.into(), v.into())));
251+
}
252+
_ => {}
122253
}
123254
}
124255
}

ipld/hamt/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ impl Default for Config {
7575

7676
type HashedKey = [u8; 32];
7777

78-
#[derive(Debug, Serialize, Deserialize, PartialEq)]
78+
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
7979
struct KeyValuePair<K, V>(K, V);
8080

8181
impl<K, V> KeyValuePair<K, V> {

ipld/hamt/src/node.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,20 @@ pub(crate) struct Node<K, V, H, Ver = version::V3> {
2929
hash: PhantomData<H>,
3030
}
3131

32+
impl<K, V, H, Ver> Clone for Node<K, V, H, Ver>
33+
where
34+
K: Clone,
35+
V: Clone,
36+
{
37+
fn clone(&self) -> Self {
38+
Self {
39+
bitfield: self.bitfield.clone(),
40+
pointers: self.pointers.clone(),
41+
hash: Default::default(),
42+
}
43+
}
44+
}
45+
3246
impl<K: PartialEq, V: PartialEq, H, Ver> PartialEq for Node<K, V, H, Ver> {
3347
fn eq(&self, other: &Self) -> bool {
3448
(self.bitfield == other.bitfield) && (self.pointers == other.pointers)

ipld/hamt/src/pointer.rs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,23 @@ pub(crate) enum Pointer<K, V, H, Ver = version::V3> {
4646
Dirty(Box<Node<K, V, H, Ver>>),
4747
}
4848

49+
impl<K, V, H, Ver> Clone for Pointer<K, V, H, Ver>
50+
where
51+
K: Clone,
52+
V: Clone,
53+
{
54+
fn clone(&self) -> Self {
55+
match self {
56+
Self::Values(v) => Self::Values(v.clone()),
57+
Self::Link { cid, cache: _ } => Self::Link {
58+
cid: *cid,
59+
cache: Default::default(),
60+
},
61+
Self::Dirty(n) => Self::Dirty(n.clone()),
62+
}
63+
}
64+
}
65+
4966
impl<K: PartialEq, V: PartialEq, H, Ver> PartialEq for Pointer<K, V, H, Ver> {
5067
fn eq(&self, other: &Self) -> bool {
5168
match (self, other) {

0 commit comments

Comments
 (0)