Skip to content

Commit e62bc97

Browse files
authored
chore: update blake3 and use new hazmat api (#63)
* minimal change to adapt to the new guts api of upstream blake3 * Use new API everywhere * Adapt to latest ~guts~ hazmat api * Update hazmat dep * Use published hazmat API * Use div_ceil to get clippy to shut up. TBH I think the code was fine before, but whatever... * require serde for mixed ror now * fix imports * remove last mentions of guts * remove test that compared guts hash_subtree with our own recursive_hash_subtree The bug is gone now, and since this is now an official feature of blake3 we rely on their tests
1 parent e5509af commit e62bc97

File tree

10 files changed

+65
-104
lines changed

10 files changed

+65
-104
lines changed

Cargo.lock

+14-21
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

+3-2
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ rust-version = "1.75"
1313
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
1414

1515
[dependencies]
16-
iroh-blake3 = "1.4.3"
16+
1717
range-collections = { version = "0.4.5", features = ["new_unchecked"] }
1818
smallvec = "1"
1919

@@ -24,13 +24,14 @@ iroh-io = { version = "0.6.0", default-features = false, optional = true }
2424
positioned-io = { version = "0.3.1", default-features = false }
2525
genawaiter = { version = "0.99.1", features = ["futures03"], optional = true }
2626
tokio = { version = "1", features = ["sync"], default-features = false, optional = true }
27+
blake3 = { version = "1.8" }
2728
serde = { version = "1", features = ["derive"], optional = true }
2829

2930
[features]
3031
serde = ["dep:serde", "bytes/serde"]
3132
tokio_fsm = ["dep:futures-lite", "dep:iroh-io"]
3233
validate = ["dep:genawaiter"]
33-
experimental-mixed = ["dep:tokio"]
34+
experimental-mixed = ["dep:tokio", "dep:serde"]
3435
default = ["tokio_fsm", "validate", "serde"]
3536

3637
[dev-dependencies]

benches/tree_bench.rs

-5
Original file line numberDiff line numberDiff line change
@@ -58,11 +58,6 @@ fn hash_benches_large(c: &mut Criterion) {
5858
hasher.finalize()
5959
})
6060
});
61-
c.bench_function("hash_subtree", |b| {
62-
b.iter(|| {
63-
blake3::guts::hash_subtree(0, &data, true);
64-
})
65-
});
6661
}
6762

6863
criterion_group!(benches, offset_benches, iter_benches, hash_benches_large,);

src/io/fsm.rs

+5-5
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ use std::{
1414
result,
1515
};
1616

17-
use blake3::guts::parent_cv;
1817
use bytes::Bytes;
1918
pub use iroh_io::{AsyncSliceReader, AsyncSliceWriter};
2019
use iroh_io::{AsyncStreamReader, AsyncStreamWriter};
@@ -30,6 +29,7 @@ use crate::{
3029
Leaf, Parent,
3130
},
3231
iter::{BaoChunk, ResponseIter},
32+
parent_cv,
3333
rec::{encode_selected_rec, truncate_ranges, truncate_ranges_owned},
3434
BaoTree, BlockSize, ChunkRanges, ChunkRangesRef, TreeNode,
3535
};
@@ -757,8 +757,8 @@ mod validate {
757757

758758
use super::Outboard;
759759
use crate::{
760-
blake3, hash_subtree, io::LocalBoxFuture, rec::truncate_ranges, split, BaoTree, ChunkNum,
761-
ChunkRangesRef, TreeNode,
760+
blake3, hash_subtree, io::LocalBoxFuture, parent_cv, rec::truncate_ranges, split, BaoTree,
761+
ChunkNum, ChunkRangesRef, TreeNode,
762762
};
763763

764764
/// Given a data file and an outboard, compute all valid ranges.
@@ -867,7 +867,7 @@ mod validate {
867867
// outboard is incomplete, we can't validate
868868
return Ok(());
869869
};
870-
let actual = blake3::guts::parent_cv(&l_hash, &r_hash, is_root);
870+
let actual = parent_cv(&l_hash, &r_hash, is_root);
871871
if &actual != parent_hash {
872872
// hash mismatch, we can't validate
873873
return Ok(());
@@ -970,7 +970,7 @@ mod validate {
970970
// outboard is incomplete, we can't validate
971971
return Ok(());
972972
};
973-
let actual = blake3::guts::parent_cv(&l_hash, &r_hash, is_root);
973+
let actual = parent_cv(&l_hash, &r_hash, is_root);
974974
if &actual != parent_hash {
975975
// hash mismatch, we can't validate
976976
return Ok(());

src/io/mixed.rs

+6-9
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,15 @@
11
//! Read from sync, send to tokio sender
22
use std::{future::Future, result};
33

4+
use blake3;
45
use bytes::Bytes;
5-
use iroh_blake3 as blake3;
6-
use iroh_blake3::guts::parent_cv;
76
use serde::{Deserialize, Serialize};
87
use smallvec::SmallVec;
98

109
use super::{sync::Outboard, EncodeError, Leaf, Parent};
1110
use crate::{
12-
hash_subtree, iter::BaoChunk, rec::truncate_ranges, split_inner, ChunkNum, ChunkRangesRef,
13-
TreeNode,
11+
hash_subtree, iter::BaoChunk, parent_cv, rec::truncate_ranges, split_inner, ChunkNum,
12+
ChunkRangesRef, TreeNode,
1413
};
1514

1615
/// A content item for the bao streaming protocol.
@@ -230,7 +229,7 @@ pub fn traverse_selected_rec(
230229
emit_data: bool,
231230
res: &mut Vec<EncodedItem>,
232231
) -> blake3::Hash {
233-
use blake3::guts::{ChunkState, CHUNK_LEN};
232+
use blake3::CHUNK_LEN;
234233
if data.len() <= CHUNK_LEN {
235234
if emit_data && !query.is_empty() {
236235
res.push(
@@ -241,9 +240,7 @@ pub fn traverse_selected_rec(
241240
.into(),
242241
);
243242
}
244-
let mut hasher = ChunkState::new(start_chunk.0);
245-
hasher.update(&data);
246-
hasher.finalize(is_root)
243+
hash_subtree(start_chunk.0, &data, is_root)
247244
} else {
248245
let chunks = data.len() / CHUNK_LEN + (data.len() % CHUNK_LEN != 0) as usize;
249246
let chunks = chunks.next_power_of_two();
@@ -256,7 +253,7 @@ pub fn traverse_selected_rec(
256253
// for full ranges where the level is below min_level, we want to emit
257254
// just the data.
258255
//
259-
// todo: maybe call into blake3::guts::hash_subtree directly for this case? it would be faster.
256+
// todo: maybe call into blake3::hazmat::hash_subtree directly for this case? it would be faster.
260257
let full = query.is_all();
261258
let emit_parent = !query.is_empty() && (!full || level >= min_level);
262259
let hash_offset = if emit_parent {

src/io/sync.rs

+5-5
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ use std::{
77
result,
88
};
99

10-
use blake3::guts::parent_cv;
1110
use bytes::BytesMut;
1211
pub use positioned_io::{ReadAt, Size, WriteAt};
1312
use smallvec::SmallVec;
@@ -22,6 +21,7 @@ use crate::{
2221
Leaf, Parent,
2322
},
2423
iter::{BaoChunk, ResponseIterRef},
24+
parent_cv,
2525
rec::encode_selected_rec,
2626
BaoTree, BlockSize, ChunkRangesRef, TreeNode,
2727
};
@@ -663,8 +663,8 @@ mod validate {
663663

664664
use super::Outboard;
665665
use crate::{
666-
blake3, hash_subtree, io::LocalBoxFuture, rec::truncate_ranges, split, BaoTree, ChunkNum,
667-
ChunkRangesRef, TreeNode,
666+
blake3, hash_subtree, io::LocalBoxFuture, parent_cv, rec::truncate_ranges, split, BaoTree,
667+
ChunkNum, ChunkRangesRef, TreeNode,
668668
};
669669

670670
/// Given a data file and an outboard, compute all valid ranges.
@@ -777,7 +777,7 @@ mod validate {
777777
// outboard is incomplete, we can't validate
778778
return Ok(());
779779
};
780-
let actual = blake3::guts::parent_cv(&l_hash, &r_hash, is_root);
780+
let actual = parent_cv(&l_hash, &r_hash, is_root);
781781
if &actual != parent_hash {
782782
// hash mismatch, we can't validate
783783
return Ok(());
@@ -879,7 +879,7 @@ mod validate {
879879
// outboard is incomplete, we can't validate
880880
return Ok(());
881881
};
882-
let actual = blake3::guts::parent_cv(&l_hash, &r_hash, is_root);
882+
let actual = parent_cv(&l_hash, &r_hash, is_root);
883883
if &actual != parent_hash {
884884
// hash mismatch, we can't validate
885885
return Ok(());

src/lib.rs

+22-24
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,7 @@ mod tree;
214214
use iter::*;
215215
pub use tree::{BlockSize, ChunkNum};
216216
pub mod io;
217-
pub use iroh_blake3 as blake3;
217+
pub use blake3;
218218

219219
#[cfg(all(test, feature = "tokio_fsm"))]
220220
mod tests;
@@ -232,34 +232,32 @@ pub type ByteRanges = range_collections::RangeSet2<u64>;
232232
/// [ChunkRanges] implements [`AsRef<ChunkRangesRef>`].
233233
pub type ChunkRangesRef = range_collections::RangeSetRef<ChunkNum>;
234234

235-
/// Quickly hash a subtree
236-
///
237-
/// This is a wrapper that passes through to the blake3::guts implementation if the size is a power of 2, and
238-
/// falls back to a recursive implementation if it is not. There is a bug in the guts implementation that
239-
/// requires this workaround.
240-
pub fn hash_subtree(start_chunk: u64, data: &[u8], is_root: bool) -> blake3::Hash {
241-
if data.len().is_power_of_two() {
242-
blake3::guts::hash_subtree(start_chunk, data, is_root)
235+
fn hash_subtree(start_chunk: u64, data: &[u8], is_root: bool) -> blake3::Hash {
236+
use blake3::hazmat::{ChainingValue, HasherExt};
237+
if is_root {
238+
debug_assert!(start_chunk == 0);
239+
blake3::hash(data)
243240
} else {
244-
recursive_hash_subtree(start_chunk, data, is_root)
241+
let mut hasher = blake3::Hasher::new();
242+
hasher.set_input_offset(start_chunk * 1024);
243+
hasher.update(data);
244+
let non_root_hash: ChainingValue = hasher.finalize_non_root();
245+
blake3::Hash::from(non_root_hash)
245246
}
246247
}
247248

248-
/// This is a recursive version of [`hash_subtree`], for testing.
249-
fn recursive_hash_subtree(start_chunk: u64, data: &[u8], is_root: bool) -> blake3::Hash {
250-
use blake3::guts::{ChunkState, CHUNK_LEN};
251-
if data.len() <= CHUNK_LEN {
252-
let mut hasher = ChunkState::new(start_chunk);
253-
hasher.update(data);
254-
hasher.finalize(is_root)
249+
fn parent_cv(left_child: &blake3::Hash, right_child: &blake3::Hash, is_root: bool) -> blake3::Hash {
250+
use blake3::hazmat::{merge_subtrees_non_root, merge_subtrees_root, ChainingValue, Mode};
251+
let left_child: ChainingValue = *left_child.as_bytes();
252+
let right_child: ChainingValue = *right_child.as_bytes();
253+
if is_root {
254+
merge_subtrees_root(&left_child, &right_child, Mode::Hash)
255255
} else {
256-
let chunks = data.len() / CHUNK_LEN + (data.len() % CHUNK_LEN != 0) as usize;
257-
let chunks = chunks.next_power_of_two();
258-
let mid = chunks / 2;
259-
let mid_bytes = mid * CHUNK_LEN;
260-
let left = recursive_hash_subtree(start_chunk, &data[..mid_bytes], false);
261-
let right = recursive_hash_subtree(start_chunk + mid as u64, &data[mid_bytes..], false);
262-
blake3::guts::parent_cv(&left, &right, is_root)
256+
blake3::Hash::from(merge_subtrees_non_root(
257+
&left_child,
258+
&right_child,
259+
Mode::Hash,
260+
))
263261
}
264262
}
265263

src/rec.rs

+6-8
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
//!
33
//! Encocding is used to compute hashes, decoding is only used in tests as a
44
//! reference implementation.
5-
use crate::{blake3, split_inner, ChunkNum, ChunkRangesRef};
5+
use crate::{blake3, hash_subtree, parent_cv, split_inner, ChunkNum, ChunkRangesRef};
66

77
/// Given a set of chunk ranges, adapt them for a tree of the given size.
88
///
@@ -105,14 +105,12 @@ pub(crate) fn encode_selected_rec(
105105
emit_data: bool,
106106
res: &mut Vec<u8>,
107107
) -> blake3::Hash {
108-
use blake3::guts::{ChunkState, CHUNK_LEN};
108+
use blake3::CHUNK_LEN;
109109
if data.len() <= CHUNK_LEN {
110110
if emit_data && !query.is_empty() {
111111
res.extend_from_slice(data);
112112
}
113-
let mut hasher = ChunkState::new(start_chunk.0);
114-
hasher.update(data);
115-
hasher.finalize(is_root)
113+
hash_subtree(start_chunk.0, data, is_root)
116114
} else {
117115
let chunks = data.len() / CHUNK_LEN + (data.len() % CHUNK_LEN != 0) as usize;
118116
let chunks = chunks.next_power_of_two();
@@ -125,7 +123,7 @@ pub(crate) fn encode_selected_rec(
125123
// for full ranges where the level is below min_level, we want to emit
126124
// just the data.
127125
//
128-
// todo: maybe call into blake3::guts::hash_subtree directly for this case? it would be faster.
126+
// todo: maybe call into blake3::hazmat::hash_subtree directly for this case? it would be faster.
129127
let full = query.is_all();
130128
let emit_parent = !query.is_empty() && (!full || level >= min_level);
131129
let hash_offset = if emit_parent {
@@ -159,7 +157,7 @@ pub(crate) fn encode_selected_rec(
159157
res[o..o + 32].copy_from_slice(left.as_bytes());
160158
res[o + 32..o + 64].copy_from_slice(right.as_bytes());
161159
}
162-
blake3::guts::parent_cv(&left, &right, is_root)
160+
parent_cv(&left, &right, is_root)
163161
}
164162
}
165163

@@ -203,7 +201,7 @@ mod test_support {
203201
if ranges.is_empty() {
204202
return;
205203
}
206-
use blake3::guts::CHUNK_LEN;
204+
use blake3::CHUNK_LEN;
207205

208206
if size <= CHUNK_LEN {
209207
emit(BaoChunk::Leaf {

0 commit comments

Comments
 (0)