-
Notifications
You must be signed in to change notification settings - Fork 251
perf: deduplicate basic block operation batches in HasherOp replay #2493
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: next
Are you sure you want to change the base?
Changes from all commits
6abcd51
84e713d
04c708d
5d40337
1211e96
7bd3cf0
a36c79a
7216824
386a857
56eaccb
3a30253
0f95609
02103ed
929addf
4d09e5e
6630eba
672561e
a40a22b
6405f22
09ba52b
35c242d
1c00c2e
1f47f90
775ae96
11b205a
53b2eaf
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,9 +1,10 @@ | ||
| use alloc::{collections::VecDeque, sync::Arc, vec::Vec}; | ||
|
|
||
| use miden_air::trace::{ | ||
| RowIndex, | ||
| chiplets::hasher::{HasherState, STATE_WIDTH}, | ||
| use alloc::{ | ||
| collections::{BTreeMap, VecDeque, btree_map::Entry}, | ||
| sync::Arc, | ||
| vec::Vec, | ||
| }; | ||
|
|
||
| use miden_air::trace::chiplets::hasher::{HasherState, STATE_WIDTH}; | ||
| use miden_core::{ | ||
| Felt, ONE, Word, ZERO, | ||
| crypto::merkle::MerklePath, | ||
|
|
@@ -13,7 +14,7 @@ use miden_core::{ | |
| }; | ||
|
|
||
| use crate::{ | ||
| AdviceError, ContextId, ErrorContext, ExecutionError, | ||
| AdviceError, ContextId, ErrorContext, ExecutionError, RowIndex, | ||
| chiplets::CircuitEvaluation, | ||
| continuation_stack::ContinuationStack, | ||
| fast::FastProcessor, | ||
|
|
@@ -978,7 +979,7 @@ impl HasherInterface for HasherResponseReplay { | |
| pub enum HasherOp { | ||
| Permute([Felt; STATE_WIDTH]), | ||
| HashControlBlock((Word, Word, Felt, Word)), | ||
| HashBasicBlock((Vec<OpBatch>, Word)), | ||
| HashBasicBlock(Word), // Only stores the digest; op_batches are looked up from op_batches_map | ||
| BuildMerkleRoot((Word, MerklePath, Felt)), | ||
| UpdateMerkleRoot((Word, Word, MerklePath, Felt)), | ||
| } | ||
|
|
@@ -988,9 +989,22 @@ pub enum HasherOp { | |
| /// | ||
| /// The hasher requests are recorded during fast processor execution and then replayed during hasher | ||
| /// chiplet trace generation. | ||
| #[derive(Debug, Default)] | ||
| #[derive(Default)] | ||
| pub struct HasherRequestReplay { | ||
| hasher_ops: VecDeque<HasherOp>, | ||
| /// Deduplication map for basic block operation batches. | ||
| /// Maps from basic block digest to its operation batches, avoiding duplication when the same | ||
| /// basic block is entered multiple times. | ||
| op_batches_map: BTreeMap<Word, Vec<OpBatch>>, | ||
| } | ||
|
|
||
| impl core::fmt::Debug for HasherRequestReplay { | ||
| fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { | ||
| f.debug_struct("HasherRequestReplay") | ||
| .field("hasher_ops", &self.hasher_ops) | ||
| // Exclude op_batches_map from Debug output to maintain snapshot compatibility | ||
| .finish() | ||
| } | ||
| } | ||
|
|
||
| impl HasherRequestReplay { | ||
|
|
@@ -1012,8 +1026,40 @@ impl HasherRequestReplay { | |
| } | ||
|
|
||
| /// Records a `Hasher::hash_basic_block()` request. | ||
| /// | ||
| /// Deduplicates operation batches by storing them in a map keyed by the basic block digest. | ||
| /// If the same basic block is entered multiple times, only one copy of the operation batches | ||
| /// is stored. | ||
| pub fn record_hash_basic_block(&mut self, op_batches: Vec<OpBatch>, expected_hash: Word) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Consider adding tests that verify deduplication works when the same basic block is executed multiple times. The existing tests pass, but none specifically check that Example: #[test]
fn test_hasher_request_replay_deduplicates_basic_blocks() {
let mut replay = HasherRequestReplay::default();
let digest: Word = [Felt::new(1), Felt::new(2), Felt::new(3), Felt::new(4)];
let op_batches = vec![OpBatch::new(vec![Operation::Add], Vec::new()).unwrap()];
replay.record_hash_basic_block(op_batches.clone(), digest);
replay.record_hash_basic_block(op_batches.clone(), digest);
replay.record_hash_basic_block(op_batches.clone(), digest);
assert_eq!(replay.op_batches_map().len(), 1);
let (hasher_ops, _) = replay.into_parts();
assert_eq!(hasher_ops.len(), 3);
} |
||
| self.hasher_ops.push_back(HasherOp::HashBasicBlock((op_batches, expected_hash))); | ||
| // Only store the op_batches if we haven't seen this digest before | ||
| // If the digest already exists, we verify that the op_batches match (they should, since | ||
| // the digest is computed from the op_batches) | ||
| match self.op_batches_map.entry(expected_hash) { | ||
| Entry::Vacant(entry) => { | ||
| entry.insert(op_batches); | ||
| }, | ||
| Entry::Occupied(entry) => { | ||
| // Digest already exists, skip storing (deduplication) | ||
| debug_assert_eq!( | ||
| entry.get(), | ||
| &op_batches, | ||
| "Same digest should always map to same op_batches" | ||
| ); | ||
| }, | ||
| } | ||
| // Store only the digest in the operation record | ||
| self.hasher_ops.push_back(HasherOp::HashBasicBlock(expected_hash)); | ||
| } | ||
|
|
||
| /// Returns a reference to the operation batches map for looking up batches during replay. | ||
| pub fn op_batches_map(&self) -> &BTreeMap<Word, Vec<OpBatch>> { | ||
| &self.op_batches_map | ||
| } | ||
|
|
||
| /// Consumes `HasherRequestReplay` and returns both the hasher operations and the operation | ||
| /// batches map. This allows accessing the map during iteration without cloning. | ||
| pub fn into_parts(self) -> (VecDeque<HasherOp>, BTreeMap<Word, Vec<OpBatch>>) { | ||
| (self.hasher_ops, self.op_batches_map) | ||
| } | ||
|
|
||
| /// Records a `Hasher::build_merkle_root()` request. | ||
|
|
@@ -1034,15 +1080,6 @@ impl HasherRequestReplay { | |
| } | ||
| } | ||
|
|
||
| impl IntoIterator for HasherRequestReplay { | ||
| type Item = HasherOp; | ||
| type IntoIter = <VecDeque<HasherOp> as IntoIterator>::IntoIter; | ||
|
|
||
| fn into_iter(self) -> Self::IntoIter { | ||
| self.hasher_ops.into_iter() | ||
| } | ||
| } | ||
|
|
||
| // STACK OVERFLOW REPLAY | ||
| // ================================================================================================ | ||
|
|
||
|
|
@@ -1175,3 +1212,40 @@ pub enum NodeExecutionState { | |
| /// This is used when completing execution of a control flow construct. | ||
| End(MastNodeId), | ||
| } | ||
|
|
||
| #[cfg(test)] | ||
| mod tests { | ||
| use miden_core::{Operation, mast::BasicBlockNodeBuilder}; | ||
|
|
||
| use super::*; | ||
|
|
||
| #[test] | ||
| fn test_hasher_request_replay_deduplicates_basic_blocks() { | ||
| let mut replay = HasherRequestReplay::default(); | ||
| let digest = Word::new([Felt::new(1), Felt::new(2), Felt::new(3), Felt::new(4)]); | ||
|
|
||
| // Create a simple basic block with one operation to get op_batches | ||
| let basic_block = | ||
| BasicBlockNodeBuilder::new(vec![Operation::Add], Vec::new()).build().unwrap(); | ||
| let op_batches = basic_block.op_batches().to_vec(); | ||
|
|
||
| // Record the same digest three times | ||
| replay.record_hash_basic_block(op_batches.clone(), digest); | ||
| replay.record_hash_basic_block(op_batches.clone(), digest); | ||
| replay.record_hash_basic_block(op_batches.clone(), digest); | ||
|
|
||
| // Verify that the map has only one entry (deduplication worked) | ||
| assert_eq!(replay.op_batches_map().len(), 1); | ||
|
|
||
| // Verify that hasher_ops has three entries (one for each record call) | ||
| let (hasher_ops, _) = replay.into_parts(); | ||
| assert_eq!(hasher_ops.len(), 3); | ||
|
|
||
| // Verify all three entries are HashBasicBlock with the same digest | ||
| let mut iter = hasher_ops.into_iter(); | ||
| assert!(matches!(iter.next(), Some(HasherOp::HashBasicBlock(h)) if h == digest)); | ||
| assert!(matches!(iter.next(), Some(HasherOp::HashBasicBlock(h)) if h == digest)); | ||
| assert!(matches!(iter.next(), Some(HasherOp::HashBasicBlock(h)) if h == digest)); | ||
| assert!(iter.next().is_none()); | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The
impl IntoIterator for HasherRequestReplaystill exists, but seems unused, now. It only returnshasher_opsand drops theop_batches_map. We should delete it to avoid confusion withinto_parts.