From 2c3a6751eb0e5f6b2bfe4d1210718aad996aa0d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Garillot?= Date: Sun, 14 Dec 2025 11:47:36 -0500 Subject: [PATCH 01/19] feat(core): add CSR validation to OpToDecoratorIds --- core/src/mast/debuginfo/decorator_storage.rs | 135 ++++++++++++++++++- 1 file changed, 134 insertions(+), 1 deletion(-) diff --git a/core/src/mast/debuginfo/decorator_storage.rs b/core/src/mast/debuginfo/decorator_storage.rs index f63b410de9..24b839331d 100644 --- a/core/src/mast/debuginfo/decorator_storage.rs +++ b/core/src/mast/debuginfo/decorator_storage.rs @@ -1,4 +1,4 @@ -use alloc::vec::Vec; +use alloc::{string::{String, ToString}, vec::Vec}; use miden_utils_indexing::{Idx, IndexVec}; #[cfg(feature = "arbitrary")] @@ -175,6 +175,11 @@ impl OpToDecoratorIds { return Err(DecoratorIndexError::InternalStructure); } + // Check that op_indptr_for_dec_ids starts at 0 + if op_indptr_for_dec_ids[0] != 0 { + return Err(DecoratorIndexError::InternalStructure); + } + // Check that the last operation pointer doesn't exceed decorator IDs length let Some(&last_op_ptr) = op_indptr_for_dec_ids.last() else { return Err(DecoratorIndexError::InternalStructure); @@ -189,6 +194,12 @@ impl OpToDecoratorIds { } let node_slice = node_indptr_for_op_idx.as_slice(); + + // Check that node_indptr_for_op_idx starts at 0 + if node_slice[0] != 0 { + return Err(DecoratorIndexError::InternalStructure); + } + let Some(&last_node_ptr) = node_slice.last() else { return Err(DecoratorIndexError::InternalStructure); }; @@ -216,6 +227,78 @@ impl OpToDecoratorIds { }) } + /// Validate CSR structure integrity. + /// + /// Checks: + /// - All decorator IDs are valid (< decorator_count) + /// - op_indptr_for_dec_ids is monotonic, starts at 0, ends at decorator_ids.len() + /// - node_indptr_for_op_idx is monotonic, starts at 0, ends <= op_indptr_for_dec_ids.len()-1 + pub(crate) fn validate_csr(&self, decorator_count: usize) -> Result<(), String> { + // Validate all decorator IDs + for &dec_id in &self.decorator_ids { + if dec_id.to_usize() >= decorator_count { + return Err(format!("Invalid decorator ID {}: exceeds decorator count {}", + dec_id.to_usize(), decorator_count)); + } + } + + // Validate op_indptr_for_dec_ids + if self.op_indptr_for_dec_ids.is_empty() { + return Err("op_indptr_for_dec_ids cannot be empty".to_string()); + } + + if self.op_indptr_for_dec_ids[0] != 0 { + return Err("op_indptr_for_dec_ids must start at 0".to_string()); + } + + for window in self.op_indptr_for_dec_ids.windows(2) { + if window[0] > window[1] { + return Err(format!( + "op_indptr_for_dec_ids not monotonic: {} > {}", + window[0], window[1] + )); + } + } + + if *self.op_indptr_for_dec_ids.last().unwrap() != self.decorator_ids.len() { + return Err(format!( + "op_indptr_for_dec_ids end {} doesn't match decorator_ids length {}", + self.op_indptr_for_dec_ids.last().unwrap(), + self.decorator_ids.len() + )); + } + + // Validate node_indptr_for_op_idx + let node_slice = self.node_indptr_for_op_idx.as_slice(); + if node_slice.is_empty() { + return Err("node_indptr_for_op_idx cannot be empty".to_string()); + } + + if node_slice[0] != 0 { + return Err("node_indptr_for_op_idx must start at 0".to_string()); + } + + for window in node_slice.windows(2) { + if window[0] > window[1] { + return Err(format!( + "node_indptr_for_op_idx not monotonic: {} > {}", + window[0], window[1] + )); + } + } + + let max_node_ptr = self.op_indptr_for_dec_ids.len().saturating_sub(1); + if *node_slice.last().unwrap() > max_node_ptr { + return Err(format!( + "node_indptr_for_op_idx end {} exceeds op_indptr bounds {}", + node_slice.last().unwrap(), + max_node_ptr + )); + } + + Ok(()) + } + pub fn is_empty(&self) -> bool { self.node_indptr_for_op_idx.is_empty() } @@ -1276,6 +1359,56 @@ mod tests { } } + #[test] + fn test_validate_csr_valid() { + let storage = create_standard_test_storage(); + assert!(storage.validate_csr(6).is_ok()); + } + + #[test] + fn test_validate_csr_invalid_decorator_id() { + let storage = create_standard_test_storage(); + // decorator_count=3 but storage has IDs up to 5 + let result = storage.validate_csr(3); + assert!(result.is_err()); + assert!(result.unwrap_err().contains("Invalid decorator ID")); + } + + #[test] + fn test_validate_csr_not_monotonic() { + let decorator_ids = vec![test_decorator_id(0), test_decorator_id(1)]; + let op_indptr_for_dec_ids = vec![0, 2, 1]; // Not monotonic! + let mut node_indptr_for_op_idx = IndexVec::new(); + node_indptr_for_op_idx.push(0).unwrap(); + node_indptr_for_op_idx.push(2).unwrap(); + + let storage = OpToDecoratorIds::from_components( + decorator_ids, + op_indptr_for_dec_ids, + node_indptr_for_op_idx, + ); + + // from_components should catch this + assert!(storage.is_err()); + } + + #[test] + fn test_validate_csr_wrong_start() { + let decorator_ids = vec![test_decorator_id(0)]; + let op_indptr_for_dec_ids = vec![1, 1]; // Should start at 0! + let mut node_indptr_for_op_idx = IndexVec::new(); + node_indptr_for_op_idx.push(0).unwrap(); + node_indptr_for_op_idx.push(1).unwrap(); + + let storage = OpToDecoratorIds::from_components( + decorator_ids, + op_indptr_for_dec_ids, + node_indptr_for_op_idx, + ); + + assert!(storage.is_err()); + } + #[cfg(feature = "arbitrary")] proptest! { /// Property test that verifies decorator_links_for_node always produces a valid iterator From 04f6f16e4fbcfac946ec32fffbfaf012f97feb1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Garillot?= Date: Sun, 14 Dec 2025 11:47:39 -0500 Subject: [PATCH 02/19] feat(core): add CSR validation to NodeToDecoratorIds --- .../mast/debuginfo/node_decorator_storage.rs | 180 +++++++++++++++++- 1 file changed, 179 insertions(+), 1 deletion(-) diff --git a/core/src/mast/debuginfo/node_decorator_storage.rs b/core/src/mast/debuginfo/node_decorator_storage.rs index 0290804c68..21c369d6c2 100644 --- a/core/src/mast/debuginfo/node_decorator_storage.rs +++ b/core/src/mast/debuginfo/node_decorator_storage.rs @@ -1,4 +1,7 @@ -use alloc::vec::Vec; +use alloc::{ + string::{String, ToString}, + vec::Vec, +}; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; @@ -59,6 +62,107 @@ impl NodeToDecoratorIds { } } + /// Create a NodeToDecoratorIds from raw components. + /// + /// Used during deserialization. Validation happens separately via `validate_csr()`. + pub fn from_components( + before_enter_decorators: Vec, + after_exit_decorators: Vec, + node_indptr_for_before: IndexVec, + node_indptr_for_after: IndexVec, + ) -> Result { + let storage = Self { + before_enter_decorators, + after_exit_decorators, + node_indptr_for_before, + node_indptr_for_after, + }; + + // Basic structural validation (full validation happens via validate_csr) + let before_slice = storage.node_indptr_for_before.as_slice(); + let after_slice = storage.node_indptr_for_after.as_slice(); + + if !before_slice.is_empty() && before_slice[0] != 0 { + return Err("node_indptr_for_before must start at 0".to_string()); + } + + if !after_slice.is_empty() && after_slice[0] != 0 { + return Err("node_indptr_for_after must start at 0".to_string()); + } + + Ok(storage) + } + + /// Validate CSR structure integrity. + /// + /// Checks: + /// - All decorator IDs are valid (< decorator_count) + /// - Both indptr arrays are monotonic, start at 0, end at respective decorator vector lengths + pub(crate) fn validate_csr(&self, decorator_count: usize) -> Result<(), String> { + // Validate all decorator IDs + for &dec_id in self.before_enter_decorators.iter().chain(self.after_exit_decorators.iter()) { + if dec_id.to_usize() >= decorator_count { + return Err(format!( + "Invalid decorator ID {}: exceeds decorator count {}", + dec_id.to_usize(), + decorator_count + )); + } + } + + // Validate before_enter CSR + let before_slice = self.node_indptr_for_before.as_slice(); + if !before_slice.is_empty() { + if before_slice[0] != 0 { + return Err("node_indptr_for_before must start at 0".to_string()); + } + + for window in before_slice.windows(2) { + if window[0] > window[1] { + return Err(format!( + "node_indptr_for_before not monotonic: {} > {}", + window[0], window[1] + )); + } + } + + if *before_slice.last().unwrap() != self.before_enter_decorators.len() { + return Err(format!( + "node_indptr_for_before end {} doesn't match before_enter_decorators length {}", + before_slice.last().unwrap(), + self.before_enter_decorators.len() + )); + } + } + + // Validate after_exit CSR + let after_slice = self.node_indptr_for_after.as_slice(); + if !after_slice.is_empty() { + if after_slice[0] != 0 { + return Err("node_indptr_for_after must start at 0".to_string()); + } + + for window in after_slice.windows(2) { + if window[0] > window[1] { + return Err(format!( + "node_indptr_for_after not monotonic: {} > {}", + window[0], window[1] + )); + } + } + + if *after_slice.last().unwrap() != self.after_exit_decorators.len() { + return Err(format!( + "node_indptr_for_after end {} doesn't match after_exit_decorators length {}", + after_slice.last().unwrap(), + self.after_exit_decorators.len() + )); + } + } + + Ok(()) + } + /// Creates a new empty `NodeToDecoratorIds` with specified capacity. pub fn with_capacity( nodes_capacity: usize, @@ -206,3 +310,77 @@ impl Default for NodeToDecoratorIds { Self::new() } } + +#[cfg(test)] +mod tests { + use super::*; + + fn test_decorator_id(value: u32) -> DecoratorId { + DecoratorId(value) + } + + #[test] + fn test_from_components_valid() { + let before = vec![test_decorator_id(0), test_decorator_id(1)]; + let after = vec![test_decorator_id(2)]; + + let mut before_indptr = IndexVec::new(); + before_indptr.push(0).unwrap(); + before_indptr.push(2).unwrap(); + + let mut after_indptr = IndexVec::new(); + after_indptr.push(0).unwrap(); + after_indptr.push(1).unwrap(); + + let storage = NodeToDecoratorIds::from_components( + before, + after, + before_indptr, + after_indptr, + ); + + assert!(storage.is_ok()); + } + + #[test] + fn test_validate_csr_valid() { + let mut before_indptr = IndexVec::new(); + before_indptr.push(0).unwrap(); + before_indptr.push(1).unwrap(); + + let mut after_indptr = IndexVec::new(); + after_indptr.push(0).unwrap(); + after_indptr.push(1).unwrap(); + + let storage = NodeToDecoratorIds::from_components( + vec![test_decorator_id(0)], + vec![test_decorator_id(1)], + before_indptr, + after_indptr, + ).unwrap(); + + assert!(storage.validate_csr(3).is_ok()); + } + + #[test] + fn test_validate_csr_invalid_decorator_id() { + let mut before_indptr = IndexVec::new(); + before_indptr.push(0).unwrap(); + before_indptr.push(1).unwrap(); + + let mut after_indptr = IndexVec::new(); + after_indptr.push(0).unwrap(); + after_indptr.push(0).unwrap(); + + let storage = NodeToDecoratorIds::from_components( + vec![test_decorator_id(5)], // ID too high + vec![], + before_indptr, + after_indptr, + ).unwrap(); + + let result = storage.validate_csr(3); + assert!(result.is_err()); + assert!(result.unwrap_err().contains("Invalid decorator ID")); + } +} From f2b7029a930e3bf7dd54d32aaca3e0598107ee3b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Garillot?= Date: Sun, 14 Dec 2025 11:47:42 -0500 Subject: [PATCH 03/19] feat(core): add validation method to DebugInfo --- core/src/mast/debuginfo/mod.rs | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/core/src/mast/debuginfo/mod.rs b/core/src/mast/debuginfo/mod.rs index 40a1ffdc13..e0f4c89820 100644 --- a/core/src/mast/debuginfo/mod.rs +++ b/core/src/mast/debuginfo/mod.rs @@ -40,7 +40,7 @@ //! method, which removes decorators while preserving critical information. This allows //! backward compatibility while enabling size optimization for deployment. -use alloc::{collections::BTreeMap, sync::Arc, vec::Vec}; +use alloc::{collections::BTreeMap, string::String, sync::Arc, vec::Vec}; use miden_utils_indexing::{Idx, IndexVec}; #[cfg(feature = "serde")] @@ -324,6 +324,27 @@ impl DebugInfo { self.procedure_names.clear(); } + // VALIDATION + // -------------------------------------------------------------------------------------------- + + /// Validate the integrity of the DebugInfo structure. + /// + /// This validates: + /// - All CSR structures in op_decorator_storage + /// - All CSR structures in node_decorator_storage + /// - All decorator IDs reference valid decorators + pub(crate) fn validate(&self) -> Result<(), String> { + let decorator_count = self.decorators.len(); + + // Validate OpToDecoratorIds CSR + self.op_decorator_storage.validate_csr(decorator_count)?; + + // Validate NodeToDecoratorIds CSR + self.node_decorator_storage.validate_csr(decorator_count)?; + + Ok(()) + } + // TEST HELPERS // -------------------------------------------------------------------------------------------- From 7274bce15dbf1db54c07b4d224f905a9fc7f33f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Garillot?= Date: Sun, 14 Dec 2025 11:47:44 -0500 Subject: [PATCH 04/19] feat(core): implement Serializable for DebugInfo --- core/src/mast/debuginfo/decorator_storage.rs | 6 +- core/src/mast/debuginfo/mod.rs | 94 +++++++++++++++++++- core/src/mast/serialization/mod.rs | 2 +- 3 files changed, 97 insertions(+), 5 deletions(-) diff --git a/core/src/mast/debuginfo/decorator_storage.rs b/core/src/mast/debuginfo/decorator_storage.rs index 24b839331d..69d3fcbc57 100644 --- a/core/src/mast/debuginfo/decorator_storage.rs +++ b/core/src/mast/debuginfo/decorator_storage.rs @@ -57,7 +57,7 @@ use crate::mast::{DecoratedOpLink, DecoratorId, MastNodeId}; pub struct OpToDecoratorIds { /// All the decorator IDs per operation per node, in a CSR relationship with /// node_indptr_for_op_idx and op_indptr_for_dec_ids - decorator_ids: Vec, + pub(crate) decorator_ids: Vec, /// For the node whose operation indices are in /// `op_indptr_for_dec_ids[node_start..node_end]`, /// the indices of its i-th operation are at: @@ -65,10 +65,10 @@ pub struct OpToDecoratorIds { /// decorator_ids[op_indptr_for_dec_ids[node_start + i].. /// op_indptr_for_dec_ids[node_start + i + 1]] /// ``` - op_indptr_for_dec_ids: Vec, + pub(crate) op_indptr_for_dec_ids: Vec, /// The decorated operation indices for the n-th node are at /// `op_indptr_for_dec_ids[node_indptr_for_op_idx[n]..node_indptr_for_op_idx[n+1]]` - node_indptr_for_op_idx: IndexVec, + pub(crate) node_indptr_for_op_idx: IndexVec, } /// Error type for decorator index mapping operations diff --git a/core/src/mast/debuginfo/mod.rs b/core/src/mast/debuginfo/mod.rs index e0f4c89820..e37baa4223 100644 --- a/core/src/mast/debuginfo/mod.rs +++ b/core/src/mast/debuginfo/mod.rs @@ -40,13 +40,14 @@ //! method, which removes decorators while preserving critical information. This allows //! backward compatibility while enabling size optimization for deployment. -use alloc::{collections::BTreeMap, string::String, sync::Arc, vec::Vec}; +use alloc::{collections::BTreeMap, string::{String, ToString}, sync::Arc, vec::Vec}; use miden_utils_indexing::{Idx, IndexVec}; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; use super::{Decorator, DecoratorId, MastForestError, MastNodeId}; +use crate::utils::{ByteWriter, Serializable}; use crate::{LexicographicWord, Word}; mod decorator_storage; @@ -355,6 +356,97 @@ impl DebugInfo { } } +impl Serializable for DebugInfo { + fn write_into(&self, target: &mut W) { + use crate::mast::serialization::decorator::DecoratorDataBuilder; + + // 1. Serialize decorators (data, string table, infos) + let mut decorator_data_builder = DecoratorDataBuilder::new(); + for decorator in self.decorators.iter() { + decorator_data_builder.add_decorator(decorator); + } + let (decorator_data, decorator_infos, string_table) = decorator_data_builder.finalize(); + + decorator_data.write_into(target); + string_table.write_into(target); + + // Write decorator count and infos + target.write_usize(decorator_infos.len()); + for decorator_info in decorator_infos { + decorator_info.write_into(target); + } + + // 2. Serialize error codes + let error_codes: alloc::collections::BTreeMap = self + .error_codes + .iter() + .map(|(k, v)| (*k, v.to_string())) + .collect(); + error_codes.write_into(target); + + // 3. Serialize OpToDecoratorIds CSR (dense representation) + // Dense representation: serialize indptr arrays as-is (no sparse encoding). + // Analysis shows sparse saves <1KB even with 90% empty nodes, not worth complexity. + // See measurement: https://gist.github.com/huitseeker/7379e2eecffd7020ae577e986057a400 + + // decorator_ids as Vec + let decorator_ids_u32: Vec = self + .op_decorator_storage + .decorator_ids + .iter() + .map(|id| u32::from(*id)) + .collect(); + decorator_ids_u32.write_into(target); + + // op_indptr_for_dec_ids + self.op_decorator_storage.op_indptr_for_dec_ids.write_into(target); + + // node_indptr_for_op_idx as Vec + let node_indptr_vec: Vec = self + .op_decorator_storage + .node_indptr_for_op_idx + .as_slice() + .to_vec(); + node_indptr_vec.write_into(target); + + // 4. Serialize NodeToDecoratorIds CSR (dense representation) + + // before_enter_decorators as Vec + let before_decorators_u32: Vec = self + .node_decorator_storage + .before_enter_decorators + .iter() + .map(|id| u32::from(*id)) + .collect(); + before_decorators_u32.write_into(target); + + // after_exit_decorators as Vec + let after_decorators_u32: Vec = self + .node_decorator_storage + .after_exit_decorators + .iter() + .map(|id| u32::from(*id)) + .collect(); + after_decorators_u32.write_into(target); + + // node_indptr_for_before as Vec + let before_indptr_vec: Vec = self + .node_decorator_storage + .node_indptr_for_before + .as_slice() + .to_vec(); + before_indptr_vec.write_into(target); + + // node_indptr_for_after as Vec + let after_indptr_vec: Vec = self + .node_decorator_storage + .node_indptr_for_after + .as_slice() + .to_vec(); + after_indptr_vec.write_into(target); + } +} + impl Default for DebugInfo { fn default() -> Self { Self::new() diff --git a/core/src/mast/serialization/mod.rs b/core/src/mast/serialization/mod.rs index 57b16bcad8..42be1a8c2d 100644 --- a/core/src/mast/serialization/mod.rs +++ b/core/src/mast/serialization/mod.rs @@ -58,7 +58,7 @@ use crate::{ utils::{ByteReader, ByteWriter, Deserializable, DeserializationError, Serializable}, }; -mod decorator; +pub(crate) mod decorator; mod info; use info::MastNodeInfo; From 413ea333aeb04619262fb3aecb3c580ebd6f86fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Garillot?= Date: Sun, 14 Dec 2025 11:47:47 -0500 Subject: [PATCH 05/19] feat(core): implement Deserializable for DebugInfo --- core/src/mast/debuginfo/mod.rs | 147 ++++++++++++++++++++++++++++- core/src/mast/serialization/mod.rs | 4 +- crates/utils-indexing/src/lib.rs | 9 ++ 3 files changed, 156 insertions(+), 4 deletions(-) diff --git a/core/src/mast/debuginfo/mod.rs b/core/src/mast/debuginfo/mod.rs index e37baa4223..e1de397ed2 100644 --- a/core/src/mast/debuginfo/mod.rs +++ b/core/src/mast/debuginfo/mod.rs @@ -47,8 +47,10 @@ use miden_utils_indexing::{Idx, IndexVec}; use serde::{Deserialize, Serialize}; use super::{Decorator, DecoratorId, MastForestError, MastNodeId}; -use crate::utils::{ByteWriter, Serializable}; -use crate::{LexicographicWord, Word}; +use crate::{ + LexicographicWord, Word, + utils::{ByteReader, ByteWriter, Deserializable, DeserializationError, Serializable}, +}; mod decorator_storage; pub use decorator_storage::{ @@ -447,6 +449,147 @@ impl Serializable for DebugInfo { } } +impl Deserializable for DebugInfo { + fn read_from(source: &mut R) -> Result { + use crate::mast::serialization::decorator::DecoratorInfo; + + // 1. Read decorator data and string table + let decorator_data: Vec = Deserializable::read_from(source)?; + let string_table: crate::mast::serialization::StringTable = Deserializable::read_from(source)?; + + let decorator_count: usize = source.read_usize()?; + let mut decorator_infos = Vec::with_capacity(decorator_count); + for _ in 0..decorator_count { + decorator_infos.push(DecoratorInfo::read_from(source)?); + } + + // 2. Reconstruct decorators + let mut decorators = IndexVec::new(); + for decorator_info in decorator_infos { + let decorator = decorator_info + .try_into_decorator(&string_table, &decorator_data)?; + decorators.push(decorator).map_err(|_| { + DeserializationError::InvalidValue( + "Failed to add decorator to IndexVec".to_string(), + ) + })?; + } + + // 3. Read error codes + let error_codes_raw: alloc::collections::BTreeMap = + Deserializable::read_from(source)?; + let error_codes: alloc::collections::BTreeMap> = + error_codes_raw + .into_iter() + .map(|(k, v)| (k, alloc::sync::Arc::from(v.as_str()))) + .collect(); + + // 4. Read OpToDecoratorIds CSR (dense representation) + + // decorator_ids from Vec + let decorator_ids_u32: Vec = Deserializable::read_from(source)?; + let decorator_ids: Vec = decorator_ids_u32 + .into_iter() + .map(|id| { + if id as usize >= decorators.len() { + return Err(DeserializationError::InvalidValue(format!( + "DecoratorId {} exceeds decorator count {}", + id, + decorators.len() + ))); + } + Ok(DecoratorId(id)) + }) + .collect::, _>>()?; + + // op_indptr_for_dec_ids + let op_indptr_for_dec_ids: Vec = Deserializable::read_from(source)?; + + // node_indptr_for_op_idx from Vec + let node_indptr_vec: Vec = Deserializable::read_from(source)?; + let node_indptr_for_op_idx = IndexVec::from_raw(node_indptr_vec); + + let op_decorator_storage = OpToDecoratorIds::from_components( + decorator_ids, + op_indptr_for_dec_ids, + node_indptr_for_op_idx, + ) + .map_err(|e| DeserializationError::InvalidValue(e.to_string()))?; + + // 5. Read NodeToDecoratorIds CSR (dense representation) + + // before_enter_decorators from Vec + let before_decorators_u32: Vec = Deserializable::read_from(source)?; + let before_enter_decorators: Vec = before_decorators_u32 + .into_iter() + .map(|id| { + if id as usize >= decorators.len() { + return Err(DeserializationError::InvalidValue(format!( + "DecoratorId {} exceeds decorator count {}", + id, + decorators.len() + ))); + } + Ok(DecoratorId(id)) + }) + .collect::, _>>()?; + + // after_exit_decorators from Vec + let after_decorators_u32: Vec = Deserializable::read_from(source)?; + let after_exit_decorators: Vec = after_decorators_u32 + .into_iter() + .map(|id| { + if id as usize >= decorators.len() { + return Err(DeserializationError::InvalidValue(format!( + "DecoratorId {} exceeds decorator count {}", + id, + decorators.len() + ))); + } + Ok(DecoratorId(id)) + }) + .collect::, _>>()?; + + // node_indptr_for_before from Vec + let before_indptr_vec: Vec = Deserializable::read_from(source)?; + let node_indptr_for_before = IndexVec::from_raw(before_indptr_vec); + + // node_indptr_for_after from Vec + let after_indptr_vec: Vec = Deserializable::read_from(source)?; + let node_indptr_for_after = IndexVec::from_raw(after_indptr_vec); + + let node_decorator_storage = NodeToDecoratorIds::from_components( + before_enter_decorators, + after_exit_decorators, + node_indptr_for_before, + node_indptr_for_after, + ) + .map_err(|e| DeserializationError::InvalidValue(e.to_string()))?; + + // 6. Read procedure names (marked serde(skip) but we deserialize manually) + let procedure_names_raw: BTreeMap = Deserializable::read_from(source)?; + let procedure_names: BTreeMap> = procedure_names_raw + .into_iter() + .map(|(k, v)| (LexicographicWord::from(k), Arc::from(v.as_str()))) + .collect(); + + // 7. Construct and validate DebugInfo + let debug_info = DebugInfo { + decorators, + op_decorator_storage, + node_decorator_storage, + error_codes, + procedure_names, + }; + + debug_info + .validate() + .map_err(|e| DeserializationError::InvalidValue(format!("DebugInfo validation failed: {}", e)))?; + + Ok(debug_info) + } +} + impl Default for DebugInfo { fn default() -> Self { Self::new() diff --git a/core/src/mast/serialization/mod.rs b/core/src/mast/serialization/mod.rs index 42be1a8c2d..9f9e5b12db 100644 --- a/core/src/mast/serialization/mod.rs +++ b/core/src/mast/serialization/mod.rs @@ -49,7 +49,6 @@ use alloc::{ }; use decorator::{DecoratorDataBuilder, DecoratorInfo}; -use string_table::StringTable; use super::{DecoratedOpLink, DecoratorId, MastForest, MastNode, MastNodeId}; use crate::{ @@ -68,7 +67,8 @@ use basic_blocks::{BasicBlockDataBuilder, BasicBlockDataDecoder}; use crate::DecoratorList; -mod string_table; +pub(crate) mod string_table; +pub(crate) use string_table::StringTable; #[cfg(test)] mod tests; diff --git a/crates/utils-indexing/src/lib.rs b/crates/utils-indexing/src/lib.rs index 4dcfc877c0..6b1ce63d5c 100644 --- a/crates/utils-indexing/src/lib.rs +++ b/crates/utils-indexing/src/lib.rs @@ -145,6 +145,15 @@ impl IndexVec { self.raw } + /// Create an IndexVec from a raw Vec. + /// + /// This is useful for deserialization or when reconstructing an IndexVec + /// from previously extracted components. + #[inline] + pub fn from_raw(raw: Vec) -> Self { + Self { raw, _m: PhantomData } + } + /// Remove an element at the specified index and return it. pub fn swap_remove(&mut self, index: usize) -> T { self.raw.swap_remove(index) From 4a13c7f48f0c73569a8744375227d0f840150073 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Garillot?= Date: Sun, 14 Dec 2025 11:47:50 -0500 Subject: [PATCH 06/19] feat(core): use direct DebugInfo serialization in MastForest --- core/src/mast/serialization/mod.rs | 60 ++---------------------------- 1 file changed, 4 insertions(+), 56 deletions(-) diff --git a/core/src/mast/serialization/mod.rs b/core/src/mast/serialization/mod.rs index 9f9e5b12db..a091da2549 100644 --- a/core/src/mast/serialization/mod.rs +++ b/core/src/mast/serialization/mod.rs @@ -113,12 +113,6 @@ impl Serializable for MastForest { fn write_into(&self, target: &mut W) { let mut basic_block_data_builder = BasicBlockDataBuilder::new(); - // Set up "before enter" and "after exit" decorators by `MastNodeId` - let mut before_enter_decorators: Vec<(usize, Vec)> = Vec::new(); - let mut after_exit_decorators: Vec<(usize, Vec)> = Vec::new(); - - let mut basic_block_decorators: Vec<(usize, Vec)> = Vec::new(); - // magic & version target.write_bytes(MAGIC); target.write_bytes(&VERSION); @@ -140,26 +134,8 @@ impl Serializable for MastForest { .map(|(mast_node_id, mast_node)| { let node_id = MastNodeId::new_unchecked(mast_node_id as u32); - // Use centralized NodeToDecoratorIds for node-level decorators - let before_decorators = self.before_enter_decorators(node_id); - if !before_decorators.is_empty() { - before_enter_decorators.push((mast_node_id, before_decorators.to_vec())); - } - - let after_decorators = self.after_exit_decorators(node_id); - if !after_decorators.is_empty() { - after_exit_decorators.push((mast_node_id, after_decorators.to_vec())); - } - let ops_offset = if let MastNode::Block(basic_block) = mast_node { - let ops_offset = basic_block_data_builder.encode_basic_block(basic_block); - - // Serialize decorators with padded indices - let padded_decorators: Vec<(usize, crate::mast::DecoratorId)> = - basic_block.indexed_decorator_iter(self).collect(); - basic_block_decorators.push((mast_node_id, padded_decorators)); - - ops_offset + basic_block_data_builder.encode_basic_block(basic_block) } else { 0 }; @@ -177,38 +153,10 @@ impl Serializable for MastForest { } self.advice_map.write_into(target); - let error_codes: BTreeMap = - self.debug_info.error_codes().map(|(k, v)| (*k, v.to_string())).collect(); - error_codes.write_into(target); - - // Write procedure names - let procedure_names: BTreeMap = - self.debug_info.procedure_names().map(|(k, v)| (k, v.to_string())).collect(); - procedure_names.write_into(target); - - // write all decorator data below - - let mut decorator_data_builder = DecoratorDataBuilder::new(); - for decorator in self.debug_info.decorators() { - decorator_data_builder.add_decorator(decorator) - } - - let (decorator_data, decorator_infos, string_table) = decorator_data_builder.finalize(); - - // decorator data buffers - decorator_data.write_into(target); - string_table.write_into(target); - - // Write decorator infos - for decorator_info in decorator_infos { - decorator_info.write_into(target); - } - - basic_block_decorators.write_into(target); - // Write "before enter" and "after exit" decorators - before_enter_decorators.write_into(target); - after_exit_decorators.write_into(target); + // Serialize DebugInfo directly (includes decorators, error_codes, CSR structures, + // and procedure_names) + self.debug_info.write_into(target); } } From 8dc158a82b722e76bd5b7f8933e8950ce2a02ac1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Garillot?= Date: Sun, 14 Dec 2025 11:47:53 -0500 Subject: [PATCH 07/19] feat(core): implement builder-based deserialization for MastForest --- core/src/mast/node/basic_block_node/mod.rs | 56 +++++-- core/src/mast/node/call_node.rs | 4 +- core/src/mast/node/dyn_node.rs | 10 +- core/src/mast/node/external.rs | 7 +- core/src/mast/node/join_node.rs | 3 - core/src/mast/node/loop_node.rs | 3 - core/src/mast/node/mast_forest_contributor.rs | 29 ++-- core/src/mast/node/split_node.rs | 3 - core/src/mast/serialization/info.rs | 5 +- core/src/mast/serialization/mod.rs | 156 ++---------------- 10 files changed, 77 insertions(+), 199 deletions(-) diff --git a/core/src/mast/node/basic_block_node/mod.rs b/core/src/mast/node/basic_block_node/mod.rs index d95263c97b..f7468790c2 100644 --- a/core/src/mast/node/basic_block_node/mod.rs +++ b/core/src/mast/node/basic_block_node/mod.rs @@ -1347,15 +1347,6 @@ impl BasicBlockNodeBuilder { } } - /// Used to initialize decorators for the [`BasicBlockNodeBuilder`]. Replaces the existing - /// decorators with the given ['DecoratorList']. - pub(crate) fn set_decorators(&mut self, decorators: DecoratorList) { - match &mut self.operation_data { - OperationData::Raw { decorators: dec, .. } => *dec = decorators, - OperationData::Batched { decorators: dec, .. } => *dec = decorators, - } - } - /// Builds the BasicBlockNode with the specified decorators. pub fn build(self) -> Result { let (op_batches, digest, padded_decorators) = match self.operation_data { @@ -1417,9 +1408,50 @@ impl BasicBlockNodeBuilder { self, forest: &mut MastForest, ) -> Result { - // BasicBlockNode doesn't have child dependencies, so relaxed validation is the same - // as normal validation. We delegate to the normal method for consistency. - self.add_to_forest(forest) + // For deserialization: decorators are already in forest.debug_info, + // so we don't register them again. We just create the node. + + let future_node_id = MastNodeId::new_unchecked(forest.nodes.len() as u32); + + // Process based on operation data type + let (op_batches, digest) = match self.operation_data { + OperationData::Raw { operations, decorators: _ } => { + if operations.is_empty() { + return Err(MastForestError::EmptyBasicBlock); + } + + // Batch operations (adds padding NOOPs) + let (op_batches, computed_digest) = batch_and_hash_ops(operations); + + // Use the forced digest if provided, otherwise use the computed digest + let digest = self.digest.unwrap_or(computed_digest); + + (op_batches, digest) + }, + OperationData::Batched { op_batches, decorators: _ } => { + if op_batches.is_empty() { + return Err(MastForestError::EmptyBasicBlock); + } + + // For batched operations, digest must be set + let digest = self.digest.expect("digest must be set for batched operations"); + + (op_batches, digest) + }, + }; + + // Create the node in the forest with Linked variant + // Note: Decorators are already in forest.debug_info from deserialization + let node_id = forest + .nodes + .push(MastNode::Block(BasicBlockNode { + op_batches, + digest, + decorators: DecoratorStore::Linked { id: future_node_id }, + })) + .map_err(|_| MastForestError::TooManyNodes)?; + + Ok(node_id) } } diff --git a/core/src/mast/node/call_node.rs b/core/src/mast/node/call_node.rs index b18a8d44c8..a31c17a27e 100644 --- a/core/src/mast/node/call_node.rs +++ b/core/src/mast/node/call_node.rs @@ -521,10 +521,8 @@ impl CallNodeBuilder { let future_node_id = MastNodeId::new_unchecked(forest.nodes.len() as u32); - // Store node-level decorators in the centralized NodeToDecoratorIds for efficient access - forest.register_node_decorators(future_node_id, &self.before_enter, &self.after_exit); - // Create the node in the forest with Linked variant from the start + // Note: Decorators are already in forest.debug_info from deserialization // Move the data directly without intermediate cloning let node_id = forest .nodes diff --git a/core/src/mast/node/dyn_node.rs b/core/src/mast/node/dyn_node.rs index d4e62eb9db..65e1823892 100644 --- a/core/src/mast/node/dyn_node.rs +++ b/core/src/mast/node/dyn_node.rs @@ -446,14 +446,11 @@ impl DynNodeBuilder { /// /// Note: This is not part of the `MastForestContributor` trait because it's only /// intended for internal use during deserialization. - /// - /// For DynNode, this is equivalent to the normal `add_to_forest` since dyn nodes - /// don't have child nodes to validate. pub(in crate::mast) fn add_to_forest_relaxed( self, forest: &mut MastForest, ) -> Result { - // Use the same digest computation as in build() + // Use the forced digest if provided, otherwise use the default digest let digest = if let Some(forced_digest) = self.digest { forced_digest } else if self.is_dyncall { @@ -462,12 +459,11 @@ impl DynNodeBuilder { DynNode::DYN_DEFAULT_DIGEST }; + // Determine the node ID that will be assigned let future_node_id = MastNodeId::new_unchecked(forest.nodes.len() as u32); - // Store node-level decorators in the centralized NodeToDecoratorIds for efficient access - forest.register_node_decorators(future_node_id, &self.before_enter, &self.after_exit); - // Create the node in the forest with Linked variant from the start + // Note: Decorators are already in forest.debug_info from deserialization // Move the data directly without intermediate cloning let node_id = forest .nodes diff --git a/core/src/mast/node/external.rs b/core/src/mast/node/external.rs index 4a4dff2e4a..3117b6db4c 100644 --- a/core/src/mast/node/external.rs +++ b/core/src/mast/node/external.rs @@ -360,18 +360,13 @@ impl ExternalNodeBuilder { /// /// Note: This is not part of the `MastForestContributor` trait because it's only /// intended for internal use during deserialization. - /// - /// For ExternalNode, this is equivalent to the normal `add_to_forest` since external nodes - /// don't have child nodes to validate. pub(in crate::mast) fn add_to_forest_relaxed( self, forest: &mut MastForest, ) -> Result { + // Determine the node ID that will be assigned let future_node_id = MastNodeId::new_unchecked(forest.nodes.len() as u32); - // Store node-level decorators in the centralized NodeToDecoratorIds for efficient access - forest.register_node_decorators(future_node_id, &self.before_enter, &self.after_exit); - // Create the node in the forest with Linked variant from the start // Move the data directly without intermediate cloning let node_id = forest diff --git a/core/src/mast/node/join_node.rs b/core/src/mast/node/join_node.rs index e8cf02d80d..95edab2642 100644 --- a/core/src/mast/node/join_node.rs +++ b/core/src/mast/node/join_node.rs @@ -486,9 +486,6 @@ impl JoinNodeBuilder { let future_node_id = MastNodeId::new_unchecked(forest.nodes.len() as u32); - // Store node-level decorators in the centralized NodeToDecoratorIds for efficient access - forest.register_node_decorators(future_node_id, &self.before_enter, &self.after_exit); - // Create the node in the forest with Linked variant from the start // Move the data directly without intermediate cloning let node_id = forest diff --git a/core/src/mast/node/loop_node.rs b/core/src/mast/node/loop_node.rs index c0be163dc4..4e8dd0ec87 100644 --- a/core/src/mast/node/loop_node.rs +++ b/core/src/mast/node/loop_node.rs @@ -422,9 +422,6 @@ impl LoopNodeBuilder { let future_node_id = MastNodeId::new_unchecked(forest.nodes.len() as u32); - // Store node-level decorators in the centralized NodeToDecoratorIds for efficient access - forest.register_node_decorators(future_node_id, &self.before_enter, &self.after_exit); - // Create the node in the forest with Linked variant from the start // Move the data directly without intermediate cloning let node_id = forest diff --git a/core/src/mast/node/mast_forest_contributor.rs b/core/src/mast/node/mast_forest_contributor.rs index 5c57ae27f4..25b62abd4a 100644 --- a/core/src/mast/node/mast_forest_contributor.rs +++ b/core/src/mast/node/mast_forest_contributor.rs @@ -88,27 +88,24 @@ impl MastNodeBuilder { } } - /// Add this node to a forest using relaxed validation. + /// Adds the node from this builder to the forest without validation, used during + /// deserialization. /// - /// This method is used during deserialization where nodes may reference child nodes - /// that haven't been added to the forest yet. The child node IDs have already been - /// validated against the expected final node count during the `try_into_mast_node_builder` - /// step, so we can safely skip validation here. - /// - /// Note: This is not part of the `MastForestContributor` trait because it's only - /// intended for internal use during deserialization. + /// This method bypasses normal validation and directly creates nodes with + /// `DecoratorStore::Linked`. It should only be used during deserialization where the forest + /// structure is being reconstructed. pub(in crate::mast) fn add_to_forest_relaxed( self, - forest: &mut MastForest, + mast_forest: &mut MastForest, ) -> Result { match self { - MastNodeBuilder::BasicBlock(builder) => builder.add_to_forest_relaxed(forest), - MastNodeBuilder::Call(builder) => builder.add_to_forest_relaxed(forest), - MastNodeBuilder::Dyn(builder) => builder.add_to_forest_relaxed(forest), - MastNodeBuilder::External(builder) => builder.add_to_forest_relaxed(forest), - MastNodeBuilder::Join(builder) => builder.add_to_forest_relaxed(forest), - MastNodeBuilder::Loop(builder) => builder.add_to_forest_relaxed(forest), - MastNodeBuilder::Split(builder) => builder.add_to_forest_relaxed(forest), + MastNodeBuilder::BasicBlock(builder) => builder.add_to_forest_relaxed(mast_forest), + MastNodeBuilder::Call(builder) => builder.add_to_forest_relaxed(mast_forest), + MastNodeBuilder::Dyn(builder) => builder.add_to_forest_relaxed(mast_forest), + MastNodeBuilder::External(builder) => builder.add_to_forest_relaxed(mast_forest), + MastNodeBuilder::Join(builder) => builder.add_to_forest_relaxed(mast_forest), + MastNodeBuilder::Loop(builder) => builder.add_to_forest_relaxed(mast_forest), + MastNodeBuilder::Split(builder) => builder.add_to_forest_relaxed(mast_forest), } } } diff --git a/core/src/mast/node/split_node.rs b/core/src/mast/node/split_node.rs index 6b344ac52a..3fddea0c48 100644 --- a/core/src/mast/node/split_node.rs +++ b/core/src/mast/node/split_node.rs @@ -446,9 +446,6 @@ impl SplitNodeBuilder { let future_node_id = MastNodeId::new_unchecked(forest.nodes.len() as u32); - // Store node-level decorators in the centralized NodeToDecoratorIds for efficient access - forest.register_node_decorators(future_node_id, &self.before_enter, &self.after_exit); - // Create the node in the forest with Linked variant from the start // Move the data directly without intermediate cloning let node_id = forest diff --git a/core/src/mast/serialization/info.rs b/core/src/mast/serialization/info.rs index 4b6b4a5f5c..4b471d99dd 100644 --- a/core/src/mast/serialization/info.rs +++ b/core/src/mast/serialization/info.rs @@ -89,11 +89,12 @@ impl MastNodeInfo { Ok(MastNodeBuilder::Call(builder)) }, MastNodeType::Dyn => { - let builder = crate::mast::node::DynNodeBuilder::new_dyn(); + let builder = crate::mast::node::DynNodeBuilder::new_dyn().with_digest(self.digest); Ok(MastNodeBuilder::Dyn(builder)) }, MastNodeType::Dyncall => { - let builder = crate::mast::node::DynNodeBuilder::new_dyncall(); + let builder = + crate::mast::node::DynNodeBuilder::new_dyncall().with_digest(self.digest); Ok(MastNodeBuilder::Dyn(builder)) }, MastNodeType::External => { diff --git a/core/src/mast/serialization/mod.rs b/core/src/mast/serialization/mod.rs index a091da2549..72f5df54f3 100644 --- a/core/src/mast/serialization/mod.rs +++ b/core/src/mast/serialization/mod.rs @@ -48,12 +48,9 @@ use alloc::{ vec::Vec, }; -use decorator::{DecoratorDataBuilder, DecoratorInfo}; - -use super::{DecoratedOpLink, DecoratorId, MastForest, MastNode, MastNodeId}; +use super::{MastForest, MastNode, MastNodeId}; use crate::{ AdviceMap, - mast::{MastForestContributor, MastNodeBuilder}, utils::{ByteReader, ByteWriter, Deserializable, DeserializationError, Serializable}, }; @@ -65,8 +62,6 @@ use info::MastNodeInfo; mod basic_blocks; use basic_blocks::{BasicBlockDataBuilder, BasicBlockDataDecoder}; -use crate::DecoratorList; - pub(crate) mod string_table; pub(crate) use string_table::StringTable; @@ -130,10 +125,7 @@ impl Serializable for MastForest { let mast_node_infos: Vec = self .nodes .iter() - .enumerate() - .map(|(mast_node_id, mast_node)| { - let node_id = MastNodeId::new_unchecked(mast_node_id as u32); - + .map(|mast_node| { let ops_offset = if let MastNode::Block(basic_block) = mast_node { basic_block_data_builder.encode_basic_block(basic_block) } else { @@ -167,7 +159,7 @@ impl Deserializable for MastForest { // Reading sections metadata let node_count = source.read_usize()?; - let decorator_count = source.read_usize()?; + let _decorator_count = source.read_usize()?; // Reading procedure roots let roots: Vec = Deserializable::read_from(source)?; @@ -179,73 +171,27 @@ impl Deserializable for MastForest { let advice_map = AdviceMap::read_from(source)?; - let error_codes: BTreeMap = Deserializable::read_from(source)?; - let error_codes: BTreeMap> = - error_codes.into_iter().map(|(k, v)| (k, Arc::from(v))).collect(); - - // Reading procedure names - let procedure_names: BTreeMap = Deserializable::read_from(source)?; - let procedure_names: BTreeMap> = - procedure_names.into_iter().map(|(k, v)| (k, Arc::from(v))).collect(); - - // Reading Decorators - let decorator_data: Vec = Deserializable::read_from(source)?; - let string_table: StringTable = Deserializable::read_from(source)?; - let decorator_infos = decorator_infos_iter(source, decorator_count); + // Deserialize DebugInfo directly (includes decorators, error_codes, CSR structures, + // and procedure_names) + let debug_info = super::DebugInfo::read_from(source)?; // Constructing MastForest - let mut mast_forest = { + let mast_forest = { let mut mast_forest = MastForest::new(); - for decorator_info in decorator_infos { - let decorator_info = decorator_info?; - let decorator = - decorator_info.try_into_decorator(&string_table, &decorator_data)?; + // Set the fully deserialized debug_info - it already contains all mappings + mast_forest.debug_info = debug_info; - mast_forest.add_decorator(decorator).map_err(|e| { - DeserializationError::InvalidValue(format!( - "failed to add decorator to MAST forest while deserializing: {e}", - )) - })?; - } - - // nodes + // Convert node infos to builders let basic_block_data_decoder = BasicBlockDataDecoder::new(&basic_block_data); - let mut mast_builders = mast_node_infos + let mast_builders = mast_node_infos .into_iter() .map(|node_info| { node_info.try_into_mast_node_builder(node_count, &basic_block_data_decoder) }) .collect::, _>>()?; - let basic_block_decorators: Vec<(usize, DecoratorList)> = - read_block_decorators(source, mast_forest.debug_info.num_decorators())?; - for (node_id, decorator_list) in basic_block_decorators { - match &mut mast_builders[node_id] { - MastNodeBuilder::BasicBlock(basic_block) => { - basic_block.set_decorators(decorator_list); - }, - other => { - return Err(DeserializationError::InvalidValue(format!( - "expected mast node with id {node_id} to be a basic block, found {other:?}" - ))); - }, - } - } - - // read "before enter" and "after exit" decorators, and update the corresponding nodes - let before_enter_decorators: Vec<(usize, Vec)> = - read_before_after_decorators(source, mast_forest.debug_info.num_decorators())?; - for (node_id, decorator_ids) in before_enter_decorators { - mast_builders[node_id].append_before_enter(decorator_ids); - } - - let after_exit_decorators: Vec<(usize, Vec)> = - read_before_after_decorators(source, mast_forest.debug_info.num_decorators())?; - for (node_id, decorator_ids) in after_exit_decorators { - mast_builders[node_id].append_after_exit(decorator_ids); - } - + // Add all builders to forest using relaxed validation for mast_node_builder in mast_builders { mast_node_builder.add_to_forest_relaxed(&mut mast_forest).map_err(|e| { DeserializationError::InvalidValue(format!( @@ -266,13 +212,6 @@ impl Deserializable for MastForest { mast_forest }; - mast_forest.debug_info.clear_error_codes(); - mast_forest - .debug_info - .extend_error_codes(error_codes.iter().map(|(k, v)| (*k, v.clone()))); - - mast_forest.debug_info.clear_procedure_names(); - // Validate that all procedure name digests correspond to procedure roots in the forest for digest in procedure_names.keys() { if mast_forest.find_procedure_root(*digest).is_none() { @@ -283,8 +222,6 @@ impl Deserializable for MastForest { } } - mast_forest.debug_info.extend_procedure_names(procedure_names); - Ok(mast_forest) } } @@ -312,47 +249,6 @@ fn read_and_validate_version( Ok(version) } -fn read_block_decorators( - source: &mut R, - decorator_count: usize, -) -> Result, DeserializationError> { - let vec_len: usize = source.read()?; - let mut out_vec: Vec<_> = Vec::with_capacity(vec_len); - - for _ in 0..vec_len { - let node_id: usize = source.read()?; - - let decorator_vec_len: usize = source.read()?; - let mut inner_vec: Vec = Vec::with_capacity(decorator_vec_len); - for _ in 0..decorator_vec_len { - let op_id: usize = source.read()?; - let decorator_id = DecoratorId::from_u32_bounded(source.read()?, decorator_count)?; - inner_vec.push((op_id, decorator_id)); - } - - out_vec.push((node_id, inner_vec)); - } - - Ok(out_vec) -} - -fn decorator_infos_iter<'a, R>( - source: &'a mut R, - decorator_count: usize, -) -> impl Iterator> + 'a -where - R: ByteReader + 'a, -{ - let mut remaining = decorator_count; - core::iter::from_fn(move || { - if remaining == 0 { - return None; - } - remaining -= 1; - Some(DecoratorInfo::read_from(source)) - }) -} - fn node_infos_iter<'a, R>( source: &'a mut R, node_count: usize, @@ -369,31 +265,3 @@ where Some(MastNodeInfo::read_from(source)) }) } - -/// Reads the `before_enter_decorators` and `after_exit_decorators` of the serialized `MastForest` -/// format. -/// -/// Note that we need this custom format because we cannot implement `Deserializable` for -/// `DecoratorId` (in favor of using [`DecoratorId::from_u32_bounded`]). -fn read_before_after_decorators( - source: &mut R, - decorator_count: usize, -) -> Result)>, DeserializationError> { - let vec_len: usize = source.read()?; - let mut out_vec: Vec<_> = Vec::with_capacity(vec_len); - - for _ in 0..vec_len { - let node_id: usize = source.read()?; - - let inner_vec_len: usize = source.read()?; - let mut inner_vec: Vec = Vec::with_capacity(inner_vec_len); - for _ in 0..inner_vec_len { - let decorator_id = DecoratorId::from_u32_bounded(source.read()?, decorator_count)?; - inner_vec.push(decorator_id); - } - - out_vec.push((node_id, inner_vec)); - } - - Ok(out_vec) -} From ebb895291cebd888c92c48ce2e50945cbb4af238 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Garillot?= Date: Sun, 14 Dec 2025 11:47:56 -0500 Subject: [PATCH 08/19] test(core): add comprehensive DebugInfo serialization tests and CSR handling fixes Adds comprehensive round-trip tests for MastForest serialization/deserialization with direct DebugInfo handling, including edge cases for CSR (Compressed Sparse Row) format. Tests cover: - Basic MastForest with decorators roundtrip - Empty CSR arrays for nodes without decorators - Sentinel values in op_indptr for empty nodes after decorated nodes - Multiple operations and decorators per node - All node types with various decorator configurations CSR handling fixes: - Allow empty CSR arrays when nodes have no decorators - Add sentinel values to op_indptr for proper indexing --- core/src/mast/debuginfo/decorator_storage.rs | 177 ++++++++++++++++-- core/src/mast/debuginfo/mod.rs | 56 +++--- .../mast/debuginfo/node_decorator_storage.rs | 28 ++- core/src/mast/serialization/tests.rs | 134 +++++++++++++ 4 files changed, 340 insertions(+), 55 deletions(-) diff --git a/core/src/mast/debuginfo/decorator_storage.rs b/core/src/mast/debuginfo/decorator_storage.rs index 69d3fcbc57..f52fb12f98 100644 --- a/core/src/mast/debuginfo/decorator_storage.rs +++ b/core/src/mast/debuginfo/decorator_storage.rs @@ -1,4 +1,7 @@ -use alloc::{string::{String, ToString}, vec::Vec}; +use alloc::{ + string::{String, ToString}, + vec::Vec, +}; use miden_utils_indexing::{Idx, IndexVec}; #[cfg(feature = "arbitrary")] @@ -145,7 +148,7 @@ impl OpToDecoratorIds { op_indptr_for_dec_ids: Vec, node_indptr_for_op_idx: IndexVec, ) -> Result { - // Empty structures are valid (no nodes, no decorators) + // Completely empty structures are valid (no nodes, no decorators) if decorator_ids.is_empty() && op_indptr_for_dec_ids.is_empty() && node_indptr_for_op_idx.is_empty() @@ -157,8 +160,9 @@ impl OpToDecoratorIds { }); } - // Nodes but no decorators: empty decorator_ids/op_indptr, node_indptr with all zeros + // Nodes with no decorators are valid if decorator_ids.is_empty() && op_indptr_for_dec_ids.is_empty() { + // All node pointers must be 0 if node_indptr_for_op_idx.iter().all(|&ptr| ptr == 0) { return Ok(Self { decorator_ids, @@ -203,6 +207,7 @@ impl OpToDecoratorIds { let Some(&last_node_ptr) = node_slice.last() else { return Err(DecoratorIndexError::InternalStructure); }; + // Node pointers must be valid indices into op_indptr if last_node_ptr > op_indptr_for_dec_ids.len() - 1 { return Err(DecoratorIndexError::InternalStructure); } @@ -234,11 +239,31 @@ impl OpToDecoratorIds { /// - op_indptr_for_dec_ids is monotonic, starts at 0, ends at decorator_ids.len() /// - node_indptr_for_op_idx is monotonic, starts at 0, ends <= op_indptr_for_dec_ids.len()-1 pub(crate) fn validate_csr(&self, decorator_count: usize) -> Result<(), String> { + // Completely empty structures are valid (no nodes, no decorators) + if self.decorator_ids.is_empty() + && self.op_indptr_for_dec_ids.is_empty() + && self.node_indptr_for_op_idx.is_empty() + { + return Ok(()); + } + + // Nodes with no decorators are valid + if self.decorator_ids.is_empty() && self.op_indptr_for_dec_ids.is_empty() { + // All node pointers must be 0 + if !self.node_indptr_for_op_idx.iter().all(|&ptr| ptr == 0) { + return Err("node pointers must all be 0 when there are no decorators".to_string()); + } + return Ok(()); + } + // Validate all decorator IDs for &dec_id in &self.decorator_ids { if dec_id.to_usize() >= decorator_count { - return Err(format!("Invalid decorator ID {}: exceeds decorator count {}", - dec_id.to_usize(), decorator_count)); + return Err(format!( + "Invalid decorator ID {}: exceeds decorator count {}", + dec_id.to_usize(), + decorator_count + )); } } @@ -287,6 +312,7 @@ impl OpToDecoratorIds { } } + // Node pointers must be valid indices into op_indptr let max_node_ptr = self.op_indptr_for_dec_ids.len().saturating_sub(1); if *node_slice.last().unwrap() > max_node_ptr { return Err(format!( @@ -367,8 +393,15 @@ impl OpToDecoratorIds { } if decorators_info.is_empty() { - // Empty node: no operations at all, just set the end pointer equal to start - // This creates a node with an empty operations range + // Empty node needs sentinel if it follows decorated nodes + // CSR requires all node_indptr values to be valid op_indptr indices. + // If op_start == op_indptr.len(), add a sentinel so the pointer stays in bounds. + if op_start == self.op_indptr_for_dec_ids.len() + && !self.op_indptr_for_dec_ids.is_empty() + { + self.op_indptr_for_dec_ids.push(self.decorator_ids.len()); + } + self.node_indptr_for_op_idx .push(op_start) .map_err(|_| DecoratorIndexError::OperationIndex { node, operation: op_start })?; @@ -387,7 +420,8 @@ impl OpToDecoratorIds { // final sentinel for this node self.op_indptr_for_dec_ids.push(self.decorator_ids.len()); - // Push end pointer for this node (index of last op pointer) + // Push end pointer for this node (index of last op pointer, which is the sentinel) + // This is len()-1 because we just pushed the sentinel above let end_ops = self.op_indptr_for_dec_ids.len() - 1; self.node_indptr_for_op_idx .push(end_ops) @@ -836,7 +870,7 @@ mod tests { #[test] fn test_from_components_invalid_structure() { - // Empty structures are now valid + // Empty structures are valid let result = OpToDecoratorIds::from_components(vec![], vec![], IndexVec::new()); assert!(result.is_ok()); @@ -1182,14 +1216,20 @@ mod tests { assert!(range0.end > range0.start, "Node with decorators should have non-empty range"); assert!(range1.end > range1.start, "Node with decorators should have non-empty range"); - // Empty node should have range pointing to the end of op_indptr_for_dec_ids array - // This is expected behavior: empty nodes get the range at the end of the array + // Empty node should have range pointing to a sentinel that was added when the empty node + // was created. The sentinel is added at the position that would have been "past the end" + // before the empty node was added, making the empty node's range valid. let op_indptr_len = storage.op_indptr_for_dec_ids.len(); + // The empty node should point to the sentinel (which is now at len()-1 after being added) + let expected_empty_pos = op_indptr_len - 1; + assert_eq!( + range2.start, expected_empty_pos, + "Empty node should point to the sentinel that was added for it" + ); assert_eq!( - range2.start, op_indptr_len, - "Empty node should point to end of op_indptr array" + range2.end, expected_empty_pos, + "Empty node should have empty range at the sentinel" ); - assert_eq!(range2.end, op_indptr_len, "Empty node should have empty range at array end"); // Test 2: decorator_ids_for_node() should work for empty nodes // This should not panic - the iterator should be empty even though the range points to @@ -1451,3 +1491,112 @@ mod tests { } } } + +#[cfg(test)] +mod sparse_debug_tests { + use alloc::vec; + + use super::*; + + fn test_decorator_id(value: u32) -> DecoratorId { + DecoratorId(value) + } + + #[test] + fn test_sparse_case_manual() { + // Manually create what should be produced by sparse decorator case: + // 10 nodes, decorators only at nodes 0 and 5 + + // Just node 0 with decorator at op 0 + let decorator_ids = vec![test_decorator_id(0)]; + let op_indptr_for_dec_ids = vec![0, 1, 1]; // op 0: [0,1), sentinel + let mut node_indptr = IndexVec::new(); + node_indptr.push(0).unwrap(); // node 0 starts at op index 0 + node_indptr.push(2).unwrap(); // node 0 ends at op index 2 (sentinel) + + // Validate this structure + let result = + OpToDecoratorIds::from_components(decorator_ids, op_indptr_for_dec_ids, node_indptr); + + assert!(result.is_ok(), "Single node with decorator should validate: {:?}", result); + } + + #[test] + fn test_sparse_case_two_nodes() { + // Two nodes: node 0 with decorator, node 1 empty + let decorator_ids = vec![test_decorator_id(0)]; + let op_indptr_for_dec_ids = vec![0, 1, 1]; // op 0: [0,1), sentinel at 1 + let mut node_indptr = IndexVec::new(); + node_indptr.push(0).unwrap(); // node 0 starts at op index 0 + node_indptr.push(2).unwrap(); // node 0 ends at op index 2 + node_indptr.push(2).unwrap(); // node 1 (empty) points to same location + + let result = + OpToDecoratorIds::from_components(decorator_ids, op_indptr_for_dec_ids, node_indptr); + + assert!( + result.is_ok(), + "Two nodes (one with decorator, one empty) should validate: {:?}", + result + ); + } + + #[test] + fn test_sparse_debuginfo_round_trip() { + use alloc::collections::BTreeMap; + + use crate::{ + Decorator, + mast::debuginfo::{DebugInfo, NodeToDecoratorIds}, + utils::{Deserializable, Serializable}, + }; + + // Create a sparse CSR structure like we'd see with 10 nodes where only 0 and 5 have + // decorators + let decorator_ids = vec![test_decorator_id(0), test_decorator_id(1)]; + + // Node 0: op 0 has decorator 0 + // Nodes 1-4: empty + // Node 5: op 0 has decorator 1 + let op_indptr_for_dec_ids = vec![0, 1, 1, 1, 2, 2]; // ops with their decorator ranges + let mut node_indptr_for_op_idx = IndexVec::new(); + node_indptr_for_op_idx.push(0).unwrap(); // node 0 + node_indptr_for_op_idx.push(2).unwrap(); // node 0 end + node_indptr_for_op_idx.push(2).unwrap(); // node 1 (empty) + node_indptr_for_op_idx.push(2).unwrap(); // node 2 (empty) + node_indptr_for_op_idx.push(2).unwrap(); // node 3 (empty) + node_indptr_for_op_idx.push(2).unwrap(); // node 4 (empty) + node_indptr_for_op_idx.push(4).unwrap(); // node 5 + + let op_storage = OpToDecoratorIds::from_components( + decorator_ids.clone(), + op_indptr_for_dec_ids, + node_indptr_for_op_idx, + ) + .expect("CSR structure should be valid"); + + // Create a minimal DebugInfo + let mut decorators = crate::mast::debuginfo::IndexVec::new(); + decorators.push(Decorator::Trace(0)).unwrap(); + decorators.push(Decorator::Trace(5)).unwrap(); + + let node_storage = NodeToDecoratorIds::new(); + let error_codes = BTreeMap::new(); + + let debug_info = DebugInfo { + decorators, + op_decorator_storage: op_storage, + node_decorator_storage: node_storage, + error_codes, + procedure_names: BTreeMap::new(), + }; + + // Serialize and deserialize + let bytes = debug_info.to_bytes(); + let deserialized = + DebugInfo::read_from_bytes(&bytes).expect("Should deserialize successfully"); + + // Verify + assert_eq!(debug_info.num_decorators(), deserialized.num_decorators()); + } +} diff --git a/core/src/mast/debuginfo/mod.rs b/core/src/mast/debuginfo/mod.rs index e1de397ed2..49433dafbc 100644 --- a/core/src/mast/debuginfo/mod.rs +++ b/core/src/mast/debuginfo/mod.rs @@ -40,7 +40,12 @@ //! method, which removes decorators while preserving critical information. This allows //! backward compatibility while enabling size optimization for deployment. -use alloc::{collections::BTreeMap, string::{String, ToString}, sync::Arc, vec::Vec}; +use alloc::{ + collections::BTreeMap, + string::{String, ToString}, + sync::Arc, + vec::Vec, +}; use miden_utils_indexing::{Idx, IndexVec}; #[cfg(feature = "serde")] @@ -379,11 +384,8 @@ impl Serializable for DebugInfo { } // 2. Serialize error codes - let error_codes: alloc::collections::BTreeMap = self - .error_codes - .iter() - .map(|(k, v)| (*k, v.to_string())) - .collect(); + let error_codes: alloc::collections::BTreeMap = + self.error_codes.iter().map(|(k, v)| (*k, v.to_string())).collect(); error_codes.write_into(target); // 3. Serialize OpToDecoratorIds CSR (dense representation) @@ -404,12 +406,11 @@ impl Serializable for DebugInfo { self.op_decorator_storage.op_indptr_for_dec_ids.write_into(target); // node_indptr_for_op_idx as Vec - let node_indptr_vec: Vec = self - .op_decorator_storage + self.op_decorator_storage .node_indptr_for_op_idx .as_slice() - .to_vec(); - node_indptr_vec.write_into(target); + .to_vec() + .write_into(target); // 4. Serialize NodeToDecoratorIds CSR (dense representation) @@ -432,19 +433,13 @@ impl Serializable for DebugInfo { after_decorators_u32.write_into(target); // node_indptr_for_before as Vec - let before_indptr_vec: Vec = self - .node_decorator_storage - .node_indptr_for_before - .as_slice() - .to_vec(); + let before_indptr_vec: Vec = + self.node_decorator_storage.node_indptr_for_before.as_slice().to_vec(); before_indptr_vec.write_into(target); // node_indptr_for_after as Vec - let after_indptr_vec: Vec = self - .node_decorator_storage - .node_indptr_for_after - .as_slice() - .to_vec(); + let after_indptr_vec: Vec = + self.node_decorator_storage.node_indptr_for_after.as_slice().to_vec(); after_indptr_vec.write_into(target); } } @@ -455,7 +450,8 @@ impl Deserializable for DebugInfo { // 1. Read decorator data and string table let decorator_data: Vec = Deserializable::read_from(source)?; - let string_table: crate::mast::serialization::StringTable = Deserializable::read_from(source)?; + let string_table: crate::mast::serialization::StringTable = + Deserializable::read_from(source)?; let decorator_count: usize = source.read_usize()?; let mut decorator_infos = Vec::with_capacity(decorator_count); @@ -466,8 +462,7 @@ impl Deserializable for DebugInfo { // 2. Reconstruct decorators let mut decorators = IndexVec::new(); for decorator_info in decorator_infos { - let decorator = decorator_info - .try_into_decorator(&string_table, &decorator_data)?; + let decorator = decorator_info.try_into_decorator(&string_table, &decorator_data)?; decorators.push(decorator).map_err(|_| { DeserializationError::InvalidValue( "Failed to add decorator to IndexVec".to_string(), @@ -478,11 +473,10 @@ impl Deserializable for DebugInfo { // 3. Read error codes let error_codes_raw: alloc::collections::BTreeMap = Deserializable::read_from(source)?; - let error_codes: alloc::collections::BTreeMap> = - error_codes_raw - .into_iter() - .map(|(k, v)| (k, alloc::sync::Arc::from(v.as_str()))) - .collect(); + let error_codes: alloc::collections::BTreeMap> = error_codes_raw + .into_iter() + .map(|(k, v)| (k, alloc::sync::Arc::from(v.as_str()))) + .collect(); // 4. Read OpToDecoratorIds CSR (dense representation) @@ -582,9 +576,9 @@ impl Deserializable for DebugInfo { procedure_names, }; - debug_info - .validate() - .map_err(|e| DeserializationError::InvalidValue(format!("DebugInfo validation failed: {}", e)))?; + debug_info.validate().map_err(|e| { + DeserializationError::InvalidValue(format!("DebugInfo validation failed: {}", e)) + })?; Ok(debug_info) } diff --git a/core/src/mast/debuginfo/node_decorator_storage.rs b/core/src/mast/debuginfo/node_decorator_storage.rs index 21c369d6c2..db7b2fbc65 100644 --- a/core/src/mast/debuginfo/node_decorator_storage.rs +++ b/core/src/mast/debuginfo/node_decorator_storage.rs @@ -99,8 +99,18 @@ impl NodeToDecoratorIds { /// - All decorator IDs are valid (< decorator_count) /// - Both indptr arrays are monotonic, start at 0, end at respective decorator vector lengths pub(crate) fn validate_csr(&self, decorator_count: usize) -> Result<(), String> { + // Completely empty structures are valid (no nodes, no decorators) + if self.before_enter_decorators.is_empty() + && self.after_exit_decorators.is_empty() + && self.node_indptr_for_before.is_empty() + && self.node_indptr_for_after.is_empty() + { + return Ok(()); + } + // Validate all decorator IDs - for &dec_id in self.before_enter_decorators.iter().chain(self.after_exit_decorators.iter()) { + for &dec_id in self.before_enter_decorators.iter().chain(self.after_exit_decorators.iter()) + { if dec_id.to_usize() >= decorator_count { return Err(format!( "Invalid decorator ID {}: exceeds decorator count {}", @@ -332,12 +342,8 @@ mod tests { after_indptr.push(0).unwrap(); after_indptr.push(1).unwrap(); - let storage = NodeToDecoratorIds::from_components( - before, - after, - before_indptr, - after_indptr, - ); + let storage = + NodeToDecoratorIds::from_components(before, after, before_indptr, after_indptr); assert!(storage.is_ok()); } @@ -357,7 +363,8 @@ mod tests { vec![test_decorator_id(1)], before_indptr, after_indptr, - ).unwrap(); + ) + .unwrap(); assert!(storage.validate_csr(3).is_ok()); } @@ -373,11 +380,12 @@ mod tests { after_indptr.push(0).unwrap(); let storage = NodeToDecoratorIds::from_components( - vec![test_decorator_id(5)], // ID too high + vec![test_decorator_id(5)], // ID too high vec![], before_indptr, after_indptr, - ).unwrap(); + ) + .unwrap(); let result = storage.validate_csr(3); assert!(result.is_err()); diff --git a/core/src/mast/serialization/tests.rs b/core/src/mast/serialization/tests.rs index 44588b6ebe..ec6ef1a475 100644 --- a/core/src/mast/serialization/tests.rs +++ b/core/src/mast/serialization/tests.rs @@ -1120,3 +1120,137 @@ mod proptests { } } } + +// COMPREHENSIVE DEBUGINFO ROUND-TRIP TESTS +// ================================================================================================ + +/// Test DebugInfo serialization with empty decorators (no decorators at all) +#[test] +fn test_debuginfo_serialization_empty() { + use crate::{ + Operation, + mast::{BasicBlockNodeBuilder, MastForest}, + }; + + // Create forest with no decorators + let mut forest = MastForest::new(); + + // Add a simple basic block with no decorators + let ops = vec![Operation::Noop; 4]; + let block_id = BasicBlockNodeBuilder::new(ops, Vec::new()).add_to_forest(&mut forest).unwrap(); + forest.make_root(block_id); + + // Serialize and deserialize + let bytes = forest.to_bytes(); + let deserialized = MastForest::read_from_bytes(&bytes).unwrap(); + + // Verify + assert_eq!(forest.num_nodes(), deserialized.num_nodes()); + assert_eq!(forest.decorators().len(), 0); + assert_eq!(deserialized.decorators().len(), 0); +} + +/// Test DebugInfo serialization with sparse decorators (20% of nodes have decorators) +#[test] +fn test_debuginfo_serialization_sparse() { + use crate::{ + Decorator, Operation, + mast::{BasicBlockNodeBuilder, MastForest}, + }; + + let mut forest = MastForest::new(); + + // Create 10 blocks, only 2 with decorators (20% sparse) + for i in 0..10 { + let ops = vec![Operation::Noop; 4]; + + if i % 5 == 0 { + // Add decorator at position 0 for nodes 0 and 5 + let decorator_id = forest.add_decorator(Decorator::Trace(i)).unwrap(); + BasicBlockNodeBuilder::new(ops, vec![(0, decorator_id)]) + .add_to_forest(&mut forest) + .unwrap(); + } else { + BasicBlockNodeBuilder::new(ops, Vec::new()).add_to_forest(&mut forest).unwrap(); + } + } + + // Serialize and deserialize + let bytes = forest.to_bytes(); + let deserialized = MastForest::read_from_bytes(&bytes).unwrap(); + + // Verify decorator count + assert_eq!(forest.decorators().len(), 2); + assert_eq!(deserialized.decorators().len(), 2); + + // Verify decorators are at correct nodes + for i in 0..10 { + let node_id = crate::mast::MastNodeId::new_unchecked(i); + let orig_decorators = forest.decorator_indices_for_op(node_id, 0); + let deser_decorators = deserialized.decorator_indices_for_op(node_id, 0); + + assert_eq!(orig_decorators, deser_decorators, "Decorators at node {} should match", i); + } +} + +/// Test DebugInfo serialization with dense decorators (80% of nodes have decorators) +#[test] +fn test_debuginfo_serialization_dense() { + use crate::{ + Decorator, Operation, + mast::{BasicBlockNodeBuilder, MastForest}, + }; + + let mut forest = MastForest::new(); + + // Create 10 blocks, 8 with decorators (80% dense) + for i in 0..10 { + let ops = vec![Operation::Noop; 4]; + + if i < 8 { + // Add decorator at position 0 for first 8 nodes + let decorator_id = forest.add_decorator(Decorator::Trace(i)).unwrap(); + BasicBlockNodeBuilder::new(ops, vec![(0, decorator_id)]) + .add_to_forest(&mut forest) + .unwrap(); + } else { + BasicBlockNodeBuilder::new(ops, Vec::new()).add_to_forest(&mut forest).unwrap(); + } + } + + // Serialize and deserialize + let bytes = forest.to_bytes(); + let deserialized = MastForest::read_from_bytes(&bytes).unwrap(); + + // Verify decorator count + assert_eq!(forest.decorators().len(), 8); + assert_eq!(deserialized.decorators().len(), 8); + + // Verify decorators are at correct nodes + for i in 0..10 { + let node_id = crate::mast::MastNodeId::new_unchecked(i); + let orig_decorators = forest.decorator_indices_for_op(node_id, 0); + let deser_decorators = deserialized.decorator_indices_for_op(node_id, 0); + + assert_eq!(orig_decorators, deser_decorators, "Decorators at node {} should match", i); + + // Verify expected decorator presence + if i < 8 { + assert_eq!(orig_decorators.len(), 1, "Node {} should have 1 decorator", i); + assert_eq!( + deser_decorators.len(), + 1, + "Node {} should have 1 decorator after deserialization", + i + ); + } else { + assert_eq!(orig_decorators.len(), 0, "Node {} should have no decorators", i); + assert_eq!( + deser_decorators.len(), + 0, + "Node {} should have no decorators after deserialization", + i + ); + } + } +} From 08289d1ddfa0a682d9459b5c4291a7f72fbac171 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Garillot?= Date: Sun, 14 Dec 2025 11:50:16 -0500 Subject: [PATCH 09/19] chore: Changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ef6637e4b3..27dd55cd7f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,6 +28,7 @@ - Added validation of `core_trace_fragment_size` in `ExecutionOptions` ([#2528](https://github.com/0xMiden/miden-vm/pull/2528)). - [BREAKING] Change serialization of `BasicBlockNode`s to use padded indices ([#2466](https://github.com/0xMiden/miden-vm/pull/2466/)). - Change padded serialization of `BasicBlockNode`s to use delta-encoded metadata ([#2469](https://github.com/0xMiden/miden-vm/pull/2469/)). +- Change (de)serialization of `MastForest` to directly (de)serialize DebugInfo ([#2470](https://github.com/0xMiden/miden-vm/pull/2470/)). ## 0.20.2 (TBD) - Fix issue where decorator access was not bypassed properly in release mode ([#2529](https://github.com/0xMiden/miden-vm/pull/2529)). From 6a66b4b64a2acb765b946c51535337a1cdb0f9f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Garillot?= Date: Sun, 14 Dec 2025 11:53:47 -0500 Subject: [PATCH 10/19] doc(core): update serialization magic number changelog --- core/src/mast/serialization/mod.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/core/src/mast/serialization/mod.rs b/core/src/mast/serialization/mod.rs index 72f5df54f3..8e4fa4b633 100644 --- a/core/src/mast/serialization/mod.rs +++ b/core/src/mast/serialization/mod.rs @@ -98,7 +98,9 @@ const MAGIC: &[u8; 5] = b"MAST\0"; /// Version history: /// - [0, 0, 0]: Initial format /// - [0, 0, 1]: Added batch metadata to basic blocks (operations serialized in padded form with -/// indptr, padding, and group metadata for exact OpBatch reconstruction) +/// indptr, padding, and group metadata for exact OpBatch reconstruction). Direct decorator +/// serialization in CSR format (eliminates per-node decorator sections and round-trip +/// conversions). const VERSION: [u8; 3] = [0, 0, 1]; // MAST FOREST SERIALIZATION/DESERIALIZATION From e34f0c8103665438b388e09792b58ffdaa52c2d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Garillot?= Date: Sun, 14 Dec 2025 22:47:14 -0500 Subject: [PATCH 11/19] refactor(debuginfo): improve API and code organization - Changed `OpToDecoratorIds::from_components` and `NodeToDecoratorIds::from_components` visibility from `pub` to `pub(super)` as they're only used internally for deserialization - Added serialization helper methods `write_into()` and `read_from()` to `OpToDecoratorIds` and `NodeToDecoratorIds` to break down large DebugInfo serialization implementation - Refactored DebugInfo (de)serialization to call helper methods instead of directly accessing CSR fields - Moved decorator storage tests to separate file (`decorator_storage/tests.rs`) following codebase pattern, reducing main file from 1669 to 871 lines - Added comment clarifying wire format compatibility for unused `_decorator_count` variable in MastForest deserialization --- core/src/mast/debuginfo/decorator_storage.rs | 1602 ----------------- .../mast/debuginfo/decorator_storage/mod.rs | 870 +++++++++ .../mast/debuginfo/decorator_storage/tests.rs | 782 ++++++++ core/src/mast/debuginfo/mod.rs | 134 +- .../mast/debuginfo/node_decorator_storage.rs | 90 +- core/src/mast/serialization/mod.rs | 2 +- 6 files changed, 1747 insertions(+), 1733 deletions(-) delete mode 100644 core/src/mast/debuginfo/decorator_storage.rs create mode 100644 core/src/mast/debuginfo/decorator_storage/mod.rs create mode 100644 core/src/mast/debuginfo/decorator_storage/tests.rs diff --git a/core/src/mast/debuginfo/decorator_storage.rs b/core/src/mast/debuginfo/decorator_storage.rs deleted file mode 100644 index f52fb12f98..0000000000 --- a/core/src/mast/debuginfo/decorator_storage.rs +++ /dev/null @@ -1,1602 +0,0 @@ -use alloc::{ - string::{String, ToString}, - vec::Vec, -}; - -use miden_utils_indexing::{Idx, IndexVec}; -#[cfg(feature = "arbitrary")] -use proptest::prelude::*; -#[cfg(feature = "serde")] -use serde::{Deserialize, Serialize}; - -use crate::mast::{DecoratedOpLink, DecoratorId, MastNodeId}; - -/// A two-level compressed sparse row (CSR) representation for indexing decorator IDs per -/// operation per node. -/// -/// This structure provides efficient access to decorator IDs in a hierarchical manner: -/// 1. First level: Node -> Operations (operations belong to nodes) -/// 2. Second level: Operation -> Decorator IDs (decorator IDs belong to operations) -/// -/// The data layout follows CSR format at both levels: -/// -/// - `decorator_ids` contains all the DecoratorId values in a single flat array. These are the -/// actual decorator identifiers that need to be accessed. We store them contiguously to minimize -/// memory overhead and improve cache locality when iterating. -/// -/// - `op_indptr_for_dec_ids` stores pointer indices that map operations to their position within -/// the `decorator_ids` array. For each operation, it contains the start index where that -/// operation's decorator IDs begin in the flat storage. -/// -/// - `node_indptr_for_op_idx` stores pointer indices that map nodes to their position within the -/// `op_indptr_for_dec_ids` array. For each node, it contains the start index where that node's -/// operation pointers begin. -/// -/// Together, these three arrays form a two-level index structure that allows efficient -/// lookup of decorator IDs for any operation in any node, while minimizing memory usage -/// for sparse decorator data. -/// -/// # Example -/// -/// Consider this COO (Coordinate format) representation: -/// ```text -/// Node 0, Op 0: [decorator_id_0, decorator_id_1] -/// Node 0, Op 1: [decorator_id_2] -/// Node 1, Op 0: [decorator_id_3, decorator_id_4, decorator_id_5] -/// ``` -/// -/// This would be stored as: -/// ```text -/// decorator_ids: [0, 1, 2, 3, 4, 5] -/// op_indptr_for_dec_ids: [0, 2, 3, 6] // Node 0: ops [0,2], [2,3]; Node 1: ops [3,6] -/// node_indptr_for_op_idx: [0, 2, 3] // Node 0: [0,2], Node 1: [2,3] -/// ``` -/// -/// See the unit test `test_csr_and_coo_produce_same_elements` for a comprehensive example -/// demonstrating how this encoding works and verifying round-trip conversion from COO to CSR. -#[derive(Debug, Clone, PartialEq, Eq)] -#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[cfg_attr(all(feature = "arbitrary", test), miden_test_serde_macros::serde_test)] -pub struct OpToDecoratorIds { - /// All the decorator IDs per operation per node, in a CSR relationship with - /// node_indptr_for_op_idx and op_indptr_for_dec_ids - pub(crate) decorator_ids: Vec, - /// For the node whose operation indices are in - /// `op_indptr_for_dec_ids[node_start..node_end]`, - /// the indices of its i-th operation are at: - /// ```text - /// decorator_ids[op_indptr_for_dec_ids[node_start + i].. - /// op_indptr_for_dec_ids[node_start + i + 1]] - /// ``` - pub(crate) op_indptr_for_dec_ids: Vec, - /// The decorated operation indices for the n-th node are at - /// `op_indptr_for_dec_ids[node_indptr_for_op_idx[n]..node_indptr_for_op_idx[n+1]]` - pub(crate) node_indptr_for_op_idx: IndexVec, -} - -/// Error type for decorator index mapping operations -#[derive(Debug, PartialEq, Eq, thiserror::Error)] -pub enum DecoratorIndexError { - /// Node index out of bounds - #[error("Invalid node index {0:?}")] - NodeIndex(MastNodeId), - /// Operation index out of bounds for the given node - #[error("Invalid operation index {operation} for node {node:?}")] - OperationIndex { node: MastNodeId, operation: usize }, - /// Invalid internal data structure (corrupted pointers) - #[error("Invalid internal data structure in OpToDecoratorIds")] - InternalStructure, -} - -impl OpToDecoratorIds { - /// Create a new empty OpToDecoratorIds with the specified capacity. - /// - /// # Arguments - /// * `nodes_capacity` - Expected number of nodes - /// * `operations_capacity` - Expected total number of operations across all nodes - /// * `decorator_ids_capacity` - Expected total number of decorator IDs across all operations - pub fn with_capacity( - nodes_capacity: usize, - operations_capacity: usize, - decorator_ids_capacity: usize, - ) -> Self { - Self { - decorator_ids: Vec::with_capacity(decorator_ids_capacity), - op_indptr_for_dec_ids: Vec::with_capacity(operations_capacity + 1), - node_indptr_for_op_idx: IndexVec::with_capacity(nodes_capacity + 1), - } - } - - /// Create a new empty OpToDecoratorIds. - pub fn new() -> Self { - Self::with_capacity(0, 0, 0) - } - - /// Create a OpToDecoratorIds from raw CSR components. - /// - /// This is useful for deserialization or testing purposes where you need to reconstruct - /// the data structure from its raw components. - /// - /// # Arguments - /// * `decorator_ids` - Flat storage of all decorator IDs. These are the actual decorator - /// identifiers that will be accessed through the index structure. - /// * `op_indptr_for_dec_ids` - Pointer indices for operations within decorator_ids. These must - /// be monotonically increasing and within bounds of `decorator_ids`. The slice must not be - /// empty, and the first element must be 0. - /// * `node_indptr_for_op_idx` - Pointer indices for nodes within op_indptr_for_dec_ids. These - /// must be monotonically increasing and within bounds of `op_indptr_for_dec_ids`. The slice - /// must not be empty, and the first element must be 0. - /// - /// # Returns - /// An error if the internal structure is inconsistent. Common issues that cause errors: - /// - Empty `op_indptr_for_dec_ids` or `node_indptr_for_op_idx` vectors - /// - Non-zero first element in either pointer array - /// - Decreasing pointer values (pointers must be monotonically non-decreasing) - /// - Pointer values that exceed the bounds of the arrays they index into - /// - Invalid ranges (start > end) in any pointer window - /// - /// # Validation Restrictions - /// The following constraints are enforced between components: - /// - `op_indptr_for_dec_ids` length must be >= 1 (for the sentinel) - /// - `node_indptr_for_op_idx` length must be >= 1 (for the sentinel) - /// - Last value in `op_indptr_for_dec_ids` must be <= `decorator_ids.len()` - /// - Last value in `node_indptr_for_op_idx` must be <= `op_indptr_for_dec_ids.len() - 1` - /// - Both `op_indptr_for_dec_ids` and `node_indptr_for_op_idx` must be strictly monotonic (each - /// successive value must be >= the previous one) - pub(crate) fn from_components( - decorator_ids: Vec, - op_indptr_for_dec_ids: Vec, - node_indptr_for_op_idx: IndexVec, - ) -> Result { - // Completely empty structures are valid (no nodes, no decorators) - if decorator_ids.is_empty() - && op_indptr_for_dec_ids.is_empty() - && node_indptr_for_op_idx.is_empty() - { - return Ok(Self { - decorator_ids, - op_indptr_for_dec_ids, - node_indptr_for_op_idx, - }); - } - - // Nodes with no decorators are valid - if decorator_ids.is_empty() && op_indptr_for_dec_ids.is_empty() { - // All node pointers must be 0 - if node_indptr_for_op_idx.iter().all(|&ptr| ptr == 0) { - return Ok(Self { - decorator_ids, - op_indptr_for_dec_ids, - node_indptr_for_op_idx, - }); - } else { - return Err(DecoratorIndexError::InternalStructure); - } - } - - // Validate the structure - if op_indptr_for_dec_ids.is_empty() { - return Err(DecoratorIndexError::InternalStructure); - } - - // Check that op_indptr_for_dec_ids starts at 0 - if op_indptr_for_dec_ids[0] != 0 { - return Err(DecoratorIndexError::InternalStructure); - } - - // Check that the last operation pointer doesn't exceed decorator IDs length - let Some(&last_op_ptr) = op_indptr_for_dec_ids.last() else { - return Err(DecoratorIndexError::InternalStructure); - }; - if last_op_ptr > decorator_ids.len() { - return Err(DecoratorIndexError::InternalStructure); - } - - // Check that node pointers are within bounds of operation pointers - if node_indptr_for_op_idx.is_empty() { - return Err(DecoratorIndexError::InternalStructure); - } - - let node_slice = node_indptr_for_op_idx.as_slice(); - - // Check that node_indptr_for_op_idx starts at 0 - if node_slice[0] != 0 { - return Err(DecoratorIndexError::InternalStructure); - } - - let Some(&last_node_ptr) = node_slice.last() else { - return Err(DecoratorIndexError::InternalStructure); - }; - // Node pointers must be valid indices into op_indptr - if last_node_ptr > op_indptr_for_dec_ids.len() - 1 { - return Err(DecoratorIndexError::InternalStructure); - } - - // Ensure monotonicity of pointers - for window in op_indptr_for_dec_ids.windows(2) { - if window[0] > window[1] { - return Err(DecoratorIndexError::InternalStructure); - } - } - - for window in node_slice.windows(2) { - if window[0] > window[1] { - return Err(DecoratorIndexError::InternalStructure); - } - } - - Ok(Self { - decorator_ids, - op_indptr_for_dec_ids, - node_indptr_for_op_idx, - }) - } - - /// Validate CSR structure integrity. - /// - /// Checks: - /// - All decorator IDs are valid (< decorator_count) - /// - op_indptr_for_dec_ids is monotonic, starts at 0, ends at decorator_ids.len() - /// - node_indptr_for_op_idx is monotonic, starts at 0, ends <= op_indptr_for_dec_ids.len()-1 - pub(crate) fn validate_csr(&self, decorator_count: usize) -> Result<(), String> { - // Completely empty structures are valid (no nodes, no decorators) - if self.decorator_ids.is_empty() - && self.op_indptr_for_dec_ids.is_empty() - && self.node_indptr_for_op_idx.is_empty() - { - return Ok(()); - } - - // Nodes with no decorators are valid - if self.decorator_ids.is_empty() && self.op_indptr_for_dec_ids.is_empty() { - // All node pointers must be 0 - if !self.node_indptr_for_op_idx.iter().all(|&ptr| ptr == 0) { - return Err("node pointers must all be 0 when there are no decorators".to_string()); - } - return Ok(()); - } - - // Validate all decorator IDs - for &dec_id in &self.decorator_ids { - if dec_id.to_usize() >= decorator_count { - return Err(format!( - "Invalid decorator ID {}: exceeds decorator count {}", - dec_id.to_usize(), - decorator_count - )); - } - } - - // Validate op_indptr_for_dec_ids - if self.op_indptr_for_dec_ids.is_empty() { - return Err("op_indptr_for_dec_ids cannot be empty".to_string()); - } - - if self.op_indptr_for_dec_ids[0] != 0 { - return Err("op_indptr_for_dec_ids must start at 0".to_string()); - } - - for window in self.op_indptr_for_dec_ids.windows(2) { - if window[0] > window[1] { - return Err(format!( - "op_indptr_for_dec_ids not monotonic: {} > {}", - window[0], window[1] - )); - } - } - - if *self.op_indptr_for_dec_ids.last().unwrap() != self.decorator_ids.len() { - return Err(format!( - "op_indptr_for_dec_ids end {} doesn't match decorator_ids length {}", - self.op_indptr_for_dec_ids.last().unwrap(), - self.decorator_ids.len() - )); - } - - // Validate node_indptr_for_op_idx - let node_slice = self.node_indptr_for_op_idx.as_slice(); - if node_slice.is_empty() { - return Err("node_indptr_for_op_idx cannot be empty".to_string()); - } - - if node_slice[0] != 0 { - return Err("node_indptr_for_op_idx must start at 0".to_string()); - } - - for window in node_slice.windows(2) { - if window[0] > window[1] { - return Err(format!( - "node_indptr_for_op_idx not monotonic: {} > {}", - window[0], window[1] - )); - } - } - - // Node pointers must be valid indices into op_indptr - let max_node_ptr = self.op_indptr_for_dec_ids.len().saturating_sub(1); - if *node_slice.last().unwrap() > max_node_ptr { - return Err(format!( - "node_indptr_for_op_idx end {} exceeds op_indptr bounds {}", - node_slice.last().unwrap(), - max_node_ptr - )); - } - - Ok(()) - } - - pub fn is_empty(&self) -> bool { - self.node_indptr_for_op_idx.is_empty() - } - - /// Get the number of nodes in this storage. - pub fn num_nodes(&self) -> usize { - if self.node_indptr_for_op_idx.is_empty() { - 0 - } else { - self.node_indptr_for_op_idx.len() - 1 - } - } - - /// Get the total number of decorator IDs across all operations. - pub fn num_decorator_ids(&self) -> usize { - self.decorator_ids.len() - } - - /// Add decorator information for a node incrementally. - /// - /// This method allows building up the OpToDecoratorIds structure by adding - /// decorator IDs for nodes in sequential order only. - /// - /// # Arguments - /// * `node` - The node ID to add decorator IDs for. Must be the next sequential node. - /// * `decorators_info` - Vector of (operation_index, decorator_id) tuples. The operation - /// indices should be sorted (as guaranteed by validate_decorators). Operations not present in - /// this vector will have no decorator IDs. - /// - /// # Returns - /// Ok(()) if successful, Err(DecoratorIndexError) if the node is not the next sequential - /// node. - /// - /// # Behavior - /// - Can only add decorator IDs for the next sequential node ID - /// - Automatically creates empty operations for gaps in operation indices - /// - Maintains the two-level CSR structure invariant - pub fn add_decorator_info_for_node( - &mut self, - node: MastNodeId, - decorators_info: Vec<(usize, DecoratorId)>, - ) -> Result<(), DecoratorIndexError> { - // Enforce sequential node ids - let expected = MastNodeId::new_unchecked(self.num_nodes() as u32); - if node < expected { - return Err(DecoratorIndexError::NodeIndex(node)); - } - // Create empty nodes for gaps in node indices - for idx in expected.0..node.0 { - self.add_decorator_info_for_node(MastNodeId::new_unchecked(idx), vec![]) - .unwrap(); - } - - // Start of this node's operations is the current length (do NOT reuse previous sentinel) - let op_start = self.op_indptr_for_dec_ids.len(); - - // Maintain node CSR: node_indptr[i] = start index for node i - if self.node_indptr_for_op_idx.is_empty() { - self.node_indptr_for_op_idx - .push(op_start) - .map_err(|_| DecoratorIndexError::OperationIndex { node, operation: op_start })?; - } else { - // Overwrite the previous "end" slot to become this node's start - let last = MastNodeId::new_unchecked((self.node_indptr_for_op_idx.len() - 1) as u32); - self.node_indptr_for_op_idx[last] = op_start; - } - - if decorators_info.is_empty() { - // Empty node needs sentinel if it follows decorated nodes - // CSR requires all node_indptr values to be valid op_indptr indices. - // If op_start == op_indptr.len(), add a sentinel so the pointer stays in bounds. - if op_start == self.op_indptr_for_dec_ids.len() - && !self.op_indptr_for_dec_ids.is_empty() - { - self.op_indptr_for_dec_ids.push(self.decorator_ids.len()); - } - - self.node_indptr_for_op_idx - .push(op_start) - .map_err(|_| DecoratorIndexError::OperationIndex { node, operation: op_start })?; - } else { - // Build op->decorator CSR for this node - let max_op_idx = decorators_info.last().unwrap().0; // input is sorted by op index - let mut it = decorators_info.into_iter().peekable(); - - for op in 0..=max_op_idx { - // pointer to start of decorator IDs for op - self.op_indptr_for_dec_ids.push(self.decorator_ids.len()); - while it.peek().is_some_and(|(i, _)| *i == op) { - self.decorator_ids.push(it.next().unwrap().1); - } - } - // final sentinel for this node - self.op_indptr_for_dec_ids.push(self.decorator_ids.len()); - - // Push end pointer for this node (index of last op pointer, which is the sentinel) - // This is len()-1 because we just pushed the sentinel above - let end_ops = self.op_indptr_for_dec_ids.len() - 1; - self.node_indptr_for_op_idx - .push(end_ops) - .map_err(|_| DecoratorIndexError::OperationIndex { node, operation: end_ops })?; - } - - Ok(()) - } - - /// Get the number of decorator IDs for a specific operation within a node. - /// - /// # Arguments - /// * `node` - The node ID - /// * `operation` - The operation index within the node - /// - /// # Returns - /// The number of decorator IDs for the operation, or an error if indices are invalid. - pub fn num_decorator_ids_for_operation( - &self, - node: MastNodeId, - operation: usize, - ) -> Result { - self.decorator_ids_for_operation(node, operation).map(|slice| slice.len()) - } - - /// Get all decorator IDs for a specific operation within a node. - /// - /// # Arguments - /// * `node` - The node ID - /// * `operation` - The operation index within the node - /// - /// # Returns - /// A slice of decorator IDs for the operation, or an error if indices are invalid. - pub fn decorator_ids_for_operation( - &self, - node: MastNodeId, - operation: usize, - ) -> Result<&[DecoratorId], DecoratorIndexError> { - let op_range = self.operation_range_for_node(node)?; - // that operation does not have listed decorator indices - if operation >= op_range.len() { - return Ok(&[]); - } - - let op_start_idx = op_range.start + operation; - if op_start_idx + 1 >= self.op_indptr_for_dec_ids.len() { - return Err(DecoratorIndexError::InternalStructure); - } - - let dec_start = self.op_indptr_for_dec_ids[op_start_idx]; - let dec_end = self.op_indptr_for_dec_ids[op_start_idx + 1]; - - if dec_start > dec_end || dec_end > self.decorator_ids.len() { - return Err(DecoratorIndexError::InternalStructure); - } - - Ok(&self.decorator_ids[dec_start..dec_end]) - } - - /// Get an iterator over all operations and their decorator IDs for a given node. - /// - /// # Arguments - /// * `node` - The node ID - /// - /// # Returns - /// An iterator yielding (operation_index, decorator_ids_slice) tuples, or an error if the node - /// is invalid. - pub fn decorator_ids_for_node( - &self, - node: MastNodeId, - ) -> Result, DecoratorIndexError> { - let op_range = self.operation_range_for_node(node)?; - let num_ops = op_range.len(); - - Ok((0..num_ops).map(move |op_idx| { - let op_start_idx = op_range.start + op_idx; - let dec_start = self.op_indptr_for_dec_ids[op_start_idx]; - let dec_end = self.op_indptr_for_dec_ids[op_start_idx + 1]; - (op_idx, &self.decorator_ids[dec_start..dec_end]) - })) - } - - /// Named, zero-alloc view flattened to `(relative_op_idx, DecoratorId)`. - pub fn decorator_links_for_node<'a>( - &'a self, - node: MastNodeId, - ) -> Result, DecoratorIndexError> { - let op_range = self.operation_range_for_node(node)?; // [start .. end) in op-pointer space - Ok(DecoratedLinks::new( - op_range.start, - op_range.end, - &self.op_indptr_for_dec_ids, - &self.decorator_ids, - )) - } - - /// Check if a specific operation within a node has any decorator IDs. - /// - /// # Arguments - /// * `node` - The node ID - /// * `operation` - The operation index within the node - /// - /// # Returns - /// True if the operation has at least one decorator ID, false otherwise, or an error if indices - /// are invalid. - pub fn operation_has_decorator_ids( - &self, - node: MastNodeId, - operation: usize, - ) -> Result { - self.num_decorator_ids_for_operation(node, operation).map(|count| count > 0) - } - - /// Get the range of operation indices for a given node. - /// - /// # Arguments - /// * `node` - The node ID - /// - /// # Returns - /// A range representing the start and end (exclusive) operation indices for the node. - pub fn operation_range_for_node( - &self, - node: MastNodeId, - ) -> Result, DecoratorIndexError> { - let node_slice = self.node_indptr_for_op_idx.as_slice(); - let node_idx = node.to_usize(); - - if node_idx + 1 >= node_slice.len() { - return Err(DecoratorIndexError::NodeIndex(node)); - } - - let start = node_slice[node_idx]; - let end = node_slice[node_idx + 1]; - - if start > end || end > self.op_indptr_for_dec_ids.len() { - return Err(DecoratorIndexError::InternalStructure); - } - - Ok(start..end) - } -} - -impl Default for OpToDecoratorIds { - fn default() -> Self { - Self::new() - } -} - -/// Immutable view over all `(op_idx, DecoratorId)` pairs for a node. -/// Uses the two-level CSR encoded by `node_indptr_for_op_idx` and `op_indptr_for_dec_idx`. -pub struct DecoratedLinks<'a> { - // Absolute op-pointer index range for this node: [start_op .. end_op) - start_op: usize, - end_op: usize, - - // CSR arrays (borrowed; not owned) - op_indptr_for_dec_idx: &'a [usize], // len = total_ops + 1 - decorator_indices: &'a [DecoratorId], -} - -impl<'a> DecoratedLinks<'a> { - fn new( - start_op: usize, - end_op: usize, - op_indptr_for_dec_idx: &'a [usize], - decorator_indices: &'a [DecoratorId], - ) -> Self { - Self { - start_op, - end_op, - op_indptr_for_dec_idx, - decorator_indices, - } - } - - /// Total number of `(op_idx, DecoratorId)` pairs in this view (exact). - #[inline] - pub fn len_pairs(&self) -> usize { - let s = self.op_indptr_for_dec_idx[self.start_op]; - let e = self.op_indptr_for_dec_idx[self.end_op]; - e - s - } - - #[inline] - pub fn is_empty(&self) -> bool { - self.len_pairs() == 0 - } -} - -/// The concrete, zero-alloc iterator over `(relative_op_idx, DecoratorId)`. -pub struct DecoratedLinksIter<'a> { - // absolute op-pointer indices (into op_indptr_for_dec_idx) - cur_op: usize, - end_op: usize, - base_op: usize, // for relative index = cur_op - base_op - - // inner slice [inner_i .. inner_end) indexes into decorator_indices - inner_i: usize, - inner_end: usize, - - // borrowed CSR arrays - op_indptr_for_dec_idx: &'a [usize], - decorator_indices: &'a [DecoratorId], - - // exact count of remaining pairs - remaining: usize, -} - -impl<'a> IntoIterator for DecoratedLinks<'a> { - type Item = DecoratedOpLink; - type IntoIter = DecoratedLinksIter<'a>; - - fn into_iter(self) -> Self::IntoIter { - // Precompute the exact number of pairs in the node's range. - let remaining = { - // Add bounds check to prevent panic on empty storage - if self.start_op >= self.op_indptr_for_dec_idx.len() - || self.end_op > self.op_indptr_for_dec_idx.len() // Note: end_op can be equal to len - || self.start_op > self.end_op - // Invalid range - { - 0 - } else { - // For valid ranges, compute the actual number of decorator pairs - let mut total = 0; - for op_idx in self.start_op..self.end_op { - let start_idx = self.op_indptr_for_dec_idx[op_idx]; - let end_idx = if op_idx + 1 < self.op_indptr_for_dec_idx.len() { - self.op_indptr_for_dec_idx[op_idx + 1] - } else { - self.op_indptr_for_dec_idx.len() - }; - total += end_idx - start_idx; - } - total - } - }; - - // Initialize inner range to the first op (if any). - let (inner_i, inner_end) = if self.start_op < self.end_op - && self.start_op + 1 < self.op_indptr_for_dec_idx.len() - { - let s0 = self.op_indptr_for_dec_idx[self.start_op]; - let e0 = self.op_indptr_for_dec_idx[self.start_op + 1]; - (s0, e0) - } else { - (0, 0) - }; - - DecoratedLinksIter { - cur_op: self.start_op, - end_op: self.end_op, - base_op: self.start_op, - inner_i, - inner_end, - op_indptr_for_dec_idx: self.op_indptr_for_dec_idx, - decorator_indices: self.decorator_indices, - remaining, - } - } -} - -impl<'a> DecoratedLinksIter<'a> { - #[inline] - fn advance_outer(&mut self) -> bool { - self.cur_op += 1; - if self.cur_op >= self.end_op { - return false; - } - // Bounds check: ensure cur_op and cur_op+1 are within the pointer array - if self.cur_op + 1 >= self.op_indptr_for_dec_idx.len() { - return false; - } - let s = self.op_indptr_for_dec_idx[self.cur_op]; - let e = self.op_indptr_for_dec_idx[self.cur_op + 1]; - self.inner_i = s; - self.inner_end = e; - true - } -} - -impl<'a> Iterator for DecoratedLinksIter<'a> { - type Item = DecoratedOpLink; - - #[inline] - fn next(&mut self) -> Option { - while self.cur_op < self.end_op { - // Emit from current op's decorator slice if non-empty - if self.inner_i < self.inner_end { - let rel_op = self.cur_op - self.base_op; // relative op index within the node - let id = self.decorator_indices[self.inner_i]; - self.inner_i += 1; - self.remaining -= 1; - return Some((rel_op, id)); - } - // Move to next operation (which might be empty as well) - if !self.advance_outer() { - break; - } - } - None - } - - #[inline] - fn size_hint(&self) -> (usize, Option) { - (self.remaining, Some(self.remaining)) - } -} - -impl<'a> ExactSizeIterator for DecoratedLinksIter<'a> { - #[inline] - fn len(&self) -> usize { - self.remaining - } -} - -#[cfg(feature = "arbitrary")] -impl Arbitrary for OpToDecoratorIds { - type Parameters = proptest::collection::SizeRange; - type Strategy = BoxedStrategy; - - fn arbitrary_with(size: Self::Parameters) -> Self::Strategy { - use proptest::{collection::vec, prelude::*}; - - // Generate small, controlled structures to avoid infinite loops - // Keep node and operation counts small and bounded, always at least 1 - (1usize..40, 1usize..=72) // (max_nodes, max_ops_per_node) - .prop_flat_map(move |(max_nodes, max_ops_per_node)| { - vec( - // Generate (node_id, op_id, decorator_id) with bounded values - (0..max_nodes, 0..max_ops_per_node, any::()), - size.clone(), // Limit total entries to size - ) - .prop_map(move |coo_data| { - // Build the OpToDecoratorIds incrementally - let mut mapping = OpToDecoratorIds::new(); - - // Group by node_id, then by op_id to maintain sorted order - use alloc::collections::BTreeMap; - let mut node_to_ops: BTreeMap>> = BTreeMap::new(); - - for (node_id, op_id, decorator_id) in coo_data { - node_to_ops - .entry(node_id as u32) - .or_default() - .entry(op_id as u32) - .or_default() - .push(decorator_id); - } - - // Add nodes in order to satisfy sequential constraint - for (node_id, ops_map) in node_to_ops { - let mut decorators_info: Vec<(usize, DecoratorId)> = Vec::new(); - for (op_id, decorator_ids) in ops_map { - for decorator_id in decorator_ids { - decorators_info.push((op_id as usize, DecoratorId(decorator_id))); - } - } - // Sort by operation index to meet add_decorator_info_for_node requirements - decorators_info.sort_by_key(|(op_idx, _)| *op_idx); - - mapping - .add_decorator_info_for_node( - MastNodeId::new_unchecked(node_id), - decorators_info, - ) - .unwrap(); - } - - mapping - }) - }) - .boxed() - } -} - -#[cfg(test)] -mod tests { - use miden_utils_indexing::IndexVec; - - use super::*; - - /// Helper function to create a test DecoratorId - fn test_decorator_id(value: u32) -> DecoratorId { - DecoratorId(value) - } - - /// Helper function to create a test MastNodeId - fn test_node_id(value: u32) -> MastNodeId { - MastNodeId::new_unchecked(value) - } - - /// Helper to create standard test storage with 2 nodes, 3 operations, 6 decorator IDs - /// Structure: Node 0: Op 0 -> [0, 1], Op 1 -> [2]; Node 1: Op 0 -> [3, 4, 5] - fn create_standard_test_storage() -> OpToDecoratorIds { - let decorator_ids = vec![ - test_decorator_id(0), - test_decorator_id(1), - test_decorator_id(2), - test_decorator_id(3), - test_decorator_id(4), - test_decorator_id(5), - ]; - let op_indptr_for_dec_ids = vec![0, 2, 3, 6]; - let mut node_indptr_for_op_idx = IndexVec::new(); - node_indptr_for_op_idx.push(0).expect("test setup: IndexVec capacity exceeded"); - node_indptr_for_op_idx.push(2).expect("test setup: IndexVec capacity exceeded"); - node_indptr_for_op_idx.push(3).expect("test setup: IndexVec capacity exceeded"); - - OpToDecoratorIds::from_components( - decorator_ids, - op_indptr_for_dec_ids, - node_indptr_for_op_idx, - ) - .unwrap() - } - - #[test] - fn test_constructors() { - // Test new() - let storage = OpToDecoratorIds::new(); - assert_eq!(storage.num_nodes(), 0); - assert_eq!(storage.num_decorator_ids(), 0); - - // Test with_capacity() - let storage = OpToDecoratorIds::with_capacity(10, 20, 30); - assert_eq!(storage.num_nodes(), 0); - assert_eq!(storage.num_decorator_ids(), 0); - - // Test default() - let storage = OpToDecoratorIds::default(); - assert_eq!(storage.num_nodes(), 0); - assert_eq!(storage.num_decorator_ids(), 0); - } - - #[test] - fn test_from_components_simple() { - // Create a simple structure: - // Node 0: Op 0 -> [0, 1], Op 1 -> [2] - // Node 1: Op 0 -> [3, 4, 5] - let storage = create_standard_test_storage(); - - assert_eq!(storage.num_nodes(), 2); - assert_eq!(storage.num_decorator_ids(), 6); - } - - #[test] - fn test_from_components_invalid_structure() { - // Empty structures are valid - let result = OpToDecoratorIds::from_components(vec![], vec![], IndexVec::new()); - assert!(result.is_ok()); - - // Test with operation pointer exceeding decorator indices - let result = OpToDecoratorIds::from_components( - vec![test_decorator_id(0)], - vec![0, 2], // Points to index 2 but we only have 1 decorator - IndexVec::new(), - ); - assert_eq!(result, Err(DecoratorIndexError::InternalStructure)); - - // Test with non-monotonic operation pointers - let result = OpToDecoratorIds::from_components( - vec![test_decorator_id(0), test_decorator_id(1)], - vec![0, 2, 1], // 2 > 1, should be monotonic - IndexVec::new(), - ); - assert_eq!(result, Err(DecoratorIndexError::InternalStructure)); - } - - #[test] - fn test_data_access_methods() { - let storage = create_standard_test_storage(); - - // Test decorator_ids_for_operation - let decorators = storage.decorator_ids_for_operation(test_node_id(0), 0).unwrap(); - assert_eq!(decorators, &[test_decorator_id(0), test_decorator_id(1)]); - - let decorators = storage.decorator_ids_for_operation(test_node_id(0), 1).unwrap(); - assert_eq!(decorators, &[test_decorator_id(2)]); - - let decorators = storage.decorator_ids_for_operation(test_node_id(1), 0).unwrap(); - assert_eq!(decorators, &[test_decorator_id(3), test_decorator_id(4), test_decorator_id(5)]); - - // Test decorator_ids_for_node - let decorators: Vec<_> = storage.decorator_ids_for_node(test_node_id(0)).unwrap().collect(); - assert_eq!(decorators.len(), 2); - assert_eq!(decorators[0], (0, &[test_decorator_id(0), test_decorator_id(1)][..])); - assert_eq!(decorators[1], (1, &[test_decorator_id(2)][..])); - - let decorators: Vec<_> = storage.decorator_ids_for_node(test_node_id(1)).unwrap().collect(); - assert_eq!(decorators.len(), 1); - assert_eq!( - decorators[0], - (0, &[test_decorator_id(3), test_decorator_id(4), test_decorator_id(5)][..]) - ); - - // Test operation_has_decorator_ids - assert!(storage.operation_has_decorator_ids(test_node_id(0), 0).unwrap()); - assert!(storage.operation_has_decorator_ids(test_node_id(0), 1).unwrap()); - assert!(storage.operation_has_decorator_ids(test_node_id(1), 0).unwrap()); - assert!(!storage.operation_has_decorator_ids(test_node_id(0), 2).unwrap()); - - // Test num_decorator_ids_for_operation - assert_eq!(storage.num_decorator_ids_for_operation(test_node_id(0), 0).unwrap(), 2); - assert_eq!(storage.num_decorator_ids_for_operation(test_node_id(0), 1).unwrap(), 1); - assert_eq!(storage.num_decorator_ids_for_operation(test_node_id(1), 0).unwrap(), 3); - assert_eq!(storage.num_decorator_ids_for_operation(test_node_id(0), 2).unwrap(), 0); - - // Test invalid operation returns empty slice - let decorators = storage.decorator_ids_for_operation(test_node_id(0), 2).unwrap(); - assert_eq!(decorators, &[]); - } - - #[test] - fn test_empty_nodes_basic_functionality() { - // Test 1: Empty nodes created via from_components (original - // test_empty_nodes_and_operations) - { - // Create a structure with empty nodes/operations - let decorator_indices = vec![]; - let op_indptr_for_dec_idx = vec![0, 0, 0]; // 2 operations, both empty - let mut node_indptr_for_op_idx = IndexVec::new(); - node_indptr_for_op_idx.push(0).expect("test setup: IndexVec capacity exceeded"); - node_indptr_for_op_idx.push(2).expect("test setup: IndexVec capacity exceeded"); - - let storage = OpToDecoratorIds::from_components( - decorator_indices, - op_indptr_for_dec_idx, - node_indptr_for_op_idx, - ) - .unwrap(); - - assert_eq!(storage.num_nodes(), 1); - assert_eq!(storage.num_decorator_ids(), 0); - - // Empty decorators - let decorators = storage.decorator_ids_for_operation(test_node_id(0), 0).unwrap(); - assert_eq!(decorators, &[]); - - // Operation has no decorators - assert!(!storage.operation_has_decorator_ids(test_node_id(0), 0).unwrap()); - } - - // Test 2: Empty nodes created via add_decorator_info_for_node (original - // test_decorator_ids_for_node_with_empty_nodes) - { - let mut storage = OpToDecoratorIds::new(); - - // Add node 0 with no decorators (empty node) - storage.add_decorator_info_for_node(test_node_id(0), vec![]).unwrap(); - - // Test 2a: operation_range_for_node should be empty for node with no decorators - let range = storage.operation_range_for_node(test_node_id(0)); - assert!(range.is_ok(), "operation_range_for_node should return Ok for empty node"); - let range = range.unwrap(); - assert_eq!(range, 0..0, "Empty node should have empty operations range"); - - // Test 2b: decorator_ids_for_node should return an empty iterator - let result = storage.decorator_ids_for_node(test_node_id(0)); - assert!(result.is_ok(), "decorator_ids_for_node should return Ok for empty node"); - // The iterator should be empty - let decorators: Vec<_> = result.unwrap().collect(); - assert_eq!(decorators, Vec::<(usize, &[DecoratorId])>::new()); - - // Test 2c: decorator_links_for_node should return an empty iterator - let result = storage.decorator_links_for_node(test_node_id(0)); - assert!(result.is_ok(), "decorator_links_for_node should return Ok for empty node"); - let links: Vec<_> = result.unwrap().into_iter().collect(); - assert_eq!(links, Vec::<(usize, DecoratorId)>::new()); - - // Test 2d: Basic access methods on empty node - assert_eq!(storage.num_nodes(), 1); - assert_eq!(storage.num_decorator_ids(), 0); - } - } - - #[test] - fn test_debug_impl() { - let storage = OpToDecoratorIds::new(); - let debug_str = format!("{:?}", storage); - assert!(debug_str.contains("OpToDecoratorIds")); - } - - #[test] - fn test_clone_and_equality() { - let decorator_indices = vec![ - test_decorator_id(0), - test_decorator_id(1), - test_decorator_id(2), - test_decorator_id(3), - test_decorator_id(4), - test_decorator_id(5), - ]; - let op_indptr_for_dec_idx = vec![0, 2, 3, 6]; - let mut node_indptr_for_op_idx = IndexVec::new(); - node_indptr_for_op_idx.push(0).expect("test setup: IndexVec capacity exceeded"); - node_indptr_for_op_idx.push(2).expect("test setup: IndexVec capacity exceeded"); - node_indptr_for_op_idx.push(3).expect("test setup: IndexVec capacity exceeded"); - - let storage1 = OpToDecoratorIds::from_components( - decorator_indices.clone(), - op_indptr_for_dec_idx.clone(), - node_indptr_for_op_idx.clone(), - ) - .unwrap(); - - let storage2 = storage1.clone(); - assert_eq!(storage1, storage2); - - // Modify one and ensure they're no longer equal - let different_decorators = vec![test_decorator_id(10)]; - let mut different_node_indptr = IndexVec::new(); - different_node_indptr.push(0).expect("test setup: IndexVec capacity exceeded"); - different_node_indptr.push(1).expect("test setup: IndexVec capacity exceeded"); - - let storage3 = OpToDecoratorIds::from_components( - different_decorators, - vec![0, 1], - different_node_indptr, - ) - .unwrap(); - - assert_ne!(storage1, storage3); - } - - #[test] - fn test_add_decorator_info_functionality() { - // Test 1: Basic multi-node functionality - let mut storage = OpToDecoratorIds::new(); - - // Add decorators for node 0 - let decorators_info = vec![ - (0, test_decorator_id(10)), - (0, test_decorator_id(11)), - (2, test_decorator_id(12)), - ]; - storage.add_decorator_info_for_node(test_node_id(0), decorators_info).unwrap(); - - assert_eq!(storage.num_nodes(), 1); - assert_eq!(storage.num_decorator_ids_for_operation(test_node_id(0), 0).unwrap(), 2); - assert_eq!(storage.num_decorator_ids_for_operation(test_node_id(0), 2).unwrap(), 1); - - // Add node 1 with simple decorators - storage - .add_decorator_info_for_node(test_node_id(1), vec![(0, test_decorator_id(20))]) - .unwrap(); - assert_eq!(storage.num_nodes(), 2); - - let node1_op0 = storage.decorator_ids_for_operation(test_node_id(1), 0).unwrap(); - assert_eq!(node1_op0, &[test_decorator_id(20)]); - - // Test 2: Sequential constraint validation - let mut storage2 = OpToDecoratorIds::new(); - storage2 - .add_decorator_info_for_node(test_node_id(0), vec![(0, test_decorator_id(10))]) - .unwrap(); - - // Adding node 1 should succeed - storage2 - .add_decorator_info_for_node(test_node_id(1), vec![(0, test_decorator_id(30))]) - .unwrap(); - assert_eq!(storage2.num_nodes(), 2); - - // Try to add node 0 again - should fail - let result = - storage2.add_decorator_info_for_node(test_node_id(0), vec![(0, test_decorator_id(40))]); - assert_eq!(result, Err(DecoratorIndexError::NodeIndex(test_node_id(0)))); - - // Test 3: Empty input handling (creates empty nodes with no operations) - let mut storage3 = OpToDecoratorIds::new(); - let result = storage3.add_decorator_info_for_node(test_node_id(0), vec![]); - assert_eq!(result, Ok(())); - assert_eq!(storage3.num_nodes(), 1); // Should create empty node - - // Empty node should have no operations (accessing any operation should return empty) - let decorators = storage3.decorator_ids_for_operation(test_node_id(0), 0).unwrap(); - assert_eq!(decorators, &[]); - - // Should be able to add next node after empty node - storage3 - .add_decorator_info_for_node(test_node_id(1), vec![(0, test_decorator_id(100))]) - .unwrap(); - assert_eq!(storage3.num_nodes(), 2); - - // Test 4: Operations with gaps - let mut storage4 = OpToDecoratorIds::new(); - let gap_decorators = vec![ - (0, test_decorator_id(10)), - (0, test_decorator_id(11)), // operation 0 has 2 decorators - (3, test_decorator_id(12)), // operation 3 has 1 decorator - (4, test_decorator_id(13)), // operation 4 has 1 decorator - ]; - storage4.add_decorator_info_for_node(test_node_id(0), gap_decorators).unwrap(); - - assert_eq!(storage4.num_decorator_ids_for_operation(test_node_id(0), 0).unwrap(), 2); - assert_eq!(storage4.num_decorator_ids_for_operation(test_node_id(0), 1).unwrap(), 0); - assert_eq!(storage4.num_decorator_ids_for_operation(test_node_id(0), 2).unwrap(), 0); - assert_eq!(storage4.num_decorator_ids_for_operation(test_node_id(0), 3).unwrap(), 1); - assert_eq!(storage4.num_decorator_ids_for_operation(test_node_id(0), 4).unwrap(), 1); - - // Test accessing operations without decorators returns empty slice - let op1_decorators = storage4.decorator_ids_for_operation(test_node_id(0), 1).unwrap(); - assert_eq!(op1_decorators, &[]); - - // Test 5: Your specific use case - mixed empty and non-empty nodes - let mut storage5 = OpToDecoratorIds::new(); - - // node 0 with decorators - storage5 - .add_decorator_info_for_node( - test_node_id(0), - vec![(0, test_decorator_id(1)), (1, test_decorator_id(0))], - ) - .unwrap(); - - // node 1 with no decorators (empty) - storage5.add_decorator_info_for_node(test_node_id(1), vec![]).unwrap(); - - // node 2 with decorators - storage5 - .add_decorator_info_for_node( - test_node_id(2), - vec![(1, test_decorator_id(1)), (2, test_decorator_id(2))], - ) - .unwrap(); - - assert_eq!(storage5.num_nodes(), 3); - - // Verify node 0: op 0 has [1], op 1 has [0] - assert_eq!( - storage5.decorator_ids_for_operation(test_node_id(0), 0).unwrap(), - &[test_decorator_id(1)] - ); - assert_eq!( - storage5.decorator_ids_for_operation(test_node_id(0), 1).unwrap(), - &[test_decorator_id(0)] - ); - - // Verify node 1: has no operations at all, any operation access returns empty - assert_eq!(storage5.decorator_ids_for_operation(test_node_id(1), 0).unwrap(), &[]); - - // Verify node 2: op 0 has [], op 1 has [1], op 2 has [2] - assert_eq!(storage5.decorator_ids_for_operation(test_node_id(2), 0).unwrap(), &[]); - assert_eq!( - storage5.decorator_ids_for_operation(test_node_id(2), 1).unwrap(), - &[test_decorator_id(1)] - ); - assert_eq!( - storage5.decorator_ids_for_operation(test_node_id(2), 2).unwrap(), - &[test_decorator_id(2)] - ); - } - - #[test] - fn test_empty_nodes_edge_cases() { - // Test edge cases with empty nodes (nodes with no decorators) - // This consolidates test_decorator_ids_for_node_mixed_scenario and - // test_decorated_links_overflow_bug - - let mut storage = OpToDecoratorIds::new(); - - // Set up mixed scenario: some nodes have decorators, some don't - // Node 0: Has decorators - storage - .add_decorator_info_for_node( - test_node_id(0), - vec![(0, test_decorator_id(10)), (2, test_decorator_id(20))], - ) - .unwrap(); - - // Node 1: Has decorators - storage - .add_decorator_info_for_node( - test_node_id(1), - vec![ - (0, test_decorator_id(30)), - (0, test_decorator_id(31)), - (3, test_decorator_id(32)), - ], - ) - .unwrap(); - - // Node 2: No decorators (empty node) - this is the edge case we're testing - storage.add_decorator_info_for_node(test_node_id(2), vec![]).unwrap(); - - // Test 1: Verify range handling for empty nodes - let range0 = storage.operation_range_for_node(test_node_id(0)).unwrap(); - let range1 = storage.operation_range_for_node(test_node_id(1)).unwrap(); - let range2 = storage.operation_range_for_node(test_node_id(2)).unwrap(); - - // Nodes with decorators should have non-empty ranges - assert!(range0.end > range0.start, "Node with decorators should have non-empty range"); - assert!(range1.end > range1.start, "Node with decorators should have non-empty range"); - - // Empty node should have range pointing to a sentinel that was added when the empty node - // was created. The sentinel is added at the position that would have been "past the end" - // before the empty node was added, making the empty node's range valid. - let op_indptr_len = storage.op_indptr_for_dec_ids.len(); - // The empty node should point to the sentinel (which is now at len()-1 after being added) - let expected_empty_pos = op_indptr_len - 1; - assert_eq!( - range2.start, expected_empty_pos, - "Empty node should point to the sentinel that was added for it" - ); - assert_eq!( - range2.end, expected_empty_pos, - "Empty node should have empty range at the sentinel" - ); - - // Test 2: decorator_ids_for_node() should work for empty nodes - // This should not panic - the iterator should be empty even though the range points to - // array end - let result = storage.decorator_ids_for_node(test_node_id(2)); - assert!(result.is_ok(), "decorator_ids_for_node should work for node with no decorators"); - let decorators: Vec<_> = result.unwrap().collect(); - assert_eq!(decorators, Vec::<(usize, &[DecoratorId])>::new()); - - // Test 3: decorator_links_for_node() should work for empty nodes (tests overflow bug) - // This tests the specific overflow bug in DecoratedLinks iterator - let result = storage.decorator_links_for_node(test_node_id(2)); - assert!(result.is_ok(), "decorator_links_for_node should return Ok for empty node"); - - let links = result.unwrap(); - // This should not panic, even when iterating - let collected: Vec<_> = links.into_iter().collect(); - assert_eq!(collected, Vec::<(usize, DecoratorId)>::new()); - - // Test 4: Multiple iterations should work (regression test for iterator reuse) - let result2 = storage.decorator_links_for_node(test_node_id(2)); - assert!( - result2.is_ok(), - "decorator_links_for_node should work repeatedly for empty node" - ); - let links2 = result2.unwrap(); - let collected2: Vec<_> = links2.into_iter().collect(); - assert_eq!(collected2, Vec::<(usize, DecoratorId)>::new()); - - // Test 5: Multiple iterations of decorator_ids_for_node should also work - let result3 = storage.decorator_ids_for_node(test_node_id(2)); - assert!(result3.is_ok(), "decorator_ids_for_node should work repeatedly for empty node"); - let decorators2: Vec<_> = result3.unwrap().collect(); - assert_eq!(decorators2, Vec::<(usize, &[DecoratorId])>::new()); - } - - #[test] - fn test_decorator_links_for_node_flattened() { - let storage = create_standard_test_storage(); - let n0 = MastNodeId::new_unchecked(0); - let flat: Vec<_> = storage.decorator_links_for_node(n0).unwrap().into_iter().collect(); - // Node 0: Op0 -> [0,1], Op1 -> [2] - assert_eq!(flat, vec![(0, DecoratorId(0)), (0, DecoratorId(1)), (1, DecoratorId(2)),]); - - let n1 = MastNodeId::new_unchecked(1); - let flat1: Vec<_> = storage.decorator_links_for_node(n1).unwrap().into_iter().collect(); - // Node 1: Op0 -> [3,4,5] - assert_eq!(flat1, vec![(0, DecoratorId(3)), (0, DecoratorId(4)), (0, DecoratorId(5)),]); - } - - #[test] - /// This test verifies that the CSR encoding described in the OpToDecoratorIds struct - /// documentation correctly represents COO data. It also validates all accessor methods - /// work as expected. Keep this test in sync with the documentation example (adding nodes - /// to this test if you add nodes to the documentation example, and vice versa). - fn test_csr_and_coo_produce_same_elements() { - // Build a COO representation manually - let coo_data = vec![ - // Node 0 - (MastNodeId::new_unchecked(0), 0, DecoratorId(10)), - (MastNodeId::new_unchecked(0), 0, DecoratorId(11)), - (MastNodeId::new_unchecked(0), 1, DecoratorId(12)), - (MastNodeId::new_unchecked(0), 2, DecoratorId(13)), - // Node 1 - (MastNodeId::new_unchecked(1), 0, DecoratorId(20)), - (MastNodeId::new_unchecked(1), 2, DecoratorId(21)), - (MastNodeId::new_unchecked(1), 2, DecoratorId(22)), - // Node 2 (empty node, should still work) - // Node 3 - (MastNodeId::new_unchecked(3), 0, DecoratorId(30)), - ]; - - // Build COO representation as a HashMap for easy lookup during verification - let mut coo_map: alloc::collections::BTreeMap<(MastNodeId, usize), Vec> = - alloc::collections::BTreeMap::new(); - for (node, op_idx, decorator_id) in &coo_data { - coo_map.entry((*node, *op_idx)).or_default().push(*decorator_id); - } - - // Build CSR representation using the builder API - let mut csr_storage = OpToDecoratorIds::new(); - - // Node 0: Op0 -> [10,11], Op1 -> [12], Op2 -> [13] - csr_storage - .add_decorator_info_for_node( - MastNodeId::new_unchecked(0), - vec![ - (0, DecoratorId(10)), - (0, DecoratorId(11)), - (1, DecoratorId(12)), - (2, DecoratorId(13)), - ], - ) - .unwrap(); - - // Node 1: Op0 -> [20], Op2 -> [21,22] - csr_storage - .add_decorator_info_for_node( - MastNodeId::new_unchecked(1), - vec![(0, DecoratorId(20)), (2, DecoratorId(21)), (2, DecoratorId(22))], - ) - .unwrap(); - - // Node 2: empty - csr_storage - .add_decorator_info_for_node(MastNodeId::new_unchecked(2), vec![]) - .unwrap(); - - // Node 3: Op0 -> [30] - csr_storage - .add_decorator_info_for_node(MastNodeId::new_unchecked(3), vec![(0, DecoratorId(30))]) - .unwrap(); - - // Verify that CSR and COO produce the same elements - for node_idx in 0..4 { - let node_id = MastNodeId::new_unchecked(node_idx); - - // Get all operations for this node from CSR - let op_range = csr_storage.operation_range_for_node(node_id).unwrap(); - let num_ops = op_range.len(); - - // For each operation in this node - for op_idx in 0..num_ops { - // Get decorator IDs from CSR - let csr_decorator_ids = - csr_storage.decorator_ids_for_operation(node_id, op_idx).unwrap(); - - // Get decorator IDs from COO map - let coo_key = (node_id, op_idx); - let coo_decorator_ids = - coo_map.get(&coo_key).map_or(&[] as &[DecoratorId], |v| v.as_slice()); - - // They should be the same - assert_eq!( - csr_decorator_ids, coo_decorator_ids, - "CSR and COO should produce the same decorator IDs for node {:?}, op {}", - node_id, op_idx - ); - } - } - - // Also verify using the flattened iterator approach - for node_idx in 0..4 { - let node_id = MastNodeId::new_unchecked(node_idx); - - // Get flattened view from CSR - let csr_flat: Vec<(usize, DecoratorId)> = - csr_storage.decorator_links_for_node(node_id).unwrap().into_iter().collect(); - - // Build expected from COO map - let mut expected_flat = Vec::new(); - for ((node, op_idx), decorator_ids) in &coo_map { - if *node == node_id { - for decorator_id in decorator_ids { - expected_flat.push((*op_idx, *decorator_id)); - } - } - } - // Sort by operation index then decorator ID for consistent comparison - expected_flat.sort_by_key(|(op_idx, dec_id)| (*op_idx, u32::from(*dec_id))); - - assert_eq!( - csr_flat, expected_flat, - "Flattened CSR and COO should produce the same elements for node {:?}", - node_id - ); - } - } - - #[test] - fn test_validate_csr_valid() { - let storage = create_standard_test_storage(); - assert!(storage.validate_csr(6).is_ok()); - } - - #[test] - fn test_validate_csr_invalid_decorator_id() { - let storage = create_standard_test_storage(); - // decorator_count=3 but storage has IDs up to 5 - let result = storage.validate_csr(3); - assert!(result.is_err()); - assert!(result.unwrap_err().contains("Invalid decorator ID")); - } - - #[test] - fn test_validate_csr_not_monotonic() { - let decorator_ids = vec![test_decorator_id(0), test_decorator_id(1)]; - let op_indptr_for_dec_ids = vec![0, 2, 1]; // Not monotonic! - let mut node_indptr_for_op_idx = IndexVec::new(); - node_indptr_for_op_idx.push(0).unwrap(); - node_indptr_for_op_idx.push(2).unwrap(); - - let storage = OpToDecoratorIds::from_components( - decorator_ids, - op_indptr_for_dec_ids, - node_indptr_for_op_idx, - ); - - // from_components should catch this - assert!(storage.is_err()); - } - - #[test] - fn test_validate_csr_wrong_start() { - let decorator_ids = vec![test_decorator_id(0)]; - let op_indptr_for_dec_ids = vec![1, 1]; // Should start at 0! - let mut node_indptr_for_op_idx = IndexVec::new(); - node_indptr_for_op_idx.push(0).unwrap(); - node_indptr_for_op_idx.push(1).unwrap(); - - let storage = OpToDecoratorIds::from_components( - decorator_ids, - op_indptr_for_dec_ids, - node_indptr_for_op_idx, - ); - - assert!(storage.is_err()); - } - - #[cfg(feature = "arbitrary")] - proptest! { - /// Property test that verifies decorator_links_for_node always produces a valid iterator - /// that can be fully consumed without panicking for any OpToDecoratorIds. - #[test] - fn decorator_links_for_node_always_iterates_complete( - mapping in any::() - ) { - // Skip empty mappings since they have no nodes to test - if mapping.num_nodes() == 0 { - return Ok(()); - } - - // Test every valid node in the mapping - for node_idx in 0..mapping.num_nodes() { - let node_id = MastNodeId::new_unchecked(node_idx as u32); - - // Call decorator_links_for_node - this should never return an error for valid nodes - let result = mapping.decorator_links_for_node(node_id); - - // The result should always be Ok for valid node indices - prop_assume!(result.is_ok(), "decorator_links_for_node should succeed for valid node"); - - let decorated_links = result.unwrap(); - - // Convert to iterator and collect all items - this should complete without panicking - let collected: Vec<(usize, DecoratorId)> = decorated_links.into_iter().collect(); - - // The collected items should match what we get from decorator_ids_for_node - let expected_items: Vec<(usize, DecoratorId)> = mapping - .decorator_ids_for_node(node_id) - .unwrap() - .flat_map(|(op_idx, decorator_ids)| { - decorator_ids.iter().map(move |&decorator_id| (op_idx, decorator_id)) - }) - .collect(); - - prop_assert_eq!(collected, expected_items); - } - } - } -} - -#[cfg(test)] -mod sparse_debug_tests { - use alloc::vec; - - use super::*; - - fn test_decorator_id(value: u32) -> DecoratorId { - DecoratorId(value) - } - - #[test] - fn test_sparse_case_manual() { - // Manually create what should be produced by sparse decorator case: - // 10 nodes, decorators only at nodes 0 and 5 - - // Just node 0 with decorator at op 0 - let decorator_ids = vec![test_decorator_id(0)]; - let op_indptr_for_dec_ids = vec![0, 1, 1]; // op 0: [0,1), sentinel - let mut node_indptr = IndexVec::new(); - node_indptr.push(0).unwrap(); // node 0 starts at op index 0 - node_indptr.push(2).unwrap(); // node 0 ends at op index 2 (sentinel) - - // Validate this structure - let result = - OpToDecoratorIds::from_components(decorator_ids, op_indptr_for_dec_ids, node_indptr); - - assert!(result.is_ok(), "Single node with decorator should validate: {:?}", result); - } - - #[test] - fn test_sparse_case_two_nodes() { - // Two nodes: node 0 with decorator, node 1 empty - let decorator_ids = vec![test_decorator_id(0)]; - let op_indptr_for_dec_ids = vec![0, 1, 1]; // op 0: [0,1), sentinel at 1 - let mut node_indptr = IndexVec::new(); - node_indptr.push(0).unwrap(); // node 0 starts at op index 0 - node_indptr.push(2).unwrap(); // node 0 ends at op index 2 - node_indptr.push(2).unwrap(); // node 1 (empty) points to same location - - let result = - OpToDecoratorIds::from_components(decorator_ids, op_indptr_for_dec_ids, node_indptr); - - assert!( - result.is_ok(), - "Two nodes (one with decorator, one empty) should validate: {:?}", - result - ); - } - - #[test] - fn test_sparse_debuginfo_round_trip() { - use alloc::collections::BTreeMap; - - use crate::{ - Decorator, - mast::debuginfo::{DebugInfo, NodeToDecoratorIds}, - utils::{Deserializable, Serializable}, - }; - - // Create a sparse CSR structure like we'd see with 10 nodes where only 0 and 5 have - // decorators - let decorator_ids = vec![test_decorator_id(0), test_decorator_id(1)]; - - // Node 0: op 0 has decorator 0 - // Nodes 1-4: empty - // Node 5: op 0 has decorator 1 - let op_indptr_for_dec_ids = vec![0, 1, 1, 1, 2, 2]; // ops with their decorator ranges - let mut node_indptr_for_op_idx = IndexVec::new(); - node_indptr_for_op_idx.push(0).unwrap(); // node 0 - node_indptr_for_op_idx.push(2).unwrap(); // node 0 end - node_indptr_for_op_idx.push(2).unwrap(); // node 1 (empty) - node_indptr_for_op_idx.push(2).unwrap(); // node 2 (empty) - node_indptr_for_op_idx.push(2).unwrap(); // node 3 (empty) - node_indptr_for_op_idx.push(2).unwrap(); // node 4 (empty) - node_indptr_for_op_idx.push(4).unwrap(); // node 5 - - let op_storage = OpToDecoratorIds::from_components( - decorator_ids.clone(), - op_indptr_for_dec_ids, - node_indptr_for_op_idx, - ) - .expect("CSR structure should be valid"); - - // Create a minimal DebugInfo - let mut decorators = crate::mast::debuginfo::IndexVec::new(); - decorators.push(Decorator::Trace(0)).unwrap(); - decorators.push(Decorator::Trace(5)).unwrap(); - - let node_storage = NodeToDecoratorIds::new(); - let error_codes = BTreeMap::new(); - - let debug_info = DebugInfo { - decorators, - op_decorator_storage: op_storage, - node_decorator_storage: node_storage, - error_codes, - procedure_names: BTreeMap::new(), - }; - - // Serialize and deserialize - let bytes = debug_info.to_bytes(); - let deserialized = - DebugInfo::read_from_bytes(&bytes).expect("Should deserialize successfully"); - - // Verify - assert_eq!(debug_info.num_decorators(), deserialized.num_decorators()); - } -} diff --git a/core/src/mast/debuginfo/decorator_storage/mod.rs b/core/src/mast/debuginfo/decorator_storage/mod.rs new file mode 100644 index 0000000000..af08efe786 --- /dev/null +++ b/core/src/mast/debuginfo/decorator_storage/mod.rs @@ -0,0 +1,870 @@ +use alloc::{ + string::{String, ToString}, + vec::Vec, +}; + +use miden_utils_indexing::{Idx, IndexVec}; +#[cfg(feature = "arbitrary")] +use proptest::prelude::*; +#[cfg(feature = "serde")] +use serde::{Deserialize, Serialize}; + +use crate::mast::{DecoratedOpLink, DecoratorId, MastNodeId}; + +/// A two-level compressed sparse row (CSR) representation for indexing decorator IDs per +/// operation per node. +/// +/// This structure provides efficient access to decorator IDs in a hierarchical manner: +/// 1. First level: Node -> Operations (operations belong to nodes) +/// 2. Second level: Operation -> Decorator IDs (decorator IDs belong to operations) +/// +/// The data layout follows CSR format at both levels: +/// +/// - `decorator_ids` contains all the DecoratorId values in a single flat array. These are the +/// actual decorator identifiers that need to be accessed. We store them contiguously to minimize +/// memory overhead and improve cache locality when iterating. +/// +/// - `op_indptr_for_dec_ids` stores pointer indices that map operations to their position within +/// the `decorator_ids` array. For each operation, it contains the start index where that +/// operation's decorator IDs begin in the flat storage. +/// +/// - `node_indptr_for_op_idx` stores pointer indices that map nodes to their position within the +/// `op_indptr_for_dec_ids` array. For each node, it contains the start index where that node's +/// operation pointers begin. +/// +/// Together, these three arrays form a two-level index structure that allows efficient +/// lookup of decorator IDs for any operation in any node, while minimizing memory usage +/// for sparse decorator data. +/// +/// # Example +/// +/// Consider this COO (Coordinate format) representation: +/// ```text +/// Node 0, Op 0: [decorator_id_0, decorator_id_1] +/// Node 0, Op 1: [decorator_id_2] +/// Node 1, Op 0: [decorator_id_3, decorator_id_4, decorator_id_5] +/// ``` +/// +/// This would be stored as: +/// ```text +/// decorator_ids: [0, 1, 2, 3, 4, 5] +/// op_indptr_for_dec_ids: [0, 2, 3, 6] // Node 0: ops [0,2], [2,3]; Node 1: ops [3,6] +/// node_indptr_for_op_idx: [0, 2, 3] // Node 0: [0,2], Node 1: [2,3] +/// ``` +/// +/// See the unit test `test_csr_and_coo_produce_same_elements` for a comprehensive example +/// demonstrating how this encoding works and verifying round-trip conversion from COO to CSR. +#[derive(Debug, Clone, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(all(feature = "arbitrary", test), miden_test_serde_macros::serde_test)] +pub struct OpToDecoratorIds { + /// All the decorator IDs per operation per node, in a CSR relationship with + /// node_indptr_for_op_idx and op_indptr_for_dec_ids + pub(super) decorator_ids: Vec, + /// For the node whose operation indices are in + /// `op_indptr_for_dec_ids[node_start..node_end]`, + /// the indices of its i-th operation are at: + /// ```text + /// decorator_ids[op_indptr_for_dec_ids[node_start + i].. + /// op_indptr_for_dec_ids[node_start + i + 1]] + /// ``` + pub(super) op_indptr_for_dec_ids: Vec, + /// The decorated operation indices for the n-th node are at + /// `op_indptr_for_dec_ids[node_indptr_for_op_idx[n]..node_indptr_for_op_idx[n+1]]` + pub(super) node_indptr_for_op_idx: IndexVec, +} + +/// Error type for decorator index mapping operations +#[derive(Debug, PartialEq, Eq, thiserror::Error)] +pub enum DecoratorIndexError { + /// Node index out of bounds + #[error("Invalid node index {0:?}")] + NodeIndex(MastNodeId), + /// Operation index out of bounds for the given node + #[error("Invalid operation index {operation} for node {node:?}")] + OperationIndex { node: MastNodeId, operation: usize }, + /// Invalid internal data structure (corrupted pointers) + #[error("Invalid internal data structure in OpToDecoratorIds")] + InternalStructure, +} + +impl OpToDecoratorIds { + /// Create a new empty OpToDecoratorIds with the specified capacity. + /// + /// # Arguments + /// * `nodes_capacity` - Expected number of nodes + /// * `operations_capacity` - Expected total number of operations across all nodes + /// * `decorator_ids_capacity` - Expected total number of decorator IDs across all operations + pub fn with_capacity( + nodes_capacity: usize, + operations_capacity: usize, + decorator_ids_capacity: usize, + ) -> Self { + Self { + decorator_ids: Vec::with_capacity(decorator_ids_capacity), + op_indptr_for_dec_ids: Vec::with_capacity(operations_capacity + 1), + node_indptr_for_op_idx: IndexVec::with_capacity(nodes_capacity + 1), + } + } + + /// Create a new empty OpToDecoratorIds. + pub fn new() -> Self { + Self::with_capacity(0, 0, 0) + } + + /// Create a OpToDecoratorIds from raw CSR components. + /// + /// This is useful for deserialization or testing purposes where you need to reconstruct + /// the data structure from its raw components. + /// + /// # Arguments + /// * `decorator_ids` - Flat storage of all decorator IDs. These are the actual decorator + /// identifiers that will be accessed through the index structure. + /// * `op_indptr_for_dec_ids` - Pointer indices for operations within decorator_ids. These must + /// be monotonically increasing and within bounds of `decorator_ids`. The slice must not be + /// empty, and the first element must be 0. + /// * `node_indptr_for_op_idx` - Pointer indices for nodes within op_indptr_for_dec_ids. These + /// must be monotonically increasing and within bounds of `op_indptr_for_dec_ids`. The slice + /// must not be empty, and the first element must be 0. + /// + /// # Returns + /// An error if the internal structure is inconsistent. Common issues that cause errors: + /// - Empty `op_indptr_for_dec_ids` or `node_indptr_for_op_idx` vectors + /// - Non-zero first element in either pointer array + /// - Decreasing pointer values (pointers must be monotonically non-decreasing) + /// - Pointer values that exceed the bounds of the arrays they index into + /// - Invalid ranges (start > end) in any pointer window + /// + /// # Validation Restrictions + /// The following constraints are enforced between components: + /// - `op_indptr_for_dec_ids` length must be >= 1 (for the sentinel) + /// - `node_indptr_for_op_idx` length must be >= 1 (for the sentinel) + /// - Last value in `op_indptr_for_dec_ids` must be <= `decorator_ids.len()` + /// - Last value in `node_indptr_for_op_idx` must be <= `op_indptr_for_dec_ids.len() - 1` + /// - Both `op_indptr_for_dec_ids` and `node_indptr_for_op_idx` must be strictly monotonic (each + /// successive value must be >= the previous one) + pub(super) fn from_components( + decorator_ids: Vec, + op_indptr_for_dec_ids: Vec, + node_indptr_for_op_idx: IndexVec, + ) -> Result { + // Completely empty structures are valid (no nodes, no decorators) + if decorator_ids.is_empty() + && op_indptr_for_dec_ids.is_empty() + && node_indptr_for_op_idx.is_empty() + { + return Ok(Self { + decorator_ids, + op_indptr_for_dec_ids, + node_indptr_for_op_idx, + }); + } + + // Nodes with no decorators are valid + if decorator_ids.is_empty() && op_indptr_for_dec_ids.is_empty() { + // All node pointers must be 0 + if node_indptr_for_op_idx.iter().all(|&ptr| ptr == 0) { + return Ok(Self { + decorator_ids, + op_indptr_for_dec_ids, + node_indptr_for_op_idx, + }); + } else { + return Err(DecoratorIndexError::InternalStructure); + } + } + + // Validate the structure + if op_indptr_for_dec_ids.is_empty() { + return Err(DecoratorIndexError::InternalStructure); + } + + // Check that op_indptr_for_dec_ids starts at 0 + if op_indptr_for_dec_ids[0] != 0 { + return Err(DecoratorIndexError::InternalStructure); + } + + // Check that the last operation pointer doesn't exceed decorator IDs length + let Some(&last_op_ptr) = op_indptr_for_dec_ids.last() else { + return Err(DecoratorIndexError::InternalStructure); + }; + if last_op_ptr > decorator_ids.len() { + return Err(DecoratorIndexError::InternalStructure); + } + + // Check that node pointers are within bounds of operation pointers + if node_indptr_for_op_idx.is_empty() { + return Err(DecoratorIndexError::InternalStructure); + } + + let node_slice = node_indptr_for_op_idx.as_slice(); + + // Check that node_indptr_for_op_idx starts at 0 + if node_slice[0] != 0 { + return Err(DecoratorIndexError::InternalStructure); + } + + let Some(&last_node_ptr) = node_slice.last() else { + return Err(DecoratorIndexError::InternalStructure); + }; + // Node pointers must be valid indices into op_indptr + if last_node_ptr > op_indptr_for_dec_ids.len() - 1 { + return Err(DecoratorIndexError::InternalStructure); + } + + // Ensure monotonicity of pointers + for window in op_indptr_for_dec_ids.windows(2) { + if window[0] > window[1] { + return Err(DecoratorIndexError::InternalStructure); + } + } + + for window in node_slice.windows(2) { + if window[0] > window[1] { + return Err(DecoratorIndexError::InternalStructure); + } + } + + Ok(Self { + decorator_ids, + op_indptr_for_dec_ids, + node_indptr_for_op_idx, + }) + } + + /// Validate CSR structure integrity. + /// + /// Checks: + /// - All decorator IDs are valid (< decorator_count) + /// - op_indptr_for_dec_ids is monotonic, starts at 0, ends at decorator_ids.len() + /// - node_indptr_for_op_idx is monotonic, starts at 0, ends <= op_indptr_for_dec_ids.len()-1 + pub(super) fn validate_csr(&self, decorator_count: usize) -> Result<(), String> { + // Completely empty structures are valid (no nodes, no decorators) + if self.decorator_ids.is_empty() + && self.op_indptr_for_dec_ids.is_empty() + && self.node_indptr_for_op_idx.is_empty() + { + return Ok(()); + } + + // Nodes with no decorators are valid + if self.decorator_ids.is_empty() && self.op_indptr_for_dec_ids.is_empty() { + // All node pointers must be 0 + if !self.node_indptr_for_op_idx.iter().all(|&ptr| ptr == 0) { + return Err("node pointers must all be 0 when there are no decorators".to_string()); + } + return Ok(()); + } + + // Validate all decorator IDs + for &dec_id in &self.decorator_ids { + if dec_id.to_usize() >= decorator_count { + return Err(format!( + "Invalid decorator ID {}: exceeds decorator count {}", + dec_id.to_usize(), + decorator_count + )); + } + } + + // Validate op_indptr_for_dec_ids + if self.op_indptr_for_dec_ids.is_empty() { + return Err("op_indptr_for_dec_ids cannot be empty".to_string()); + } + + if self.op_indptr_for_dec_ids[0] != 0 { + return Err("op_indptr_for_dec_ids must start at 0".to_string()); + } + + for window in self.op_indptr_for_dec_ids.windows(2) { + if window[0] > window[1] { + return Err(format!( + "op_indptr_for_dec_ids not monotonic: {} > {}", + window[0], window[1] + )); + } + } + + if *self.op_indptr_for_dec_ids.last().unwrap() != self.decorator_ids.len() { + return Err(format!( + "op_indptr_for_dec_ids end {} doesn't match decorator_ids length {}", + self.op_indptr_for_dec_ids.last().unwrap(), + self.decorator_ids.len() + )); + } + + // Validate node_indptr_for_op_idx + let node_slice = self.node_indptr_for_op_idx.as_slice(); + if node_slice.is_empty() { + return Err("node_indptr_for_op_idx cannot be empty".to_string()); + } + + if node_slice[0] != 0 { + return Err("node_indptr_for_op_idx must start at 0".to_string()); + } + + for window in node_slice.windows(2) { + if window[0] > window[1] { + return Err(format!( + "node_indptr_for_op_idx not monotonic: {} > {}", + window[0], window[1] + )); + } + } + + // Node pointers must be valid indices into op_indptr + let max_node_ptr = self.op_indptr_for_dec_ids.len().saturating_sub(1); + if *node_slice.last().unwrap() > max_node_ptr { + return Err(format!( + "node_indptr_for_op_idx end {} exceeds op_indptr bounds {}", + node_slice.last().unwrap(), + max_node_ptr + )); + } + + Ok(()) + } + + pub fn is_empty(&self) -> bool { + self.node_indptr_for_op_idx.is_empty() + } + + /// Serialize this OpToDecoratorIds into the target writer. + pub(super) fn write_into(&self, target: &mut W) { + use crate::utils::Serializable; + + // Serialize decorator_ids as u32 vector + let decorator_ids_u32: Vec = + self.decorator_ids.iter().map(|id| u32::from(*id)).collect(); + decorator_ids_u32.write_into(target); + + // Serialize op_indptr_for_dec_ids + let op_indptr_u32: Vec = + self.op_indptr_for_dec_ids.iter().map(|&idx| idx as u32).collect(); + op_indptr_u32.write_into(target); + + // Serialize node_indptr_for_op_idx + let node_indptr_u32: Vec = + self.node_indptr_for_op_idx.iter().map(|&idx| idx as u32).collect(); + node_indptr_u32.write_into(target); + } + + /// Deserialize OpToDecoratorIds from the source reader. + pub(super) fn read_from( + source: &mut R, + decorator_count: usize, + ) -> Result { + use crate::utils::{Deserializable, DeserializationError}; + + // Deserialize decorator_ids + let decorator_ids_u32: Vec = Deserializable::read_from(source)?; + let decorator_ids: Vec = decorator_ids_u32 + .into_iter() + .map(|id| { + if id as usize >= decorator_count { + return Err(DeserializationError::InvalidValue(format!( + "DecoratorId {} exceeds decorator count {}", + id, decorator_count + ))); + } + Ok(DecoratorId(id)) + }) + .collect::, _>>()?; + + // Deserialize op_indptr_for_dec_ids + let op_indptr_u32: Vec = Deserializable::read_from(source)?; + let op_indptr_for_dec_ids: Vec = + op_indptr_u32.iter().map(|&idx| idx as usize).collect(); + + // Deserialize node_indptr_for_op_idx + let node_indptr_u32: Vec = Deserializable::read_from(source)?; + let node_indptr_for_op_idx: Vec = + node_indptr_u32.iter().map(|&idx| idx as usize).collect(); + + // Build the structure + let result = Self::from_components( + decorator_ids, + op_indptr_for_dec_ids, + IndexVec::from_raw(node_indptr_for_op_idx), + ) + .map_err(|e| DeserializationError::InvalidValue(e.to_string()))?; + + // Validate CSR structure + result.validate_csr(decorator_count).map_err(|e| { + DeserializationError::InvalidValue(format!("OpToDecoratorIds validation failed: {e}")) + })?; + + Ok(result) + } + + /// Get the number of nodes in this storage. + pub fn num_nodes(&self) -> usize { + if self.node_indptr_for_op_idx.is_empty() { + 0 + } else { + self.node_indptr_for_op_idx.len() - 1 + } + } + + /// Get the total number of decorator IDs across all operations. + pub fn num_decorator_ids(&self) -> usize { + self.decorator_ids.len() + } + + /// Add decorator information for a node incrementally. + /// + /// This method allows building up the OpToDecoratorIds structure by adding + /// decorator IDs for nodes in sequential order only. + /// + /// # Arguments + /// * `node` - The node ID to add decorator IDs for. Must be the next sequential node. + /// * `decorators_info` - Vector of (operation_index, decorator_id) tuples. The operation + /// indices should be sorted (as guaranteed by validate_decorators). Operations not present in + /// this vector will have no decorator IDs. + /// + /// # Returns + /// Ok(()) if successful, Err(DecoratorIndexError) if the node is not the next sequential + /// node. + /// + /// # Behavior + /// - Can only add decorator IDs for the next sequential node ID + /// - Automatically creates empty operations for gaps in operation indices + /// - Maintains the two-level CSR structure invariant + pub fn add_decorator_info_for_node( + &mut self, + node: MastNodeId, + decorators_info: Vec<(usize, DecoratorId)>, + ) -> Result<(), DecoratorIndexError> { + // Enforce sequential node ids + let expected = MastNodeId::new_unchecked(self.num_nodes() as u32); + if node < expected { + return Err(DecoratorIndexError::NodeIndex(node)); + } + // Create empty nodes for gaps in node indices + for idx in expected.0..node.0 { + self.add_decorator_info_for_node(MastNodeId::new_unchecked(idx), vec![]) + .unwrap(); + } + + // Start of this node's operations is the current length (do NOT reuse previous sentinel) + let op_start = self.op_indptr_for_dec_ids.len(); + + // Maintain node CSR: node_indptr[i] = start index for node i + if self.node_indptr_for_op_idx.is_empty() { + self.node_indptr_for_op_idx + .push(op_start) + .map_err(|_| DecoratorIndexError::OperationIndex { node, operation: op_start })?; + } else { + // Overwrite the previous "end" slot to become this node's start + let last = MastNodeId::new_unchecked((self.node_indptr_for_op_idx.len() - 1) as u32); + self.node_indptr_for_op_idx[last] = op_start; + } + + if decorators_info.is_empty() { + // Empty node needs sentinel if it follows decorated nodes + // CSR requires all node_indptr values to be valid op_indptr indices. + // If op_start == op_indptr.len(), add a sentinel so the pointer stays in bounds. + if op_start == self.op_indptr_for_dec_ids.len() + && !self.op_indptr_for_dec_ids.is_empty() + { + self.op_indptr_for_dec_ids.push(self.decorator_ids.len()); + } + + self.node_indptr_for_op_idx + .push(op_start) + .map_err(|_| DecoratorIndexError::OperationIndex { node, operation: op_start })?; + } else { + // Build op->decorator CSR for this node + let max_op_idx = decorators_info.last().unwrap().0; // input is sorted by op index + let mut it = decorators_info.into_iter().peekable(); + + for op in 0..=max_op_idx { + // pointer to start of decorator IDs for op + self.op_indptr_for_dec_ids.push(self.decorator_ids.len()); + while it.peek().is_some_and(|(i, _)| *i == op) { + self.decorator_ids.push(it.next().unwrap().1); + } + } + // final sentinel for this node + self.op_indptr_for_dec_ids.push(self.decorator_ids.len()); + + // Push end pointer for this node (index of last op pointer, which is the sentinel) + // This is len()-1 because we just pushed the sentinel above + let end_ops = self.op_indptr_for_dec_ids.len() - 1; + self.node_indptr_for_op_idx + .push(end_ops) + .map_err(|_| DecoratorIndexError::OperationIndex { node, operation: end_ops })?; + } + + Ok(()) + } + + /// Get the number of decorator IDs for a specific operation within a node. + /// + /// # Arguments + /// * `node` - The node ID + /// * `operation` - The operation index within the node + /// + /// # Returns + /// The number of decorator IDs for the operation, or an error if indices are invalid. + pub fn num_decorator_ids_for_operation( + &self, + node: MastNodeId, + operation: usize, + ) -> Result { + self.decorator_ids_for_operation(node, operation).map(|slice| slice.len()) + } + + /// Get all decorator IDs for a specific operation within a node. + /// + /// # Arguments + /// * `node` - The node ID + /// * `operation` - The operation index within the node + /// + /// # Returns + /// A slice of decorator IDs for the operation, or an error if indices are invalid. + pub fn decorator_ids_for_operation( + &self, + node: MastNodeId, + operation: usize, + ) -> Result<&[DecoratorId], DecoratorIndexError> { + let op_range = self.operation_range_for_node(node)?; + // that operation does not have listed decorator indices + if operation >= op_range.len() { + return Ok(&[]); + } + + let op_start_idx = op_range.start + operation; + if op_start_idx + 1 >= self.op_indptr_for_dec_ids.len() { + return Err(DecoratorIndexError::InternalStructure); + } + + let dec_start = self.op_indptr_for_dec_ids[op_start_idx]; + let dec_end = self.op_indptr_for_dec_ids[op_start_idx + 1]; + + if dec_start > dec_end || dec_end > self.decorator_ids.len() { + return Err(DecoratorIndexError::InternalStructure); + } + + Ok(&self.decorator_ids[dec_start..dec_end]) + } + + /// Get an iterator over all operations and their decorator IDs for a given node. + /// + /// # Arguments + /// * `node` - The node ID + /// + /// # Returns + /// An iterator yielding (operation_index, decorator_ids_slice) tuples, or an error if the node + /// is invalid. + pub fn decorator_ids_for_node( + &self, + node: MastNodeId, + ) -> Result, DecoratorIndexError> { + let op_range = self.operation_range_for_node(node)?; + let num_ops = op_range.len(); + + Ok((0..num_ops).map(move |op_idx| { + let op_start_idx = op_range.start + op_idx; + let dec_start = self.op_indptr_for_dec_ids[op_start_idx]; + let dec_end = self.op_indptr_for_dec_ids[op_start_idx + 1]; + (op_idx, &self.decorator_ids[dec_start..dec_end]) + })) + } + + /// Named, zero-alloc view flattened to `(relative_op_idx, DecoratorId)`. + pub fn decorator_links_for_node<'a>( + &'a self, + node: MastNodeId, + ) -> Result, DecoratorIndexError> { + let op_range = self.operation_range_for_node(node)?; // [start .. end) in op-pointer space + Ok(DecoratedLinks::new( + op_range.start, + op_range.end, + &self.op_indptr_for_dec_ids, + &self.decorator_ids, + )) + } + + /// Check if a specific operation within a node has any decorator IDs. + /// + /// # Arguments + /// * `node` - The node ID + /// * `operation` - The operation index within the node + /// + /// # Returns + /// True if the operation has at least one decorator ID, false otherwise, or an error if indices + /// are invalid. + pub fn operation_has_decorator_ids( + &self, + node: MastNodeId, + operation: usize, + ) -> Result { + self.num_decorator_ids_for_operation(node, operation).map(|count| count > 0) + } + + /// Get the range of operation indices for a given node. + /// + /// # Arguments + /// * `node` - The node ID + /// + /// # Returns + /// A range representing the start and end (exclusive) operation indices for the node. + pub fn operation_range_for_node( + &self, + node: MastNodeId, + ) -> Result, DecoratorIndexError> { + let node_slice = self.node_indptr_for_op_idx.as_slice(); + let node_idx = node.to_usize(); + + if node_idx + 1 >= node_slice.len() { + return Err(DecoratorIndexError::NodeIndex(node)); + } + + let start = node_slice[node_idx]; + let end = node_slice[node_idx + 1]; + + if start > end || end > self.op_indptr_for_dec_ids.len() { + return Err(DecoratorIndexError::InternalStructure); + } + + Ok(start..end) + } +} + +impl Default for OpToDecoratorIds { + fn default() -> Self { + Self::new() + } +} + +/// Immutable view over all `(op_idx, DecoratorId)` pairs for a node. +/// Uses the two-level CSR encoded by `node_indptr_for_op_idx` and `op_indptr_for_dec_idx`. +pub struct DecoratedLinks<'a> { + // Absolute op-pointer index range for this node: [start_op .. end_op) + start_op: usize, + end_op: usize, + + // CSR arrays (borrowed; not owned) + op_indptr_for_dec_idx: &'a [usize], // len = total_ops + 1 + decorator_indices: &'a [DecoratorId], +} + +impl<'a> DecoratedLinks<'a> { + fn new( + start_op: usize, + end_op: usize, + op_indptr_for_dec_idx: &'a [usize], + decorator_indices: &'a [DecoratorId], + ) -> Self { + Self { + start_op, + end_op, + op_indptr_for_dec_idx, + decorator_indices, + } + } + + /// Total number of `(op_idx, DecoratorId)` pairs in this view (exact). + #[inline] + pub fn len_pairs(&self) -> usize { + let s = self.op_indptr_for_dec_idx[self.start_op]; + let e = self.op_indptr_for_dec_idx[self.end_op]; + e - s + } + + #[inline] + pub fn is_empty(&self) -> bool { + self.len_pairs() == 0 + } +} + +/// The concrete, zero-alloc iterator over `(relative_op_idx, DecoratorId)`. +pub struct DecoratedLinksIter<'a> { + // absolute op-pointer indices (into op_indptr_for_dec_idx) + cur_op: usize, + end_op: usize, + base_op: usize, // for relative index = cur_op - base_op + + // inner slice [inner_i .. inner_end) indexes into decorator_indices + inner_i: usize, + inner_end: usize, + + // borrowed CSR arrays + op_indptr_for_dec_idx: &'a [usize], + decorator_indices: &'a [DecoratorId], + + // exact count of remaining pairs + remaining: usize, +} + +impl<'a> IntoIterator for DecoratedLinks<'a> { + type Item = DecoratedOpLink; + type IntoIter = DecoratedLinksIter<'a>; + + fn into_iter(self) -> Self::IntoIter { + // Precompute the exact number of pairs in the node's range. + let remaining = { + // Add bounds check to prevent panic on empty storage + if self.start_op >= self.op_indptr_for_dec_idx.len() + || self.end_op > self.op_indptr_for_dec_idx.len() // Note: end_op can be equal to len + || self.start_op > self.end_op + // Invalid range + { + 0 + } else { + // For valid ranges, compute the actual number of decorator pairs + let mut total = 0; + for op_idx in self.start_op..self.end_op { + let start_idx = self.op_indptr_for_dec_idx[op_idx]; + let end_idx = if op_idx + 1 < self.op_indptr_for_dec_idx.len() { + self.op_indptr_for_dec_idx[op_idx + 1] + } else { + self.op_indptr_for_dec_idx.len() + }; + total += end_idx - start_idx; + } + total + } + }; + + // Initialize inner range to the first op (if any). + let (inner_i, inner_end) = if self.start_op < self.end_op + && self.start_op + 1 < self.op_indptr_for_dec_idx.len() + { + let s0 = self.op_indptr_for_dec_idx[self.start_op]; + let e0 = self.op_indptr_for_dec_idx[self.start_op + 1]; + (s0, e0) + } else { + (0, 0) + }; + + DecoratedLinksIter { + cur_op: self.start_op, + end_op: self.end_op, + base_op: self.start_op, + inner_i, + inner_end, + op_indptr_for_dec_idx: self.op_indptr_for_dec_idx, + decorator_indices: self.decorator_indices, + remaining, + } + } +} + +impl<'a> DecoratedLinksIter<'a> { + #[inline] + fn advance_outer(&mut self) -> bool { + self.cur_op += 1; + if self.cur_op >= self.end_op { + return false; + } + // Bounds check: ensure cur_op and cur_op+1 are within the pointer array + if self.cur_op + 1 >= self.op_indptr_for_dec_idx.len() { + return false; + } + let s = self.op_indptr_for_dec_idx[self.cur_op]; + let e = self.op_indptr_for_dec_idx[self.cur_op + 1]; + self.inner_i = s; + self.inner_end = e; + true + } +} + +impl<'a> Iterator for DecoratedLinksIter<'a> { + type Item = DecoratedOpLink; + + #[inline] + fn next(&mut self) -> Option { + while self.cur_op < self.end_op { + // Emit from current op's decorator slice if non-empty + if self.inner_i < self.inner_end { + let rel_op = self.cur_op - self.base_op; // relative op index within the node + let id = self.decorator_indices[self.inner_i]; + self.inner_i += 1; + self.remaining -= 1; + return Some((rel_op, id)); + } + // Move to next operation (which might be empty as well) + if !self.advance_outer() { + break; + } + } + None + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + (self.remaining, Some(self.remaining)) + } +} + +impl<'a> ExactSizeIterator for DecoratedLinksIter<'a> { + #[inline] + fn len(&self) -> usize { + self.remaining + } +} + +#[cfg(feature = "arbitrary")] +impl Arbitrary for OpToDecoratorIds { + type Parameters = proptest::collection::SizeRange; + type Strategy = BoxedStrategy; + + fn arbitrary_with(size: Self::Parameters) -> Self::Strategy { + use proptest::{collection::vec, prelude::*}; + + // Generate small, controlled structures to avoid infinite loops + // Keep node and operation counts small and bounded, always at least 1 + (1usize..40, 1usize..=72) // (max_nodes, max_ops_per_node) + .prop_flat_map(move |(max_nodes, max_ops_per_node)| { + vec( + // Generate (node_id, op_id, decorator_id) with bounded values + (0..max_nodes, 0..max_ops_per_node, any::()), + size.clone(), // Limit total entries to size + ) + .prop_map(move |coo_data| { + // Build the OpToDecoratorIds incrementally + let mut mapping = OpToDecoratorIds::new(); + + // Group by node_id, then by op_id to maintain sorted order + use alloc::collections::BTreeMap; + let mut node_to_ops: BTreeMap>> = BTreeMap::new(); + + for (node_id, op_id, decorator_id) in coo_data { + node_to_ops + .entry(node_id as u32) + .or_default() + .entry(op_id as u32) + .or_default() + .push(decorator_id); + } + + // Add nodes in order to satisfy sequential constraint + for (node_id, ops_map) in node_to_ops { + let mut decorators_info: Vec<(usize, DecoratorId)> = Vec::new(); + for (op_id, decorator_ids) in ops_map { + for decorator_id in decorator_ids { + decorators_info.push((op_id as usize, DecoratorId(decorator_id))); + } + } + // Sort by operation index to meet add_decorator_info_for_node requirements + decorators_info.sort_by_key(|(op_idx, _)| *op_idx); + + mapping + .add_decorator_info_for_node( + MastNodeId::new_unchecked(node_id), + decorators_info, + ) + .unwrap(); + } + + mapping + }) + }) + .boxed() + } +} + +#[cfg(test)] +mod tests; diff --git a/core/src/mast/debuginfo/decorator_storage/tests.rs b/core/src/mast/debuginfo/decorator_storage/tests.rs new file mode 100644 index 0000000000..25d1dc10ba --- /dev/null +++ b/core/src/mast/debuginfo/decorator_storage/tests.rs @@ -0,0 +1,782 @@ +use miden_utils_indexing::IndexVec; + +use super::*; + +/// Helper function to create a test DecoratorId +fn test_decorator_id(value: u32) -> DecoratorId { + DecoratorId(value) +} + +/// Helper function to create a test MastNodeId +fn test_node_id(value: u32) -> MastNodeId { + MastNodeId::new_unchecked(value) +} + +/// Helper to create standard test storage with 2 nodes, 3 operations, 6 decorator IDs +/// Structure: Node 0: Op 0 -> [0, 1], Op 1 -> [2]; Node 1: Op 0 -> [3, 4, 5] +fn create_standard_test_storage() -> OpToDecoratorIds { + let decorator_ids = vec![ + test_decorator_id(0), + test_decorator_id(1), + test_decorator_id(2), + test_decorator_id(3), + test_decorator_id(4), + test_decorator_id(5), + ]; + let op_indptr_for_dec_ids = vec![0, 2, 3, 6]; + let mut node_indptr_for_op_idx = IndexVec::new(); + node_indptr_for_op_idx.push(0).expect("test setup: IndexVec capacity exceeded"); + node_indptr_for_op_idx.push(2).expect("test setup: IndexVec capacity exceeded"); + node_indptr_for_op_idx.push(3).expect("test setup: IndexVec capacity exceeded"); + + OpToDecoratorIds::from_components(decorator_ids, op_indptr_for_dec_ids, node_indptr_for_op_idx) + .unwrap() +} + +#[test] +fn test_constructors() { + // Test new() + let storage = OpToDecoratorIds::new(); + assert_eq!(storage.num_nodes(), 0); + assert_eq!(storage.num_decorator_ids(), 0); + + // Test with_capacity() + let storage = OpToDecoratorIds::with_capacity(10, 20, 30); + assert_eq!(storage.num_nodes(), 0); + assert_eq!(storage.num_decorator_ids(), 0); + + // Test default() + let storage = OpToDecoratorIds::default(); + assert_eq!(storage.num_nodes(), 0); + assert_eq!(storage.num_decorator_ids(), 0); +} + +#[test] +fn test_from_components_simple() { + // Create a simple structure: + // Node 0: Op 0 -> [0, 1], Op 1 -> [2] + // Node 1: Op 0 -> [3, 4, 5] + let storage = create_standard_test_storage(); + + assert_eq!(storage.num_nodes(), 2); + assert_eq!(storage.num_decorator_ids(), 6); +} + +#[test] +fn test_from_components_invalid_structure() { + // Empty structures are valid + let result = OpToDecoratorIds::from_components(vec![], vec![], IndexVec::new()); + assert!(result.is_ok()); + + // Test with operation pointer exceeding decorator indices + let result = OpToDecoratorIds::from_components( + vec![test_decorator_id(0)], + vec![0, 2], // Points to index 2 but we only have 1 decorator + IndexVec::new(), + ); + assert_eq!(result, Err(DecoratorIndexError::InternalStructure)); + + // Test with non-monotonic operation pointers + let result = OpToDecoratorIds::from_components( + vec![test_decorator_id(0), test_decorator_id(1)], + vec![0, 2, 1], // 2 > 1, should be monotonic + IndexVec::new(), + ); + assert_eq!(result, Err(DecoratorIndexError::InternalStructure)); +} + +#[test] +fn test_data_access_methods() { + let storage = create_standard_test_storage(); + + // Test decorator_ids_for_operation + let decorators = storage.decorator_ids_for_operation(test_node_id(0), 0).unwrap(); + assert_eq!(decorators, &[test_decorator_id(0), test_decorator_id(1)]); + + let decorators = storage.decorator_ids_for_operation(test_node_id(0), 1).unwrap(); + assert_eq!(decorators, &[test_decorator_id(2)]); + + let decorators = storage.decorator_ids_for_operation(test_node_id(1), 0).unwrap(); + assert_eq!(decorators, &[test_decorator_id(3), test_decorator_id(4), test_decorator_id(5)]); + + // Test decorator_ids_for_node + let decorators: Vec<_> = storage.decorator_ids_for_node(test_node_id(0)).unwrap().collect(); + assert_eq!(decorators.len(), 2); + assert_eq!(decorators[0], (0, &[test_decorator_id(0), test_decorator_id(1)][..])); + assert_eq!(decorators[1], (1, &[test_decorator_id(2)][..])); + + let decorators: Vec<_> = storage.decorator_ids_for_node(test_node_id(1)).unwrap().collect(); + assert_eq!(decorators.len(), 1); + assert_eq!( + decorators[0], + (0, &[test_decorator_id(3), test_decorator_id(4), test_decorator_id(5)][..]) + ); + + // Test operation_has_decorator_ids + assert!(storage.operation_has_decorator_ids(test_node_id(0), 0).unwrap()); + assert!(storage.operation_has_decorator_ids(test_node_id(0), 1).unwrap()); + assert!(storage.operation_has_decorator_ids(test_node_id(1), 0).unwrap()); + assert!(!storage.operation_has_decorator_ids(test_node_id(0), 2).unwrap()); + + // Test num_decorator_ids_for_operation + assert_eq!(storage.num_decorator_ids_for_operation(test_node_id(0), 0).unwrap(), 2); + assert_eq!(storage.num_decorator_ids_for_operation(test_node_id(0), 1).unwrap(), 1); + assert_eq!(storage.num_decorator_ids_for_operation(test_node_id(1), 0).unwrap(), 3); + assert_eq!(storage.num_decorator_ids_for_operation(test_node_id(0), 2).unwrap(), 0); + + // Test invalid operation returns empty slice + let decorators = storage.decorator_ids_for_operation(test_node_id(0), 2).unwrap(); + assert_eq!(decorators, &[]); +} + +#[test] +fn test_empty_nodes_basic_functionality() { + // Test 1: Empty nodes created via from_components (original + // test_empty_nodes_and_operations) + { + // Create a structure with empty nodes/operations + let decorator_indices = vec![]; + let op_indptr_for_dec_idx = vec![0, 0, 0]; // 2 operations, both empty + let mut node_indptr_for_op_idx = IndexVec::new(); + node_indptr_for_op_idx.push(0).expect("test setup: IndexVec capacity exceeded"); + node_indptr_for_op_idx.push(2).expect("test setup: IndexVec capacity exceeded"); + + let storage = OpToDecoratorIds::from_components( + decorator_indices, + op_indptr_for_dec_idx, + node_indptr_for_op_idx, + ) + .unwrap(); + + assert_eq!(storage.num_nodes(), 1); + assert_eq!(storage.num_decorator_ids(), 0); + + // Empty decorators + let decorators = storage.decorator_ids_for_operation(test_node_id(0), 0).unwrap(); + assert_eq!(decorators, &[]); + + // Operation has no decorators + assert!(!storage.operation_has_decorator_ids(test_node_id(0), 0).unwrap()); + } + + // Test 2: Empty nodes created via add_decorator_info_for_node (original + // test_decorator_ids_for_node_with_empty_nodes) + { + let mut storage = OpToDecoratorIds::new(); + + // Add node 0 with no decorators (empty node) + storage.add_decorator_info_for_node(test_node_id(0), vec![]).unwrap(); + + // Test 2a: operation_range_for_node should be empty for node with no decorators + let range = storage.operation_range_for_node(test_node_id(0)); + assert!(range.is_ok(), "operation_range_for_node should return Ok for empty node"); + let range = range.unwrap(); + assert_eq!(range, 0..0, "Empty node should have empty operations range"); + + // Test 2b: decorator_ids_for_node should return an empty iterator + let result = storage.decorator_ids_for_node(test_node_id(0)); + assert!(result.is_ok(), "decorator_ids_for_node should return Ok for empty node"); + // The iterator should be empty + let decorators: Vec<_> = result.unwrap().collect(); + assert_eq!(decorators, Vec::<(usize, &[DecoratorId])>::new()); + + // Test 2c: decorator_links_for_node should return an empty iterator + let result = storage.decorator_links_for_node(test_node_id(0)); + assert!(result.is_ok(), "decorator_links_for_node should return Ok for empty node"); + let links: Vec<_> = result.unwrap().into_iter().collect(); + assert_eq!(links, Vec::<(usize, DecoratorId)>::new()); + + // Test 2d: Basic access methods on empty node + assert_eq!(storage.num_nodes(), 1); + assert_eq!(storage.num_decorator_ids(), 0); + } +} + +#[test] +fn test_debug_impl() { + let storage = OpToDecoratorIds::new(); + let debug_str = format!("{:?}", storage); + assert!(debug_str.contains("OpToDecoratorIds")); +} + +#[test] +fn test_clone_and_equality() { + let decorator_indices = vec![ + test_decorator_id(0), + test_decorator_id(1), + test_decorator_id(2), + test_decorator_id(3), + test_decorator_id(4), + test_decorator_id(5), + ]; + let op_indptr_for_dec_idx = vec![0, 2, 3, 6]; + let mut node_indptr_for_op_idx = IndexVec::new(); + node_indptr_for_op_idx.push(0).expect("test setup: IndexVec capacity exceeded"); + node_indptr_for_op_idx.push(2).expect("test setup: IndexVec capacity exceeded"); + node_indptr_for_op_idx.push(3).expect("test setup: IndexVec capacity exceeded"); + + let storage1 = OpToDecoratorIds::from_components( + decorator_indices.clone(), + op_indptr_for_dec_idx.clone(), + node_indptr_for_op_idx.clone(), + ) + .unwrap(); + + let storage2 = storage1.clone(); + assert_eq!(storage1, storage2); + + // Modify one and ensure they're no longer equal + let different_decorators = vec![test_decorator_id(10)]; + let mut different_node_indptr = IndexVec::new(); + different_node_indptr.push(0).expect("test setup: IndexVec capacity exceeded"); + different_node_indptr.push(1).expect("test setup: IndexVec capacity exceeded"); + + let storage3 = + OpToDecoratorIds::from_components(different_decorators, vec![0, 1], different_node_indptr) + .unwrap(); + + assert_ne!(storage1, storage3); +} + +#[test] +fn test_add_decorator_info_functionality() { + // Test 1: Basic multi-node functionality + let mut storage = OpToDecoratorIds::new(); + + // Add decorators for node 0 + let decorators_info = vec![ + (0, test_decorator_id(10)), + (0, test_decorator_id(11)), + (2, test_decorator_id(12)), + ]; + storage.add_decorator_info_for_node(test_node_id(0), decorators_info).unwrap(); + + assert_eq!(storage.num_nodes(), 1); + assert_eq!(storage.num_decorator_ids_for_operation(test_node_id(0), 0).unwrap(), 2); + assert_eq!(storage.num_decorator_ids_for_operation(test_node_id(0), 2).unwrap(), 1); + + // Add node 1 with simple decorators + storage + .add_decorator_info_for_node(test_node_id(1), vec![(0, test_decorator_id(20))]) + .unwrap(); + assert_eq!(storage.num_nodes(), 2); + + let node1_op0 = storage.decorator_ids_for_operation(test_node_id(1), 0).unwrap(); + assert_eq!(node1_op0, &[test_decorator_id(20)]); + + // Test 2: Sequential constraint validation + let mut storage2 = OpToDecoratorIds::new(); + storage2 + .add_decorator_info_for_node(test_node_id(0), vec![(0, test_decorator_id(10))]) + .unwrap(); + + // Adding node 1 should succeed + storage2 + .add_decorator_info_for_node(test_node_id(1), vec![(0, test_decorator_id(30))]) + .unwrap(); + assert_eq!(storage2.num_nodes(), 2); + + // Try to add node 0 again - should fail + let result = + storage2.add_decorator_info_for_node(test_node_id(0), vec![(0, test_decorator_id(40))]); + assert_eq!(result, Err(DecoratorIndexError::NodeIndex(test_node_id(0)))); + + // Test 3: Empty input handling (creates empty nodes with no operations) + let mut storage3 = OpToDecoratorIds::new(); + let result = storage3.add_decorator_info_for_node(test_node_id(0), vec![]); + assert_eq!(result, Ok(())); + assert_eq!(storage3.num_nodes(), 1); // Should create empty node + + // Empty node should have no operations (accessing any operation should return empty) + let decorators = storage3.decorator_ids_for_operation(test_node_id(0), 0).unwrap(); + assert_eq!(decorators, &[]); + + // Should be able to add next node after empty node + storage3 + .add_decorator_info_for_node(test_node_id(1), vec![(0, test_decorator_id(100))]) + .unwrap(); + assert_eq!(storage3.num_nodes(), 2); + + // Test 4: Operations with gaps + let mut storage4 = OpToDecoratorIds::new(); + let gap_decorators = vec![ + (0, test_decorator_id(10)), + (0, test_decorator_id(11)), // operation 0 has 2 decorators + (3, test_decorator_id(12)), // operation 3 has 1 decorator + (4, test_decorator_id(13)), // operation 4 has 1 decorator + ]; + storage4.add_decorator_info_for_node(test_node_id(0), gap_decorators).unwrap(); + + assert_eq!(storage4.num_decorator_ids_for_operation(test_node_id(0), 0).unwrap(), 2); + assert_eq!(storage4.num_decorator_ids_for_operation(test_node_id(0), 1).unwrap(), 0); + assert_eq!(storage4.num_decorator_ids_for_operation(test_node_id(0), 2).unwrap(), 0); + assert_eq!(storage4.num_decorator_ids_for_operation(test_node_id(0), 3).unwrap(), 1); + assert_eq!(storage4.num_decorator_ids_for_operation(test_node_id(0), 4).unwrap(), 1); + + // Test accessing operations without decorators returns empty slice + let op1_decorators = storage4.decorator_ids_for_operation(test_node_id(0), 1).unwrap(); + assert_eq!(op1_decorators, &[]); + + // Test 5: Your specific use case - mixed empty and non-empty nodes + let mut storage5 = OpToDecoratorIds::new(); + + // node 0 with decorators + storage5 + .add_decorator_info_for_node( + test_node_id(0), + vec![(0, test_decorator_id(1)), (1, test_decorator_id(0))], + ) + .unwrap(); + + // node 1 with no decorators (empty) + storage5.add_decorator_info_for_node(test_node_id(1), vec![]).unwrap(); + + // node 2 with decorators + storage5 + .add_decorator_info_for_node( + test_node_id(2), + vec![(1, test_decorator_id(1)), (2, test_decorator_id(2))], + ) + .unwrap(); + + assert_eq!(storage5.num_nodes(), 3); + + // Verify node 0: op 0 has [1], op 1 has [0] + assert_eq!( + storage5.decorator_ids_for_operation(test_node_id(0), 0).unwrap(), + &[test_decorator_id(1)] + ); + assert_eq!( + storage5.decorator_ids_for_operation(test_node_id(0), 1).unwrap(), + &[test_decorator_id(0)] + ); + + // Verify node 1: has no operations at all, any operation access returns empty + assert_eq!(storage5.decorator_ids_for_operation(test_node_id(1), 0).unwrap(), &[]); + + // Verify node 2: op 0 has [], op 1 has [1], op 2 has [2] + assert_eq!(storage5.decorator_ids_for_operation(test_node_id(2), 0).unwrap(), &[]); + assert_eq!( + storage5.decorator_ids_for_operation(test_node_id(2), 1).unwrap(), + &[test_decorator_id(1)] + ); + assert_eq!( + storage5.decorator_ids_for_operation(test_node_id(2), 2).unwrap(), + &[test_decorator_id(2)] + ); +} + +#[test] +fn test_empty_nodes_edge_cases() { + // Test edge cases with empty nodes (nodes with no decorators) + // This consolidates test_decorator_ids_for_node_mixed_scenario and + // test_decorated_links_overflow_bug + + let mut storage = OpToDecoratorIds::new(); + + // Set up mixed scenario: some nodes have decorators, some don't + // Node 0: Has decorators + storage + .add_decorator_info_for_node( + test_node_id(0), + vec![(0, test_decorator_id(10)), (2, test_decorator_id(20))], + ) + .unwrap(); + + // Node 1: Has decorators + storage + .add_decorator_info_for_node( + test_node_id(1), + vec![ + (0, test_decorator_id(30)), + (0, test_decorator_id(31)), + (3, test_decorator_id(32)), + ], + ) + .unwrap(); + + // Node 2: No decorators (empty node) - this is the edge case we're testing + storage.add_decorator_info_for_node(test_node_id(2), vec![]).unwrap(); + + // Test 1: Verify range handling for empty nodes + let range0 = storage.operation_range_for_node(test_node_id(0)).unwrap(); + let range1 = storage.operation_range_for_node(test_node_id(1)).unwrap(); + let range2 = storage.operation_range_for_node(test_node_id(2)).unwrap(); + + // Nodes with decorators should have non-empty ranges + assert!(range0.end > range0.start, "Node with decorators should have non-empty range"); + assert!(range1.end > range1.start, "Node with decorators should have non-empty range"); + + // Empty node should have range pointing to a sentinel that was added when the empty node + // was created. The sentinel is added at the position that would have been "past the end" + // before the empty node was added, making the empty node's range valid. + let op_indptr_len = storage.op_indptr_for_dec_ids.len(); + // The empty node should point to the sentinel (which is now at len()-1 after being added) + let expected_empty_pos = op_indptr_len - 1; + assert_eq!( + range2.start, expected_empty_pos, + "Empty node should point to the sentinel that was added for it" + ); + assert_eq!( + range2.end, expected_empty_pos, + "Empty node should have empty range at the sentinel" + ); + + // Test 2: decorator_ids_for_node() should work for empty nodes + // This should not panic - the iterator should be empty even though the range points to + // array end + let result = storage.decorator_ids_for_node(test_node_id(2)); + assert!(result.is_ok(), "decorator_ids_for_node should work for node with no decorators"); + let decorators: Vec<_> = result.unwrap().collect(); + assert_eq!(decorators, Vec::<(usize, &[DecoratorId])>::new()); + + // Test 3: decorator_links_for_node() should work for empty nodes (tests overflow bug) + // This tests the specific overflow bug in DecoratedLinks iterator + let result = storage.decorator_links_for_node(test_node_id(2)); + assert!(result.is_ok(), "decorator_links_for_node should return Ok for empty node"); + + let links = result.unwrap(); + // This should not panic, even when iterating + let collected: Vec<_> = links.into_iter().collect(); + assert_eq!(collected, Vec::<(usize, DecoratorId)>::new()); + + // Test 4: Multiple iterations should work (regression test for iterator reuse) + let result2 = storage.decorator_links_for_node(test_node_id(2)); + assert!( + result2.is_ok(), + "decorator_links_for_node should work repeatedly for empty node" + ); + let links2 = result2.unwrap(); + let collected2: Vec<_> = links2.into_iter().collect(); + assert_eq!(collected2, Vec::<(usize, DecoratorId)>::new()); + + // Test 5: Multiple iterations of decorator_ids_for_node should also work + let result3 = storage.decorator_ids_for_node(test_node_id(2)); + assert!(result3.is_ok(), "decorator_ids_for_node should work repeatedly for empty node"); + let decorators2: Vec<_> = result3.unwrap().collect(); + assert_eq!(decorators2, Vec::<(usize, &[DecoratorId])>::new()); +} + +#[test] +fn test_decorator_links_for_node_flattened() { + let storage = create_standard_test_storage(); + let n0 = MastNodeId::new_unchecked(0); + let flat: Vec<_> = storage.decorator_links_for_node(n0).unwrap().into_iter().collect(); + // Node 0: Op0 -> [0,1], Op1 -> [2] + assert_eq!(flat, vec![(0, DecoratorId(0)), (0, DecoratorId(1)), (1, DecoratorId(2)),]); + + let n1 = MastNodeId::new_unchecked(1); + let flat1: Vec<_> = storage.decorator_links_for_node(n1).unwrap().into_iter().collect(); + // Node 1: Op0 -> [3,4,5] + assert_eq!(flat1, vec![(0, DecoratorId(3)), (0, DecoratorId(4)), (0, DecoratorId(5)),]); +} + +#[test] +/// This test verifies that the CSR encoding described in the OpToDecoratorIds struct +/// documentation correctly represents COO data. It also validates all accessor methods +/// work as expected. Keep this test in sync with the documentation example (adding nodes +/// to this test if you add nodes to the documentation example, and vice versa). +fn test_csr_and_coo_produce_same_elements() { + // Build a COO representation manually + let coo_data = vec![ + // Node 0 + (MastNodeId::new_unchecked(0), 0, DecoratorId(10)), + (MastNodeId::new_unchecked(0), 0, DecoratorId(11)), + (MastNodeId::new_unchecked(0), 1, DecoratorId(12)), + (MastNodeId::new_unchecked(0), 2, DecoratorId(13)), + // Node 1 + (MastNodeId::new_unchecked(1), 0, DecoratorId(20)), + (MastNodeId::new_unchecked(1), 2, DecoratorId(21)), + (MastNodeId::new_unchecked(1), 2, DecoratorId(22)), + // Node 2 (empty node, should still work) + // Node 3 + (MastNodeId::new_unchecked(3), 0, DecoratorId(30)), + ]; + + // Build COO representation as a HashMap for easy lookup during verification + let mut coo_map: alloc::collections::BTreeMap<(MastNodeId, usize), Vec> = + alloc::collections::BTreeMap::new(); + for (node, op_idx, decorator_id) in &coo_data { + coo_map.entry((*node, *op_idx)).or_default().push(*decorator_id); + } + + // Build CSR representation using the builder API + let mut csr_storage = OpToDecoratorIds::new(); + + // Node 0: Op0 -> [10,11], Op1 -> [12], Op2 -> [13] + csr_storage + .add_decorator_info_for_node( + MastNodeId::new_unchecked(0), + vec![ + (0, DecoratorId(10)), + (0, DecoratorId(11)), + (1, DecoratorId(12)), + (2, DecoratorId(13)), + ], + ) + .unwrap(); + + // Node 1: Op0 -> [20], Op2 -> [21,22] + csr_storage + .add_decorator_info_for_node( + MastNodeId::new_unchecked(1), + vec![(0, DecoratorId(20)), (2, DecoratorId(21)), (2, DecoratorId(22))], + ) + .unwrap(); + + // Node 2: empty + csr_storage + .add_decorator_info_for_node(MastNodeId::new_unchecked(2), vec![]) + .unwrap(); + + // Node 3: Op0 -> [30] + csr_storage + .add_decorator_info_for_node(MastNodeId::new_unchecked(3), vec![(0, DecoratorId(30))]) + .unwrap(); + + // Verify that CSR and COO produce the same elements + for node_idx in 0..4 { + let node_id = MastNodeId::new_unchecked(node_idx); + + // Get all operations for this node from CSR + let op_range = csr_storage.operation_range_for_node(node_id).unwrap(); + let num_ops = op_range.len(); + + // For each operation in this node + for op_idx in 0..num_ops { + // Get decorator IDs from CSR + let csr_decorator_ids = + csr_storage.decorator_ids_for_operation(node_id, op_idx).unwrap(); + + // Get decorator IDs from COO map + let coo_key = (node_id, op_idx); + let coo_decorator_ids = + coo_map.get(&coo_key).map_or(&[] as &[DecoratorId], |v| v.as_slice()); + + // They should be the same + assert_eq!( + csr_decorator_ids, coo_decorator_ids, + "CSR and COO should produce the same decorator IDs for node {:?}, op {}", + node_id, op_idx + ); + } + } + + // Also verify using the flattened iterator approach + for node_idx in 0..4 { + let node_id = MastNodeId::new_unchecked(node_idx); + + // Get flattened view from CSR + let csr_flat: Vec<(usize, DecoratorId)> = + csr_storage.decorator_links_for_node(node_id).unwrap().into_iter().collect(); + + // Build expected from COO map + let mut expected_flat = Vec::new(); + for ((node, op_idx), decorator_ids) in &coo_map { + if *node == node_id { + for decorator_id in decorator_ids { + expected_flat.push((*op_idx, *decorator_id)); + } + } + } + // Sort by operation index then decorator ID for consistent comparison + expected_flat.sort_by_key(|(op_idx, dec_id)| (*op_idx, u32::from(*dec_id))); + + assert_eq!( + csr_flat, expected_flat, + "Flattened CSR and COO should produce the same elements for node {:?}", + node_id + ); + } +} + +#[test] +fn test_validate_csr_valid() { + let storage = create_standard_test_storage(); + assert!(storage.validate_csr(6).is_ok()); +} + +#[test] +fn test_validate_csr_invalid_decorator_id() { + let storage = create_standard_test_storage(); + // decorator_count=3 but storage has IDs up to 5 + let result = storage.validate_csr(3); + assert!(result.is_err()); + assert!(result.unwrap_err().contains("Invalid decorator ID")); +} + +#[test] +fn test_validate_csr_not_monotonic() { + let decorator_ids = vec![test_decorator_id(0), test_decorator_id(1)]; + let op_indptr_for_dec_ids = vec![0, 2, 1]; // Not monotonic! + let mut node_indptr_for_op_idx = IndexVec::new(); + node_indptr_for_op_idx.push(0).unwrap(); + node_indptr_for_op_idx.push(2).unwrap(); + + let storage = OpToDecoratorIds::from_components( + decorator_ids, + op_indptr_for_dec_ids, + node_indptr_for_op_idx, + ); + + // from_components should catch this + assert!(storage.is_err()); +} + +#[test] +fn test_validate_csr_wrong_start() { + let decorator_ids = vec![test_decorator_id(0)]; + let op_indptr_for_dec_ids = vec![1, 1]; // Should start at 0! + let mut node_indptr_for_op_idx = IndexVec::new(); + node_indptr_for_op_idx.push(0).unwrap(); + node_indptr_for_op_idx.push(1).unwrap(); + + let storage = OpToDecoratorIds::from_components( + decorator_ids, + op_indptr_for_dec_ids, + node_indptr_for_op_idx, + ); + + assert!(storage.is_err()); +} + +#[cfg(feature = "arbitrary")] +proptest! { + /// Property test that verifies decorator_links_for_node always produces a valid iterator + /// that can be fully consumed without panicking for any OpToDecoratorIds. + #[test] + fn decorator_links_for_node_always_iterates_complete( + mapping in any::() + ) { + // Skip empty mappings since they have no nodes to test + if mapping.num_nodes() == 0 { + return Ok(()); + } + + // Test every valid node in the mapping + for node_idx in 0..mapping.num_nodes() { + let node_id = MastNodeId::new_unchecked(node_idx as u32); + + // Call decorator_links_for_node - this should never return an error for valid nodes + let result = mapping.decorator_links_for_node(node_id); + + // The result should always be Ok for valid node indices + prop_assume!(result.is_ok(), "decorator_links_for_node should succeed for valid node"); + + let decorated_links = result.unwrap(); + + // Convert to iterator and collect all items - this should complete without panicking + let collected: Vec<(usize, DecoratorId)> = decorated_links.into_iter().collect(); + + // The collected items should match what we get from decorator_ids_for_node + let expected_items: Vec<(usize, DecoratorId)> = mapping + .decorator_ids_for_node(node_id) + .unwrap() + .flat_map(|(op_idx, decorator_ids)| { + decorator_ids.iter().map(move |&decorator_id| (op_idx, decorator_id)) + }) + .collect(); + + prop_assert_eq!(collected, expected_items); + } + } +} + +// Sparse debug tests +use alloc::vec; + +#[test] +fn test_sparse_case_manual() { + // Manually create what should be produced by sparse decorator case: + // 10 nodes, decorators only at nodes 0 and 5 + + // Just node 0 with decorator at op 0 + let decorator_ids = vec![test_decorator_id(0)]; + let op_indptr_for_dec_ids = vec![0, 1, 1]; // op 0: [0,1), sentinel + let mut node_indptr = IndexVec::new(); + node_indptr.push(0).unwrap(); // node 0 starts at op index 0 + node_indptr.push(2).unwrap(); // node 0 ends at op index 2 (sentinel) + + // Validate this structure + let result = + OpToDecoratorIds::from_components(decorator_ids, op_indptr_for_dec_ids, node_indptr); + + assert!(result.is_ok(), "Single node with decorator should validate: {:?}", result); +} + +#[test] +fn test_sparse_case_two_nodes() { + // Two nodes: node 0 with decorator, node 1 empty + let decorator_ids = vec![test_decorator_id(0)]; + let op_indptr_for_dec_ids = vec![0, 1, 1]; // op 0: [0,1), sentinel at 1 + let mut node_indptr = IndexVec::new(); + node_indptr.push(0).unwrap(); // node 0 starts at op index 0 + node_indptr.push(2).unwrap(); // node 0 ends at op index 2 + node_indptr.push(2).unwrap(); // node 1 (empty) points to same location + + let result = + OpToDecoratorIds::from_components(decorator_ids, op_indptr_for_dec_ids, node_indptr); + + assert!( + result.is_ok(), + "Two nodes (one with decorator, one empty) should validate: {:?}", + result + ); +} + +#[test] +fn test_sparse_debuginfo_round_trip() { + use alloc::collections::BTreeMap; + + use crate::{ + Decorator, + mast::debuginfo::{DebugInfo, NodeToDecoratorIds}, + utils::{Deserializable, Serializable}, + }; + + // Create a sparse CSR structure like we'd see with 10 nodes where only 0 and 5 have + // decorators + let decorator_ids = vec![test_decorator_id(0), test_decorator_id(1)]; + + // Node 0: op 0 has decorator 0 + // Nodes 1-4: empty + // Node 5: op 0 has decorator 1 + let op_indptr_for_dec_ids = vec![0, 1, 1, 1, 2, 2]; // ops with their decorator ranges + let mut node_indptr_for_op_idx = IndexVec::new(); + node_indptr_for_op_idx.push(0).unwrap(); // node 0 + node_indptr_for_op_idx.push(2).unwrap(); // node 0 end + node_indptr_for_op_idx.push(2).unwrap(); // node 1 (empty) + node_indptr_for_op_idx.push(2).unwrap(); // node 2 (empty) + node_indptr_for_op_idx.push(2).unwrap(); // node 3 (empty) + node_indptr_for_op_idx.push(2).unwrap(); // node 4 (empty) + node_indptr_for_op_idx.push(4).unwrap(); // node 5 + + let op_storage = OpToDecoratorIds::from_components( + decorator_ids.clone(), + op_indptr_for_dec_ids, + node_indptr_for_op_idx, + ) + .expect("CSR structure should be valid"); + + // Create a minimal DebugInfo + let mut decorators = crate::mast::debuginfo::IndexVec::new(); + decorators.push(Decorator::Trace(0)).unwrap(); + decorators.push(Decorator::Trace(5)).unwrap(); + + let node_storage = NodeToDecoratorIds::new(); + let error_codes = BTreeMap::new(); + + let debug_info = DebugInfo { + decorators, + op_decorator_storage: op_storage, + node_decorator_storage: node_storage, + error_codes, + }; + + // Serialize and deserialize + let bytes = debug_info.to_bytes(); + let deserialized = DebugInfo::read_from_bytes(&bytes).expect("Should deserialize successfully"); + + // Verify + assert_eq!(debug_info.num_decorators(), deserialized.num_decorators()); +} diff --git a/core/src/mast/debuginfo/mod.rs b/core/src/mast/debuginfo/mod.rs index 49433dafbc..6f856f1c83 100644 --- a/core/src/mast/debuginfo/mod.rs +++ b/core/src/mast/debuginfo/mod.rs @@ -341,7 +341,7 @@ impl DebugInfo { /// - All CSR structures in op_decorator_storage /// - All CSR structures in node_decorator_storage /// - All decorator IDs reference valid decorators - pub(crate) fn validate(&self) -> Result<(), String> { + pub(super) fn validate(&self) -> Result<(), String> { let decorator_count = self.decorators.len(); // Validate OpToDecoratorIds CSR @@ -392,55 +392,15 @@ impl Serializable for DebugInfo { // Dense representation: serialize indptr arrays as-is (no sparse encoding). // Analysis shows sparse saves <1KB even with 90% empty nodes, not worth complexity. // See measurement: https://gist.github.com/huitseeker/7379e2eecffd7020ae577e986057a400 - - // decorator_ids as Vec - let decorator_ids_u32: Vec = self - .op_decorator_storage - .decorator_ids - .iter() - .map(|id| u32::from(*id)) - .collect(); - decorator_ids_u32.write_into(target); - - // op_indptr_for_dec_ids - self.op_decorator_storage.op_indptr_for_dec_ids.write_into(target); - - // node_indptr_for_op_idx as Vec - self.op_decorator_storage - .node_indptr_for_op_idx - .as_slice() - .to_vec() - .write_into(target); + self.op_decorator_storage.write_into(target); // 4. Serialize NodeToDecoratorIds CSR (dense representation) + self.node_decorator_storage.write_into(target); - // before_enter_decorators as Vec - let before_decorators_u32: Vec = self - .node_decorator_storage - .before_enter_decorators - .iter() - .map(|id| u32::from(*id)) - .collect(); - before_decorators_u32.write_into(target); - - // after_exit_decorators as Vec - let after_decorators_u32: Vec = self - .node_decorator_storage - .after_exit_decorators - .iter() - .map(|id| u32::from(*id)) - .collect(); - after_decorators_u32.write_into(target); - - // node_indptr_for_before as Vec - let before_indptr_vec: Vec = - self.node_decorator_storage.node_indptr_for_before.as_slice().to_vec(); - before_indptr_vec.write_into(target); - - // node_indptr_for_after as Vec - let after_indptr_vec: Vec = - self.node_decorator_storage.node_indptr_for_after.as_slice().to_vec(); - after_indptr_vec.write_into(target); + // 5. Serialize procedure names + let procedure_names: BTreeMap = + self.procedure_names().map(|(k, v)| (k, v.to_string())).collect(); + procedure_names.write_into(target); } } @@ -479,86 +439,10 @@ impl Deserializable for DebugInfo { .collect(); // 4. Read OpToDecoratorIds CSR (dense representation) - - // decorator_ids from Vec - let decorator_ids_u32: Vec = Deserializable::read_from(source)?; - let decorator_ids: Vec = decorator_ids_u32 - .into_iter() - .map(|id| { - if id as usize >= decorators.len() { - return Err(DeserializationError::InvalidValue(format!( - "DecoratorId {} exceeds decorator count {}", - id, - decorators.len() - ))); - } - Ok(DecoratorId(id)) - }) - .collect::, _>>()?; - - // op_indptr_for_dec_ids - let op_indptr_for_dec_ids: Vec = Deserializable::read_from(source)?; - - // node_indptr_for_op_idx from Vec - let node_indptr_vec: Vec = Deserializable::read_from(source)?; - let node_indptr_for_op_idx = IndexVec::from_raw(node_indptr_vec); - - let op_decorator_storage = OpToDecoratorIds::from_components( - decorator_ids, - op_indptr_for_dec_ids, - node_indptr_for_op_idx, - ) - .map_err(|e| DeserializationError::InvalidValue(e.to_string()))?; + let op_decorator_storage = OpToDecoratorIds::read_from(source, decorators.len())?; // 5. Read NodeToDecoratorIds CSR (dense representation) - - // before_enter_decorators from Vec - let before_decorators_u32: Vec = Deserializable::read_from(source)?; - let before_enter_decorators: Vec = before_decorators_u32 - .into_iter() - .map(|id| { - if id as usize >= decorators.len() { - return Err(DeserializationError::InvalidValue(format!( - "DecoratorId {} exceeds decorator count {}", - id, - decorators.len() - ))); - } - Ok(DecoratorId(id)) - }) - .collect::, _>>()?; - - // after_exit_decorators from Vec - let after_decorators_u32: Vec = Deserializable::read_from(source)?; - let after_exit_decorators: Vec = after_decorators_u32 - .into_iter() - .map(|id| { - if id as usize >= decorators.len() { - return Err(DeserializationError::InvalidValue(format!( - "DecoratorId {} exceeds decorator count {}", - id, - decorators.len() - ))); - } - Ok(DecoratorId(id)) - }) - .collect::, _>>()?; - - // node_indptr_for_before from Vec - let before_indptr_vec: Vec = Deserializable::read_from(source)?; - let node_indptr_for_before = IndexVec::from_raw(before_indptr_vec); - - // node_indptr_for_after from Vec - let after_indptr_vec: Vec = Deserializable::read_from(source)?; - let node_indptr_for_after = IndexVec::from_raw(after_indptr_vec); - - let node_decorator_storage = NodeToDecoratorIds::from_components( - before_enter_decorators, - after_exit_decorators, - node_indptr_for_before, - node_indptr_for_after, - ) - .map_err(|e| DeserializationError::InvalidValue(e.to_string()))?; + let node_decorator_storage = NodeToDecoratorIds::read_from(source, decorators.len())?; // 6. Read procedure names (marked serde(skip) but we deserialize manually) let procedure_names_raw: BTreeMap = Deserializable::read_from(source)?; diff --git a/core/src/mast/debuginfo/node_decorator_storage.rs b/core/src/mast/debuginfo/node_decorator_storage.rs index db7b2fbc65..78191aa352 100644 --- a/core/src/mast/debuginfo/node_decorator_storage.rs +++ b/core/src/mast/debuginfo/node_decorator_storage.rs @@ -36,19 +36,19 @@ use crate::{ #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub struct NodeToDecoratorIds { /// All `before_enter` decorators, concatenated across all nodes. - pub before_enter_decorators: Vec, + pub(super) before_enter_decorators: Vec, /// All `after_exit` decorators, concatenated across all nodes. - pub after_exit_decorators: Vec, + pub(super) after_exit_decorators: Vec, /// Index pointers for before_enter decorators: the range for node `i` is /// ```text /// node_indptr_for_before[i]..node_indptr_for_before[i+1] /// ``` - pub node_indptr_for_before: IndexVec, + pub(super) node_indptr_for_before: IndexVec, /// Index pointers for after_exit decorators: the range for node `i` is /// ```text /// node_indptr_for_after[i]..node_indptr_for_after[i+1] /// ``` - pub node_indptr_for_after: IndexVec, + pub(super) node_indptr_for_after: IndexVec, } impl NodeToDecoratorIds { @@ -98,7 +98,7 @@ impl NodeToDecoratorIds { /// Checks: /// - All decorator IDs are valid (< decorator_count) /// - Both indptr arrays are monotonic, start at 0, end at respective decorator vector lengths - pub(crate) fn validate_csr(&self, decorator_count: usize) -> Result<(), String> { + pub(super) fn validate_csr(&self, decorator_count: usize) -> Result<(), String> { // Completely empty structures are valid (no nodes, no decorators) if self.before_enter_decorators.is_empty() && self.after_exit_decorators.is_empty() @@ -313,6 +313,86 @@ impl NodeToDecoratorIds { pub fn is_empty(&self) -> bool { self.len() == 0 } + + // SERIALIZATION HELPERS + // -------------------------------------------------------------------------------------------- + + /// Write this CSR structure to a target using dense representation. + pub(super) fn write_into(&self, target: &mut W) { + use crate::utils::Serializable; + + // before_enter_decorators as Vec + let before_decorators_u32: Vec = + self.before_enter_decorators.iter().map(|id| u32::from(*id)).collect(); + before_decorators_u32.write_into(target); + + // after_exit_decorators as Vec + let after_decorators_u32: Vec = + self.after_exit_decorators.iter().map(|id| u32::from(*id)).collect(); + after_decorators_u32.write_into(target); + + // node_indptr_for_before as Vec + let before_indptr_vec: Vec = self.node_indptr_for_before.as_slice().to_vec(); + before_indptr_vec.write_into(target); + + // node_indptr_for_after as Vec + let after_indptr_vec: Vec = self.node_indptr_for_after.as_slice().to_vec(); + after_indptr_vec.write_into(target); + } + + /// Read this CSR structure from a source, validating decorator IDs against decorator_count. + pub(super) fn read_from( + source: &mut R, + decorator_count: usize, + ) -> Result { + use crate::utils::Deserializable; + + // before_enter_decorators from Vec + let before_decorators_u32: Vec = Deserializable::read_from(source)?; + let before_enter_decorators: Vec = before_decorators_u32 + .into_iter() + .map(|id| { + if id as usize >= decorator_count { + return Err(crate::utils::DeserializationError::InvalidValue(format!( + "DecoratorId {} exceeds decorator count {}", + id, decorator_count + ))); + } + Ok(DecoratorId(id)) + }) + .collect::, _>>()?; + + // after_exit_decorators from Vec + let after_decorators_u32: Vec = Deserializable::read_from(source)?; + let after_exit_decorators: Vec = after_decorators_u32 + .into_iter() + .map(|id| { + if id as usize >= decorator_count { + return Err(crate::utils::DeserializationError::InvalidValue(format!( + "DecoratorId {} exceeds decorator count {}", + id, decorator_count + ))); + } + Ok(DecoratorId(id)) + }) + .collect::, _>>()?; + + // node_indptr_for_before from Vec + let before_indptr_vec: Vec = Deserializable::read_from(source)?; + let node_indptr_for_before = IndexVec::from_raw(before_indptr_vec); + + // node_indptr_for_after from Vec + let after_indptr_vec: Vec = Deserializable::read_from(source)?; + let node_indptr_for_after = IndexVec::from_raw(after_indptr_vec); + + Self::from_components( + before_enter_decorators, + after_exit_decorators, + node_indptr_for_before, + node_indptr_for_after, + ) + .map_err(|e| crate::utils::DeserializationError::InvalidValue(e.to_string())) + } } impl Default for NodeToDecoratorIds { diff --git a/core/src/mast/serialization/mod.rs b/core/src/mast/serialization/mod.rs index 8e4fa4b633..f700566467 100644 --- a/core/src/mast/serialization/mod.rs +++ b/core/src/mast/serialization/mod.rs @@ -161,7 +161,7 @@ impl Deserializable for MastForest { // Reading sections metadata let node_count = source.read_usize()?; - let _decorator_count = source.read_usize()?; + let _decorator_count = source.read_usize()?; // Read for wire format compatibility // Reading procedure roots let roots: Vec = Deserializable::read_from(source)?; From 2df94420509d2aa8799337e106cd222fcfef42cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Garillot?= Date: Mon, 15 Dec 2025 00:44:05 -0500 Subject: [PATCH 12/19] refactor: address review feedback - Update validation doc comments to reflect valid edge cases - Update serialization format description in mod.rs - Simplify decorator_infos serialization using Vec::write_into --- .../mast/debuginfo/decorator_storage/mod.rs | 22 +++++------- .../mast/debuginfo/decorator_storage/tests.rs | 1 + core/src/mast/debuginfo/mod.rs | 14 ++------ core/src/mast/serialization/mod.rs | 36 ++++++------------- 4 files changed, 21 insertions(+), 52 deletions(-) diff --git a/core/src/mast/debuginfo/decorator_storage/mod.rs b/core/src/mast/debuginfo/decorator_storage/mod.rs index af08efe786..d6c9a28d66 100644 --- a/core/src/mast/debuginfo/decorator_storage/mod.rs +++ b/core/src/mast/debuginfo/decorator_storage/mod.rs @@ -127,22 +127,16 @@ impl OpToDecoratorIds { /// must be monotonically increasing and within bounds of `op_indptr_for_dec_ids`. The slice /// must not be empty, and the first element must be 0. /// - /// # Returns - /// An error if the internal structure is inconsistent. Common issues that cause errors: - /// - Empty `op_indptr_for_dec_ids` or `node_indptr_for_op_idx` vectors - /// - Non-zero first element in either pointer array - /// - Decreasing pointer values (pointers must be monotonically non-decreasing) - /// - Pointer values that exceed the bounds of the arrays they index into - /// - Invalid ranges (start > end) in any pointer window + /// # Valid Edge Cases + /// - All three vectors empty (no nodes, no decorators) + /// - Empty decorator vectors with node pointers all zero (nodes exist but have no decorators) /// - /// # Validation Restrictions - /// The following constraints are enforced between components: - /// - `op_indptr_for_dec_ids` length must be >= 1 (for the sentinel) - /// - `node_indptr_for_op_idx` length must be >= 1 (for the sentinel) + /// # Validation Rules + /// For non-empty structures: + /// - Pointer arrays must start at zero + /// - Pointers must be monotonically non-decreasing /// - Last value in `op_indptr_for_dec_ids` must be <= `decorator_ids.len()` - /// - Last value in `node_indptr_for_op_idx` must be <= `op_indptr_for_dec_ids.len() - 1` - /// - Both `op_indptr_for_dec_ids` and `node_indptr_for_op_idx` must be strictly monotonic (each - /// successive value must be >= the previous one) + /// - Last value in `node_indptr_for_op_idx` must be < `op_indptr_for_dec_ids.len()` pub(super) fn from_components( decorator_ids: Vec, op_indptr_for_dec_ids: Vec, diff --git a/core/src/mast/debuginfo/decorator_storage/tests.rs b/core/src/mast/debuginfo/decorator_storage/tests.rs index 25d1dc10ba..8e309d9a36 100644 --- a/core/src/mast/debuginfo/decorator_storage/tests.rs +++ b/core/src/mast/debuginfo/decorator_storage/tests.rs @@ -771,6 +771,7 @@ fn test_sparse_debuginfo_round_trip() { op_decorator_storage: op_storage, node_decorator_storage: node_storage, error_codes, + procedure_names: BTreeMap::new(), }; // Serialize and deserialize diff --git a/core/src/mast/debuginfo/mod.rs b/core/src/mast/debuginfo/mod.rs index 6f856f1c83..e1af0eead7 100644 --- a/core/src/mast/debuginfo/mod.rs +++ b/core/src/mast/debuginfo/mod.rs @@ -376,12 +376,7 @@ impl Serializable for DebugInfo { decorator_data.write_into(target); string_table.write_into(target); - - // Write decorator count and infos - target.write_usize(decorator_infos.len()); - for decorator_info in decorator_infos { - decorator_info.write_into(target); - } + decorator_infos.write_into(target); // 2. Serialize error codes let error_codes: alloc::collections::BTreeMap = @@ -412,12 +407,7 @@ impl Deserializable for DebugInfo { let decorator_data: Vec = Deserializable::read_from(source)?; let string_table: crate::mast::serialization::StringTable = Deserializable::read_from(source)?; - - let decorator_count: usize = source.read_usize()?; - let mut decorator_infos = Vec::with_capacity(decorator_count); - for _ in 0..decorator_count { - decorator_infos.push(DecoratorInfo::read_from(source)?); - } + let decorator_infos: Vec = Deserializable::read_from(source)?; // 2. Reconstruct decorators let mut decorators = IndexVec::new(); diff --git a/core/src/mast/serialization/mod.rs b/core/src/mast/serialization/mod.rs index f700566467..678c404c67 100644 --- a/core/src/mast/serialization/mod.rs +++ b/core/src/mast/serialization/mod.rs @@ -21,32 +21,16 @@ //! (advice map section) //! - Advice map (AdviceMap) //! -//! (error_codes map section) -//! - Error codes map (BTreeMap) -//! -//! (procedure_names map section) -//! - Procedure names map (BTreeMap) -//! -//! (decorator data section) +//! (DebugInfo section) //! - Decorator data //! - String table -//! -//! (decorator info section) -//! - decorator infos (`Vec`) -//! -//! (basic block decorator lists section) -//! - basic block decorator lists (`Vec<(MastNodeId, Vec<(usize, DecoratorId)>)>`) -//! -//! (before enter and after exit decorators section) -//! - before enter decorators (`Vec<(MastNodeId, Vec)>`) -//! - after exit decorators (`Vec<(MastNodeId, Vec)>`) - -use alloc::{ - collections::BTreeMap, - string::{String, ToString}, - sync::Arc, - vec::Vec, -}; +//! - Decorator infos (`Vec`) +//! - Error codes map (BTreeMap) +//! - OpToDecoratorIds CSR (operation-indexed decorators) +//! - NodeToDecoratorIds CSR (before_enter and after_exit decorators) +//! - Procedure names map (BTreeMap) + +use alloc::vec::Vec; use super::{MastForest, MastNode, MastNodeId}; use crate::{ @@ -215,8 +199,8 @@ impl Deserializable for MastForest { }; // Validate that all procedure name digests correspond to procedure roots in the forest - for digest in procedure_names.keys() { - if mast_forest.find_procedure_root(*digest).is_none() { + for (digest, _) in mast_forest.debug_info.procedure_names() { + if mast_forest.find_procedure_root(digest).is_none() { return Err(DeserializationError::InvalidValue(format!( "procedure name references digest that is not a procedure root: {:?}", digest From ade4dfb2f88e1152d6db71d8391e3246b4d21d30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Garillot?= Date: Wed, 7 Jan 2026 02:03:39 -0500 Subject: [PATCH 13/19] refactor: decorator_storage/mod.rs -> decorator_storage.rs --- .../debuginfo/{decorator_storage/mod.rs => decorator_storage.rs} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename core/src/mast/debuginfo/{decorator_storage/mod.rs => decorator_storage.rs} (100%) diff --git a/core/src/mast/debuginfo/decorator_storage/mod.rs b/core/src/mast/debuginfo/decorator_storage.rs similarity index 100% rename from core/src/mast/debuginfo/decorator_storage/mod.rs rename to core/src/mast/debuginfo/decorator_storage.rs From ae8ad0e8fcbb9259c5949d1459e38eeef96049d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Garillot?= Date: Wed, 7 Jan 2026 02:10:31 -0500 Subject: [PATCH 14/19] refactor: move procedure name validation to MastForest::validate --- core/src/mast/mod.rs | 11 +++++++++++ core/src/mast/serialization/mod.rs | 13 ++++--------- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/core/src/mast/mod.rs b/core/src/mast/mod.rs index 3d6e06da23..550ac96c36 100644 --- a/core/src/mast/mod.rs +++ b/core/src/mast/mod.rs @@ -617,6 +617,7 @@ impl MastForest { /// at assembly time rather than dynamically during execution, and adds comprehensive /// structural validation to prevent deserialization-time panics. pub fn validate(&self) -> Result<(), MastForestError> { + // Validate basic block batch invariants for (node_id_idx, node) in self.nodes.iter().enumerate() { let node_id = MastNodeId::new_unchecked(node_id_idx.try_into().expect("too many nodes")); @@ -626,6 +627,14 @@ impl MastForest { })?; } } + + // Validate that all procedure name digests correspond to procedure roots in the forest + for (digest, _) in self.debug_info.procedure_names() { + if self.find_procedure_root(digest).is_none() { + return Err(MastForestError::InvalidProcedureNameDigest(digest)); + } + } + Ok(()) } } @@ -1015,6 +1024,8 @@ pub enum MastForestError { DigestRequiredForDeserialization, #[error("invalid batch in basic block node {0:?}: {1}")] InvalidBatchPadding(MastNodeId, String), + #[error("procedure name references digest that is not a procedure root: {0:?}")] + InvalidProcedureNameDigest(Word), } // Custom serde implementations for MastForest that handle linked decorators properly diff --git a/core/src/mast/serialization/mod.rs b/core/src/mast/serialization/mod.rs index 678c404c67..7eebd2f690 100644 --- a/core/src/mast/serialization/mod.rs +++ b/core/src/mast/serialization/mod.rs @@ -198,15 +198,10 @@ impl Deserializable for MastForest { mast_forest }; - // Validate that all procedure name digests correspond to procedure roots in the forest - for (digest, _) in mast_forest.debug_info.procedure_names() { - if mast_forest.find_procedure_root(digest).is_none() { - return Err(DeserializationError::InvalidValue(format!( - "procedure name references digest that is not a procedure root: {:?}", - digest - ))); - } - } + // Note: validation of deserialized MastForests (e.g., checking that procedure name + // digests correspond to procedure roots) is intentionally not performed here. + // Callers should use MastForest::validate() if validation is needed. + // See issue #2445 for the plan to address MastForest validation holistically. Ok(mast_forest) } From e3a8a463b98545fed8e473df4288cf7eb05044ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Garillot?= Date: Wed, 7 Jan 2026 02:15:32 -0500 Subject: [PATCH 15/19] fix: comment on decorator_count --- core/src/mast/serialization/mod.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/core/src/mast/serialization/mod.rs b/core/src/mast/serialization/mod.rs index 7eebd2f690..bb05e7eebd 100644 --- a/core/src/mast/serialization/mod.rs +++ b/core/src/mast/serialization/mod.rs @@ -100,6 +100,7 @@ impl Serializable for MastForest { // decorator & node counts target.write_usize(self.nodes.len()); + // Expected to be used in #2504. Remove if this issue is resolved without using. target.write_usize(self.debug_info.num_decorators()); // roots @@ -145,6 +146,7 @@ impl Deserializable for MastForest { // Reading sections metadata let node_count = source.read_usize()?; + // Expected to be used in #2504. Remove if this issue is resolved without using. let _decorator_count = source.read_usize()?; // Read for wire format compatibility // Reading procedure roots From 98074d150835046613b711561d6ac9b906c294db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Garillot?= Date: Wed, 7 Jan 2026 02:19:09 -0500 Subject: [PATCH 16/19] chore: remove superfluous saturating_sub --- core/src/mast/debuginfo/decorator_storage.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/mast/debuginfo/decorator_storage.rs b/core/src/mast/debuginfo/decorator_storage.rs index d6c9a28d66..a87be5867d 100644 --- a/core/src/mast/debuginfo/decorator_storage.rs +++ b/core/src/mast/debuginfo/decorator_storage.rs @@ -307,7 +307,7 @@ impl OpToDecoratorIds { } // Node pointers must be valid indices into op_indptr - let max_node_ptr = self.op_indptr_for_dec_ids.len().saturating_sub(1); + let max_node_ptr = self.op_indptr_for_dec_ids.len() - 1; if *node_slice.last().unwrap() > max_node_ptr { return Err(format!( "node_indptr_for_op_idx end {} exceeds op_indptr bounds {}", From 633bf9dd28880f7380433f42115e6839d3b7bd9a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Garillot?= Date: Wed, 7 Jan 2026 04:48:23 -0500 Subject: [PATCH 17/19] refactor: address review comments on DebugInfo serialization - Add Deserializable impl for DecoratorId to simplify deserialization - Simplify write_into/read_from in OpToDecoratorIds and NodeToDecoratorIds to use DecoratorId's serialization traits directly - Serialize indptr arrays as u32 (indices into u32-bounded arrays) - Update serialization format description in module docs - Add comment about procedure name validation at MastForest level - Update trusted source comment for deserialization - Remove unnecessary pub(super) from struct fields --- core/src/mast/debuginfo/decorator_storage.rs | 45 +++------ core/src/mast/debuginfo/mod.rs | 4 +- .../mast/debuginfo/node_decorator_storage.rs | 97 ++++++++----------- core/src/mast/mod.rs | 12 ++- core/src/mast/serialization/mod.rs | 44 ++++----- 5 files changed, 93 insertions(+), 109 deletions(-) diff --git a/core/src/mast/debuginfo/decorator_storage.rs b/core/src/mast/debuginfo/decorator_storage.rs index a87be5867d..7c535a4c8b 100644 --- a/core/src/mast/debuginfo/decorator_storage.rs +++ b/core/src/mast/debuginfo/decorator_storage.rs @@ -60,7 +60,7 @@ use crate::mast::{DecoratedOpLink, DecoratorId, MastNodeId}; pub struct OpToDecoratorIds { /// All the decorator IDs per operation per node, in a CSR relationship with /// node_indptr_for_op_idx and op_indptr_for_dec_ids - pub(super) decorator_ids: Vec, + decorator_ids: Vec, /// For the node whose operation indices are in /// `op_indptr_for_dec_ids[node_start..node_end]`, /// the indices of its i-th operation are at: @@ -68,10 +68,10 @@ pub struct OpToDecoratorIds { /// decorator_ids[op_indptr_for_dec_ids[node_start + i].. /// op_indptr_for_dec_ids[node_start + i + 1]] /// ``` - pub(super) op_indptr_for_dec_ids: Vec, + op_indptr_for_dec_ids: Vec, /// The decorated operation indices for the n-th node are at /// `op_indptr_for_dec_ids[node_indptr_for_op_idx[n]..node_indptr_for_op_idx[n+1]]` - pub(super) node_indptr_for_op_idx: IndexVec, + node_indptr_for_op_idx: IndexVec, } /// Error type for decorator index mapping operations @@ -327,17 +327,15 @@ impl OpToDecoratorIds { pub(super) fn write_into(&self, target: &mut W) { use crate::utils::Serializable; - // Serialize decorator_ids as u32 vector - let decorator_ids_u32: Vec = - self.decorator_ids.iter().map(|id| u32::from(*id)).collect(); - decorator_ids_u32.write_into(target); + // Serialize decorator_ids (DecoratorId serializes as u32) + self.decorator_ids.write_into(target); - // Serialize op_indptr_for_dec_ids + // Serialize indptr arrays as u32. These are indices into u32-bounded arrays, + // so any value > u32::MAX would be invalid. let op_indptr_u32: Vec = self.op_indptr_for_dec_ids.iter().map(|&idx| idx as u32).collect(); op_indptr_u32.write_into(target); - // Serialize node_indptr_for_op_idx let node_indptr_u32: Vec = self.node_indptr_for_op_idx.iter().map(|&idx| idx as u32).collect(); node_indptr_u32.write_into(target); @@ -350,40 +348,29 @@ impl OpToDecoratorIds { ) -> Result { use crate::utils::{Deserializable, DeserializationError}; - // Deserialize decorator_ids - let decorator_ids_u32: Vec = Deserializable::read_from(source)?; - let decorator_ids: Vec = decorator_ids_u32 - .into_iter() - .map(|id| { - if id as usize >= decorator_count { - return Err(DeserializationError::InvalidValue(format!( - "DecoratorId {} exceeds decorator count {}", - id, decorator_count - ))); - } - Ok(DecoratorId(id)) - }) - .collect::, _>>()?; + // Deserialize decorator_ids (DecoratorId deserializes from u32) + let decorator_ids: Vec = Deserializable::read_from(source)?; - // Deserialize op_indptr_for_dec_ids + // Deserialize indptr arrays from u32. These are indices into u32-bounded arrays, + // so any value > u32::MAX would be invalid. let op_indptr_u32: Vec = Deserializable::read_from(source)?; let op_indptr_for_dec_ids: Vec = - op_indptr_u32.iter().map(|&idx| idx as usize).collect(); + op_indptr_u32.into_iter().map(|idx| idx as usize).collect(); - // Deserialize node_indptr_for_op_idx let node_indptr_u32: Vec = Deserializable::read_from(source)?; let node_indptr_for_op_idx: Vec = - node_indptr_u32.iter().map(|&idx| idx as usize).collect(); + node_indptr_u32.into_iter().map(|idx| idx as usize).collect(); // Build the structure let result = Self::from_components( decorator_ids, op_indptr_for_dec_ids, - IndexVec::from_raw(node_indptr_for_op_idx), + IndexVec::try_from(node_indptr_for_op_idx) + .map_err(|e| DeserializationError::InvalidValue(e.to_string()))?, ) .map_err(|e| DeserializationError::InvalidValue(e.to_string()))?; - // Validate CSR structure + // Validate CSR structure (including decorator ID bounds) result.validate_csr(decorator_count).map_err(|e| { DeserializationError::InvalidValue(format!("OpToDecoratorIds validation failed: {e}")) })?; diff --git a/core/src/mast/debuginfo/mod.rs b/core/src/mast/debuginfo/mod.rs index e1af0eead7..9e6f0762e0 100644 --- a/core/src/mast/debuginfo/mod.rs +++ b/core/src/mast/debuginfo/mod.rs @@ -434,7 +434,9 @@ impl Deserializable for DebugInfo { // 5. Read NodeToDecoratorIds CSR (dense representation) let node_decorator_storage = NodeToDecoratorIds::read_from(source, decorators.len())?; - // 6. Read procedure names (marked serde(skip) but we deserialize manually) + // 6. Read procedure names + // Note: Procedure name digests are validated at the MastForest level (in + // MastForest::validate) to ensure they reference actual procedures in the forest. let procedure_names_raw: BTreeMap = Deserializable::read_from(source)?; let procedure_names: BTreeMap> = procedure_names_raw .into_iter() diff --git a/core/src/mast/debuginfo/node_decorator_storage.rs b/core/src/mast/debuginfo/node_decorator_storage.rs index 78191aa352..e756b67679 100644 --- a/core/src/mast/debuginfo/node_decorator_storage.rs +++ b/core/src/mast/debuginfo/node_decorator_storage.rs @@ -36,19 +36,19 @@ use crate::{ #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub struct NodeToDecoratorIds { /// All `before_enter` decorators, concatenated across all nodes. - pub(super) before_enter_decorators: Vec, + before_enter_decorators: Vec, /// All `after_exit` decorators, concatenated across all nodes. - pub(super) after_exit_decorators: Vec, + after_exit_decorators: Vec, /// Index pointers for before_enter decorators: the range for node `i` is /// ```text /// node_indptr_for_before[i]..node_indptr_for_before[i+1] /// ``` - pub(super) node_indptr_for_before: IndexVec, + node_indptr_for_before: IndexVec, /// Index pointers for after_exit decorators: the range for node `i` is /// ```text /// node_indptr_for_after[i]..node_indptr_for_after[i+1] /// ``` - pub(super) node_indptr_for_after: IndexVec, + node_indptr_for_after: IndexVec, } impl NodeToDecoratorIds { @@ -321,23 +321,19 @@ impl NodeToDecoratorIds { pub(super) fn write_into(&self, target: &mut W) { use crate::utils::Serializable; - // before_enter_decorators as Vec - let before_decorators_u32: Vec = - self.before_enter_decorators.iter().map(|id| u32::from(*id)).collect(); - before_decorators_u32.write_into(target); + // Serialize decorator vectors (DecoratorId serializes as u32) + self.before_enter_decorators.write_into(target); + self.after_exit_decorators.write_into(target); - // after_exit_decorators as Vec - let after_decorators_u32: Vec = - self.after_exit_decorators.iter().map(|id| u32::from(*id)).collect(); - after_decorators_u32.write_into(target); + // Serialize indptr arrays as u32. These are indices into u32-bounded arrays, + // so any value > u32::MAX would be invalid. + let before_indptr_u32: Vec = + self.node_indptr_for_before.iter().map(|&idx| idx as u32).collect(); + before_indptr_u32.write_into(target); - // node_indptr_for_before as Vec - let before_indptr_vec: Vec = self.node_indptr_for_before.as_slice().to_vec(); - before_indptr_vec.write_into(target); - - // node_indptr_for_after as Vec - let after_indptr_vec: Vec = self.node_indptr_for_after.as_slice().to_vec(); - after_indptr_vec.write_into(target); + let after_indptr_u32: Vec = + self.node_indptr_for_after.iter().map(|&idx| idx as u32).collect(); + after_indptr_u32.write_into(target); } /// Read this CSR structure from a source, validating decorator IDs against decorator_count. @@ -347,51 +343,40 @@ impl NodeToDecoratorIds { ) -> Result { use crate::utils::Deserializable; - // before_enter_decorators from Vec - let before_decorators_u32: Vec = Deserializable::read_from(source)?; - let before_enter_decorators: Vec = before_decorators_u32 - .into_iter() - .map(|id| { - if id as usize >= decorator_count { - return Err(crate::utils::DeserializationError::InvalidValue(format!( - "DecoratorId {} exceeds decorator count {}", - id, decorator_count - ))); - } - Ok(DecoratorId(id)) - }) - .collect::, _>>()?; - - // after_exit_decorators from Vec - let after_decorators_u32: Vec = Deserializable::read_from(source)?; - let after_exit_decorators: Vec = after_decorators_u32 - .into_iter() - .map(|id| { - if id as usize >= decorator_count { - return Err(crate::utils::DeserializationError::InvalidValue(format!( - "DecoratorId {} exceeds decorator count {}", - id, decorator_count - ))); - } - Ok(DecoratorId(id)) - }) - .collect::, _>>()?; + // Deserialize decorator vectors (DecoratorId deserializes from u32) + let before_enter_decorators: Vec = Deserializable::read_from(source)?; + let after_exit_decorators: Vec = Deserializable::read_from(source)?; - // node_indptr_for_before from Vec - let before_indptr_vec: Vec = Deserializable::read_from(source)?; - let node_indptr_for_before = IndexVec::from_raw(before_indptr_vec); + // Deserialize indptr arrays from u32. These are indices into u32-bounded arrays, + // so any value > u32::MAX would be invalid. + let before_indptr_u32: Vec = Deserializable::read_from(source)?; + let node_indptr_for_before = IndexVec::try_from( + before_indptr_u32.into_iter().map(|idx| idx as usize).collect::>(), + ) + .map_err(|e| crate::utils::DeserializationError::InvalidValue(e.to_string()))?; - // node_indptr_for_after from Vec - let after_indptr_vec: Vec = Deserializable::read_from(source)?; - let node_indptr_for_after = IndexVec::from_raw(after_indptr_vec); + let after_indptr_u32: Vec = Deserializable::read_from(source)?; + let node_indptr_for_after = IndexVec::try_from( + after_indptr_u32.into_iter().map(|idx| idx as usize).collect::>(), + ) + .map_err(|e| crate::utils::DeserializationError::InvalidValue(e.to_string()))?; - Self::from_components( + let result = Self::from_components( before_enter_decorators, after_exit_decorators, node_indptr_for_before, node_indptr_for_after, ) - .map_err(|e| crate::utils::DeserializationError::InvalidValue(e.to_string())) + .map_err(|e| crate::utils::DeserializationError::InvalidValue(e.to_string()))?; + + // Validate CSR structure (including decorator ID bounds) + result.validate_csr(decorator_count).map_err(|e| { + crate::utils::DeserializationError::InvalidValue(format!( + "NodeToDecoratorIds validation failed: {e}" + )) + })?; + + Ok(result) } } diff --git a/core/src/mast/mod.rs b/core/src/mast/mod.rs index 550ac96c36..7ad58e555b 100644 --- a/core/src/mast/mod.rs +++ b/core/src/mast/mod.rs @@ -29,7 +29,10 @@ pub use node::{ use crate::{ AdviceMap, AssemblyOp, Decorator, Felt, Idx, LexicographicWord, Word, - utils::{ByteWriter, DeserializationError, Serializable, hash_string_to_word}, + utils::{ + ByteReader, ByteWriter, Deserializable, DeserializationError, Serializable, + hash_string_to_word, + }, }; mod debuginfo; @@ -989,6 +992,13 @@ impl Serializable for DecoratorId { } } +impl Deserializable for DecoratorId { + fn read_from(source: &mut R) -> Result { + let value = u32::read_from(source)?; + Ok(Self(value)) + } +} + /// Derives an error code from an error message by hashing the message and returning the 0th element /// of the resulting [`Word`]. pub fn error_code_from_msg(msg: impl AsRef) -> Felt { diff --git a/core/src/mast/serialization/mod.rs b/core/src/mast/serialization/mod.rs index bb05e7eebd..a6e5ff5797 100644 --- a/core/src/mast/serialization/mod.rs +++ b/core/src/mast/serialization/mod.rs @@ -1,34 +1,33 @@ //! The serialization format of MastForest is as follows: //! //! (Metadata) -//! - MAGIC -//! - VERSION +//! - MAGIC (5 bytes) +//! - VERSION (3 bytes) //! -//! (sections metadata) -//! - nodes length (`usize`) -//! - decorator data section offset (`usize`) (not implemented, see issue #1580) -//! - decorators length (`usize`) +//! (Counts) +//! - nodes count (`usize`) +//! - decorators count (`usize`) - reserved for future use in lazy loading (#2504) //! -//! (procedure roots section) -//! - procedure roots (`Vec`) +//! (Procedure roots section) +//! - procedure roots (`Vec` as MastNodeId values) //! -//! (basic block data section) -//! - basic block data +//! (Basic block data section) +//! - basic block data (padded operations + batch metadata) //! -//! (node info section) +//! (Node info section) //! - MAST node infos (`Vec`) //! -//! (advice map section) -//! - Advice map (AdviceMap) +//! (Advice map section) +//! - Advice map (`AdviceMap`) //! //! (DebugInfo section) -//! - Decorator data -//! - String table +//! - Decorator data (raw bytes for decorator payloads) +//! - String table (deduplicated strings) //! - Decorator infos (`Vec`) -//! - Error codes map (BTreeMap) -//! - OpToDecoratorIds CSR (operation-indexed decorators) -//! - NodeToDecoratorIds CSR (before_enter and after_exit decorators) -//! - Procedure names map (BTreeMap) +//! - Error codes map (`BTreeMap`) +//! - OpToDecoratorIds CSR (operation-indexed decorators, dense representation) +//! - NodeToDecoratorIds CSR (before_enter and after_exit decorators, dense representation) +//! - Procedure names map (`BTreeMap`) use alloc::vec::Vec; @@ -200,10 +199,11 @@ impl Deserializable for MastForest { mast_forest }; - // Note: validation of deserialized MastForests (e.g., checking that procedure name + // Note: Full validation of deserialized MastForests (e.g., checking that procedure name // digests correspond to procedure roots) is intentionally not performed here. - // Callers should use MastForest::validate() if validation is needed. - // See issue #2445 for the plan to address MastForest validation holistically. + // The serialized format is expected to come from a trusted source (e.g., the assembler + // or a verified package). Callers should use MastForest::validate() if validation of + // untrusted input is needed. Ok(mast_forest) } From 90133eeb2f33b48a868f6d60b66a493dd3e8c01c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Garillot?= Date: Wed, 7 Jan 2026 04:48:30 -0500 Subject: [PATCH 18/19] feat(utils-indexing): implement Serializable/Deserializable for IndexVec Add TryFrom> impl that validates length <= u32::MAX, enforcing the u32-indexed invariant. Deserialization uses this for validation. --- Cargo.lock | 1 + crates/utils-indexing/Cargo.toml | 1 + crates/utils-indexing/src/lib.rs | 53 ++++++++++++++++++++++++++------ 3 files changed, 46 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 74ce246c14..2a558accd5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1538,6 +1538,7 @@ dependencies = [ name = "miden-utils-indexing" version = "0.21.0" dependencies = [ + "miden-crypto", "serde", "thiserror", ] diff --git a/crates/utils-indexing/Cargo.toml b/crates/utils-indexing/Cargo.toml index 44871d50ef..44f03c0fc5 100644 --- a/crates/utils-indexing/Cargo.toml +++ b/crates/utils-indexing/Cargo.toml @@ -19,6 +19,7 @@ serde = ["dep:serde"] [dependencies] # Required dependencies +miden-crypto.workspace = true thiserror.workspace = true # Optional dependencies diff --git a/crates/utils-indexing/src/lib.rs b/crates/utils-indexing/src/lib.rs index 6b1ce63d5c..c0794e6a14 100644 --- a/crates/utils-indexing/src/lib.rs +++ b/crates/utils-indexing/src/lib.rs @@ -145,15 +145,6 @@ impl IndexVec { self.raw } - /// Create an IndexVec from a raw Vec. - /// - /// This is useful for deserialization or when reconstructing an IndexVec - /// from previously extracted components. - #[inline] - pub fn from_raw(raw: Vec) -> Self { - Self { raw, _m: PhantomData } - } - /// Remove an element at the specified index and return it. pub fn swap_remove(&mut self, index: usize) -> T { self.raw.swap_remove(index) @@ -316,6 +307,50 @@ impl<'a, I: Idx, T> IntoIterator for &'a IndexVec { } } +impl TryFrom> for IndexVec { + type Error = IndexedVecError; + + /// Create an IndexVec from a Vec. + /// + /// Returns an error if the Vec length exceeds u32::MAX. + fn try_from(raw: Vec) -> Result { + if raw.len() > u32::MAX as usize { + return Err(IndexedVecError::TooManyItems); + } + Ok(Self { raw, _m: PhantomData }) + } +} + +// SERIALIZATION +// ================================================================================================ + +use miden_crypto::utils::{ + ByteReader, ByteWriter, Deserializable, DeserializationError, Serializable, +}; + +impl Serializable for IndexVec +where + I: Idx, + T: Serializable, +{ + fn write_into(&self, target: &mut W) { + self.as_slice().write_into(target); + } +} + +impl Deserializable for IndexVec +where + I: Idx, + T: Deserializable, +{ + fn read_from(source: &mut R) -> Result { + let vec: Vec = Deserializable::read_from(source)?; + IndexVec::try_from(vec).map_err(|_| { + DeserializationError::InvalidValue("IndexVec length exceeds u32::MAX".into()) + }) + } +} + #[cfg(test)] mod tests { use alloc::string::{String, ToString}; From 7dc6b6e4a884a65f5c9d31eee4eb87916a6c73a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Garillot?= Date: Wed, 7 Jan 2026 05:43:20 -0500 Subject: [PATCH 19/19] refactor: serialize CSR indptr arrays as usize directly The indptr arrays (op_indptr_for_dec_ids, node_indptr_for_op_idx, node_indptr_for_before, node_indptr_for_after) are indices that can exceed u32::MAX since decorator_ids can grow arbitrarily large due to decorator reuse across many operations. Use winter-utils Serializable/Deserializable impls for usize and the IndexVec impls to serialize these arrays directly. --- core/src/mast/debuginfo/decorator_storage.rs | 39 ++++--------------- .../mast/debuginfo/node_decorator_storage.rs | 31 +++------------ 2 files changed, 13 insertions(+), 57 deletions(-) diff --git a/core/src/mast/debuginfo/decorator_storage.rs b/core/src/mast/debuginfo/decorator_storage.rs index 7c535a4c8b..126c3b9b1b 100644 --- a/core/src/mast/debuginfo/decorator_storage.rs +++ b/core/src/mast/debuginfo/decorator_storage.rs @@ -327,18 +327,9 @@ impl OpToDecoratorIds { pub(super) fn write_into(&self, target: &mut W) { use crate::utils::Serializable; - // Serialize decorator_ids (DecoratorId serializes as u32) self.decorator_ids.write_into(target); - - // Serialize indptr arrays as u32. These are indices into u32-bounded arrays, - // so any value > u32::MAX would be invalid. - let op_indptr_u32: Vec = - self.op_indptr_for_dec_ids.iter().map(|&idx| idx as u32).collect(); - op_indptr_u32.write_into(target); - - let node_indptr_u32: Vec = - self.node_indptr_for_op_idx.iter().map(|&idx| idx as u32).collect(); - node_indptr_u32.write_into(target); + self.op_indptr_for_dec_ids.write_into(target); + self.node_indptr_for_op_idx.write_into(target); } /// Deserialize OpToDecoratorIds from the source reader. @@ -348,29 +339,15 @@ impl OpToDecoratorIds { ) -> Result { use crate::utils::{Deserializable, DeserializationError}; - // Deserialize decorator_ids (DecoratorId deserializes from u32) let decorator_ids: Vec = Deserializable::read_from(source)?; + let op_indptr_for_dec_ids: Vec = Deserializable::read_from(source)?; + let node_indptr_for_op_idx: IndexVec = + Deserializable::read_from(source)?; - // Deserialize indptr arrays from u32. These are indices into u32-bounded arrays, - // so any value > u32::MAX would be invalid. - let op_indptr_u32: Vec = Deserializable::read_from(source)?; - let op_indptr_for_dec_ids: Vec = - op_indptr_u32.into_iter().map(|idx| idx as usize).collect(); - - let node_indptr_u32: Vec = Deserializable::read_from(source)?; - let node_indptr_for_op_idx: Vec = - node_indptr_u32.into_iter().map(|idx| idx as usize).collect(); - - // Build the structure - let result = Self::from_components( - decorator_ids, - op_indptr_for_dec_ids, - IndexVec::try_from(node_indptr_for_op_idx) - .map_err(|e| DeserializationError::InvalidValue(e.to_string()))?, - ) - .map_err(|e| DeserializationError::InvalidValue(e.to_string()))?; + let result = + Self::from_components(decorator_ids, op_indptr_for_dec_ids, node_indptr_for_op_idx) + .map_err(|e| DeserializationError::InvalidValue(e.to_string()))?; - // Validate CSR structure (including decorator ID bounds) result.validate_csr(decorator_count).map_err(|e| { DeserializationError::InvalidValue(format!("OpToDecoratorIds validation failed: {e}")) })?; diff --git a/core/src/mast/debuginfo/node_decorator_storage.rs b/core/src/mast/debuginfo/node_decorator_storage.rs index e756b67679..3c976941df 100644 --- a/core/src/mast/debuginfo/node_decorator_storage.rs +++ b/core/src/mast/debuginfo/node_decorator_storage.rs @@ -321,19 +321,10 @@ impl NodeToDecoratorIds { pub(super) fn write_into(&self, target: &mut W) { use crate::utils::Serializable; - // Serialize decorator vectors (DecoratorId serializes as u32) self.before_enter_decorators.write_into(target); self.after_exit_decorators.write_into(target); - - // Serialize indptr arrays as u32. These are indices into u32-bounded arrays, - // so any value > u32::MAX would be invalid. - let before_indptr_u32: Vec = - self.node_indptr_for_before.iter().map(|&idx| idx as u32).collect(); - before_indptr_u32.write_into(target); - - let after_indptr_u32: Vec = - self.node_indptr_for_after.iter().map(|&idx| idx as u32).collect(); - after_indptr_u32.write_into(target); + self.node_indptr_for_before.write_into(target); + self.node_indptr_for_after.write_into(target); } /// Read this CSR structure from a source, validating decorator IDs against decorator_count. @@ -343,23 +334,12 @@ impl NodeToDecoratorIds { ) -> Result { use crate::utils::Deserializable; - // Deserialize decorator vectors (DecoratorId deserializes from u32) let before_enter_decorators: Vec = Deserializable::read_from(source)?; let after_exit_decorators: Vec = Deserializable::read_from(source)?; - // Deserialize indptr arrays from u32. These are indices into u32-bounded arrays, - // so any value > u32::MAX would be invalid. - let before_indptr_u32: Vec = Deserializable::read_from(source)?; - let node_indptr_for_before = IndexVec::try_from( - before_indptr_u32.into_iter().map(|idx| idx as usize).collect::>(), - ) - .map_err(|e| crate::utils::DeserializationError::InvalidValue(e.to_string()))?; - - let after_indptr_u32: Vec = Deserializable::read_from(source)?; - let node_indptr_for_after = IndexVec::try_from( - after_indptr_u32.into_iter().map(|idx| idx as usize).collect::>(), - ) - .map_err(|e| crate::utils::DeserializationError::InvalidValue(e.to_string()))?; + let node_indptr_for_before: IndexVec = + Deserializable::read_from(source)?; + let node_indptr_for_after: IndexVec = Deserializable::read_from(source)?; let result = Self::from_components( before_enter_decorators, @@ -369,7 +349,6 @@ impl NodeToDecoratorIds { ) .map_err(|e| crate::utils::DeserializationError::InvalidValue(e.to_string()))?; - // Validate CSR structure (including decorator ID bounds) result.validate_csr(decorator_count).map_err(|e| { crate::utils::DeserializationError::InvalidValue(format!( "NodeToDecoratorIds validation failed: {e}"