diff --git a/CHANGELOG.md b/CHANGELOG.md index 27dd55cd7f..639a3c5933 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,7 @@ - [BREAKING] Change serialization of `BasicBlockNode`s to use padded indices ([#2466](https://github.com/0xMiden/miden-vm/pull/2466/)). - Change padded serialization of `BasicBlockNode`s to use delta-encoded metadata ([#2469](https://github.com/0xMiden/miden-vm/pull/2469/)). - Change (de)serialization of `MastForest` to directly (de)serialize DebugInfo ([#2470](https://github.com/0xMiden/miden-vm/pull/2470/)). +- Add API to serialize the `MastForest` without `DebugInfo` ([#2549](https://github.com/0xMiden/miden-vm/pull/2549)). ## 0.20.2 (TBD) - Fix issue where decorator access was not bypassed properly in release mode ([#2529](https://github.com/0xMiden/miden-vm/pull/2529)). diff --git a/core/src/mast/mod.rs b/core/src/mast/mod.rs index 7ad58e555b..cdb04b7625 100644 --- a/core/src/mast/mod.rs +++ b/core/src/mast/mod.rs @@ -443,6 +443,39 @@ impl MastForest { pub fn advice_map_mut(&mut self) -> &mut AdviceMap { &mut self.advice_map } + + // SERIALIZATION + // -------------------------------------------------------------------------------------------- + + /// Serializes this MastForest without debug information. + /// + /// This produces a smaller output by omitting decorators, error codes, and procedure names. + /// The resulting bytes can be deserialized with the standard [`Deserializable`] impl, + /// which auto-detects the format and creates an empty [`DebugInfo`]. + /// + /// Use this for production builds where debug info is not needed. + /// + /// # Example + /// + /// ``` + /// use miden_core::{mast::MastForest, utils::Serializable}; + /// + /// let forest = MastForest::new(); + /// + /// // Full serialization (with debug info) + /// let full_bytes = forest.to_bytes(); + /// + /// // Stripped serialization (without debug info) + /// let mut stripped_bytes = Vec::new(); + /// forest.write_stripped(&mut stripped_bytes); + /// + /// // Both can be deserialized the same way + /// // let restored = MastForest::read_from_bytes(&stripped_bytes).unwrap(); + /// ``` + pub fn write_stripped(&self, target: &mut W) { + use serialization::StrippedMastForest; + StrippedMastForest(self).write_into(target); + } } // ------------------------------------------------------------------------------------------------ diff --git a/core/src/mast/serialization/mod.rs b/core/src/mast/serialization/mod.rs index a6e5ff5797..c7fc71c1e1 100644 --- a/core/src/mast/serialization/mod.rs +++ b/core/src/mast/serialization/mod.rs @@ -1,12 +1,11 @@ //! The serialization format of MastForest is as follows: //! //! (Metadata) -//! - MAGIC (5 bytes) -//! - VERSION (3 bytes) +//! - MAGIC (4 bytes) + FLAGS (1 byte) + VERSION (3 bytes) //! //! (Counts) //! - nodes count (`usize`) -//! - decorators count (`usize`) - reserved for future use in lazy loading (#2504) +//! - decorators count (`usize`) - 0 if stripped, reserved for future use in lazy loading (#2504) //! //! (Procedure roots section) //! - procedure roots (`Vec` as MastNodeId values) @@ -20,7 +19,7 @@ //! (Advice map section) //! - Advice map (`AdviceMap`) //! -//! (DebugInfo section) +//! (DebugInfo section - omitted if FLAGS bit 0 is set) //! - Decorator data (raw bytes for decorator payloads) //! - String table (deduplicated strings) //! - Decorator infos (`Vec`) @@ -28,6 +27,12 @@ //! - OpToDecoratorIds CSR (operation-indexed decorators, dense representation) //! - NodeToDecoratorIds CSR (before_enter and after_exit decorators, dense representation) //! - Procedure names map (`BTreeMap`) +//! +//! # Stripped Format +//! +//! When serializing with [`MastForest::write_stripped`], the FLAGS byte has bit 0 set +//! and the entire DebugInfo section is omitted. Deserialization auto-detects the format +//! and creates an empty `DebugInfo` with valid CSR structures when reading stripped files. use alloc::vec::Vec; @@ -69,8 +74,27 @@ type StringIndex = usize; // CONSTANTS // ================================================================================================ -/// Magic string for detecting that a file is binary-encoded MAST. -const MAGIC: &[u8; 5] = b"MAST\0"; +/// Magic bytes for detecting that a file is binary-encoded MAST. +/// +/// The format uses 4 bytes for identification followed by a flags byte: +/// - Bytes 0-3: `b"MAST"` - Magic identifier +/// - Byte 4: Flags byte (see [`FLAG_STRIPPED`] and [`FLAGS_RESERVED_MASK`] constants) +/// +/// This design repurposes the original null terminator (`b"MAST\0"`) as a flags byte, +/// maintaining backward compatibility: old files have flags=0x00 (the null byte), +/// which means "debug info present". +const MAGIC: &[u8; 4] = b"MAST"; + +/// Flag indicating debug info is stripped from the serialized MastForest. +/// +/// When this bit is set in the flags byte, the DebugInfo section is omitted entirely. +/// The deserializer will create an empty `DebugInfo` with valid CSR structures. +const FLAG_STRIPPED: u8 = 0x01; + +/// Mask for reserved flag bits that must be zero. +/// +/// Bits 1-7 are reserved for future use. If any are set, deserialization fails. +const FLAGS_RESERVED_MASK: u8 = 0xfe; /// The format version. /// @@ -83,7 +107,7 @@ const MAGIC: &[u8; 5] = b"MAST\0"; /// - [0, 0, 1]: Added batch metadata to basic blocks (operations serialized in padded form with /// indptr, padding, and group metadata for exact OpBatch reconstruction). Direct decorator /// serialization in CSR format (eliminates per-node decorator sections and round-trip -/// conversions). +/// conversions). Header changed from `MAST\0` to `MAST` + flags byte. const VERSION: [u8; 3] = [0, 0, 1]; // MAST FOREST SERIALIZATION/DESERIALIZATION @@ -91,16 +115,28 @@ const VERSION: [u8; 3] = [0, 0, 1]; impl Serializable for MastForest { fn write_into(&self, target: &mut W) { + self.write_into_with_options(target, false); + } +} + +impl MastForest { + /// Internal serialization with options. + /// + /// When `stripped` is true, the DebugInfo section is omitted and the FLAGS byte + /// has bit 0 set. + fn write_into_with_options(&self, target: &mut W, stripped: bool) { let mut basic_block_data_builder = BasicBlockDataBuilder::new(); - // magic & version + // magic & flags target.write_bytes(MAGIC); + target.write_u8(if stripped { FLAG_STRIPPED } else { 0x00 }); + + // version target.write_bytes(&VERSION); - // decorator & node counts + // node & decorator counts target.write_usize(self.nodes.len()); - // Expected to be used in #2504. Remove if this issue is resolved without using. - target.write_usize(self.debug_info.num_decorators()); + target.write_usize(if stripped { 0 } else { self.debug_info.num_decorators() }); // roots let roots: Vec = self.roots.iter().copied().map(u32::from).collect(); @@ -132,20 +168,20 @@ impl Serializable for MastForest { self.advice_map.write_into(target); - // Serialize DebugInfo directly (includes decorators, error_codes, CSR structures, - // and procedure_names) - self.debug_info.write_into(target); + // Serialize DebugInfo only if not stripped + if !stripped { + self.debug_info.write_into(target); + } } } impl Deserializable for MastForest { fn read_from(source: &mut R) -> Result { - read_and_validate_magic(source)?; - read_and_validate_version(source)?; + let flags = read_and_validate_header(source)?; + let is_stripped = flags & FLAG_STRIPPED != 0; // Reading sections metadata let node_count = source.read_usize()?; - // Expected to be used in #2504. Remove if this issue is resolved without using. let _decorator_count = source.read_usize()?; // Read for wire format compatibility // Reading procedure roots @@ -158,9 +194,12 @@ impl Deserializable for MastForest { let advice_map = AdviceMap::read_from(source)?; - // Deserialize DebugInfo directly (includes decorators, error_codes, CSR structures, - // and procedure_names) - let debug_info = super::DebugInfo::read_from(source)?; + // Deserialize DebugInfo or create empty one if stripped + let debug_info = if is_stripped { + super::DebugInfo::empty_for_nodes(node_count) + } else { + super::DebugInfo::read_from(source)? + }; // Constructing MastForest let mast_forest = { @@ -209,27 +248,37 @@ impl Deserializable for MastForest { } } -fn read_and_validate_magic(source: &mut R) -> Result<[u8; 5], DeserializationError> { - let magic: [u8; 5] = source.read_array()?; +/// Reads and validates the MAST header (magic, flags, version). +/// +/// Returns the flags byte on success. +fn read_and_validate_header(source: &mut R) -> Result { + // Read magic + let magic: [u8; 4] = source.read_array()?; if magic != *MAGIC { return Err(DeserializationError::InvalidValue(format!( "Invalid magic bytes. Expected '{:?}', got '{:?}'", *MAGIC, magic ))); } - Ok(magic) -} -fn read_and_validate_version( - source: &mut R, -) -> Result<[u8; 3], DeserializationError> { + // Read and validate flags + let flags: u8 = source.read_u8()?; + if flags & FLAGS_RESERVED_MASK != 0 { + return Err(DeserializationError::InvalidValue(format!( + "Unknown flags set in MAST header: {:#04x}. Reserved bits must be zero.", + flags & FLAGS_RESERVED_MASK + ))); + } + + // Read and validate version let version: [u8; 3] = source.read_array()?; if version != VERSION { return Err(DeserializationError::InvalidValue(format!( "Unsupported version. Got '{version:?}', but only '{VERSION:?}' is supported", ))); } - Ok(version) + + Ok(flags) } fn node_infos_iter<'a, R>( @@ -248,3 +297,22 @@ where Some(MastNodeInfo::read_from(source)) }) } + +// STRIPPED SERIALIZATION +// ================================================================================================ + +/// Wrapper for serializing a [`MastForest`] without debug information. +/// +/// This newtype enables an alternative serialization format that omits the DebugInfo section, +/// producing smaller output files suitable for production deployment where debug info is not +/// needed. +/// +/// The resulting bytes can be deserialized with the standard [`Deserializable`] impl for +/// [`MastForest`], which auto-detects the format via the flags byte in the header. +pub(super) struct StrippedMastForest<'a>(pub(super) &'a MastForest); + +impl Serializable for StrippedMastForest<'_> { + fn write_into(&self, target: &mut W) { + self.0.write_into_with_options(target, true); + } +} diff --git a/core/src/mast/serialization/tests.rs b/core/src/mast/serialization/tests.rs index ec6ef1a475..b0a2fd5232 100644 --- a/core/src/mast/serialization/tests.rs +++ b/core/src/mast/serialization/tests.rs @@ -570,14 +570,16 @@ fn mast_forest_deserialize_invalid_ops_offset_fails() { use crate::utils::SliceReader; let mut reader = SliceReader::new(&serialized); - let _: [u8; 8] = reader.read_array().unwrap(); // magic + version + let _: [u8; 8] = reader.read_array().unwrap(); // magic (4) + flags (1) + version (3) let _node_count: usize = reader.read().unwrap(); let _decorator_count: usize = reader.read().unwrap(); let _roots: Vec = Deserializable::read_from(&mut reader).unwrap(); let basic_block_data: Vec = Deserializable::read_from(&mut reader).unwrap(); - // Calculate offset to MastNodeInfo - let node_info_offset = 5 + 3 + 8 + 8 + 8 + 4 + 8 + basic_block_data.len(); + // Calculate offset to MastNodeInfo: + // magic (4) + flags (1) + version (3) + node_count (8) + decorator_count (8) + + // roots_len (8) + 1 root (4) + bb_data_len (8) + bb_data + let node_info_offset = 4 + 1 + 3 + 8 + 8 + 8 + 4 + 8 + basic_block_data.len(); // Corrupt the ops_offset field with an out-of-bounds value let block_discriminant: u64 = 3; @@ -876,6 +878,161 @@ fn test_batched_construction_preserves_structure() { // PROPTEST-BASED ROUND-TRIP SERIALIZATION TESTS // ================================================================================================ +/// Test that the new header format is backward compatible (flags=0x00). +#[test] +fn test_header_backward_compatible() { + let mut forest = MastForest::new(); + let block_id = BasicBlockNodeBuilder::new(vec![Operation::Add], Vec::new()) + .add_to_forest(&mut forest) + .unwrap(); + forest.make_root(block_id); + + let bytes = forest.to_bytes(); + + // Check header structure: MAST (4 bytes) + flags (1 byte) + version (3 bytes) + assert_eq!(&bytes[0..4], b"MAST", "Magic should be MAST"); + assert_eq!(bytes[4], 0x00, "Flags should be 0x00 for full serialization"); + assert_eq!(&bytes[5..8], &[0, 0, 1], "Version should be [0, 0, 1]"); +} + +/// Test that stripped serialization produces smaller output than full serialization. +#[test] +fn test_stripped_serialization_smaller_than_full() { + let mut forest = MastForest::new(); + + // Add decorators + let decorator_id = forest.add_decorator(Decorator::Trace(42)).unwrap(); + + let operations = vec![Operation::Add, Operation::Mul, Operation::Drop]; + let block_id = BasicBlockNodeBuilder::new(operations, vec![(0, decorator_id)]) + .add_to_forest(&mut forest) + .unwrap(); + forest.make_root(block_id); + + // Add procedure name for more debug info + let digest = forest[block_id].digest(); + forest.insert_procedure_name(digest, "test_proc".into()); + + let full_bytes = forest.to_bytes(); + + let mut stripped_bytes = Vec::new(); + forest.write_stripped(&mut stripped_bytes); + + assert!( + stripped_bytes.len() < full_bytes.len(), + "Stripped ({} bytes) should be smaller than full ({} bytes)", + stripped_bytes.len(), + full_bytes.len() + ); +} + +/// Test that stripped serialization round-trips correctly with empty DebugInfo. +#[test] +fn test_stripped_serialization_roundtrip() { + let mut forest = MastForest::new(); + + // Add decorators + let decorator_id = forest.add_decorator(Decorator::Trace(42)).unwrap(); + + let operations = vec![Operation::Add, Operation::Mul, Operation::Drop]; + let block_id = BasicBlockNodeBuilder::new(operations, vec![(0, decorator_id)]) + .add_to_forest(&mut forest) + .unwrap(); + forest.make_root(block_id); + + // Add procedure name and error code + let digest = forest[block_id].digest(); + forest.insert_procedure_name(digest, "test_proc".into()); + let _ = forest.register_error("test error".into()); + + // Serialize stripped + let mut stripped_bytes = Vec::new(); + forest.write_stripped(&mut stripped_bytes); + + // Deserialize + let restored = MastForest::read_from_bytes(&stripped_bytes).unwrap(); + + // Verify structure is preserved + assert_eq!(forest.num_nodes(), restored.num_nodes()); + assert_eq!(forest.procedure_roots().len(), restored.procedure_roots().len()); + + // Verify debug info is empty + assert!( + restored.debug_info.is_empty(), + "DebugInfo should be empty after stripped roundtrip" + ); + assert_eq!(restored.decorators().len(), 0); + assert_eq!(restored.procedure_name(&digest), None); +} + +/// Test that stripped serialization sets the correct header flags. +#[test] +fn test_stripped_header_flags() { + let mut forest = MastForest::new(); + let block_id = BasicBlockNodeBuilder::new(vec![Operation::Add], Vec::new()) + .add_to_forest(&mut forest) + .unwrap(); + forest.make_root(block_id); + + let mut stripped_bytes = Vec::new(); + forest.write_stripped(&mut stripped_bytes); + + // Check header structure + assert_eq!(&stripped_bytes[0..4], b"MAST", "Magic should be MAST"); + assert_eq!(stripped_bytes[4], 0x01, "Flags should be 0x01 for stripped serialization"); + assert_eq!(&stripped_bytes[5..8], &[0, 0, 1], "Version should be [0, 0, 1]"); +} + +/// Test that node digests are preserved in stripped serialization. +#[test] +fn test_stripped_preserves_digests() { + let mut forest = MastForest::new(); + + let decorator_id = forest.add_decorator(Decorator::Trace(1)).unwrap(); + + let block1_id = BasicBlockNodeBuilder::new(vec![Operation::Add], vec![(0, decorator_id)]) + .add_to_forest(&mut forest) + .unwrap(); + let block2_id = BasicBlockNodeBuilder::new(vec![Operation::Mul], Vec::new()) + .add_to_forest(&mut forest) + .unwrap(); + let join_id = JoinNodeBuilder::new([block1_id, block2_id]).add_to_forest(&mut forest).unwrap(); + forest.make_root(join_id); + + // Capture original digests + let original_digests: Vec<_> = forest.nodes().iter().map(|n| n.digest()).collect(); + + // Stripped roundtrip + let mut stripped_bytes = Vec::new(); + forest.write_stripped(&mut stripped_bytes); + let restored = MastForest::read_from_bytes(&stripped_bytes).unwrap(); + + // Verify digests match + let restored_digests: Vec<_> = restored.nodes().iter().map(|n| n.digest()).collect(); + assert_eq!(original_digests, restored_digests, "Node digests should be preserved"); +} + +/// Test that deserialization rejects unknown flags. +#[test] +fn test_deserialize_rejects_unknown_flags() { + let mut forest = MastForest::new(); + let block_id = BasicBlockNodeBuilder::new(vec![Operation::Add], Vec::new()) + .add_to_forest(&mut forest) + .unwrap(); + forest.make_root(block_id); + + let mut bytes = forest.to_bytes(); + + // Set an unknown flag (bit 1) + bytes[4] = 0x02; + + let result = MastForest::read_from_bytes(&bytes); + assert_matches!( + result, + Err(DeserializationError::InvalidValue(msg)) if msg.contains("reserved") || msg.contains("flags") + ); +} + mod proptests { use proptest::{prelude::*, strategy::Just}; @@ -1118,6 +1275,55 @@ mod proptests { ); } } + + /// Property test: stripped serialization should preserve node structure + #[test] + fn proptest_stripped_roundtrip( + forest in any_with::(MastForestParams { + decorators: 10, + blocks: 1..=5, + max_joins: 3, + max_splits: 2, + max_loops: 2, + max_calls: 2, + max_syscalls: 0, + max_externals: 1, + max_dyns: 1, + }) + ) { + // Stripped serialization + let mut stripped_bytes = Vec::new(); + forest.write_stripped(&mut stripped_bytes); + + // Deserialize + let restored = MastForest::read_from_bytes(&stripped_bytes) + .expect("Stripped deserialization should succeed"); + + // Verify node count matches + prop_assert_eq!( + forest.num_nodes(), + restored.num_nodes(), + "Node count should match" + ); + + // Verify all node digests match + for (idx, original) in forest.nodes().iter().enumerate() { + let node_id = crate::mast::MastNodeId::new_unchecked(idx as u32); + let restored_node = &restored[node_id]; + + prop_assert_eq!( + original.digest(), + restored_node.digest(), + "Node {:?} digest mismatch", node_id + ); + } + + // Verify debug info is empty + prop_assert!( + restored.debug_info.is_empty(), + "DebugInfo should be empty after stripped roundtrip" + ); + } } }