diff --git a/CHANGELOG.md b/CHANGELOG.md index 25a0433038..ef6637e4b3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,7 @@ - [BREAKING] Change backend from winterfell to Plonky3 ([#2472](https://github.com/0xMiden/miden-vm/pull/2472)). - Added validation of `core_trace_fragment_size` in `ExecutionOptions` ([#2528](https://github.com/0xMiden/miden-vm/pull/2528)). - [BREAKING] Change serialization of `BasicBlockNode`s to use padded indices ([#2466](https://github.com/0xMiden/miden-vm/pull/2466/)). +- Change padded serialization of `BasicBlockNode`s to use delta-encoded metadata ([#2469](https://github.com/0xMiden/miden-vm/pull/2469/)). ## 0.20.2 (TBD) - Fix issue where decorator access was not bypassed properly in release mode ([#2529](https://github.com/0xMiden/miden-vm/pull/2529)). diff --git a/Cargo.lock b/Cargo.lock index 1c08d662b2..74ce246c14 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1274,6 +1274,7 @@ dependencies = [ "num-traits", "proptest", "proptest-derive", + "rstest", "serde", "serde_json", "thiserror", diff --git a/core/Cargo.toml b/core/Cargo.toml index d5aae58826..43ebbd4111 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -66,5 +66,6 @@ criterion = { workspace = true } insta.workspace = true miden-test-serde-macros.workspace = true proptest.workspace = true +rstest = { version = "0.26" } serde_json = { workspace = true } miden-utils-testing.workspace = true diff --git a/core/src/mast/node/basic_block_node/mod.rs b/core/src/mast/node/basic_block_node/mod.rs index ad3ffd9932..d95263c97b 100644 --- a/core/src/mast/node/basic_block_node/mod.rs +++ b/core/src/mast/node/basic_block_node/mod.rs @@ -526,7 +526,7 @@ impl BasicBlockNode { let indptr = batch.indptr(); let ops = batch.ops(); - // indptr should be monotonic non-decreasing + // indptr should be monotonic non-decreasing in valid prefix for i in 0..batch.num_groups() { if indptr[i] > indptr[i + 1] { return Err(format!( @@ -540,8 +540,32 @@ impl BasicBlockNode { } } + // Full array must be monotonic for serialization (delta encoding) + for i in 0..indptr.len() - 1 { + if indptr[i] > indptr[i + 1] { + return Err(format!( + "Batch {}: indptr[{}] {} > indptr[{}] {} - full array not monotonic (required for serialization)", + batch_idx, + i, + indptr[i], + i + 1, + indptr[i + 1] + )); + } + } + // All indptr values should be within ops bounds let ops_len = ops.len(); + + // Final indptr value must equal ops.len() + if indptr[indptr.len() - 1] != ops_len { + return Err(format!( + "Batch {}: final indptr value {} doesn't match ops.len() {}", + batch_idx, + indptr[indptr.len() - 1], + ops_len + )); + } for (i, &indptr_val) in indptr.iter().enumerate().take(batch.num_groups() + 1) { if indptr_val > ops_len { return Err(format!( diff --git a/core/src/mast/node/basic_block_node/op_batch.rs b/core/src/mast/node/basic_block_node/op_batch.rs index 26f684765d..ff6af9392b 100644 --- a/core/src/mast/node/basic_block_node/op_batch.rs +++ b/core/src/mast/node/basic_block_node/op_batch.rs @@ -17,22 +17,20 @@ use super::{BATCH_SIZE, Felt, GROUP_SIZE, Operation, ZERO}; pub struct OpBatch { /// A list of operations in this batch, including padding noops. pub(super) ops: Vec, - /// An array of indexes in the ops array, marking the beginning and end of each group. + /// Indexes marking the start and end of each group in the ops array. /// - /// The array maintains the invariant that the i-th group (i <= BATCH_SIZE-1) is at - /// `self.ops[self.indptr[i]..self.indptr[i+1]]`. + /// Group i is at `self.ops[self.indptr[i]..self.indptr[i+1]]`. Groups with immediate values + /// have zero-length slices. /// - /// By convention, the groups containing immediate values have a zero-length slice of the ops - /// array. + /// Only `[0..num_groups+1]` is valid data. The tail is undefined but must be monotonic + /// (filled with final ops count) for delta encoding during serialization. pub(super) indptr: [usize; Self::BATCH_SIZE_PLUS_ONE], - /// An array of bits representing whether a group had undergone padding + /// Whether each group had padding added. Only `[0..num_groups]` is valid. pub(super) padding: [bool; BATCH_SIZE], - /// Value of groups in the batch, which includes operations and immediate values. + /// Group hashes and immediate values. Only `[0..num_groups]` is valid. pub(super) groups: [Felt; BATCH_SIZE], - /// Number of groups in this batch. - /// - /// The arrays above are meaningful in their [0..self.num_groups] prefix - /// (or [0..self.num_groups + 1] in the case of the indptr array). + /// Number of groups. Determines valid prefix sizes: indptr `[0..num_groups+1]`, padding and + /// groups `[0..num_groups]`. pub(super) num_groups: usize, } @@ -134,7 +132,62 @@ impl OpBatch { groups: [Felt; BATCH_SIZE], num_groups: usize, ) -> Self { - Self { ops, indptr, padding, groups, num_groups } + let batch = Self { ops, indptr, padding, groups, num_groups }; + #[cfg(debug_assertions)] + batch.validate_invariants(); + batch + } + + /// Validates invariants in debug builds: num_groups in range, indptr monotonic (full array), + /// final indptr equals ops.len(). + #[cfg(debug_assertions)] + fn validate_invariants(&self) { + // Validate num_groups is in valid range + assert!( + self.num_groups <= BATCH_SIZE, + "num_groups {} exceeds BATCH_SIZE {}", + self.num_groups, + BATCH_SIZE + ); + + // Validate indptr starts at 0 + assert_eq!(self.indptr[0], 0, "indptr must start at 0, got {}", self.indptr[0]); + + // Validate monotonicity in the semantically valid prefix [0..num_groups+1] + for i in 0..self.num_groups { + assert!( + self.indptr[i] <= self.indptr[i + 1], + "indptr not monotonic in valid prefix: indptr[{}]={} > indptr[{}]={}", + i, + self.indptr[i], + i + 1, + self.indptr[i + 1] + ); + } + + // Validate monotonicity across ENTIRE array (required for serialization) + for i in 0..Self::BATCH_SIZE_PLUS_ONE - 1 { + assert!( + self.indptr[i] <= self.indptr[i + 1], + "indptr not monotonic at index {}: indptr[{}]={} > indptr[{}]={} \ + (full array monotonicity required for delta encoding)", + i, + i, + self.indptr[i], + i + 1, + self.indptr[i + 1] + ); + } + + // Validate final indptr value matches ops length + let final_indptr = self.indptr[Self::BATCH_SIZE_PLUS_ONE - 1]; + assert_eq!( + final_indptr, + self.ops.len(), + "final indptr value {} doesn't match ops.len() {}", + final_indptr, + self.ops.len() + ); } /// Returns the (op_group_idx, op_idx_in_group) given an operation index in the batch. Returns @@ -327,13 +380,26 @@ impl OpBatchAccumulator { self.pad_if_needed(); self.finalize_indptr(); - OpBatch { + // Fill the unused tail of indptr array with the final value to maintain monotonicity + // This is required for delta encoding which expects indptr to be monotonically + // non-decreasing + let final_ops_count = self.ops.len(); + for i in self.next_group_idx..OpBatch::BATCH_SIZE_PLUS_ONE { + self.indptr[i] = final_ops_count; + } + + let batch = OpBatch { ops: self.ops, indptr: self.indptr, padding: self.padding, groups: self.groups, num_groups: self.next_group_idx, - } + }; + + #[cfg(debug_assertions)] + batch.validate_invariants(); + + batch } // HELPER METHODS diff --git a/core/src/mast/node/basic_block_node/snapshots/miden_core__mast__node__basic_block_node__tests__batch_ops_1.snap b/core/src/mast/node/basic_block_node/snapshots/miden_core__mast__node__basic_block_node__tests__batch_ops_1.snap index e5f60bfb11..278ad59536 100644 --- a/core/src/mast/node/basic_block_node/snapshots/miden_core__mast__node__basic_block_node__tests__batch_ops_1.snap +++ b/core/src/mast/node/basic_block_node/snapshots/miden_core__mast__node__basic_block_node__tests__batch_ops_1.snap @@ -1,5 +1,6 @@ --- source: core/src/mast/node/basic_block_node/tests.rs +assertion_line: 16 expression: batches --- [ @@ -10,13 +11,13 @@ expression: batches indptr: [ 0, 1, - 0, - 0, - 0, - 0, - 0, - 0, - 0, + 1, + 1, + 1, + 1, + 1, + 1, + 1, ], padding: [ false, diff --git a/core/src/mast/node/basic_block_node/snapshots/miden_core__mast__node__basic_block_node__tests__batch_ops_2.snap b/core/src/mast/node/basic_block_node/snapshots/miden_core__mast__node__basic_block_node__tests__batch_ops_2.snap index 6c5fb8f11d..7aa2d6032c 100644 --- a/core/src/mast/node/basic_block_node/snapshots/miden_core__mast__node__basic_block_node__tests__batch_ops_2.snap +++ b/core/src/mast/node/basic_block_node/snapshots/miden_core__mast__node__basic_block_node__tests__batch_ops_2.snap @@ -1,5 +1,6 @@ --- source: core/src/mast/node/basic_block_node/tests.rs +assertion_line: 30 expression: batches --- [ @@ -11,13 +12,13 @@ expression: batches indptr: [ 0, 2, - 0, - 0, - 0, - 0, - 0, - 0, - 0, + 2, + 2, + 2, + 2, + 2, + 2, + 2, ], padding: [ false, diff --git a/core/src/mast/node/basic_block_node/snapshots/miden_core__mast__node__basic_block_node__tests__batch_ops_3.snap b/core/src/mast/node/basic_block_node/snapshots/miden_core__mast__node__basic_block_node__tests__batch_ops_3.snap index 5e064c86fc..7af7a80b76 100644 --- a/core/src/mast/node/basic_block_node/snapshots/miden_core__mast__node__basic_block_node__tests__batch_ops_3.snap +++ b/core/src/mast/node/basic_block_node/snapshots/miden_core__mast__node__basic_block_node__tests__batch_ops_3.snap @@ -1,5 +1,6 @@ --- source: core/src/mast/node/basic_block_node/tests.rs +assertion_line: 44 expression: batches --- [ @@ -15,12 +16,12 @@ expression: batches 0, 3, 3, - 0, - 0, - 0, - 0, - 0, - 0, + 3, + 3, + 3, + 3, + 3, + 3, ], padding: [ true, diff --git a/core/src/mast/node/basic_block_node/snapshots/miden_core__mast__node__basic_block_node__tests__batch_ops_5.snap b/core/src/mast/node/basic_block_node/snapshots/miden_core__mast__node__basic_block_node__tests__batch_ops_5.snap index da56b45aa4..44ed6ac46d 100644 --- a/core/src/mast/node/basic_block_node/snapshots/miden_core__mast__node__basic_block_node__tests__batch_ops_5.snap +++ b/core/src/mast/node/basic_block_node/snapshots/miden_core__mast__node__basic_block_node__tests__batch_ops_5.snap @@ -1,5 +1,6 @@ --- source: core/src/mast/node/basic_block_node/tests.rs +assertion_line: 101 expression: batches --- [ @@ -72,12 +73,12 @@ expression: batches 0, 2, 2, - 0, - 0, - 0, - 0, - 0, - 0, + 2, + 2, + 2, + 2, + 2, + 2, ], padding: [ true, diff --git a/core/src/mast/node/basic_block_node/snapshots/miden_core__mast__node__basic_block_node__tests__batch_ops_6.snap b/core/src/mast/node/basic_block_node/snapshots/miden_core__mast__node__basic_block_node__tests__batch_ops_6.snap index e5d2b73b55..3a4935201c 100644 --- a/core/src/mast/node/basic_block_node/snapshots/miden_core__mast__node__basic_block_node__tests__batch_ops_6.snap +++ b/core/src/mast/node/basic_block_node/snapshots/miden_core__mast__node__basic_block_node__tests__batch_ops_6.snap @@ -1,5 +1,6 @@ --- source: core/src/mast/node/basic_block_node/tests.rs +assertion_line: 139 expression: batches --- [ @@ -26,10 +27,10 @@ expression: batches 9, 9, 10, - 0, - 0, - 0, - 0, + 10, + 10, + 10, + 10, ], padding: [ false, diff --git a/core/src/mast/node/basic_block_node/snapshots/miden_core__mast__node__basic_block_node__tests__batch_ops_7.snap b/core/src/mast/node/basic_block_node/snapshots/miden_core__mast__node__basic_block_node__tests__batch_ops_7.snap index 038c2b3d76..cd940c0e2f 100644 --- a/core/src/mast/node/basic_block_node/snapshots/miden_core__mast__node__basic_block_node__tests__batch_ops_7.snap +++ b/core/src/mast/node/basic_block_node/snapshots/miden_core__mast__node__basic_block_node__tests__batch_ops_7.snap @@ -1,5 +1,6 @@ --- source: core/src/mast/node/basic_block_node/tests.rs +assertion_line: 171 expression: batches --- [ @@ -25,10 +26,10 @@ expression: batches 10, 10, 11, - 0, - 0, - 0, - 0, + 11, + 11, + 11, + 11, ], padding: [ false, diff --git a/core/src/mast/node/basic_block_node/snapshots/miden_core__mast__node__basic_block_node__tests__batch_ops_8.snap b/core/src/mast/node/basic_block_node/snapshots/miden_core__mast__node__basic_block_node__tests__batch_ops_8.snap index 9290ad8328..26017b0a87 100644 --- a/core/src/mast/node/basic_block_node/snapshots/miden_core__mast__node__basic_block_node__tests__batch_ops_8.snap +++ b/core/src/mast/node/basic_block_node/snapshots/miden_core__mast__node__basic_block_node__tests__batch_ops_8.snap @@ -1,5 +1,6 @@ --- source: core/src/mast/node/basic_block_node/tests.rs +assertion_line: 203 expression: batches --- [ @@ -27,10 +28,10 @@ expression: batches 9, 11, 11, - 0, - 0, - 0, - 0, + 11, + 11, + 11, + 11, ], padding: [ true, diff --git a/core/src/mast/node/basic_block_node/snapshots/miden_core__mast__node__basic_block_node__tests__batch_ops_9.snap b/core/src/mast/node/basic_block_node/snapshots/miden_core__mast__node__basic_block_node__tests__batch_ops_9.snap index 50400c4a9e..da7e882355 100644 --- a/core/src/mast/node/basic_block_node/snapshots/miden_core__mast__node__basic_block_node__tests__batch_ops_9.snap +++ b/core/src/mast/node/basic_block_node/snapshots/miden_core__mast__node__basic_block_node__tests__batch_ops_9.snap @@ -1,5 +1,6 @@ --- source: core/src/mast/node/basic_block_node/tests.rs +assertion_line: 246 expression: batches --- [ @@ -78,12 +79,12 @@ expression: batches 0, 2, 2, - 0, - 0, - 0, - 0, - 0, - 0, + 2, + 2, + 2, + 2, + 2, + 2, ], padding: [ false, diff --git a/core/src/mast/serialization/basic_blocks.rs b/core/src/mast/serialization/basic_blocks.rs index 0737ecb8b2..adffdf7062 100644 --- a/core/src/mast/serialization/basic_blocks.rs +++ b/core/src/mast/serialization/basic_blocks.rs @@ -4,17 +4,17 @@ //! //! - Padded operations (variable size) //! - Batch count (4 bytes) -//! - Indptr array per batch (9 bytes each) +//! - Delta-encoded indptr per batch (4 bytes each: 8 deltas × 4 bits, packed) //! - Padding flags per batch (1 byte each, bit-packed) //! -//! **Total**: `ops_size + 4 + (10 * num_batches)` bytes +//! **Total**: `ops_size + 4 + (5 * num_batches)` bytes use alloc::vec::Vec; use super::NodeDataOffset; use crate::{ Operation, - mast::BasicBlockNode, + mast::{BasicBlockNode, OP_GROUP_SIZE}, utils::{ByteReader, DeserializationError, Serializable, SliceReader}, }; @@ -54,13 +54,11 @@ impl BasicBlockDataBuilder { // Write number of batches (num_batches as u32).write_into(&mut self.node_data); - // Write indptr arrays for each batch (9 u8s per batch, since max index is 72) + // Write delta-encoded indptr arrays for each batch (4 bytes per batch) for batch in op_batches { let indptr = batch.indptr(); - for &idx in indptr { - debug_assert!(idx <= 72, "batch index {} exceeds maximum of 72", idx); - (idx as u8).write_into(&mut self.node_data); - } + let packed = pack_indptr_deltas(indptr); + packed.write_into(&mut self.node_data); } // Write padding metadata (1 byte per batch, bit-packed) @@ -84,6 +82,58 @@ impl BasicBlockDataBuilder { } } +// INDPTR DELTA ENCODING +// ================================================================================================ + +/// Packs 8 indptr deltas into 4 bytes (4 bits each). Elides indptr[0] which is always 0. +/// +/// Requires full array monotonicity. OpBatch only semantically uses the `[0..num_groups+1]` +/// prefix, but the tail must be filled (with final ops count) to avoid underflow when +/// computing deltas for serialization. +fn pack_indptr_deltas(indptr: &[usize; 9]) -> [u8; 4] { + debug_assert_eq!(indptr[0], 0, "indptr must start at 0"); + + let mut packed = [0u8; 4]; + for i in 0..8 { + let delta = indptr[i + 1] - indptr[i]; + debug_assert!(delta <= 9, "delta {} exceeds maximum of 9", delta); + + let byte_idx = i / 2; + let nibble_shift = (i % 2) * 4; + packed[byte_idx] |= (delta as u8) << nibble_shift; + } + packed +} + +/// Unpacks 4 bytes of delta-encoded indptr into a full indptr array. +/// +/// Validates that each delta is in [0, GROUP_SIZE] and reconstructs the cumulative indptr array +/// starting from the implicit indptr[0] = 0. +/// +/// # Errors +/// +/// Returns `DeserializationError::InvalidValue` if any delta exceeds GROUP_SIZE. +fn unpack_indptr_deltas(packed: &[u8; 4]) -> Result<[usize; 9], DeserializationError> { + let mut indptr = [0usize; 9]; + + for i in 0..8 { + let byte_idx = i / 2; + let nibble_shift = (i % 2) * 4; + let delta = ((packed[byte_idx] >> nibble_shift) & 0x0f) as usize; + + if delta > OP_GROUP_SIZE { + return Err(DeserializationError::InvalidValue(format!( + "indptr delta {} exceeds maximum of {} at position {} (operation groups comprise at most {} ops)", + delta, OP_GROUP_SIZE, i, OP_GROUP_SIZE + ))); + } + + indptr[i + 1] = indptr[i] + delta; + } + + Ok(indptr) +} + // BASIC BLOCK DATA DECODER // ================================================================================================ @@ -127,14 +177,11 @@ impl BasicBlockDataDecoder<'_> { let num_batches: u32 = ops_data_reader.read()?; let num_batches = num_batches as usize; - // Read indptr arrays (9 u8s per batch) + // Read delta-encoded indptr arrays (4 bytes per batch) let mut batch_indptrs: Vec<[usize; 9]> = Vec::with_capacity(num_batches); for _ in 0..num_batches { - let mut indptr = [0usize; 9]; - for idx in indptr.iter_mut() { - let val: u8 = ops_data_reader.read()?; - *idx = val as usize; - } + let packed: [u8; 4] = ops_data_reader.read()?; + let indptr = unpack_indptr_deltas(&packed)?; batch_indptrs.push(indptr); } @@ -207,3 +254,37 @@ impl BasicBlockDataDecoder<'_> { Ok(op_batches) } } + +#[cfg(test)] +mod tests { + use alloc::string::ToString; + + use rstest::rstest; + + use super::*; + + #[rstest] + #[case::all_empty([0, 0, 0, 0, 0, 0, 0, 0, 0])] + #[case::max_deltas([0, 9, 18, 27, 36, 45, 54, 63, 72])] + #[case::min_non_zero_deltas([0, 1, 2, 3, 4, 5, 6, 7, 8])] + #[case::mixed_deltas([0, 3, 6, 9, 12, 15, 18, 21, 24])] + #[case::some_zero_deltas([0, 0, 5, 5, 10, 10, 15, 15, 20])] + fn test_pack_unpack_indptr_roundtrip(#[case] indptr: [usize; 9]) { + let packed = pack_indptr_deltas(&indptr); + let unpacked = unpack_indptr_deltas(&packed).unwrap(); + assert_eq!(indptr, unpacked); + } + + #[rstest] + #[case::delta_10_position_0([0x0a, 0x00, 0x00, 0x00], "delta 10 exceeds maximum of 9")] + #[case::delta_15_position_0([0x0f, 0x00, 0x00, 0x00], "delta 15 exceeds maximum of 9")] + #[case::delta_10_position_1([0x0a, 0x00, 0x00, 0x00], "delta 10 exceeds maximum of 9")] + #[case::delta_11_position_3([0x00, 0xb0, 0x00, 0x00], "delta 11 exceeds maximum of 9")] + #[case::delta_14_position_7([0x00, 0x00, 0x00, 0x0e], "delta 14 exceeds maximum of 9")] + fn test_unpack_invalid_delta(#[case] packed: [u8; 4], #[case] expected_msg: &str) { + let result = unpack_indptr_deltas(&packed); + assert!(result.is_err()); + let err_msg = result.unwrap_err().to_string(); + assert!(err_msg.contains(expected_msg)); + } +}