diff --git a/Cargo.toml b/Cargo.toml index 5962f18..82fc7be 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -41,10 +41,10 @@ thiserror = "2.0" ssz = { package = "ethereum_ssz", version = "0.10.0" } -p3-field = { git = "https://github.com/Plonky3/Plonky3.git", rev = "a33a312" } -p3-baby-bear = { git = "https://github.com/Plonky3/Plonky3.git", rev = "a33a312" } -p3-koala-bear = { git = "https://github.com/Plonky3/Plonky3.git", rev = "a33a312" } -p3-symmetric = { git = "https://github.com/Plonky3/Plonky3.git", rev = "a33a312" } +p3-field = { git = "https://github.com/Plonky3/Plonky3.git", rev = "d421e32" } +p3-baby-bear = { git = "https://github.com/Plonky3/Plonky3.git", rev = "d421e32" } +p3-koala-bear = { git = "https://github.com/Plonky3/Plonky3.git", rev = "d421e32" } +p3-symmetric = { git = "https://github.com/Plonky3/Plonky3.git", rev = "d421e32" } [dev-dependencies] criterion = "0.7" diff --git a/src/array.rs b/src/array.rs index c144725..a21f965 100644 --- a/src/array.rs +++ b/src/array.rs @@ -26,6 +26,19 @@ impl DerefMut for FieldArray { } } +impl FieldArray { + /// View a mutable slice of `FieldArray` as a mutable slice of raw `[F; N]` arrays. + /// + /// This is a zero-cost transmute enabled by the `#[repr(transparent)]` layout. + #[inline] + pub fn as_raw_slice_mut(s: &mut [Self]) -> &mut [[F; N]] { + // SAFETY: `FieldArray` is `#[repr(transparent)]` over `[F; N]`. + // + // So `&mut [FieldArray]` and `&mut [[F; N]]` have identical layouts. + unsafe { &mut *(std::ptr::from_mut::<[Self]>(s) as *mut [[F; N]]) } + } +} + impl From<[F; N]> for FieldArray { fn from(arr: [F; N]) -> Self { Self(arr) diff --git a/src/simd_utils.rs b/src/simd_utils.rs index a181a6d..57a1f42 100644 --- a/src/simd_utils.rs +++ b/src/simd_utils.rs @@ -31,39 +31,6 @@ pub fn pack_array(data: &[FieldArray]) -> [PackedF; N] { array::from_fn(|i| PackedF::from_fn(|j| data[j][i])) } -/// Unpacks SIMD vertical layout back into scalar arrays. -/// -/// Transposes from vertical layout `[PackedF; N]` to horizontal layout `[FieldArray; WIDTH]`. -/// -/// This is the inverse operation of `pack_array`. The output buffer must be preallocated -/// with size `[WIDTH]` where `WIDTH = PackedF::WIDTH`, and each element is a `FieldArray`. -/// -/// Input layout (vertical): each PackedF holds one element from each array -/// ```text -/// packed_data[0] = PackedF([a0, b0, c0, ...]) -/// packed_data[1] = PackedF([a1, b1, c1, ...]) -/// packed_data[2] = PackedF([a2, b2, c2, ...]) -/// ... -/// ``` -/// -/// Output layout (horizontal): each FieldArray is one complete array -/// ```text -/// output[0] = FieldArray([a0, a1, a2, ..., aN]) -/// output[1] = FieldArray([b0, b1, b2, ..., bN]) -/// output[2] = FieldArray([c0, c1, c2, ..., cN]) -/// ... -/// ``` -#[inline] -pub fn unpack_array(packed_data: &[PackedF; N], output: &mut [FieldArray]) { - // Optimized for cache locality: iterate over output lanes first - #[allow(clippy::needless_range_loop)] - for j in 0..PackedF::WIDTH { - for i in 0..N { - output[j].0[i] = packed_data[i].as_slice()[j]; - } - } -} - /// Pack even-indexed FieldArrays (stride 2) directly into destination. /// /// Packs `data[0], data[2], data[4], ...` into `dest[offset..offset+N]`. @@ -145,25 +112,6 @@ mod tests { } } - #[test] - fn test_unpack_array_simple() { - // Create packed data - let packed: [PackedF; 2] = [ - PackedF::from_fn(|i| F::from_u64(i as u64)), - PackedF::from_fn(|i| F::from_u64((i + 100) as u64)), - ]; - - // Unpack - let mut output = [FieldArray([F::ZERO; 2]); PackedF::WIDTH]; - unpack_array(&packed, &mut output); - - // Verify - for (lane, arr) in output.iter().enumerate() { - assert_eq!(arr[0], F::from_u64(lane as u64)); - assert_eq!(arr[1], F::from_u64((lane + 100) as u64)); - } - } - #[test] fn test_pack_preserves_element_order() { // Create data where each array has sequential values @@ -189,26 +137,6 @@ mod tests { } } - #[test] - fn test_unpack_preserves_element_order() { - // Create packed data with known pattern - let packed: [PackedF; 3] = [ - PackedF::from_fn(|i| F::from_u64((i * 3) as u64)), - PackedF::from_fn(|i| F::from_u64((i * 3 + 1) as u64)), - PackedF::from_fn(|i| F::from_u64((i * 3 + 2) as u64)), - ]; - - let mut output = [FieldArray([F::ZERO; 3]); PackedF::WIDTH]; - unpack_array(&packed, &mut output); - - // Verify each array has sequential values - for (lane, arr) in output.iter().enumerate() { - assert_eq!(arr[0], F::from_u64((lane * 3) as u64)); - assert_eq!(arr[1], F::from_u64((lane * 3 + 1) as u64)); - assert_eq!(arr[2], F::from_u64((lane * 3 + 2) as u64)); - } - } - proptest! { #[test] fn proptest_pack_unpack_roundtrip( @@ -224,7 +152,7 @@ mod tests { // Pack and unpack let packed = pack_array(&original); let mut unpacked = [FieldArray([F::ZERO; 10]); PackedF::WIDTH]; - unpack_array(&packed, &mut unpacked); + PackedF::unpack_into(&packed, FieldArray::as_raw_slice_mut(&mut unpacked)); // Verify roundtrip prop_assert_eq!(original, unpacked); diff --git a/src/symmetric/tweak_hash/poseidon.rs b/src/symmetric/tweak_hash/poseidon.rs index 7ab2d7b..3f3c885 100644 --- a/src/symmetric/tweak_hash/poseidon.rs +++ b/src/symmetric/tweak_hash/poseidon.rs @@ -9,7 +9,7 @@ use crate::TWEAK_SEPARATOR_FOR_TREE_HASH; use crate::array::FieldArray; use crate::poseidon2_16; use crate::poseidon2_24; -use crate::simd_utils::{pack_array, pack_even_into, pack_fn_into, pack_odd_into, unpack_array}; +use crate::simd_utils::{pack_array, pack_even_into, pack_fn_into, pack_odd_into}; use crate::symmetric::prf::Pseudorandom; use crate::symmetric::tweak_hash::chain; use crate::{F, PackedF}; @@ -418,7 +418,7 @@ impl< ); // Unpack directly to output slice - unpack_array(&packed_parents, parents_chunk); + PackedF::unpack_into(&packed_parents, FieldArray::as_raw_slice_mut(parents_chunk)); }); // Handle remainder (elements that don't fill a complete SIMD batch) @@ -618,7 +618,7 @@ impl< // // Convert from vertical packing back to scalar layout. // Each lane becomes one leaf in the output slice. - unpack_array(&packed_leaves, leaves_chunk); + PackedF::unpack_into(&packed_leaves, FieldArray::as_raw_slice_mut(leaves_chunk)); }); // HANDLE REMAINDER EPOCHS