diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4a6c0f9799..b84bf90d9c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,7 @@
#### Enhancements
+- Added new `memcopy` procedure for the `std::mem` module. ([#2361](https://github.com/0xMiden/miden-vm/pull/2361)).
- Added new `adv.push_mapval_count` instruction ([#2349](https://github.com/0xMiden/miden-vm/pull/2349)).
- Added new `memcopy_elements` procedure for the `std::mem` module ([#2352](https://github.com/0xMiden/miden-vm/pull/2352)).
- Added new `peek` procedure for the `std::collections::smt` module ([#2387](https://github.com/0xMiden/miden-vm/pull/2387)).
diff --git a/docs/src/user_docs/libcore/mem.md b/docs/src/user_docs/libcore/mem.md
index 641bf7a157..e82b572279 100644
--- a/docs/src/user_docs/libcore/mem.md
+++ b/docs/src/user_docs/libcore/mem.md
@@ -8,8 +8,9 @@ Module `miden::core::mem` contains a set of utility procedures for working with
| Procedure | Description |
| -------------------------------------- | ------------- |
+| `memcopy` | Copies `n` elements from `read_ptr` to `write_ptr`.
If possible, this procedure will copy word-aligned words internally using `memcopy_words` procedure, decreasing the total number of cycles required to make a copy.
It is advised to copy small chunks of memory (num_elements < 12) using `memcopy_elements` procedure instead, since it won't have a computation logic overhead.
**Inputs:** `[n, read_ptr, write_ptr]`
**Outputs:** `[]`
Total cycles:
- Read and write pointers are mutually word-unaligned: $27 + 14 * num\_elements$
- if `n` is less than 12 elements: 27 + 14 * num_elements
- Read and write pointers are mutually word-aligned: $74 + 14 * num\_prefix\_elements + 16 * num\_words + 14 * num\_suffix\_elements$
|
| `memcopy_words` | Copies `n` words from `read_ptr` to `write_ptr`.
`read_ptr` and `write_ptr` pointers *must be* word-aligned.
**Inputs:** `[n, read_ptr, write_ptr]`
**Outputs:** `[]`
Total cycles: $15 + 16 * num\_words$ |
-| `memcopy_elements` | Copies `n` elements from `read_ptr` to `write_ptr`.
**Inputs:** `[n, read_ptr, write_ptr]`
**Outputs:** `[]`
Total cycles: $7 + 14 * num\_elements$ |
+| `memcopy_elements` | Copies `n` elements from `read_ptr` to `write_ptr`.
It is advised to copy big chunks of memory (num_elements >= 12) using `memcopy` procedure instead, since it will handle the memory more efficiently.
**Inputs:** `[n, read_ptr, write_ptr]`
**Outputs:** `[]`
Total cycles: $7 + 14 * num\_elements$ |
| `pipe_double_words_to_memory` | Copies an even number of words from the advice_stack to memory.
**Inputs:** `[C, B, A, write_ptr, end_ptr]`
**Outputs:** `[C, B, A, write_ptr]`
Notice that the `end_ptr - write_ptr` value must be positive and a multiple of 8.
Total cycles: $9 + 6 * num\_word\_pairs$ |
| `pipe_words_to_memory` | Copies an arbitrary number of words from the advice stack to memory.
**Inputs:** `[num_words, write_ptr]`
**Outputs:** `[C, B, A, write_ptr']`
Total cycles:- Even `num_words`: $43 + 9 * num\_words / 2$
- Odd `num_words`: $60 + 9 * round\_down(num\_words / 2)$
|
| `pipe_preimage_to_memory` | Moves an arbitrary number of words from the advice stack to memory and asserts it matches the commitment.
**Inputs:** `[num_words, write_ptr, COMMITMENT]`
**Outputs:** `[write_ptr']`
Total cycles:- Even `num_words`: $62 + 9 * num\_words / 2$
- Odd `num_words`: $79 + 9 * round\_down(num\_words / 2)$
|
diff --git a/libcore/asm/mem.masm b/libcore/asm/mem.masm
index 939f1e2041..4f5b424d02 100644
--- a/libcore/asm/mem.masm
+++ b/libcore/asm/mem.masm
@@ -2,6 +2,137 @@ use miden::core::crypto::hashes::rpo256
# ===== MEMORY FUNCTIONS ==========================================================================
+#! Copies `n` elements from `read_ptr` to `write_ptr`.
+#!
+#! If possible, this procedure will copy word-aligned words internally using `memcopy_words`
+#! procedure, decreasing the total number of cycles required to make a copy.
+#!
+#! It is advised to copy small chunks of memory (num_elements < 12) using `memcopy_elements`
+#! procedure instead, since it won't have a computation logic overhead.
+#!
+#! Inputs: [n, read_ptr, write_ptr]
+#! Outputs: []
+#!
+#! Where:
+#! - n is the number of elements which should be copied.
+#! - read_ptr is the memory pointer where the elements to copy are stored.
+#! - write_ptr is the memory pointer where the elements will be copied.
+#!
+#! Panics if:
+#! - read_ptr and write_ptr are not a u32 values.
+#!
+#! Total cycles:
+#! - if read and write pointers are mutually word-unaligned: 27 + 14 * num_elements
+#! - if `n` is less than 12 elements: 27 + 14 * num_elements
+#! - if read and write pointers are mutually mutually word-aligned, it's equal to the sum of:
+#! - prologue: 19 cycles
+#! - prefix handling: 31 + 14 * num_prefix_elements, where num_prefix_elements is
+#! the number of elements required to make the read (and write) pointer word-aligned
+#! - words handling: 15 + 16 * num_words
+#! - suffix handling: 9 + 14 * num_suffix_elements
+#! - in total: 74 + 14 * num_prefix_elements + 16 * num_words + 14 * num_suffix_elements
+pub proc memcopy
+ # check whether the difference between `read_ptr` and `write_ptr` is a multiple of 4, otherwise
+ # these pointers will never be simultaneously word-alined, so we won't be able to use
+ # `memcopy_words`
+ movdn.2 u32assert2.err="memory addresses must be valid u32 values" # 2 cycles
+ # => [read_ptr, write_ptr, n]
+
+ # Compute the remainders of dividing read_ptr and write_ptr by 4. Compare them to get the
+ # "alignment flag".
+ dup.1 u32mod.4 dup.1 u32mod.4 eq # 11 cycles
+ # => [alignment_flag, read_ptr, write_ptr, n]
+
+ # if the number of elements to copy is small (less than 12), use only the `memcopy_elements`
+ # procedure
+ dup.3 u32lt.12 or # 6 cycles
+ # => [alignment_flag, read_ptr, write_ptr, n]
+
+ if.true
+ # if read pointer and write pointer are mutually word-aligned (i.e.
+ # (read_ptr - write_ptr) mod 4 = 0) we will be able to use `memcopy_words` in the cycle to
+ # optimize the copy
+ # => [read_ptr, write_ptr, n]
+
+ ### "prefix" handling #####################################################################
+
+ # copy `m` values, where `m` is the number of elements needed to make pointers word-aligned.
+ # If `m` > `n` (meaning we don't have enough values to make pointers word-aligned), copy all
+ # `n` available values.
+
+ # get the number of elements required to make read_ptr multiple of 4
+ dup.2 exec.elements_to_next_multiple_of_4 # 20 cycles
+ # => [m, read_ptr, write_ptr, n]
+
+ # update the total number of values left to copy
+ movup.3 dup.1 sub movdn.3 # 5 cycles
+ # => [m, read_ptr, write_ptr, n - m]
+
+ # copy the prefix elements
+ exec.memcopy_elements_internal # 4 + 14 * num_prefix_elements cycles
+ # => [0, read_ptr + m, write_ptr + m, n - m]
+
+ # rearrange the stack for word copy
+ drop movup.2 # 2 cycles
+ # => [n - m, read_ptr + m, write_ptr + m]
+
+ ### "words" handling ######################################################################
+
+ # get the number of words we should handle and the number of remaining felt values
+ u32divmod.4 # 2 cycles
+ # => [values_left, num_words, read_ptr', write_ptr']
+
+ # copy the words
+ movdn.3 exec.memcopy_words_internal # 13 + 16 * num_words cycles
+ # => [0, read_ptr'', write_ptr'', values_left]
+
+ ### "suffix" handling #####################################################################
+
+ # handle the remaining values
+ drop movup.2 exec.memcopy_elements # 9 + 14 * num_suffix_elements cycles
+ # => []
+ else
+ # if read pointer and write pointer are not word-aligned between each other, use
+ # `memcopy_elements` to copy values one-by-one
+ # => [read_ptr, write_ptr, n]
+
+ movup.2 exec.memcopy_elements # 8 + 14 * num_elements cycles
+ # => []
+ end
+ # => []
+end
+
+#! Returns the number of elements required to make the provided memory pointer multiple of 4, or `n`
+#! if it is smaller than the above described value.
+#!
+#! Inputs: [n, ptr]
+#! Outputs: [m, ptr]
+#!
+#! Where:
+#! - n is the value that limits the result from below: the result cannot be less than it.
+#! - ptr is the memory pointer relative to which the next multiple of 4 should be computed.
+#! - m is the value equal to the number of elements required to make the provided pointer multiple
+#! of 4 or `n` if it is smaller than the above described value.
+#!
+#! Total cycles: 19 cycles
+proc elements_to_next_multiple_of_4
+ # get the quotient and the remainder of the division by 4
+ dup.1 u32divmod.4 # 3 cycles
+ # => [ptr mod 4, ptr / 4, n, ptr]
+
+ # get the memory pointer which is next multiple of 4
+ neq.0 add mul.4 # 5 cycles
+ # => [ptr + to_next_4, n, ptr]
+
+ # get the number of values required to make the pointer multiple of 4
+ dup.2 sub # 3 cycles
+ # => [to_next_4, n, ptr]
+
+ # leave the smallest of the two values on the stack
+ dup.1 dup.1 u32gte cdrop # 8 cycles
+ # => [m, ptr]
+end
+
#! Copies `n` words from `read_ptr` to `write_ptr`.
#!
#! `read_ptr` and `write_ptr` pointers *must be* word-aligned.
@@ -16,52 +147,77 @@ use miden::core::crypto::hashes::rpo256
#!
#! Total cycles: 15 + 16 * num_words
pub proc memcopy_words
- # The loop variable is changed with an add instead of sub because the former
- # uses one fewer cycle. So here the counter is negated. (1 cycles)
- # stack: [-n, read_ptr, write_ptr, ...]
- neg
+ exec.memcopy_words_internal
+ # => [n = 0, read_ptr + 4n, write_ptr + 4n]
+
+ # clean the stack (3 cycles)
+ drop drop drop
+ # => []
+end
- # Pad the stack because mem_load overwrites it (4 cycles)
- # stack: [0, 0, 0, 0, -n, read_ptr, write_ptr, ...]
- padw
+#! Copies `n` words from `read_ptr` to `write_ptr`.
+#!
+#! `read_ptr` and `write_ptr` pointers *must be* word-aligned.
+#!
+#! Inputs: [n, read_ptr, write_ptr]
+#! Outputs: [n = 0, read_ptr + 4n, write_ptr + 4n]
+#!
+#! Where:
+#! - n is the number of words which should be copied.
+#! - read_ptr is the memory pointer where the words to copy are stored.
+#! - write_ptr is the memory pointer where the words will be copied.
+#!
+#! Total cycles: 12 + 16 * num_words
+proc memcopy_words_internal
+ # The loop variable is changed with an add instead of sub because the former
+ # uses one fewer cycle. So here the counter is negated. (1 cycles)
+ # stack: [-n, read_ptr, write_ptr, ...]
+ neg
- # check loop condition (3 cycles)
- # stack: [b, 0, 0, 0, 0, -n, read_ptr, write_ptr, ...]
- dup.4 neq.0
+ # Pad the stack because mem_load overwrites it (4 cycles)
+ # stack: [0, 0, 0, 0, -n, read_ptr, write_ptr, ...]
+ padw
- # LOOP: [0, 0, 0, 0, -n, read_ptr, write_ptr, ...]
- # while(n!=0) (16 cycles)
- while.true
- # perform read (2 cycles)
- # stack: [a3, a2, a1, a0, -n, read_ptr, write_ptr, ...]
- dup.5 mem_loadw_be
+ # check loop condition (3 cycles)
+ # stack: [b, 0, 0, 0, 0, -n, read_ptr, write_ptr, ...]
+ dup.4 neq.0
- # perform write (2 cycles)
- # stack: [a3, a2, a1, a0, -n, read_ptr, write_ptr, ...]
- dup.6 mem_storew_be
+ # LOOP: [0, 0, 0, 0, -n, read_ptr, write_ptr, ...]
+ # while(n != 0) (16 cycles)
+ while.true
+ # perform read (2 cycles)
+ # stack: [a3, a2, a1, a0, -n, read_ptr, write_ptr, ...]
+ dup.5 mem_loadw_be
- # note: the values of `A` are no longer necessary, use 0 to signal its a
- # padding
- # stack: [-n, read_ptr, write_ptr, x, 0, 0, 0, 0, ...]
- swapw
+ # perform write (2 cycles)
+ # stack: [a3, a2, a1, a0, -n, read_ptr, write_ptr, ...]
+ dup.6 mem_storew_be
- # stack: [-n+1, read_ptr+4, write_ptr+4, x, 0, 0, 0, 0, ...]
- # update counters (9 cycles)
- add.1 movup.3 movup.3 add.4 movup.3 add.4 movup.3
+ # note: the values of `A` are no longer necessary, use 0 to signal its a
+ # padding
+ # stack: [-n, read_ptr, write_ptr, x, 0, 0, 0, 0, ...]
+ swapw
- # stack: [0, 0, 0, 0, -n+1, read_ptr+4, write_ptr+4, x, ...]
- swapw
+ # stack: [-n+1, read_ptr+4, write_ptr+4, x, 0, 0, 0, 0, ...]
+ # update counters (9 cycles)
+ add.1 movup.3 movup.3 add.4 movup.3 add.4 movup.3
- dup.4 neq.0 # while(n!=0) (3 cycles)
- end
+ # stack: [0, 0, 0, 0, -n+1, read_ptr+4, write_ptr+4, x, ...]
+ swapw
+
+ dup.4 neq.0 # while(n != 0) (3 cycles)
+ end
- # clean stack (7 cycles)
- # stack: [...]
- dropw drop drop drop
+ # clean stack (4 cycles)
+ dropw
+ # => [-n+n = 0, read_ptr + 4n, write_ptr + 4n]
end
#! Copies `n` elements from `read_ptr` to `write_ptr`.
#!
+#! It is advised to copy big chunks of memory (num_elements >= 12) using `memcopy` procedure
+#! instead, since it will handle the memory more efficiently.
+#!
#! Inputs: [n, read_ptr, write_ptr]
#! Outputs: []
#!
@@ -72,6 +228,26 @@ end
#!
#! Total cycles: 7 + 14 * num_elements
pub proc memcopy_elements
+ exec.memcopy_elements_internal
+ # => [n = 0, read_ptr + n, write_ptr + n]
+
+ # clean stack (3 cycles)
+ drop drop drop
+ # => []
+end
+
+#! Copies `n` elements from `read_ptr` to `write_ptr`.
+#!
+#! Inputs: [n, read_ptr, write_ptr]
+#! Outputs: [n = 0, read_ptr + n, write_ptr + n]
+#!
+#! Where:
+#! - n is the number of elements which should be copied.
+#! - read_ptr is the memory pointer where the elements to copy are stored.
+#! - write_ptr is the memory pointer where the elements will be copied.
+#!
+#! Total cycles: 4 + 14 * num_elements
+proc memcopy_elements_internal
# The loop variable is changed with an `add` instead of `sub` because the former uses one fewer
# cycle. So here the counter is negated. (1 cycle)
neg
@@ -99,9 +275,6 @@ pub proc memcopy_elements
# => [loop_flag, -n+1, read_ptr+1, write_ptr+1]
end
# => [n = 0, read_ptr + n, write_ptr + n]
-
- # clean stack (3 cycles)
- drop drop drop
end
#! Copies an even number of words from the advice_stack to memory, computing their permutation.
@@ -151,68 +324,68 @@ end
#! - Even `num_words`: 43 + 9 * num_words / 2
#! - Odd `num_words`: 60 + 9 * round_down(num_words / 2)
pub proc pipe_words_to_memory
- # check if there is an odd number of words (6 cycles)
- dup is_odd
- # => [is_odd, num_words, write_ptr, ...]
-
- # copy is_odd, it defines if last word requires padding (2 cycles)
- dup movdn.3
- # => [is_odd, num_words, write_ptr, needs_padding, ...]
-
- # compute `end_ptr` with an even number of words (7 cycles)
- sub mul.4 dup.1 add swap
- # => [write_ptr, end_ptr, needs_padding, ...]
-
- # Prepare the capacity word. We use the padding rule which sets the first capacity
- # element to `len % 8` where `len` is the length of the hashed sequence. Since `len % 8`
- # is either equal to 0 or 4, this is determined by the `needs_padding` flag multiplied
- # by 4. (6 cycles)
- dup.2 mul.4 push.0.0.0
- # => [A, write_ptr, end_ptr, needs_padding, ...]
-
- # set initial hasher state (8 cycles)
- padw padw
- # => [C, B, A, write_ptr, end_ptr, needs_padding, ...]
-
- # (9 + 6 * num_words cycles)
- exec.pipe_double_words_to_memory
- # => [C, B, A, write_ptr, needs_padding, ...]
-
- # (4 cycles)
- movup.13
- # => [needs_padding, C, B, A, write_ptr, ...]
-
- # if(needs_padding) (17 cycles)
- if.true
- # Rescue Prime Optimized uses overwrite mode, drop `C`. (4 cycles)
- dropw
- # => [B, A, write_ptr, ...]
-
- # Overwrite the `B` with the new data (1 cycles)
- adv_loadw
- # => [B', A, write_ptr, ...]
-
- # - get the memory address that B' should be saved to
- # - update the write_ptr to point to the next address (4 cycles)
- movup.8 dup.0 add.4 movdn.5
- # => [write_ptr, B', write_ptr+4, A, ...]
-
- # save data to memory (1 cycles)
- mem_storew_be
- # => [B', write_ptr+1, A, ...]
-
- # Fix write_ptr position (2 cycles)
- movup.4 movdn.8
- # => [B', A, write_ptr+1, ...]
-
- # Push padding word (4 cycles)
- padw
- # => [C, B', A, write_ptr+1, ...]
-
- # Run RPO permutation (1 cycles)
- exec.rpo256::permute
- # => [C', B', A', write_ptr+1, ...]
- end
+ # check if there is an odd number of words (6 cycles)
+ dup is_odd
+ # => [is_odd, num_words, write_ptr, ...]
+
+ # copy is_odd, it defines if last word requires padding (2 cycles)
+ dup movdn.3
+ # => [is_odd, num_words, write_ptr, needs_padding, ...]
+
+ # compute `end_ptr` with an even number of words (7 cycles)
+ sub mul.4 dup.1 add swap
+ # => [write_ptr, end_ptr, needs_padding, ...]
+
+ # Prepare the capacity word. We use the padding rule which sets the first capacity
+ # element to `len % 8` where `len` is the length of the hashed sequence. Since `len % 8`
+ # is either equal to 0 or 4, this is determined by the `needs_padding` flag multiplied
+ # by 4. (6 cycles)
+ dup.2 mul.4 push.0.0.0
+ # => [A, write_ptr, end_ptr, needs_padding, ...]
+
+ # set initial hasher state (8 cycles)
+ padw padw
+ # => [C, B, A, write_ptr, end_ptr, needs_padding, ...]
+
+ # (9 + 6 * num_words cycles)
+ exec.pipe_double_words_to_memory
+ # => [C, B, A, write_ptr, needs_padding, ...]
+
+ # (4 cycles)
+ movup.13
+ # => [needs_padding, C, B, A, write_ptr, ...]
+
+ # if(needs_padding) (17 cycles)
+ if.true
+ # Rescue Prime Optimized uses overwrite mode, drop `C`. (4 cycles)
+ dropw
+ # => [B, A, write_ptr, ...]
+
+ # Overwrite the `B` with the new data (1 cycles)
+ adv_loadw
+ # => [B', A, write_ptr, ...]
+
+ # - get the memory address that B' should be saved to
+ # - update the write_ptr to point to the next address (4 cycles)
+ movup.8 dup.0 add.4 movdn.5
+ # => [write_ptr, B', write_ptr+4, A, ...]
+
+ # save data to memory (1 cycles)
+ mem_storew_be
+ # => [B', write_ptr+1, A, ...]
+
+ # Fix write_ptr position (2 cycles)
+ movup.4 movdn.8
+ # => [B', A, write_ptr+1, ...]
+
+ # Push padding word (4 cycles)
+ padw
+ # => [C, B', A, write_ptr+1, ...]
+
+ # Run RPO permutation (1 cycle)
+ exec.rpo256::permute
+ # => [C', B', A', write_ptr+1, ...]
+ end
end
#! Moves an arbitrary number of words from the advice stack to memory and asserts it matches the
@@ -232,21 +405,21 @@ end
#! - Even `num_words`: 62 + 9 * num_words / 2
#! - Odd `num_words`: 79 + 9 * round_down(num_words / 2)
pub proc pipe_preimage_to_memory
- # Copies the advice stack data to memory
- exec.pipe_words_to_memory
- # => [C, B, A, write_ptr', COMMITMENT, ...]
+ # Copies the advice stack data to memory
+ exec.pipe_words_to_memory
+ # => [C, B, A, write_ptr', COMMITMENT, ...]
- # Leave only the digest on the stack
- exec.rpo256::squeeze_digest
- # => [B, write_ptr', COMMITMENT, ...]
+ # Leave only the digest on the stack
+ exec.rpo256::squeeze_digest
+ # => [B, write_ptr', COMMITMENT, ...]
- # Save the write_ptr (2 cycles)
- movup.4 movdn.8
- # => [HASH, COMMITMENT, write_ptr', ...]
+ # Save the write_ptr (2 cycles)
+ movup.4 movdn.8
+ # => [HASH, COMMITMENT, write_ptr', ...]
- # Check the COMMITMENT (10 cycles)
- assert_eqw
- # => [write_ptr', ...]
+ # Check the COMMITMENT (10 cycles)
+ assert_eqw
+ # => [write_ptr', ...]
end
#! Moves an even number of words from the advice stack to memory and asserts that their sequential
@@ -264,32 +437,32 @@ end
#!
#! Total cycles: 56 + 3 * num_words / 2
pub proc pipe_double_words_preimage_to_memory
- # Assert precondition (8 cycles).
- dup is_odd assertz.err="pipe_double_words_preimage_to_memory: num_words must be even"
- # => [num_words, write_ptr, COMMITMENT, ...]
-
- # Compute and move end_ptr (5 cycles)
- mul.4 dup.1 add swap
- # => [write_ptr, end_ptr, COMMITMENT, ...]
-
- # Setup the initial hasher state, which starts with a capacity word `A`.
- # For us, the capacity word is entirely empty.
- # (12 cycles).
- exec.rpo256::init_no_padding
- # => [C, B, A, write_ptr, end_ptr, COMMITMENT, ...]
-
- # (9 + 3 * num_words cycles)
- # (e.g., 25 cycles for 4 words)
- exec.pipe_double_words_to_memory
- # => [C, B, A, write_ptr', COMMITMENT, ...]
-
- # Leave just the digest on the stack (9 cycles).
- exec.rpo256::squeeze_digest
- # => [B, write_ptr', COMMITMENT, ...]
-
- # Move write_ptr out of the way so we can assert the commitment (2 cycles).
- movup.4 movdn.8
-
- # Assert the commitment (11 cycles).
- assert_eqw.err="pipe_double_words_preimage_to_memory: COMMITMENT does not match"
+ # Assert precondition (8 cycles).
+ dup is_odd assertz.err="pipe_double_words_preimage_to_memory: num_words must be even"
+ # => [num_words, write_ptr, COMMITMENT, ...]
+
+ # Compute and move end_ptr (5 cycles)
+ mul.4 dup.1 add swap
+ # => [write_ptr, end_ptr, COMMITMENT, ...]
+
+ # Setup the initial hasher state, which starts with a capacity word `A`.
+ # For us, the capacity word is entirely empty.
+ # (12 cycles).
+ exec.rpo256::init_no_padding
+ # => [C, B, A, write_ptr, end_ptr, COMMITMENT, ...]
+
+ # (9 + 3 * num_words cycles)
+ # (e.g., 25 cycles for 4 words)
+ exec.pipe_double_words_to_memory
+ # => [C, B, A, write_ptr', COMMITMENT, ...]
+
+ # Leave just the digest on the stack (9 cycles).
+ exec.rpo256::squeeze_digest
+ # => [B, write_ptr', COMMITMENT, ...]
+
+ # Move write_ptr out of the way so we can assert the commitment (2 cycles).
+ movup.4 movdn.8
+
+ # Assert the commitment (11 cycles).
+ assert_eqw.err="pipe_double_words_preimage_to_memory: COMMITMENT does not match"
end
diff --git a/libcore/docs/collections/smt.md b/libcore/docs/collections/smt.md
index 793fc358a2..eec4cfdd16 100644
--- a/libcore/docs/collections/smt.md
+++ b/libcore/docs/collections/smt.md
@@ -4,4 +4,4 @@
| ----------- | ------------- |
| get | Returns the value located under the specified key in the Sparse Merkle Tree defined by the
specified root.
If no values had been previously inserted under the specified key, an empty word (i.e.,
[ZERO; 4]) is returned.
Inputs:
Operand stack: [K, R, ...]
Outputs:
Operand stack: [V, R, ...]
Fails if the tree with the specified root does not exits in the VM's advice provider.
Cycles
Leaf empty: 66 cycles
Leaf single: 115 cycles
Leaf multiple, non-empty value: 189 + 3 * pair_count
Leaf multiple, empty value: 186 + 3 * pair_count
|
| set | Inserts the specified value under the specified key in a Sparse Merkle Tree defined by the
specified root. If the insert is successful, the old value located under the specified key
is returned via the stack.
If the VALUE is an empty word (i.e., [ZERO; 4]), the new state of the tree is guaranteed to
be equivalent to the state as if the updated value was never inserted.
Inputs:
Operand stack: [V, K, R, ...]
Outputs:
Operand stack: [V_old, R_new, ...]
Fails if the tree with the specified root does not exits in the VM's advice provider.
Cycles
Leaf empty
removal: 74 cycles
insertion: 133 cycles
Leaf single
removal: 227 cycles
insertion (leaf remains single): 205
insertion (leaf becomes multiple): X cycles
Leaf multiple
X cycles
|
-| peek | Emits the "miden::core::collections::smt::smt_peek" event, which pushes onto the advice stack the
value associated with the specified key in a Sparse Merkle Tree defined by the specified root.
If no value was previously associated with the specified key, [ZERO; 4] is pushed onto the advice
stack.
Inputs:
Operand stack: [KEY, ROOT]
Advice stack: []
Outputs:
Operand stack: [KEY, ROOT]
Advice stack: [VALUE]
Panics if:
- the tree with the specified root does not exits in the VM's advice provider.
Cycles: 3 cycles
|
+| peek | Emits the "core::collections::smt::smt_peek" event, which pushes onto the advice stack the
value associated with the specified key in a Sparse Merkle Tree defined by the specified root.
If no value was previously associated with the specified key, [ZERO; 4] is pushed onto the advice
stack.
Inputs:
Operand stack: [KEY, ROOT]
Advice stack: []
Outputs:
Operand stack: [KEY, ROOT]
Advice stack: [VALUE]
Panics if:
- the tree with the specified root does not exits in the VM's advice provider.
Cycles: 3 cycles
|
diff --git a/libcore/docs/crypto/dsa/ecdsa_k256_keccak.md b/libcore/docs/crypto/dsa/ecdsa_k256_keccak.md
index 05b5708a09..b04347bd63 100644
--- a/libcore/docs/crypto/dsa/ecdsa_k256_keccak.md
+++ b/libcore/docs/crypto/dsa/ecdsa_k256_keccak.md
@@ -4,4 +4,4 @@
| ----------- | ------------- |
| verify_ecdsa_k256_keccak | Verifies an secp256k1 ECDSA signature compatible with `miden-crypto::ecdsa_k256_keccak`.
This wrapper mirrors the materialization performed in `miden-crypto::ecdsa_k256_keccak`: given
a public key commitment and the original message, it reconstructs the calldata expected by the
precompile (public key bytes, Keccak256(message), signature). The public key and signature are
supplied via the advice stack, and can be obtained with the `ecdsa_k256_keccak` function.
Inputs:
Operand stack: [PK_COMM, MSG, ...]
Advice stack: [PK[9] \| SIG_BYTES[17] \| ...]
Outputs:
Operand stack: []
Advice stack: []
Local memory layout (element addresses):
- locaddr[0 ..9 ] : compressed public key (33 bytes packed as 9 felts)
- locaddr[12..20] : message bytes (MSG written as eight u32 limbs)
- locaddr[20..28] : keccak256(message) digest (8 felts)
- locaddr[28..45] : signature (66 bytes packed as 17 felts)
The procedure traps if the public key in the advice stack does not hash to `PK_COMM`;
otherwise it returns cleanly after emitting the deferred verification request.
|
| verify | Verifies an ECDSA signature with pre-hashed message using deferred execution.
The caller provides the public key, the pre-hashed message digest, and the signature data in
memory. This routine forwards the request to the host precompile and returns the boolean result.
In typical flows the digest is obtained from `keccak256::hash_bytes`, but any 32-byte prehash
is accepted.
Input: `[pk_ptr, digest_ptr, sig_ptr, ...]`
Output: `[result, ...]`
Where:
- `pk_ptr`: word-aligned memory address containing the 33-byte compressed secp256k1 public key
- `digest_ptr`: word-aligned memory address containing the 32-byte message digest
- `sig_ptr`: word-aligned memory address containing the 66-byte signature
- `result`: 1 if the signature is valid, 0 if invalid
All data must be stored in memory as packed u32 values (little-endian), with unused bytes
in the final u32 set to zero.
|
-| verify_impl | Internal implementation of ECDSA signature verification via deferred computation.
Emits an event to trigger the precompile handler, reads the verification result from
the advice stack, and computes the commitment and tag for tracking deferred verification.
This procedure mimics the `ecdsa_secp256k1::PublicKey::verify_prehash()` function from
`miden-crypto`, which takes a pre-hashed message that the caller must provide
(e.g. obtained using the keccak256 precompile).
Input: `[pk_ptr, digest_ptr, sig_ptr, ...]`
Output: `[COMM, TAG, result, ...]`
Where:
- `pk_ptr`: word-aligned memory address containing 33-byte public key
- `digest_ptr`: word-aligned memory address containing 32-byte digest
- `sig_ptr`: word-aligned memory address containing 66-byte signature
- `COMM`: commitment to calldata computed as
`Rpo256(Rpo256(Rpo256(pk) \|\| Rpo256(digest)) \|\| Rpo256(sig))`
- `TAG`: `[ECDSA_VERIFY_EVENT, result, 0, 0]`
- `result`: 1 if signature is valid, 0 if invalid
|
+| verify_impl | Internal implementation of ECDSA signature verification via deferred computation.
Emits an event to trigger the precompile handler, reads the verification result from
the advice stack, and computes the commitment and tag for tracking deferred verification.
This procedure mimics the `ecdsa_secp256k1::PublicKey::verify_prehash()` function from
`miden-crypto`, which takes a pre-hashed message that the caller must provide
(e.g. obtained using the keccak256 precompile).
Input: `[pk_ptr, digest_ptr, sig_ptr, ...]`
Output: `[COMM, TAG, result, ...]`
Where:
- `pk_ptr`: word-aligned memory address containing 33-byte public key
- `digest_ptr`: word-aligned memory address containing 32-byte digest
- `sig_ptr`: word-aligned memory address containing 66-byte signature
- `COMM`: commitment to calldata computed as
`rpo256256(rpo256256(rpo256256(pk) \|\| rpo256256(digest)) \|\| rpo256256(sig))`
- `TAG`: `[ECDSA_VERIFY_EVENT, result, 0, 0]`
- `result`: 1 if signature is valid, 0 if invalid
|
diff --git a/libcore/docs/crypto/hashes/rpo256.md b/libcore/docs/crypto/hashes/rpo256.md
index aac4c36c51..c60458cd1f 100644
--- a/libcore/docs/crypto/hashes/rpo256.md
+++ b/libcore/docs/crypto/hashes/rpo256.md
@@ -1,4 +1,5 @@
-## miden::core::crypto::hashes::rpo
+
+## miden::core::crypto::hashes::rpo256
| Procedure | Description |
| ----------- | ------------- |
| init_no_padding | Prepares the top of the stack with the hasher initial state.
This procedures does not handle padding, therefore, the user is expected to
consume an amount of data which is a multiple of the rate (2 words).
Input: []
Output: [PERM, PERM, PERM, ...]
Cycles: 12
|
diff --git a/libcore/docs/mem.md b/libcore/docs/mem.md
index f3db7d15ff..f008a35719 100644
--- a/libcore/docs/mem.md
+++ b/libcore/docs/mem.md
@@ -2,8 +2,9 @@
## miden::core::mem
| Procedure | Description |
| ----------- | ------------- |
+| memcopy | Copies `n` elements from `read_ptr` to `write_ptr`.
If possible, this procedure will copy word-aligned words internally using `memcopy_words`
procedure, decreasing the total number of cycles required to make a copy.
It is advised to copy small chunks of memory (num_elements < 12) using `memcopy_elements`
procedure instead, since it won't have a computation logic overhead.
Inputs: [n, read_ptr, write_ptr]
Outputs: []
Where:
- n is the number of elements which should be copied.
- read_ptr is the memory pointer where the elements to copy are stored.
- write_ptr is the memory pointer where the elements will be copied.
Panics if:
- read_ptr and write_ptr are not a u32 values.
Total cycles:
- if read and write pointers are mutually word-unaligned: 27 + 14 * num_elements
- if `n` is less than 12 elements: 27 + 14 * num_elements
- if read and write pointers are mutually mutually word-aligned, it's equal to the sum of:
- prologue: 19 cycles
- prefix handling: 31 + 14 * num_prefix_elements, where num_prefix_elements is
the number of elements required to make the read (and write) pointer word-aligned
- words handling: 15 + 16 * num_words
- suffix handling: 9 + 14 * num_suffix_elements
- in total: 74 + 14 * num_prefix_elements + 16 * num_words + 14 * num_suffix_elements
|
| memcopy_words | Copies `n` words from `read_ptr` to `write_ptr`.
`read_ptr` and `write_ptr` pointers *must be* word-aligned.
Inputs: [n, read_ptr, write_ptr]
Outputs: []
Where:
- n is the number of words which should be copied.
- read_ptr is the memory pointer where the words to copy are stored.
- write_ptr is the memory pointer where the words will be copied.
Total cycles: 15 + 16 * num_words
|
-| memcopy_elements | Copies `n` elements from `read_ptr` to `write_ptr`.
Inputs: [n, read_ptr, write_ptr]
Outputs: []
Where:
- n is the number of elements which should be copied.
- read_ptr is the memory pointer where the elements to copy are stored.
- write_ptr is the memory pointer where the elements will be copied.
Total cycles: 7 + 14 * num_elements
|
+| memcopy_elements | Copies `n` elements from `read_ptr` to `write_ptr`.
It is advised to copy big chunks of memory (num_elements >= 12) using `memcopy` procedure
instead, since it will handle the memory more efficiently.
Inputs: [n, read_ptr, write_ptr]
Outputs: []
Where:
- n is the number of elements which should be copied.
- read_ptr is the memory pointer where the elements to copy are stored.
- write_ptr is the memory pointer where the elements will be copied.
Total cycles: 7 + 14 * num_elements
|
| pipe_double_words_to_memory | Copies an even number of words from the advice_stack to memory, computing their permutation.
Inputs: [C, B, A, write_ptr, end_ptr]
Outputs: [C, B, A, write_ptr]
Where:
- A is the capacity of the RPO hasher state
- C and B are the rate portion of the RPO hasher state
- write_ptr is the memory pointer where the words will be copied.
- end_ptr is the memory pointer where the copying should end.
Notice that the `end_ptr - write_ptr` value must be positive and a multiple of 8.
Total cycles: 9 + 6 * num_word_pairs
|
| pipe_words_to_memory | Copies an arbitrary number of words from the advice stack to memory, computing their permutation.
Inputs: [num_words, write_ptr]
Outputs: [C, B, A, write_ptr']
Where:
- num_words is the number of words which will be copied to the memory.
- write_ptr is the memory pointer where the words will be copied.
- write_ptr' is the memory pointer to the end of the copied words.
- A is the capacity of the RPO hasher state
- C and B are the rate portion of the RPO hasher state
Total cycles:
- Even `num_words`: 43 + 9 * num_words / 2
- Odd `num_words`: 60 + 9 * round_down(num_words / 2)
|
| pipe_preimage_to_memory | Moves an arbitrary number of words from the advice stack to memory and asserts it matches the
commitment.
Inputs: [num_words, write_ptr, COMMITMENT]
Outputs: [write_ptr']
Where:
- num_words is the number of words which will be copied to the memory.
- write_ptr is the memory pointer where the words will be copied.
- write_ptr' is the memory pointer to the end of the copied words.
- COMMITMENT is the commitment that the one calculated during this procedure will be compared
with.
Total cycles:
- Even `num_words`: 62 + 9 * num_words / 2
- Odd `num_words`: 79 + 9 * round_down(num_words / 2)
|
diff --git a/libcore/tests/mem/mod.rs b/libcore/tests/mem/mod.rs
index 5a912035f8..f05622aa4e 100644
--- a/libcore/tests/mem/mod.rs
+++ b/libcore/tests/mem/mod.rs
@@ -5,6 +5,171 @@ use miden_utils_testing::{
build_expected_perm, felt_slice_to_ints,
};
+#[test]
+fn test_memcopy() {
+ // prepare testing data
+ use miden_libcore::CoreLibrary;
+
+ let store_testing_data_source = "
+ push.1.2.3.4.1000 mem_storew_be dropw
+ push.5.6.7.8.1004 mem_storew_be dropw
+ push.9.10.11.12.1008 mem_storew_be dropw
+ push.13.14.15.16.1012 mem_storew_be dropw
+ push.17.18.19.20.1016 mem_storew_be dropw
+ ";
+
+ let stdlib = CoreLibrary::default();
+
+ fn executed_process_from_source(source: String, stdlib: &CoreLibrary) -> Process {
+ let assembler = miden_assembly::Assembler::default()
+ .with_dynamic_library(stdlib)
+ .expect("failed to load stdlib");
+ let program: Program =
+ assembler.assemble_program(source).expect("Failed to compile test source.");
+ let mut host = DefaultHost::default().with_library(stdlib).unwrap();
+
+ let mut process = Process::new(
+ program.kernel().clone(),
+ StackInputs::default(),
+ AdviceInputs::default(),
+ ExecutionOptions::default(),
+ );
+
+ process.execute(&program, &mut host).unwrap();
+
+ process
+ }
+
+ // Case 1: read pointer and write pointer are mutually unaligned
+
+ let source = format!(
+ "
+ use miden::core::mem
+
+ begin
+ {store_testing_data_source}
+
+ push.2002.1001.18 exec.mem::memcopy
+ end
+ "
+ );
+
+ let process = executed_process_from_source(source, &stdlib);
+
+ for addr in 2002..2020 {
+ assert_eq!(
+ process.chiplets.memory.get_value(ContextId::root(), addr).unwrap(),
+ Felt::from(addr - 2000),
+ "Address {}",
+ addr
+ );
+ }
+
+ // Case 2: pointers are word-aligned; essentially only the "words" handler will work
+
+ let source = format!(
+ "
+ use miden::core::mem
+
+ begin
+ {store_testing_data_source}
+
+ push.2000.1000.16 exec.mem::memcopy
+ end
+ "
+ );
+
+ let process = executed_process_from_source(source, &stdlib);
+
+ for addr in 2000..2016 {
+ assert_eq!(
+ process.chiplets.memory.get_value(ContextId::root(), addr).unwrap(),
+ Felt::from(addr - 1999),
+ "Address {}",
+ addr
+ );
+ }
+
+ // Case 3: pointers are mutually word-aligned (have equal mis-aligning from the next
+ // word-aligned address); we have enough copy values to run all three handlers: prefix,
+ // words, and suffix
+
+ let source = format!(
+ "
+ use miden::core::mem
+
+ begin
+ {store_testing_data_source}
+
+ push.2001.1001.18 exec.mem::memcopy
+ end
+ "
+ );
+
+ let process = executed_process_from_source(source, &stdlib);
+
+ for addr in 2001..2019 {
+ assert_eq!(
+ process.chiplets.memory.get_value(ContextId::root(), addr).unwrap(),
+ Felt::from(addr - 1999),
+ "Address {}",
+ addr
+ );
+ }
+
+ // Case 4: pointers are mutually word-aligned (have equal mis-aligning from the next
+ // word-aligned address); we have enough copy values to run only prefix and suffix handlers.
+
+ let source = format!(
+ "
+ use miden::core::mem
+
+ begin
+ {store_testing_data_source}
+
+ push.2001.1001.5 exec.mem::memcopy
+ end
+ "
+ );
+
+ let process = executed_process_from_source(source, &stdlib);
+
+ for addr in 2001..2006 {
+ assert_eq!(
+ process.chiplets.memory.get_value(ContextId::root(), addr).unwrap(),
+ Felt::from(addr - 1999),
+ "Address {}",
+ addr
+ );
+ }
+
+ // Case 5: pointers are mutually word-aligned (have equal mis-aligning from the next
+ // word-aligned address); we have enough copy values to run only suffix handler.
+
+ let source = format!(
+ "
+ use miden::core::mem
+
+ begin
+ {store_testing_data_source}
+
+ push.2001.1001.2 exec.mem::memcopy
+ end
+ "
+ );
+
+ let process = executed_process_from_source(source, &stdlib);
+
+ for addr in 2001..2003 {
+ assert_eq!(
+ process.chiplets.memory.get_value(ContextId::root(), addr).unwrap(),
+ Felt::from(addr - 1999),
+ "Address {}",
+ addr
+ );
+ }
+}
+
#[test]
fn test_memcopy_words() {
use miden_libcore::CoreLibrary;