diff --git a/miden-crypto/src/merkle/smt/forest/mod.rs b/miden-crypto/src/merkle/smt/forest/mod.rs index b80efe903..f9b38ce6e 100644 --- a/miden-crypto/src/merkle/smt/forest/mod.rs +++ b/miden-crypto/src/merkle/smt/forest/mod.rs @@ -126,7 +126,7 @@ impl SmtForest { /// Inserts the specified key-value pair into an SMT with the specified root. This will also /// add a new root to the forest. Returns the new root. /// - /// Returns an error if an SMT with the specified root is not in the forest, these is not + /// Returns an error if an SMT with the specified root is not in the forest, there is not /// enough data in the forest to perform the insert, or if the insert would create a leaf /// with too many entries. pub fn insert(&mut self, root: Word, key: Word, value: Word) -> Result { @@ -136,7 +136,7 @@ impl SmtForest { /// Inserts the specified key-value pairs into an SMT with the specified root. This will also /// add a single new root to the forest for the entire batch of inserts. Returns the new root. /// - /// Returns an error if an SMT with the specified root is not in the forest, these is not + /// Returns an error if an SMT with the specified root is not in the forest, there is not /// enough data in the forest to perform the insert, or if the insert would create a leaf /// with too many entries. pub fn batch_insert( diff --git a/miden-crypto/src/merkle/smt/large_forest/backend.rs b/miden-crypto/src/merkle/smt/large_forest/backend.rs new file mode 100644 index 000000000..5c33a0dc2 --- /dev/null +++ b/miden-crypto/src/merkle/smt/large_forest/backend.rs @@ -0,0 +1,123 @@ +//! This file contains the [`Backend`] trait for the [`LargeSmtForest`] implementation and the +//! supporting types it needs. + +use alloc::{boxed::Box, vec::Vec}; +use core::fmt::Debug; + +use thiserror::Error; + +use crate::{ + Word, + merkle::{ + MerkleError, + smt::{ + SmtProof, + full::SMT_DEPTH, + large_forest::{ + history::VersionId, + operation::{SmtForestUpdateBatch, SmtUpdateBatch}, + }, + }, + }, +}; +// TYPE ALIASES +// ================================================================================================ + +/// The mutation set used by the forest backends. +/// +/// At the moment this is used for _reverse_ mutations that "undo" the changes made to the tree(s), +/// but may be harmonised with [`SmtUpdateBatch`] in the future. For more information on its use for +/// reverse mutations, see [`crate::merkle::smt::SparseMerkleTree::apply_mutations_with_reversion`]. +pub type MutationSet = crate::merkle::smt::MutationSet; + +// BACKEND +// ================================================================================================ + +/// The backing storage for the SMT forest, providing the necessary high-level methods for +/// performing operations on the full trees that make up the forest, while allowing the forest +/// itself to be storage agnostic. +/// +/// # Backend Data Storage +/// +/// Having a generic [`Backend`] provides no guarantees to the user about how it stores data and +/// what patterns are used for data access under the hood. It is, however, guaranteed to store +/// _only_ the data necessary to describe the latest state of each tree in the forest. +pub trait Backend +where + Self: Debug, +{ + // QUERIES + // ============================================================================================ + + /// Returns an opening for the specified `key` in the SMT with the specified `root`. + fn open(&self, root: Word, key: Word) -> Result; + + /// Returns the value associated with the provided `key` in the SMT with the provided `root`, or + /// [`None`] if no such value exists. + fn get(&self, root: Word, key: Word) -> Result>; + + /// Returns the version of the tree with the provided `root`. + fn version(&self, root: Word) -> Result; + + /// Returns an iterator over all the tree roots and versions that the backend knows about. + /// + /// The iteration order is unspecified. + fn versions(&self) -> Result>; + + // SINGLE-TREE MODIFIERS + // ============================================================================================ + + /// Performs the provided `updates` on the tree with the provided `root`, returning the mutation + /// set that will revert the changes made to the tree. + /// + /// Implementations must guarantee the following behavior, with non-conforming implementations + /// considered to be a bug: + /// + /// - At most one new root must be added to the forest for the entire batch. + /// - If applying the provided `updates` results in no changes to the tree, no new tree must be + /// allocated. + fn update_tree( + &mut self, + root: Word, + new_version: VersionId, + updates: SmtUpdateBatch, + ) -> Result; + + // MULTI-TREE MODIFIERS + // ============================================================================================ + + /// Performs the provided `updates` on the forest, setting all new tree states to have the + /// provided `new_version` and returning a vector of the mutation sets that reverse the changes + /// to each changed tree. + /// + /// Implementations must guarantee the following behaviour, with non-conforming implementations + /// considered to be a bug: + /// + /// - At most one new root must be added to the forest for each target root in the provided + /// `updates`. + /// - If applying the provided `updates` results in no changes to a given lineage of trees in + /// the forest, then no new tree must be allocated in that lineage. + fn update_forest( + &mut self, + new_version: VersionId, + updates: SmtForestUpdateBatch, + ) -> Result>; +} + +// BACKEND ERROR +// ================================================================================================ + +/// The error type for use within Backends. +#[derive(Debug, Error)] +pub enum BackendError { + /// Raised when there is an error with the merkle tree semantics within the backend. + #[error(transparent)] + Merkle(#[from] MerkleError), + + /// Raised for arbitrary other errors within the backend. + #[error(transparent)] + Other(#[from] Box), +} + +/// The result type for use with backends. +pub type Result = core::result::Result; diff --git a/miden-crypto/src/merkle/smt/large_forest/error.rs b/miden-crypto/src/merkle/smt/large_forest/error.rs index 14ca4cf3e..8176215b4 100644 --- a/miden-crypto/src/merkle/smt/large_forest/error.rs +++ b/miden-crypto/src/merkle/smt/large_forest/error.rs @@ -1,21 +1,51 @@ //! This module contains the error types and helpers for working with errors from the large SMT //! forest. +use alloc::boxed::Box; + use thiserror::Error; -use crate::merkle::{MerkleError, smt::large_forest::history::error::HistoryError}; +use crate::{ + Word, + merkle::{ + MerkleError, + smt::large_forest::{backend::BackendError, history::error::HistoryError}, + }, +}; + +// LARGE SMT FOREST ERROR +// ================================================================================================ -/// The errors returned by operations on the large SMT forest. -/// -/// This type primarily serves to wrap more specific error types from various subsystems into a -/// generic interface type. +/// The type of errors returned by operations on the large SMT forest. #[derive(Debug, Error)] pub enum LargeSmtForestError { + /// Errors in the history subsystem of the forest. #[error(transparent)] HistoryError(#[from] HistoryError), + /// Raised when an attempt is made to modify a frozen tree. + #[error("Attempted to modify non-current tree with root {0}")] + InvalidModification(Word), + + /// Errors with the merkle tree operations of the forest. #[error(transparent)] MerkleError(#[from] MerkleError), + + /// Raised for arbitrary other errors. + #[error(transparent)] + Other(#[from] Box), +} + +/// We want to forward backend errors specifically when we can, so we manually implement the +/// conversion. +impl From for LargeSmtForestError { + fn from(value: BackendError) -> Self { + match value { + BackendError::Merkle(e) => LargeSmtForestError::from(e), + BackendError::Other(e) => LargeSmtForestError::from(e), + } + } } -pub mod history {} +/// The result type for use within the large SMT forest portion of the library. +pub type Result = core::result::Result; diff --git a/miden-crypto/src/merkle/smt/large_forest/history/mod.rs b/miden-crypto/src/merkle/smt/large_forest/history/mod.rs index 37ce4daf0..097f04741 100644 --- a/miden-crypto/src/merkle/smt/large_forest/history/mod.rs +++ b/miden-crypto/src/merkle/smt/large_forest/history/mod.rs @@ -39,7 +39,7 @@ use core::fmt::Debug; use error::{HistoryError, Result}; use crate::{ - Map, Set, Word, + Map, Word, merkle::{ NodeIndex, smt::{LeafIndex, SMT_DEPTH}, @@ -87,6 +87,7 @@ pub type VersionId = u64; /// The versions are _cumulative_, meaning that querying the history must account for changes from /// the current tree that take place in versions that are not the queried version or the current /// tree. +#[allow(dead_code)] // Temporary #[derive(Clone, Debug)] pub struct History { /// The maximum number of historical versions to be stored. @@ -110,6 +111,7 @@ pub struct History { deltas: VecDeque, } +#[allow(dead_code)] // Temporary impl History { /// Constructs a new history container, containing at most `max_count` historical versions for /// a tree. @@ -136,13 +138,23 @@ impl History { /// Returns all the roots that the history knows about. /// + /// The iteration order of the roots is guaranteed to move backward in time, with earlier items + /// being roots from versions closer to the present. + /// /// # Complexity /// - /// Calling this method requires a traversal of all the versions and is hence linear in the - /// number of history versions. - #[must_use] - pub fn roots(&self) -> Set { - self.deltas.iter().map(|d| d.root).collect() + /// Calling this method provides an iterator whose consumption requires a traversal of all the + /// versions. The method's complexity is thus `O(n)` in the number of versions. + pub fn roots(&self) -> impl Iterator { + self.deltas.iter().rev().map(|d| d.root) + } + + /// Gets the version corresponding to the provided `root`, or returns [`None`] if the provided + /// `root` is not found within this history. + pub fn version(&self, root: Word) -> Option { + self.deltas + .iter() + .find_map(|d| if d.root == root { Some(d.version_id) } else { None }) } /// Returns `true` if `root` is in the history and `false` otherwise. @@ -302,6 +314,7 @@ impl History { // ================================================================================================ /// A read-only view of the history overlay on the tree at a specified place in the history. +#[allow(dead_code)] // Temporary #[derive(Debug)] pub struct HistoryView<'history> { /// The index of the target version in the history. @@ -311,6 +324,7 @@ pub struct HistoryView<'history> { history: &'history History, } +#[allow(dead_code)] // Temporary impl<'history> HistoryView<'history> { /// Constructs a new history view that acts as a single overlay of the state represented by the /// oldest delta for which `f` returns true. @@ -413,6 +427,7 @@ struct Delta { pub leaves: LeafChanges, } +#[allow(dead_code)] // Temporary impl Delta { /// Creates a new delta with the provided `root`, and representing the provided /// changes to `nodes` and `leaves` in the merkle tree. diff --git a/miden-crypto/src/merkle/smt/large_forest/history/tests.rs b/miden-crypto/src/merkle/smt/large_forest/history/tests.rs index b3b763704..f559c9f24 100644 --- a/miden-crypto/src/merkle/smt/large_forest/history/tests.rs +++ b/miden-crypto/src/merkle/smt/large_forest/history/tests.rs @@ -1,6 +1,8 @@ #![cfg(feature = "std")] //! The functional tests for the history component. +use alloc::vec::Vec; + use p3_field::PrimeCharacteristicRing; use super::{CompactLeaf, History, LeafChanges, NodeChanges, error::Result}; @@ -32,7 +34,7 @@ fn roots() -> Result<()> { history.add_version(root_2, 1, nodes.clone(), leaves.clone())?; // We should be able to get all the roots. - let roots = history.roots(); + let roots = history.roots().collect::>(); assert_eq!(roots.len(), 2); assert!(roots.contains(&root_1)); assert!(roots.contains(&root_2)); diff --git a/miden-crypto/src/merkle/smt/large_forest/mod.rs b/miden-crypto/src/merkle/smt/large_forest/mod.rs index e67a5f27a..accffc95a 100644 --- a/miden-crypto/src/merkle/smt/large_forest/mod.rs +++ b/miden-crypto/src/merkle/smt/large_forest/mod.rs @@ -1,7 +1,382 @@ -//! A high-performance sparse merkle tree forest backed by pluggable storage. +//! A high-performance sparse merkle tree forest with pluggable backends. +//! +//! # Semantic Layout +//! +//! Much like `SparseMerkleTree`, the forest stores trees of depth 64 that use the compact leaf +//! optimization to uniquely store 256-bit elements. This reduces both the size of a merkle path, +//! and the computational work necessary to perform queries into the trees. +//! +//! # Storing Trees and Versions +//! +//! The usage of an SMT forest is conceptually split into two parts: a collection that is able to +//! store **multiple, unrelated trees**, and a container for **multiple versions of those trees**. +//! Both of these use-cases are supported by the forest, but have an explicit delineation between +//! them in both the API and the implementation. This has two impacts that a client of the forest +//! must understand. +//! +//! - While, when using a [`Backend`] that can persist data, **only the current full tree state is +//! persisted**, while **the historical data will not be**. This is designed into the structure of +//! the forest, and does not depend on the choice of storage backend. +//! - It is more expensive to query a given tree at an older point in its history than it is to +//! query it at a newer point, and querying at the current tree will always take the least time. +//! +//! # Data Storage +//! +//! The SMT forest is parametrised over the [`Backend`] implementation that it uses. These backends +//! may have significantly varied performance characteristics, and hence any performance analysis of +//! the forest should be done in conjunction with a specific backend. The forest itself takes pains +//! to not make any assumptions about properties of the backend in use. +//! +//! Take care to read the documentation of the specific [`Backend`] that you are planning to use in +//! order to understand its performance, gotchas, and other such details. +mod backend; mod error; mod history; +mod operation; +mod property_tests; +mod root; +mod tests; -pub use error::LargeSmtForestError; -pub use history::{History, HistoryView, error::HistoryError}; +pub use backend::{Backend, BackendError}; +pub use error::{LargeSmtForestError, Result}; +pub use operation::{ForestOperation, SmtForestUpdateBatch, SmtUpdateBatch}; +pub use root::RootInfo; + +use crate::{ + Map, Set, Word, + merkle::{ + EmptySubtreeRoots, MerkleError, + smt::{ + SMT_DEPTH, SmtProof, + large_forest::history::{History, VersionId}, + }, + }, +}; + +// SPARSE MERKLE TREE FOREST +// ================================================================================================ + +/// A high-performance forest of sparse merkle trees with pluggable storage. +/// +/// # Current and Frozen Trees +/// +/// Trees in the forest fall into two categories: +/// +/// 1. **Current:** These trees represent the latest version of their 'tree lineage' and can be +/// modified to generate a new tree version in the forest. +/// 2. **Frozen:** These are historical versions of trees that are no longer current, and are +/// considered 'frozen' and hence cannot be modified to generate a new tree version in the +/// forest. This is because being able to do so would effectively create a "fork" in the history, +/// and hence allow the forest to yield potentially invalid responses with regard to the +/// blockchain history. +/// +/// If an attempt is made to modify a frozen tree, the method in question will yield an +/// [`LargeSmtForestError::InvalidModification`] error as doing so represents a programmer bug. +/// +/// # Performance +/// +/// The performance characteristics of this forest depend heavily on the choice of underlying +/// [`Backend`] implementation. Where something more specific can be said about a particular method +/// call, the documentation for that method will state it. +#[allow(dead_code)] // Temporarily +#[derive(Debug)] +pub struct LargeSmtForest { + /// The backend for storing the full trees that exist as part of the forest. It makes no + /// guarantees as to where the tree data is stored, and **must not be exposed** in the API of + /// the forest for correctness. + backend: B, + + /// The container for the historical versions of each tree stored in the forest, identified by + /// the _current root_ of that tree. + /// + /// This should contain an entry for every tree lineage contained in the forest, under the root + /// of its current tree version. + histories: Map, + + /// A set tracking which lineage histories in `histories` contain actual deltas in order to + /// speed up querying. + /// + /// It must always be maintained as a strict subset of `histories.keys()`. + non_empty_histories: Set, +} + +// CONSTRUCTION AND BASIC QUERIES +// ================================================================================================ + +/// These functions deal with the creation of new forest instances, and hence rely on the ability to +/// query storage to do so. +/// +/// # Performance +/// +/// All the methods in this impl block require access to the underlying [`Backend`] instance to +/// return results. This means that their performance will depend heavily on the specific instance +/// with which the forest was constructed. +/// +/// Where anything more specific can be said about performance, the method documentation will +/// contain more detail. +impl LargeSmtForest { + /// Constructs a new forest backed by the provided `backend`. + /// + /// The constructor will treat whatever state is contained within the provided `backend` as the + /// starting state for the forest. This means that, if you pass a newly-initialized storage, the + /// forest will start in an empty state. Similarly, if you pass a `backend` that already + /// contains some data (loaded from disk, for example), then the forest will start in that state + /// instead. + /// + /// # Errors + /// + /// - [`LargeSmtForestError::Other`] if the forest cannot be started up correctly using the + /// provided `backend`. + pub fn new(_backend: B) -> Result { + todo!("LargeSmtForest::new") + } +} + +/// These methods provide the ability to perform basic queries on the forest without the need to +/// access the underlying tree storage. +/// +/// # Performance +/// +/// All of these methods can be performed fully in-memory, and hence their performance is +/// predictable on a given machine regardless of the choice of [`Backend`] instance being used by +/// the forest. +impl LargeSmtForest { + /// Returns an iterator over all roots that the forest knows about, including those from all + /// historical versions. + /// + /// The iteration order of the roots is unspecified. + pub fn roots(&self) -> impl Iterator { + self.histories + .keys() + .cloned() + .chain(self.histories.values().flat_map(|h| h.roots())) + } + + /// Returns an iterator over the roots for the latest version of every tree in the forest. + /// + /// The iteration order is unspecified. + pub fn current_roots(&self) -> impl Iterator { + self.histories.keys().cloned() + } + + /// Returns an iterator over the historical roots in the forest belonging to the lineage with + /// the provided `current_root`. + /// + /// The iteration order of the roots is guaranteed to move backward in time, with earlier items + /// being roots from versions closer to the present. It does _not_ include the specified + /// `current_root`. + /// + /// # Errors + /// + /// - [`LargeSmtForestError::MerkleError`] if no tree with the provided `root` exists in the + /// forest. + pub fn historical_roots(&self, current_root: Word) -> Result> { + self.histories + .get(¤t_root) + .map(|h| h.roots()) + .ok_or(MerkleError::RootNotInStore(current_root).into()) + } + + /// Returns the number of trees in the forest that have unique identity. + /// + /// This is **not** the number of unique tree lineages in the forest, as it includes all + /// historical trees as well. For that, see [`Self::lineage_count`]. + pub fn tree_count(&self) -> usize { + self.roots().count() + } + + /// Returns the number of unique tree lineages in the forest. + /// + /// This is **not** the number of unique trees in the forest, as it does not include all + /// versions in each lineage. For that, see [`Self::tree_count`]. + pub fn lineage_count(&self) -> usize { + self.histories.iter().len() + } + + /// Returns `true` if the provided `root` points to a tree that is the latest version, and + /// `false` otherwise. + /// + /// A tree being the latest version is one that can be modified to yield a new version. In other + /// words it does not represent a historical tree version. + pub fn is_latest_version(&self, root: Word) -> bool { + self.histories.contains_key(&root) || *EmptySubtreeRoots::entry(SMT_DEPTH, 0) == root + } +} + +// QUERIES +// ================================================================================================ + +/// These methods pertain to non-mutating queries about the data stored in the forest. They differ +/// from the simple queries in the previous block by requiring access to the backend to function. +/// +/// # Performance +/// +/// All the methods in this impl block require access to the underlying [`Backend`] instance to +/// return results. This means that their performance will depend heavily on the specific instance +/// with which the forest was constructed. +/// +/// Where anything more specific can be said about performance, the method documentation will +/// contain more detail. +impl LargeSmtForest { + /// Returns an opening for the specified `key` in the SMT with the specified `root`, or [`None`] + /// if there is no tree with the specified `root` in the forest. + /// + /// # Errors + /// + /// - [`LargeSmtForestError::MerkleError`] if no tree with the provided `root` exists in the + /// forest, or if the forest does not contain sufficient data to provide an opening for `key`. + pub fn open(&self, _root: Word, _key: Word) -> Result> { + todo!("LargeSmtForest::open") + } + + /// Returns the value associated with the provided `key` in the SMT with the provided `root`, or + /// [`None`] if no such value exists. + /// + /// # Errors + /// + /// - [`LargeSmtForestError::MerkleError`] if no tree with the provided `root` exists in the + /// forest, or if the forest does not contain sufficient data to get the value for `key`. + pub fn get(&self, _root: Word, _key: Word) -> Result> { + todo!("LargeSmtForest::get") + } + + /// Returns data describing what information the forest knows about the provided `root`. + pub fn knows_root(&self, root: Word) -> Result { + if self.histories.contains_key(&root) { + Ok(RootInfo::LatestVersion(self.backend.version(root)?)) + } else if let Some(v) = self.histories.values().find_map(|h| h.version(root)) { + Ok(RootInfo::HistoricalVersion(v)) + } else if root == *EmptySubtreeRoots::entry(SMT_DEPTH, 0) { + Ok(RootInfo::EmptyTree) + } else { + Ok(RootInfo::Missing) + } + } +} + +// SINGLE-TREE MODIFIERS +// ================================================================================================ + +/// These methods pertain to modifications that can be made to a single tree in the forest. They +/// exploit parallelism within the single target tree wherever possible. +/// +/// # Performance +/// +/// All the methods in this impl block require access to the underlying [`Backend`] instance to +/// return results. This means that their performance will depend heavily on the specific instance +/// with which the forest was constructed. +/// +/// Where anything more specific can be said about performance, the method documentation will +/// contain more detail. +#[allow(dead_code)] // Temporarily +impl LargeSmtForest { + /// Performs the provided `updates` on the tree with the provided `root`, adding a single new + /// root to the forest (corresponding to `new_version`) for the entire batch and returning that + /// root. + /// + /// If applying the provided `operations` results in no changes to the tree, then `root` will be + /// returned unchanged and no new tree will be allocated. + /// + /// # Errors + /// + /// - [`LargeSmtForestError::InvalidModification`] if `root` corresponds to a tree that is not + /// the latest in its lineage. + /// - [`LargeSmtForestError::MerkleError`] if `root` is not a root known by the forest. + pub fn update_tree( + &mut self, + _root: Word, + _new_version: VersionId, + _updates: SmtUpdateBatch, + ) -> Result { + todo!("LargeSmtForest::modify_tree") + } +} + +// MULTI-TREE MODIFIERS +// ================================================================================================ + +/// These methods pertain to modifications that can be made to multiple trees in the forest at once. +/// They exploit parallelism both between trees and within trees wherever possible. +/// +/// # Performance +/// +/// All the methods in this impl block require access to the underlying [`Backend`] instance to +/// return results. This means that their performance will depend heavily on the specific instance +/// with which the forest was constructed. +/// +/// Where anything more specific can be said about performance, the method documentation will +/// contain more detail. +impl LargeSmtForest { + /// Performs the provided `updates` on the forest, adding at most one new root with version + /// `new_version` to the forest for each target root in `updates` and returning a mapping + /// from old root to new root. + /// + /// If applying the associated batch to any given lineage in the forest results in no changes to + /// that tree, the initial root for that lineage will be returned and no new tree will be + /// allocated. + /// + /// # Errors + /// + /// - [`LargeSmtForestError::InvalidModification`] if any root in the batch corresponds to a + /// tree that is not the latest in its lineage. + /// - [`LargeSmtForestError::MerkleError`] if any root in the batch is not a root known by the + /// forest. + pub fn update_forest( + &mut self, + _new_version: VersionId, + _updates: SmtForestUpdateBatch, + ) -> Result> { + todo!("LargeSmtForest::modify_forest") + } + + /// Removes all tree versions in the forest that are older than the provided `version`. + /// + /// In the case that the current version of a given tree in the forest is older than `version`, + /// that current version is retained. + /// + /// # Errors + /// + /// - [`LargeSmtForestError::Other`] if the backend cannot be accessed to get the full tree + /// versions. + /// + /// # Panics + /// + /// - If there is no history that corresponds to one of the trees that is fully stored. + pub fn truncate(&mut self, version: VersionId) -> Result<()> { + // Truncation in the history is defined such that it never removes a version that could + // possibly serve as the latest delta for a newer version. This is because it cannot safely + // know if a version `v` is between the latest delta `d` and the current version `c`, as it + // has no knowledge of the current version. + // + // Thus, if we have a version `v` such that `d <= v < c`, we need to retain the reversion + // delta `d` in the history to correctly service queries for `v`. If, however, we have `d < + // c <= v` we need to explicitly remove the last delta as well. + // + // To that end, we handle the latter case first, by explicitly calling `History::clear()`. + self.backend.versions()?.for_each(|(root, v)| { + if version >= v { + self.histories + .get_mut(&root) + .expect( + "A full tree did not have a corresponding history, but is required + to", + ) + .clear(); + self.non_empty_histories.remove(&root); + } + }); + + // The other case is `v < c`, which is handled simply by the truncation mechanism in the + // history as we want. In other words, it retains the necessary delta, and so we can just + // call it here. + self.non_empty_histories.iter().for_each(|h| { + self.histories + .get_mut(h) + .expect("Histories did not contain an entry corresponding to a tree") + .truncate(version); + }); + + Ok(()) + } +} diff --git a/miden-crypto/src/merkle/smt/large_forest/operation.rs b/miden-crypto/src/merkle/smt/large_forest/operation.rs new file mode 100644 index 000000000..fb9140c22 --- /dev/null +++ b/miden-crypto/src/merkle/smt/large_forest/operation.rs @@ -0,0 +1,265 @@ +//! This module contains the definition of the [`ForestOperation`] type that encapsulates the +//! possible modifications made to a tree, as well as the concept of a [`SmtUpdateBatch`] of +//! operations to be performed on a single tree in the forest. This is then extended to +//! [`SmtForestUpdateBatch`], which defines a batch of operations across multiple trees. + +use alloc::vec::Vec; + +use crate::{Map, Set, Word}; + +// FOREST OPERATION +// ================================================================================================ + +/// The operations that can be performed on an arbitrary leaf in a tree in a forest. +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum ForestOperation { + /// An insertion of `value` under `key` into the tree. + /// + /// If `key` already exists in the tree, the associated value will be replaced with `value` + /// instead. + Insert { key: Word, value: Word }, + + /// The removal of the `key` and its associated value from the tree. + Remove { key: Word }, +} +impl ForestOperation { + /// Insert the provided `value` into a tree under the provided `key`. + pub fn insert(key: Word, value: Word) -> Self { + Self::Insert { key, value } + } + + /// Remove the provided `key` and its associated value from a tree. + pub fn remove(key: Word) -> Self { + Self::Remove { key } + } + + /// Retrieves the key from the operation. + pub fn key(&self) -> Word { + match self { + ForestOperation::Insert { key, .. } => *key, + ForestOperation::Remove { key } => *key, + } + } +} + +// TREE BATCH +// ================================================================================================ + +/// A batch of operations that can be performed on an arbitrary tree in a forest. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct SmtUpdateBatch { + /// The operations to be performed on a tree. + operations: Vec, +} +impl SmtUpdateBatch { + /// Creates an empty batch of operations that, when applied, will produce a tree with the + /// provided `version` when applied. + pub fn empty() -> Self { + Self { operations: vec![] } + } + + /// Creates a batch containing the provided `operations` that will produce a tree with the + /// provided `version` when applied. + pub fn new(operations: impl Iterator) -> Self { + Self { + operations: operations.collect::>(), + } + } + + /// Adds the provided `operations` to the batch. + pub fn add_operations(&mut self, operations: impl Iterator) { + self.operations.extend(operations); + } + + /// Adds the [`ForestOperation::Insert`] operation for the provided `key` and `value` pair to + /// the batch. + pub fn add_insert(&mut self, key: Word, value: Word) { + self.operations.push(ForestOperation::insert(key, value)); + } + + /// Adds the [`ForestOperation::Remove`] operation for the provided `key` to the batch. + pub fn add_remove(&mut self, key: Word) { + self.operations.push(ForestOperation::remove(key)); + } + + /// Consumes the batch as a vector of operations, containing the last operation for any given + /// `key` in the case that multiple operations per key are encountered. + /// + /// This vector is guaranteed to be sorted by the key on which an operation is performed. + pub fn consume(self) -> Vec { + // As we want to keep the LAST operation for each key, rather than the first, we filter in + // reverse. + let mut seen_keys: Set = Set::new(); + let mut ops = self + .operations + .into_iter() + .rev() + .filter(|o| seen_keys.insert(o.key())) + .collect::>(); + ops.sort_by_key(|o| o.key()); + ops + } +} + +impl From for SmtUpdateBatch +where + I: Iterator, +{ + fn from(value: I) -> Self { + Self::new(value) + } +} + +impl From for Vec { + /// The vector is guaranteed to be sorted by the key on which an operation is performed, and to + /// only contain the _last_ operation to be performed on any given key. + fn from(value: SmtUpdateBatch) -> Self { + value.consume() + } +} + +impl Default for SmtUpdateBatch { + fn default() -> Self { + Self::empty() + } +} + +// FOREST BATCH +// ================================================================================================ + +/// A batch of operations that can be performed on an arbitrary forest, consisting of operations +/// associated with specified trees in that forest. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct SmtForestUpdateBatch { + /// The operations associated with each targeted tree in the forest. + operations: Map, +} + +impl SmtForestUpdateBatch { + /// Creates a new, empty, batch of operations. + pub fn empty() -> Self { + Self { operations: Map::new() } + } + + /// Adds the provided `operations` to be performed on the tree with the provided `root`. + pub fn add_operations( + &mut self, + root: Word, + operations: impl Iterator, + ) { + let batch = self.operations.entry(root).or_insert_with(SmtUpdateBatch::empty); + batch.add_operations(operations); + } + + /// Gets the batch of operations for the tree with the provided `root` for inspection and/or + /// modification. + /// + /// It is assumed that calling this means that the caller wants to insert operations into the + /// associated batch, so a batch will be created even if one was not previously present. + pub fn operations(&mut self, root: Word) -> &mut SmtUpdateBatch { + self.operations.entry(root).or_insert_with(SmtUpdateBatch::empty) + } + + /// Consumes the batch as a map of batches, with each individual batch guaranteed to be in + /// sorted order and contain only the last operation in the batch for any given key. + pub fn consume(self) -> Map> { + self.operations.into_iter().map(|(k, v)| (k, v.consume())).collect() + } +} + +// TESTS +// ================================================================================================ + +#[cfg(feature = "std")] +#[cfg(test)] +mod test { + use itertools::Itertools; + + use super::*; + use crate::rand::test_utils::rand_value; + + #[test] + fn tree_batch() { + // We start by creating an empty tree batch. + let mut batch = SmtUpdateBatch::empty(); + + // Let's make three operations on different keys... + let o1_key: Word = rand_value(); + let o1_value: Word = rand_value(); + let o2_key: Word = rand_value(); + let o3_key: Word = rand_value(); + let o3_value: Word = rand_value(); + + let o1 = ForestOperation::insert(o1_key, o1_value); + let o2 = ForestOperation::remove(o2_key); + let o3 = ForestOperation::insert(o3_key, o3_value); + + // ... and stick them in the batch in various ways + batch.add_operations(vec![o1.clone()].into_iter()); + batch.add_remove(o2_key); + batch.add_insert(o3_key, o3_value); + + // We save a copy of the batch for later as we have more testing to do. + let batch_tmp = batch.clone(); + + // If we then consume the batch, we should have the operations ordered by their key. + let ops = batch.consume(); + assert!(ops.is_sorted_by_key(|o| o.key())); + + // Let's now make two additional operations with keys that overlay with keys from the first + // three... + let o4_key = o2_key; + let o4_value: Word = rand_value(); + let o5_key = o1_key; + + let o4 = ForestOperation::insert(o4_key, o4_value); + let o5 = ForestOperation::remove(o5_key); + + // ... and also stick them into the batch. + let mut batch = batch_tmp; + batch.add_operations(vec![o4.clone(), o5.clone()].into_iter()); + + // Now if we consume the batch we should have three operations, and they should be the last + // operation for each key. + let ops = batch.consume(); + + assert_eq!(ops.len(), 3); + assert!(ops.is_sorted_by_key(|o| o.key())); + + assert!(ops.contains(&o3)); + assert!(ops.contains(&o4)); + assert!(!ops.contains(&o2)); + assert!(ops.contains(&o5)); + assert!(!ops.contains(&o1)); + } + + #[test] + fn forest_batch() { + // We can start by creating an empty forest batch. + let mut batch = SmtForestUpdateBatch::empty(); + + // Let's start by adding a few operations to a tree. + let t1_root: Word = rand_value(); + let t1_o1 = ForestOperation::insert(rand_value(), rand_value()); + let t1_o2 = ForestOperation::remove(rand_value()); + batch.add_operations(t1_root, vec![t1_o1, t1_o2].into_iter()); + + // We can also add them differently. + let t2_root: Word = rand_value(); + let t2_o1 = ForestOperation::remove(rand_value()); + let t2_o2 = ForestOperation::insert(rand_value(), rand_value()); + batch.operations(t2_root).add_operations(vec![t2_o1, t2_o2].into_iter()); + + // When we consume the batch, each per-tree batch should be unique by key and sorted. + let ops = batch.consume(); + assert_eq!(ops.len(), 2); + + let t1_ops = ops.get(&t1_root).unwrap(); + assert!(t1_ops.is_sorted_by_key(|o| o.key())); + assert_eq!(t1_ops.iter().unique_by(|o| o.key()).count(), 2); + + let t2_ops = ops.get(&t2_root).unwrap(); + assert!(t2_ops.is_sorted_by_key(|o| o.key())); + assert_eq!(t2_ops.iter().unique_by(|o| o.key()).count(), 2); + } +} diff --git a/miden-crypto/src/merkle/smt/large_forest/property_tests.rs b/miden-crypto/src/merkle/smt/large_forest/property_tests.rs new file mode 100644 index 000000000..b59f0e435 --- /dev/null +++ b/miden-crypto/src/merkle/smt/large_forest/property_tests.rs @@ -0,0 +1 @@ +//! This module contains the property tests for the SMT forest. diff --git a/miden-crypto/src/merkle/smt/large_forest/root.rs b/miden-crypto/src/merkle/smt/large_forest/root.rs new file mode 100644 index 000000000..c007e0fb8 --- /dev/null +++ b/miden-crypto/src/merkle/smt/large_forest/root.rs @@ -0,0 +1,21 @@ +//! This module contains utility types for working with roots as part of the forest. + +use crate::merkle::smt::large_forest::history::VersionId; + +/// Information about the role that a queried root plays in the forest. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub enum RootInfo { + /// The queried root corresponds to a tree that is the latest version of a given tree in the + /// forest. + LatestVersion(VersionId), + + /// The queried root corresponds to a tree that is _not_ the latest version of a given tree in + /// the forest. + HistoricalVersion(VersionId), + + /// The queried root corresponds to the empty tree. + EmptyTree, + + /// The queried root does not belong to any tree that the forest knows about. + Missing, +} diff --git a/miden-crypto/src/merkle/smt/large_forest/tests.rs b/miden-crypto/src/merkle/smt/large_forest/tests.rs new file mode 100644 index 000000000..49c284ebd --- /dev/null +++ b/miden-crypto/src/merkle/smt/large_forest/tests.rs @@ -0,0 +1 @@ +//! This module contains the handwritten tests for the SMT forest. diff --git a/miden-crypto/src/merkle/smt/mod.rs b/miden-crypto/src/merkle/smt/mod.rs index 6c57c95cb..fb45be911 100644 --- a/miden-crypto/src/merkle/smt/mod.rs +++ b/miden-crypto/src/merkle/smt/mod.rs @@ -29,7 +29,10 @@ pub use large::{ pub use large::{RocksDbConfig, RocksDbStorage}; mod large_forest; -pub use large_forest::{History, HistoryError, HistoryView, LargeSmtForestError}; +pub use large_forest::{ + Backend, BackendError, ForestOperation, LargeSmtForest, LargeSmtForestError, RootInfo, + SmtForestUpdateBatch, SmtUpdateBatch, +}; mod simple; pub use simple::{SimpleSmt, SimpleSmtProof};