diff --git a/miden-crypto/src/merkle/smt/large_forest/error/mod.rs b/miden-crypto/src/merkle/smt/large_forest/error/mod.rs index 75c7d73b0..5ab8f797d 100644 --- a/miden-crypto/src/merkle/smt/large_forest/error/mod.rs +++ b/miden-crypto/src/merkle/smt/large_forest/error/mod.rs @@ -5,9 +5,12 @@ pub mod subtree; use thiserror::Error; -use crate::merkle::{ - MerkleError, - smt::large_forest::{error::prefix::PrefixError, history::error::HistoryError, storage}, +use crate::{ + Word, + merkle::{ + MerkleError, + smt::large_forest::{error::prefix::PrefixError, history::error::HistoryError, storage}, + }, }; // LARGE SMT FOREST ERROR @@ -31,6 +34,10 @@ pub enum LargeSmtForestError { /// Errors with the in-memory tree prefixes in the forest. #[error(transparent)] PrefixError(#[from] PrefixError), + + /// Raised when an attempt is made to modify a frozen tree. + #[error("Attempted to modify frozen tree with root {0}")] + InvalidModification(Word), } /// The result type for use within the large SMT forest portion of the library. diff --git a/miden-crypto/src/merkle/smt/large_forest/mod.rs b/miden-crypto/src/merkle/smt/large_forest/mod.rs index 9a49dd199..3fa41c5aa 100644 --- a/miden-crypto/src/merkle/smt/large_forest/mod.rs +++ b/miden-crypto/src/merkle/smt/large_forest/mod.rs @@ -37,7 +37,9 @@ mod error; pub mod history; +pub mod operation; mod prefix; +pub mod root; pub mod storage; pub mod utils; @@ -45,17 +47,47 @@ pub use error::{LargeSmtForestError, Result}; pub use storage::{Storage, StorageError, StoredTreeHandle}; pub use utils::SubtreeLevels; -use crate::{Map, Word, merkle::smt::large_forest::prefix::InMemoryPrefix}; - +use crate::{ + Map, Set, Word, + merkle::{ + EmptySubtreeRoots, + smt::{ + SMT_DEPTH, SmtProof, + large_forest::{ + history::{History, VersionId}, + operation::{SmtForestUpdateBatch, SmtUpdateBatch}, + prefix::InMemoryPrefix, + root::RootInfo, + }, + }, + }, +}; // SPARSE MERKLE TREE FOREST // ================================================================================================ /// A high-performance forest of sparse merkle trees with pluggable storage. /// +/// # Current and Frozen Trees +/// +/// Trees in the forest fall into two categories: +/// +/// 1. **Current:** These trees represent the latest version of their 'tree lineage' and can be +/// modified to generate a new tree version in the forest. +/// 2. **Frozen:** These are historical versions of trees that are no longer current, and are +/// considered 'frozen' and hence cannot be modified to generate a new tree version in the +/// forest. This is because being able to do so would effectively create a "fork" in the history, +/// and hence allow the forest to yield potentially invalid responses with regard to the +/// blockchain history. +/// +/// If an attempt is made to modify a frozen tree, the method in question will yield an +/// [`LargeSmtForestError::InvalidModification`] error as doing so represents a programmer bug. +/// /// # Performance /// -/// The performance characteristics of this forest -#[allow(dead_code)] // Temporary, while the tree gets built. +/// The performance characteristics of this forest depend heavily on the choice of underlying +/// [`Storage`] implementation. Where something more specific can be said about a particular method +/// call, the documentation for that method will state it. +#[allow(dead_code)] // Temporarily #[derive(Debug)] pub struct LargeSmtForest { /// The underlying data storage for the portion of the tree that is not guaranteed to be in @@ -68,13 +100,41 @@ pub struct LargeSmtForest { /// The container for the in-memory prefixes of each tree stored in the forest, identified by /// their current root. + /// + /// Must contain an entry for every root that has an entry in both [`Self::histories`] and + /// [`Self::full_tree_versions`]. prefixes: Map, + + /// The container for the historical versions of each tree stored in the forest, identified by + /// the current root. + /// + /// Must contain an entry for every root that has an entry in both [`Self::prefixes`] and + /// [`Self::full_tree_versions`]. + histories: Map, + + /// A mapping from the roots of the full trees stored in this forest to their corresponding + /// versions. + /// + /// Must contain an entry for every root that has an entry in both [`Self::prefixes`] and + /// [`Self::histories`]. + full_tree_versions: Map, } -impl LargeSmtForest { - // CONSTRUCTORS - // -------------------------------------------------------------------------------------------- +// CONSTRUCTION AND BASIC QUERIES +// ================================================================================================ +/// These functions deal with the creation of new forest instances, and hence rely on the ability to +/// query storage to do so. +/// +/// # Performance +/// +/// All the methods in this impl block require access to the underlying [`Storage`] instance to +/// return results. This means that their performance will depend heavily on the specific instance +/// with which the forest was constructed. +/// +/// Where anything more specific can be said about performance, the method documentation will +/// contain more detail. +impl LargeSmtForest { /// Constructs a new forest backed by the provided `storage`. /// /// The constructor will treat whatever state is contained within the provided `storage` as the @@ -87,6 +147,196 @@ impl LargeSmtForest { /// - [`LargeSmtForestError::StorageError`] if the forest cannot be started up correctly from /// storage. pub fn new(_storage: S) -> Result { - todo!() + todo!("LargeSmtForest::new") + } +} + +/// These methods provide the ability to perform basic queries on the forest without the need to +/// access the underlying tree storage. +/// +/// # Performance +/// +/// All of these methods can be performed fully in-memory, and hence their performance is +/// predictable on a given machine regardless of the choice of [`Storage`] instance for the forest. +impl LargeSmtForest { + /// Returns a set of all the roots that the forest knows about, including those of all + /// versions. + pub fn roots(&self) -> Set { + let mut roots: Set = self.prefixes.keys().cloned().collect(); + self.histories.values().for_each(|h| roots.extend(h.roots())); + roots + } + + /// Returns the number of trees in the forest. + pub fn tree_count(&self) -> usize { + // History::num_versions does not account for the 'current version' so we add one to each of + // those counts, and then we add one overall to account for the "phantom empty tree". + self.histories.values().map(|h| h.num_versions() + 1).sum::() + 1 + } + + /// Returns `true` if the provided `root` points to a tree that is the latest version, and + /// `false` otherwise. + /// + /// A tree being the latest version is one that can be modified to yield a new version. + pub fn is_latest_version(&self, root: Word) -> bool { + self.prefixes.contains_key(&root) || *EmptySubtreeRoots::entry(SMT_DEPTH, 0) == root + } +} + +// QUERIES +// ================================================================================================ + +/// These methods pertain to non-mutating queries about the data stored in the forest. They differ +/// from the simple queries in the previous block by requiring access to storage to function. +/// +/// # Performance +/// +/// All the methods in this impl block require access to the underlying [`Storage`] instance to +/// return results. This means that their performance will depend heavily on the specific instance +/// with which the forest was constructed. +/// +/// Where anything more specific can be said about performance, the method documentation will +/// contain more detail. +impl LargeSmtForest { + /// Returns an opening for the specified `key` in the SMT with the specified `root`. + /// + /// # Errors + /// + /// - [`LargeSmtForestError::StorageError`] if an error occurs when trying to read from storage. + /// - [`LargeSmtForestError::MerkleError`] if no tree with the provided `root` exists in the + /// forest, or if the forest does not contain sufficient data to provide an opening for `key`. + pub fn open(&self, _root: Word, _key: Word) -> Result { + todo!("LargeSmtForest::open") + } + + /// Returns data describing what information the forest knows about the provided `root`. + pub fn contains_root(&self, root: Word) -> RootInfo { + if self.prefixes.contains_key(&root) { + RootInfo::LatestVersion + } else if let Some(h) = self.histories.get(&root) + && h.is_known_root(root) + { + RootInfo::HistoricalVersion + } else if root == *EmptySubtreeRoots::entry(SMT_DEPTH, 0) { + RootInfo::EmptyTree + } else { + RootInfo::Missing + } + } +} + +// SINGLE-TREE MODIFIERS +// ================================================================================================ + +/// These methods pertain to modifications that can be made to a single tree in the forest. They +/// exploit parallelism within the single target tree wherever possible. +/// +/// # Performance +/// +/// All the methods in this impl block require access to the underlying [`Storage`] instance to +/// return results. This means that their performance will depend heavily on the specific instance +/// with which the forest was constructed. +/// +/// Where anything more specific can be said about performance, the method documentation will +/// contain more detail. +#[allow(dead_code)] // Temporarily +impl LargeSmtForest { + /// Performs the provided `operations` on the tree with the provided `root`, adding a single new + /// root to the forest, giving it for the entire batch and returning that root. + /// + /// If applying the `operations` results in no changes to the tree, then `root` will be returned + /// unchanged and no new tree will be allocated. + /// + /// # Errors + /// + /// - [`LargeSmtForestError::StorageError`] if an error occurs when trying to access storage. + pub fn batch_modify( + &mut self, + _root: Word, + _new_version: VersionId, + _operations: SmtUpdateBatch, + ) -> Result { + todo!("LargeSmtForest::batch_modify") + } + + /// Inserts the specified `key`, `value` pair into the tree in the forest with the specified + /// `root`, returning the new root of that tree. + /// + /// Any insertion operation where `root` is equal to the root of the empty tree will generate a + /// new unique tree in the forest, rather than adding history to an existing tree. + /// + /// # Errors + /// + /// - [`LargeSmtForestError::StorageError`] if an error occurs when trying to access storage. + fn insert(&mut self, _root: Word, _key: Word, _proof: SmtProof) -> Result { + todo!("LargeSmtForest::insert") + } + + /// Removes the `key` and its associated value from the tree specified by `root`, returning the + /// new root of the tree after performing that modification. + /// + /// Note that if `key` does not exist in the tree with the provided `root`, then `root` will be + /// returned unchanged and no new tree will be allocated. + /// + /// # Errors + /// + /// - [`LargeSmtForestError::StorageError`] if an error occurs when trying to access storage. + fn remove(&mut self, _root: Word, _key: Word) -> Result { + todo!("LargeSmtForest::remove") + } +} + +// MULTI-TREE MODIFIERS +// ================================================================================================ + +/// These methods pertain to modifications that can be made to multiple trees in the forest at once. +/// They exploit parallelism both between trees and within trees wherever possible. +/// +/// # Performance +/// +/// All the methods in this impl block require access to the underlying [`Storage`] instance to +/// return results. This means that their performance will depend heavily on the specific instance +/// with which the forest was constructed. +/// +/// Where anything more specific can be said about performance, the method documentation will +/// contain more detail. +impl LargeSmtForest { + /// Performs the provided `operations` on the forest, adding at most one new root to the forest + /// for each target root in `operations`, returning a mapping from old root to new root. + /// + /// If applying the associated batch to any given tree in the forest results in no changes to + /// the tree, the initial root will be returned and no new tree will be allocated. + /// + /// # Errors + /// + /// - [`LargeSmtForestError::StorageError`] if an error occurs when trying to access storage. + pub fn batch_modify_forest( + &mut self, + _operations: SmtForestUpdateBatch, + ) -> Result> { + todo!("LargeSmtForest::batch_modify_forest") + } + + /// Removes all tree versions in the forest that are older than the provided `version`. + /// + /// In the case that the current version of a given tree in the forest is older than `version`, + /// that current version is nevertheless retained. + pub fn truncate(&mut self, version: VersionId) { + // We start by clearing any history for which the `version` corresponds to the latest + // version and hence the full tree. + self.full_tree_versions.iter().for_each(|(k, v)| { + if *v == version { + self.histories + .get_mut(k) + .expect("A full tree did not have a corresponding history, but is required to") + .clear(); + } + }); + + // Then we just run through all the histories and truncate them to this version if needed, + // which provides the correct behaviour. + self.histories.values_mut().for_each(|h| { + h.truncate(version); + }); } } diff --git a/miden-crypto/src/merkle/smt/large_forest/operation.rs b/miden-crypto/src/merkle/smt/large_forest/operation.rs new file mode 100644 index 000000000..73e327d53 --- /dev/null +++ b/miden-crypto/src/merkle/smt/large_forest/operation.rs @@ -0,0 +1,260 @@ +//! This module contains the definition of the [`Operation`] type that encapsulates the possible +//! modifications made to a tree, as well as the concept of a [`SmtUpdateBatch`] of operations +//! to be performed on a single tree in the forest. This is then extended to +//! [`SmtForestUpdateBatch`], which defines a batch of operations across multiple trees. + +use alloc::vec::Vec; + +use crate::{Map, Set, Word, merkle::smt::large_forest::history::VersionId}; + +// FOREST OPERATION +// ================================================================================================ + +/// The operations that can be performed on an arbitrary leaf in a tree in a forest. +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum Operation { + /// An insertion of `value` under `key` into the tree. + /// + /// If `key` already exists in the tree, the associated value will be replaced with `value` + /// instead. + Insert { key: Word, value: Word }, + + /// The removal of the `key` and its associated value from the tree. + Remove { key: Word }, +} +impl Operation { + /// Insert the provided `value` into a tree under the provided `key`. + pub fn insert(key: Word, value: Word) -> Self { + Self::Insert { key, value } + } + + /// Remove the provided `key` and its associated value from a tree. + pub fn remove(key: Word) -> Self { + Self::Remove { key } + } + + /// Retrieves the key from the operation. + pub fn key(&self) -> Word { + match self { + Operation::Insert { key, .. } => *key, + Operation::Remove { key } => *key, + } + } +} + +// TREE BATCH +// ================================================================================================ + +/// A batch of operations that can be performed on an arbitrary tree in a forest. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct SmtUpdateBatch { + /// The operations to be performed on a tree. + operations: Vec, + + /// The version that corresponds to the tree produced by applying these `operations`. + version: VersionId, +} +impl SmtUpdateBatch { + /// Creates an empty batch of operations that, when applied, will produce a tree with the + /// provided `version` when applied. + pub fn empty(version: VersionId) -> Self { + Self { operations: vec![], version } + } + + /// Creates a batch containing the provided `operations` that will produce a tree with the + /// provided `version` when applied. + pub fn new(version: VersionId, operations: impl Iterator) -> Self { + Self { + operations: operations.collect::>(), + version, + } + } + + /// Adds the provided `operations` to the batch. + pub fn add_operations(&mut self, operations: impl Iterator) { + self.operations.extend(operations); + } + + /// Adds the [`Operation::Insert`] operation for the provided `key` and `value` pair to the + /// batch. + pub fn add_insert(&mut self, key: Word, value: Word) { + self.operations.push(Operation::insert(key, value)); + } + + /// Adds the [`Operation::Remove`] operation for the provided `key` to the batch. + pub fn add_remove(&mut self, key: Word) { + self.operations.push(Operation::remove(key)); + } + + /// Consumes the batch as a vector of operations, containing the last operation for any given + /// `key` in the case that multiple operations per key are encountered. + /// + /// This vector is guaranteed to be sorted by the key on which an operation is performed. + pub fn consume(self) -> Vec { + // As we want to keep the LAST operation for each key, rather than the first, we filter in + // reverse. + let mut seen_keys: Set = Set::new(); + let mut ops = self + .operations + .into_iter() + .rev() + .filter(|o| seen_keys.insert(o.key())) + .collect::>(); + ops.sort_by_key(|o| o.key()); + ops + } +} + +impl From for Vec { + /// The vector is guaranteed to be sorted by the key on which an operation is performed, and to + /// only contain the _last_ operation to be performed on any given key. + fn from(value: SmtUpdateBatch) -> Self { + value.consume() + } +} + +// FOREST BATCH +// ================================================================================================ + +/// A batch of operations that can be performed on an arbitrary forest, consisting of operations +/// associated with specified trees in that forest. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct SmtForestUpdateBatch { + /// The operations associated with each targeted tree in the forest. + operations: Map, +} + +impl SmtForestUpdateBatch { + /// Creates a new, empty, batch of operations. + pub fn empty() -> Self { + Self { operations: Map::new() } + } + + /// Adds the provided `operations` to be performed on the tree with the provided `root` to + /// produce a tree with the provided `version` when applied. + pub fn add_operations( + &mut self, + root: Word, + version: VersionId, + operations: impl Iterator, + ) { + let batch = self.operations.entry(root).or_insert_with(|| SmtUpdateBatch::empty(version)); + batch.add_operations(operations); + } + + /// Gets the batch of operations for the tree with the provided `root` for inspection and/or + /// modification. + /// + /// It is assumed that calling this means that the caller wants to insert operations into the + /// associated batch, so a batch will be created even if one was not previously present. If no + /// such batch exists, a new one will be created using the provided `version_if_ne` as the + /// version. + pub fn operations(&mut self, root: Word, version_if_ne: VersionId) -> &mut SmtUpdateBatch { + self.operations + .entry(root) + .or_insert_with(|| SmtUpdateBatch::empty(version_if_ne)) + } + + /// Consumes the batch as a map of batches, with each individual batch guaranteed to be in + /// sorted order and contain only the last operation in the batch for any given key. + pub fn consume(self) -> Map> { + self.operations.into_iter().map(|(k, v)| (k, v.consume())).collect() + } +} + +// TESTS +// ================================================================================================ + +#[cfg(feature = "std")] +#[cfg(test)] +mod test { + use itertools::Itertools; + + use super::*; + use crate::rand::test_utils::rand_value; + + #[test] + fn tree_batch() { + // We start by creating an empty tree batch. + let mut batch = SmtUpdateBatch::empty(0); + + // Let's make three operations on different keys... + let o1_key: Word = rand_value(); + let o1_value: Word = rand_value(); + let o2_key: Word = rand_value(); + let o3_key: Word = rand_value(); + let o3_value: Word = rand_value(); + + let o1 = Operation::insert(o1_key, o1_value); + let o2 = Operation::remove(o2_key); + let o3 = Operation::insert(o3_key, o3_value); + + // ... and stick them in the batch in various ways + batch.add_operations(vec![o1.clone()].into_iter()); + batch.add_remove(o2_key); + batch.add_insert(o3_key, o3_value); + + // We save a copy of the batch for later as we have more testing to do. + let batch_tmp = batch.clone(); + + // If we then consume the batch, we should have the operations ordered by their key. + let ops = batch.consume(); + assert!(ops.is_sorted_by_key(|o| o.key())); + + // Let's now make two additional operations with keys that overlay with keys from the first + // three... + let o4_key = o2_key; + let o4_value: Word = rand_value(); + let o5_key = o1_key; + + let o4 = Operation::insert(o4_key, o4_value); + let o5 = Operation::remove(o5_key); + + // ... and also stick them into the batch. + let mut batch = batch_tmp; + batch.add_operations(vec![o4.clone(), o5.clone()].into_iter()); + + // Now if we consume the batch we should have three operations, and they should be the last + // operation for each key. + let ops = batch.consume(); + + assert_eq!(ops.len(), 3); + assert!(ops.is_sorted_by_key(|o| o.key())); + + assert!(ops.contains(&o3)); + assert!(ops.contains(&o4)); + assert!(!ops.contains(&o2)); + assert!(ops.contains(&o5)); + assert!(!ops.contains(&o1)); + } + + #[test] + fn forest_batch() { + // We can start by creating an empty forest batch. + let mut batch = SmtForestUpdateBatch::empty(); + + // Let's start by adding a few operations to a tree. + let t1_root: Word = rand_value(); + let t1_o1 = Operation::insert(rand_value(), rand_value()); + let t1_o2 = Operation::remove(rand_value()); + batch.add_operations(t1_root, 0, vec![t1_o1, t1_o2].into_iter()); + + // We can also add them differently. + let t2_root: Word = rand_value(); + let t2_o1 = Operation::remove(rand_value()); + let t2_o2 = Operation::insert(rand_value(), rand_value()); + batch.operations(t2_root, 1).add_operations(vec![t2_o1, t2_o2].into_iter()); + + // When we consume the batch, each per-tree batch should be unique by key and sorted. + let ops = batch.consume(); + assert_eq!(ops.len(), 2); + + let t1_ops = ops.get(&t1_root).unwrap(); + assert!(t1_ops.is_sorted_by_key(|o| o.key())); + assert_eq!(t1_ops.iter().unique_by(|o| o.key()).count(), 2); + + let t2_ops = ops.get(&t2_root).unwrap(); + assert!(t2_ops.is_sorted_by_key(|o| o.key())); + assert_eq!(t2_ops.iter().unique_by(|o| o.key()).count(), 2); + } +} diff --git a/miden-crypto/src/merkle/smt/large_forest/prefix.rs b/miden-crypto/src/merkle/smt/large_forest/prefix.rs index 04b963c4d..4e2bf1ba3 100644 --- a/miden-crypto/src/merkle/smt/large_forest/prefix.rs +++ b/miden-crypto/src/merkle/smt/large_forest/prefix.rs @@ -22,7 +22,7 @@ use crate::{ /// An in-memory tree prefix that stores all nodes for the first `n` levels of the tree in /// fully-materialised form. -#[allow(dead_code)] // Temporarily +#[allow(dead_code)] // Temporary #[derive(Clone, Debug, Eq, PartialEq)] pub struct InMemoryPrefix { /// The number of levels that are stored in the prefix, including the root level. @@ -38,7 +38,7 @@ pub struct InMemoryPrefix { pub nodes: Vec, } -#[allow(dead_code)] // Temporarily +#[allow(dead_code)] // Temporary impl InMemoryPrefix { // CONSTRUCTORS // -------------------------------------------------------------------------------------------- diff --git a/miden-crypto/src/merkle/smt/large_forest/root.rs b/miden-crypto/src/merkle/smt/large_forest/root.rs new file mode 100644 index 000000000..17e79cd55 --- /dev/null +++ b/miden-crypto/src/merkle/smt/large_forest/root.rs @@ -0,0 +1,19 @@ +//! This module contains utility types for working with roots as part of the forest. + +/// Information about the role that a queried root plays in the forest. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub enum RootInfo { + /// The queried root corresponds to a tree that is the latest version of a given tree in the + /// forest. + LatestVersion, + + /// The queried root corresponds to a tree that is _not_ the latest version of a given tree in + /// the forest. + HistoricalVersion, + + /// The queried root corresponds to the empty tree. + EmptyTree, + + /// The queried root does not belong to any tree that the forest knows about. + Missing, +} diff --git a/miden-crypto/src/merkle/smt/large_forest/utils.rs b/miden-crypto/src/merkle/smt/large_forest/utils.rs index f3269c50e..f30feaae8 100644 --- a/miden-crypto/src/merkle/smt/large_forest/utils.rs +++ b/miden-crypto/src/merkle/smt/large_forest/utils.rs @@ -37,9 +37,9 @@ pub type LinearIndex = u64; /// Any instance of this type should see that the following properties hold: /// /// - The root is a level of its own. This is level 0, to follow the convention used by -/// [`crate::merkle::smt::NodeIndex`]. This means that if the level count begins at the top of the -/// tree, it should include the root level. By way of example, a tree with 8 leaves has _4_ levels -/// in this counting. +/// [`NodeIndex`]. This means that if the level count begins at the top of the tree, it should +/// include the root level. By way of example, a tree with 8 leaves has _4_ levels in this +/// counting. /// - You cannot have a zero number of levels, which is enforced by construction. /// - The number of levels cannot exceed [`MAX_NUM_SUBTREE_LEVELS`] #[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd)] diff --git a/miden-crypto/src/merkle/smt/mod.rs b/miden-crypto/src/merkle/smt/mod.rs index 138995144..27b41873d 100644 --- a/miden-crypto/src/merkle/smt/mod.rs +++ b/miden-crypto/src/merkle/smt/mod.rs @@ -29,7 +29,6 @@ pub use large::{ pub use large::{RocksDbConfig, RocksDbStorage}; pub mod large_forest; - pub use large_forest::{ LargeSmtForest, LargeSmtForestError, Storage as ForestStorage, StorageError, StoredTreeHandle, SubtreeLevels,