diff --git a/CHANGELOG.md b/CHANGELOG.md index 23aa7bcf90..ec4a252ee1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ - [BREAKING] Upgraded direct `rand` dependencies to 0.10, updating RNG trait bounds and removing direct `rand_hc` usage ([#995](https://github.com/0xMiden/crypto/pull/995)). - perf: fuse per-group accumulator and defer allocations ([#1008](https://github.com/0xMiden/crypto/pull/1008)) - [BREAKING] Reorganized `miden-lifted-stark` internals: consolidated `align`, `bitrev`, `horner`, and `packing` helpers under a new `util` module; moved `reconstruct_quotient` onto `LiftedCoset`; removed the legacy `fri::*` re-export facade ([#1000](https://github.com/0xMiden/crypto/pull/1000)). +- [BREAKING] Extracted `BackendReader`, allowing `LargeSmtForest` to work with read-only storage backends ([#986](https://github.com/0xMiden/crypto/pull/986)). ## 0.25.0 (2026-05-01) diff --git a/Makefile b/Makefile index 0c8d54df82..f1f35d3e4f 100644 --- a/Makefile +++ b/Makefile @@ -105,7 +105,7 @@ test-p3-parallel: ## Run Miden STARK crate tests with the parallel feature enabl .PHONY: test-large-smt test-large-smt: ## Run large SMT unit tests and RocksDB integration tests - cargo nextest run --success-output immediate --profile large-smt --cargo-profile test-release --features rocksdb + cargo nextest run --success-output immediate --profile large-smt --cargo-profile test-release --features persistent-forest .PHONY: test test: test-default test-no-std test-docs test-large-smt ## Run all tests except concurrent SMT tests diff --git a/miden-crypto/src/merkle/smt/large/mod.rs b/miden-crypto/src/merkle/smt/large/mod.rs index cda428cc11..b5b91378af 100644 --- a/miden-crypto/src/merkle/smt/large/mod.rs +++ b/miden-crypto/src/merkle/smt/large/mod.rs @@ -336,7 +336,7 @@ type MutatedLeaves = (MutatedSubtreeLeaves, Map, Map, /// /// `LargeSmt` implements [`Clone`] when its storage is cloneable. The in-memory top is shared and /// detaches on mutation. -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct LargeSmt { storage: S, /// Shared flat array representation of in-memory nodes. @@ -351,17 +351,6 @@ pub struct LargeSmt { entry_count: usize, } -impl Clone for LargeSmt { - fn clone(&self) -> Self { - Self { - storage: self.storage.clone(), - in_memory_nodes: self.in_memory_nodes.clone(), - leaf_count: self.leaf_count, - entry_count: self.entry_count, - } - } -} - impl LargeSmt { // CONSTANTS // -------------------------------------------------------------------------------------------- @@ -478,6 +467,7 @@ impl LargeSmt { >::get_inner_node(self, index) } + // Triggers copy-on-write: clones the shared node array only if other references exist. pub(crate) fn in_memory_nodes_mut(&mut self) -> &mut [Word] { Arc::make_mut(&mut self.in_memory_nodes) } @@ -526,11 +516,6 @@ impl LargeSmt { entry_count: self.entry_count, }) } -} - -impl LargeSmt { - // STATE MUTATORS - // -------------------------------------------------------------------------------------------- /// Inserts a value at the specified key, returning the previous value associated with that key. /// Recall that by definition, any key that hasn't been updated is associated with diff --git a/miden-crypto/src/merkle/smt/large_forest/backend/memory/mod.rs b/miden-crypto/src/merkle/smt/large_forest/backend/memory/mod.rs index 0b6aa1a475..e5a6001af1 100644 --- a/miden-crypto/src/merkle/smt/large_forest/backend/memory/mod.rs +++ b/miden-crypto/src/merkle/smt/large_forest/backend/memory/mod.rs @@ -12,7 +12,7 @@ use crate::{ merkle::smt::{ LeafIndex, SMT_DEPTH, Smt, SmtLeaf, SmtProof, VersionId, large_forest::{ - Backend, + Backend, BackendReader, backend::{BackendError, MutationSet, Result}, operation::{SmtForestUpdateBatch, SmtUpdateBatch}, root::{LineageId, TreeEntry, TreeWithRoot}, @@ -20,6 +20,51 @@ use crate::{ }, }; +// IN-MEMORY BACKEND SNAPSHOT +// ================================================================================================ + +/// A read-only, point-in-time snapshot of an [`InMemoryBackend`]. +/// +/// This type intentionally implements only [`BackendReader`], not [`Backend`]. It is returned by +/// [`InMemoryBackend::reader`] to hand out a detached copy of the backend state without exposing +/// any mutation capabilities. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct InMemoryBackendSnapshot(InMemoryBackend); + +impl BackendReader for InMemoryBackendSnapshot { + fn open(&self, lineage: LineageId, key: Word) -> Result { + self.0.open(lineage, key) + } + + fn get_leaf(&self, lineage: LineageId, leaf_index: LeafIndex) -> Result { + self.0.get_leaf(lineage, leaf_index) + } + + fn get(&self, lineage: LineageId, key: Word) -> Result> { + self.0.get(lineage, key) + } + + fn version(&self, lineage: LineageId) -> Result { + self.0.version(lineage) + } + + fn lineages(&self) -> Result> { + self.0.lineages() + } + + fn trees(&self) -> Result> { + self.0.trees() + } + + fn entry_count(&self, lineage: LineageId) -> Result { + self.0.entry_count(lineage) + } + + fn entries(&self, lineage: LineageId) -> Result>> { + self.0.entries(lineage) + } +} + // IN-MEMORY BACKEND // ================================================================================================ @@ -37,12 +82,17 @@ impl InMemoryBackend { let trees = Map::default(); Self { trees } } + + /// Converts this backend into a read-only snapshot. + pub fn into_snapshot(self) -> InMemoryBackendSnapshot { + InMemoryBackendSnapshot(self) + } } -// BACKEND TRAIT +// BACKEND READER TRAIT // ================================================================================================ -impl Backend for InMemoryBackend { +impl BackendReader for InMemoryBackend { /// Returns an opening for the specified `key` in the SMT with the specified `lineage`. /// /// # Errors @@ -134,6 +184,17 @@ impl Backend for InMemoryBackend { let tree = self.trees.get(&lineage).ok_or(BackendError::UnknownLineage(lineage))?; Ok(tree.tree.entries().map(|(k, v)| Ok(TreeEntry { key: *k, value: *v }))) } +} + +// BACKEND TRAIT +// ================================================================================================ + +impl Backend for InMemoryBackend { + type Reader = InMemoryBackendSnapshot; + + fn reader(&self) -> Result { + Ok(self.clone().into_snapshot()) + } /// Adds the provided `lineage` to the forest. /// diff --git a/miden-crypto/src/merkle/smt/large_forest/backend/memory/property_tests.rs b/miden-crypto/src/merkle/smt/large_forest/backend/memory/property_tests.rs index 466ab48294..b8b5266acd 100644 --- a/miden-crypto/src/merkle/smt/large_forest/backend/memory/property_tests.rs +++ b/miden-crypto/src/merkle/smt/large_forest/backend/memory/property_tests.rs @@ -9,7 +9,7 @@ use proptest::prelude::*; use crate::{ EMPTY_WORD, merkle::smt::{ - Backend, Smt, SmtForestUpdateBatch, SmtUpdateBatch, TreeWithRoot, + Backend, BackendReader, Smt, SmtForestUpdateBatch, SmtUpdateBatch, TreeWithRoot, large_forest::{ InMemoryBackend, test_utils::{ diff --git a/miden-crypto/src/merkle/smt/large_forest/backend/memory/tests.rs b/miden-crypto/src/merkle/smt/large_forest/backend/memory/tests.rs index 3268043ae8..3725f71a7a 100644 --- a/miden-crypto/src/merkle/smt/large_forest/backend/memory/tests.rs +++ b/miden-crypto/src/merkle/smt/large_forest/backend/memory/tests.rs @@ -13,7 +13,7 @@ use itertools::Itertools; use crate::{ EMPTY_WORD, Word, merkle::smt::{ - Backend, BackendError, Smt, SmtForestUpdateBatch, SmtUpdateBatch, VersionId, + Backend, BackendError, BackendReader, Smt, SmtForestUpdateBatch, SmtUpdateBatch, VersionId, large_forest::{ InMemoryBackend, backend::Result, diff --git a/miden-crypto/src/merkle/smt/large_forest/backend/mod.rs b/miden-crypto/src/merkle/smt/large_forest/backend/mod.rs index fc6076fcc4..a225ea1767 100644 --- a/miden-crypto/src/merkle/smt/large_forest/backend/mod.rs +++ b/miden-crypto/src/merkle/smt/large_forest/backend/mod.rs @@ -25,17 +25,18 @@ use crate::{ }, }; -// BACKEND +// BACKEND READER // ================================================================================================ -/// The backing storage for the SMT forest, providing the necessary high-level methods for -/// performing operations on the full trees that make up the forest, while allowing the forest -/// itself to be storage agnostic. +/// The read-only interface for the SMT forest storage backend. +/// +/// This trait provides the query operations necessary to read the full trees that make up the +/// forest. It is a supertrait of [`Backend`], which extends it with write operations. /// /// # Backend Data Storage /// -/// Having a generic [`Backend`] provides no guarantees to the user about how it stores data and -/// what patterns are used for data access under the hood. It is, however, guaranteed to store +/// Having a generic [`BackendReader`] provides no guarantees to the user about how it stores data +/// and what patterns are used for data access under the hood. It is, however, guaranteed to store /// _only_ the data necessary to describe the latest state of each tree in the forest. /// /// # Error Handling @@ -56,12 +57,6 @@ use crate::{ /// /// # Expected Behavior /// -/// Certain methods on this trait (e.g. [`Backend::update_tree`]) provide behaviors expected for -/// that method. These combine with the following trait-level behavior requirements to become part -/// of the contract of the method, but a portion that cannot be encoded in the type system. Any -/// failure to conform to these expected behaviors is **considered a bug in the implementation** of -/// the backend, and must be rectified. -/// /// The following behavior is expected of all methods in implementations of this trait: /// /// - For any failure derived from user input (see _User-Derived Errors_ above), the data and the @@ -70,13 +65,10 @@ use crate::{ /// caller by returning a variant of [`BackendError`] that is **not [`BackendError::Internal`]**. /// Methods may place additional constraints on which errors are used to signal certain failures. /// Such failures should not lead to data corruption of any persistent data. -pub trait Backend +pub trait BackendReader where Self: Debug, { - // QUERIES - // ============================================================================================ - /// Returns an opening for the specified `key` in the SMT with the specified `lineage`. /// /// It is the responsibility of the forest to ensure lineage existence before querying the @@ -148,6 +140,31 @@ where /// - `None` will be returned upon successful completion, or at any time after an error has been /// returned. fn entries(&self, lineage: LineageId) -> Result>>; +} + +// BACKEND +// ================================================================================================ + +/// The full read-write interface for the SMT forest storage backend. +/// +/// This trait extends [`BackendReader`] with mutation operations, allowing the forest to add new +/// lineages and update existing ones. +/// +/// # Implementation Contract +/// +/// Method-level doc comments describe invariants that cannot be encoded in the type system. +/// Implementations are responsible for upholding them. +pub trait Backend: BackendReader { + /// The read-only view type returned by [`Self::reader`]. + /// + /// The returned type implements [`BackendReader`] but not [`Backend`], providing a read-only + /// guarantee. Implementations may return either a point-in-time snapshot or a live view, but + /// the view must always reflect a consistent committed state (not partial writes). Holding the + /// reader must not block writes in any way. + type Reader: BackendReader; + + /// Returns a read-only view of this backend that observes its current state. + fn reader(&self) -> Result; // SINGLE-TREE MODIFIERS // ============================================================================================ diff --git a/miden-crypto/src/merkle/smt/large_forest/backend/persistent/mod.rs b/miden-crypto/src/merkle/smt/large_forest/backend/persistent/mod.rs index b03e1b7ce3..a0fc6614da 100644 --- a/miden-crypto/src/merkle/smt/large_forest/backend/persistent/mod.rs +++ b/miden-crypto/src/merkle/smt/large_forest/backend/persistent/mod.rs @@ -32,6 +32,7 @@ mod internal; mod iterator; mod keys; mod property_tests; +mod snapshot; mod tests; mod tree_metadata; @@ -43,14 +44,15 @@ use miden_serde_utils::{Deserializable, DeserializationError, Serializable}; use num::Integer; use rayon::prelude::*; use rocksdb as db; +pub use snapshot::PersistentBackendReader; use super::{BackendError, Result}; use crate::{ EMPTY_WORD, Map, Word, merkle::{ - EmptySubtreeRoots, MerkleError, NodeIndex, SparseMerklePath, + EmptySubtreeRoots, MerkleError, NodeIndex, smt::{ - Backend, InnerNode, LeafIndex, LineageId, NodeMutation, NodeMutations, SMT_DEPTH, + Backend, BackendReader, LeafIndex, LineageId, NodeMutation, NodeMutations, SMT_DEPTH, SmtForestUpdateBatch, SmtLeaf, SmtLeafError, SmtProof, SmtUpdateBatch, StorageUpdateParts, StorageUpdates, Subtree, SubtreeError, TreeEntry, TreeWithRoot, VersionId, @@ -145,69 +147,10 @@ const MIN_LINEAGES_IN_BATCH_TO_PARALLELIZE: usize = 5; /// The minimum number of items per rayon chunk when parallelizing deserialization and extraction. const CHUNKING_UNIT: usize = 100; -// PERSISTENT BACKEND -// ================================================================================================ - -/// The persistent backend for the SMT forest, providing durable storage for the latest tree in each -/// lineage in the forest. -#[derive(Debug)] -pub struct PersistentBackend { - /// The underlying database. - /// - /// # Layout - /// - /// The data on each tree is stored across a series of RocksDB column families, along with - /// additional metadata. The layout is fixed (for the moment), and has the following column - /// families. - /// - /// - [`LEAVES_CF`]: Stores the [`SmtLeaf`] data, keyed by a [`LeafKey`] instance. - /// - [`METADATA_CF`]: Stores a [`TreeMetadata`] instance for each tree, keyed by - /// [`LineageId`]. This acts like a mirror of the in-memory `lineages` data, which exists to - /// speed up common queries. - /// - `SUBTREE_XX_CF`: Stores the [`Subtree`]s with their root at level `XX` in the backend, - /// keyed on the [`SubtreeKey`]. - db: Arc, - - /// An in-memory cache of the tree metadata enabling the more rapid servicing of certain kinds - /// of queries. - /// - /// Care must be taken that this is _always_ kept in sync with the on-disk copy in the - /// [`METADATA_CF`] column. - lineages: HashMap, - - /// Whether writes should be synchronously flushed to disk. - /// - /// Setting this to true will result in reduced throughput but may result in higher durability - /// in the presence of crashes. - sync_writes: bool, -} - -// CONSTRUCTION +// BACKEND READER TRAIT // ================================================================================================ -/// This block contains functions for the construction of the persistent backend. -impl PersistentBackend { - /// Constructs an instance of the persistent backend, either opening or creating the data store - /// at the location specified in the `config`. - /// - /// # Errors - /// - /// - [`BackendError::CorruptedData`] if data corruption is encountered when loading the forest - /// from disk. - /// - [`BackendError::Internal`] if the backend cannot be started up properly. - pub fn load(config: Config) -> Result { - let db = Arc::new(Self::build_db_with_options(&config)?); - let lineages = Self::read_all_metadata(db.clone())?; - let sync_writes = config.sync_writes; - - Ok(Self { db, lineages, sync_writes }) - } -} - -// BACKEND TRAIT -// ================================================================================================ - -impl Backend for PersistentBackend { +impl BackendReader for PersistentBackend { /// Returns an opening for the specified `key` in the SMT with the specified `lineage`. /// /// # Errors @@ -215,44 +158,13 @@ impl Backend for PersistentBackend { /// - [`BackendError::UnknownLineage`] if the provided `lineage` is not known by the backend. /// - [`BackendError::Internal`] if the backing database cannot be accessed for some reason. fn open(&self, lineage: LineageId, key: Word) -> Result { - // We fail early if we don't know about the lineage in question, as querying further could - // cause very strange behavior. - if !self.lineages.contains_key(&lineage) { - return Err(BackendError::UnknownLineage(lineage)); - } - - // We get our leaf first. - let leaf = self - .load_leaf_for(lineage, key)? - .unwrap_or_else(|| SmtLeaf::new_empty(LeafIndex::from(key))); - - // We then have to load both the corresponding leaf, and the siblings for its path out of - // storage. - let leaf_index: NodeIndex = LeafIndex::from(key).into(); - - // We calculate the roots of the subtrees in order to know their keys for loading. As an - // opening only ever needs to retrieve 8 subtrees we just do this sequentially. - let subtree_roots = (0..SMT_DEPTH / SUBTREE_DEPTH) - .scan(leaf_index.parent(), |cursor, _| { - let subtree_root = Subtree::find_subtree_root(*cursor); - *cursor = subtree_root.parent(); - Some(subtree_root) - }) - .collect::>(); - - // Doing this as a separate step exhibits better performance than loading these subtrees - // inline in the path creation. This appears to be due to better pipelining and - // branch-predictor behavior. - let mut subtree_cache = HashMap::::new(); - for root in subtree_roots { - let maybe_tree = self.load_subtree(SubtreeKey { lineage, index: root })?; - subtree_cache.insert(root, maybe_tree.unwrap_or_else(|| Subtree::new(root))); - } - - let merkle_path = self.compute_path(leaf_index, &subtree_cache); - - // This is safe to do unchecked as we ensure that the path is valid by construction. - Ok(SmtProof::new_unchecked(merkle_path, leaf)) + snapshot::open_proof( + &self.lineages, + lineage, + key, + |l, k| self.load_leaf_for(l, k), + |k| self.load_subtree(k), + ) } /// Returns the leaf stored at `leaf_index` in the SMT with the specified `lineage`. @@ -367,6 +279,26 @@ impl Backend for PersistentBackend { // its type, so we delegate to our custom entries iterator impl. Ok(PersistentBackendEntriesIterator::new(lineage, pfx_iterator)) } +} + +// BACKEND TRAIT +// ================================================================================================ + +impl Backend for PersistentBackend { + type Reader = PersistentBackendReader; + + fn reader(&self) -> Result { + let snapshot = self.db.snapshot(); + // SAFETY: `SnapshotInner` holds both the snapshot and `Arc`, and its `Drop` impl + // drops the snapshot before decrementing the Arc. This guarantees the DB outlives the + // snapshot, making the 'static transmute sound. + let snapshot: db::Snapshot<'static> = unsafe { mem::transmute(snapshot) }; + Ok(PersistentBackendReader::new( + Arc::clone(&self.db), + snapshot, + Arc::clone(&self.lineages), + )) + } /// Adds the provided `lineage` to the forest with the provided `version` and sets the /// associated tree to have the value created by applying `updates` to the empty tree, returning @@ -652,38 +584,71 @@ impl Backend for PersistentBackend { } } -// INTERNAL / UTILITY +// PERSISTENT BACKEND // ================================================================================================ -/// This block contains methods for internal use only that provide useful functionality for the -/// implementation of the backend. +/// The persistent backend for the SMT forest, providing durable storage for the latest tree in each +/// lineage in the forest. +#[derive(Debug)] +pub struct PersistentBackend { + /// The underlying database. + /// + /// # Layout + /// + /// The data on each tree is stored across a series of RocksDB column families, along with + /// additional metadata. The layout is fixed (for the moment), and has the following column + /// families. + /// + /// - [`LEAVES_CF`]: Stores the [`SmtLeaf`] data, keyed by a [`LeafKey`] instance. + /// - [`METADATA_CF`]: Stores a [`TreeMetadata`] instance for each tree, keyed by + /// [`LineageId`]. This acts like a mirror of the in-memory `lineages` data, which exists to + /// speed up common queries. + /// - `SUBTREE_XX_CF`: Stores the [`Subtree`]s with their root at level `XX` in the backend, + /// keyed on the [`SubtreeKey`]. + db: Arc, + + /// An in-memory cache of the tree metadata enabling the more rapid servicing of certain kinds + /// of queries. + /// + /// Wrapped in an `Arc` for copy-on-write sharing with reader snapshots. Readers clone the + /// `Arc` cheaply; mutations use `Arc::make_mut` to fork a private copy only when needed. + /// + /// Care must be taken that this is _always_ kept in sync with the on-disk copy in the + /// [`METADATA_CF`] column. + lineages: Arc>, + + /// Whether writes should be synchronously flushed to disk. + /// + /// Setting this to true will result in reduced throughput but may result in higher durability + /// in the presence of crashes. + sync_writes: bool, +} + impl PersistentBackend { - /// Computes the merkle path for the provided `lineage` beginning at the provided `leaf_index` - /// using the pre-loaded `subtrees`. - fn compute_path( - &self, - mut leaf_index: NodeIndex, - subtrees: &HashMap, - ) -> SparseMerklePath { - let mut path = Vec::with_capacity(SMT_DEPTH as usize); - - while leaf_index.depth() > 0 { - let is_right = leaf_index.is_position_odd(); - leaf_index = leaf_index.parent(); - - let root = Subtree::find_subtree_root(leaf_index); - let subtree = &subtrees[&root]; // Known to exist by construction. - let InnerNode { left, right } = - subtree.get_inner_node(leaf_index).unwrap_or_else(|| { - EmptySubtreeRoots::get_inner_node(SMT_DEPTH, leaf_index.depth()) - }); + /// Constructs an instance of the persistent backend, either opening or creating the data store + /// at the location specified in the `config`. + /// + /// # Errors + /// + /// - [`BackendError::CorruptedData`] if data corruption is encountered when loading the forest + /// from disk. + /// - [`BackendError::Internal`] if the backend cannot be started up properly. + pub fn load(config: Config) -> Result { + let db = Arc::new(Self::build_db_with_options(&config)?); + let lineages = Arc::new(Self::read_all_metadata(db.clone())?); + let sync_writes = config.sync_writes; - path.push(if is_right { left } else { right }); - } + Ok(Self { db, lineages, sync_writes }) + } - SparseMerklePath::from_sized_iter(path).expect("Always succeeds by construction") + // Triggers copy-on-write: clones the shared lineages map only if other references exist. + pub(crate) fn lineages_mut(&mut self) -> &mut HashMap { + Arc::make_mut(&mut self.lineages) } + // INTERNAL / UTILITY + // -------------------------------------------------------------------------------------------- + /// Performs `updates` on the tree in the specified lineage, assigning the new tree the /// provided `new_version`. /// @@ -1363,14 +1328,10 @@ impl PersistentBackend { Ok(()) } -} -// INTERNAL / STARTUP -// ================================================================================================ + // INTERNAL / STARTUP + // -------------------------------------------------------------------------------------------- -/// This impl block contains internal functionality to do with starting up the backend and -/// performing its initialization work. -impl PersistentBackend { /// Sets up the basic configuration for the underlying RocksDB database. fn build_db_with_options(config: &Config) -> Result { let mut db_opts = db::Options::default(); @@ -1495,7 +1456,7 @@ impl PersistentBackend { // If it hasn't errored, we can now safely update the in-memory metadata cache. Ok(metadata .map(|(l, d, r)| { - self.lineages.insert(l, d); + self.lineages_mut().insert(l, d); (l, r) }) .collect()) diff --git a/miden-crypto/src/merkle/smt/large_forest/backend/persistent/property_tests.rs b/miden-crypto/src/merkle/smt/large_forest/backend/persistent/property_tests.rs index 2f30129432..f3db714442 100644 --- a/miden-crypto/src/merkle/smt/large_forest/backend/persistent/property_tests.rs +++ b/miden-crypto/src/merkle/smt/large_forest/backend/persistent/property_tests.rs @@ -10,7 +10,7 @@ use super::tests::default_backend; use crate::{ EMPTY_WORD, merkle::smt::{ - Backend, Smt, SmtForestUpdateBatch, SmtUpdateBatch, TreeWithRoot, + Backend, BackendReader, Smt, SmtForestUpdateBatch, SmtUpdateBatch, TreeWithRoot, large_forest::test_utils::{ arbitrary_batch, arbitrary_lineage, arbitrary_version, arbitrary_word, }, diff --git a/miden-crypto/src/merkle/smt/large_forest/backend/persistent/snapshot.rs b/miden-crypto/src/merkle/smt/large_forest/backend/persistent/snapshot.rs new file mode 100644 index 0000000000..d36a86f4d7 --- /dev/null +++ b/miden-crypto/src/merkle/smt/large_forest/backend/persistent/snapshot.rs @@ -0,0 +1,279 @@ +use alloc::{sync::Arc, vec::Vec}; +use core::mem::ManuallyDrop; +use std::collections::HashMap; + +use miden_serde_utils::{Deserializable, Serializable}; +use rocksdb as db; + +use super::{ + super::{BackendError, Result}, + LEAVES_CF, + iterator::PersistentBackendEntriesIterator, + keys::{LeafKey, SubtreeKey}, + subtree_cf_name, + tree_metadata::TreeMetadata, +}; +use crate::{ + Word, + merkle::{ + EmptySubtreeRoots, NodeIndex, SparseMerklePath, + smt::{ + BackendReader, InnerNode, LeafIndex, LineageId, SMT_DEPTH, SmtLeaf, SmtProof, Subtree, + TreeEntry, TreeWithRoot, VersionId, full::concurrent::SUBTREE_DEPTH, + }, + }, +}; + +// PERSISTENT BACKEND SNAPSHOT INNER +// ================================================================================================ + +/// Inner state shared by all clones of a [`PersistentBackendReader`]. +/// +/// Pairs a RocksDB point-in-time snapshot with the `Arc` that owns the database, so that +/// the database is guaranteed to outlive the snapshot. +/// +/// # Safety +/// +/// `snapshot` contains an internal pointer into the `DB` allocation. `db` must not be dropped +/// (i.e. its refcount must not reach zero) while `snapshot` is live. The `Drop` impl enforces +/// this by explicitly dropping `snapshot` before the `Arc` field is automatically decremented. +pub(super) struct SnapshotInner { + /// The RocksDB snapshot providing the consistent read view. + /// + /// The `'static` lifetime is a sound lie: the real lifetime is tied to `db`. The `Drop` impl + /// guarantees we drop this before `db`. + snapshot: ManuallyDrop>, + /// Keeps the database alive for at least as long as `snapshot`. + db: Arc, + /// Point-in-time view of the lineage metadata, shared with the backend via copy-on-write. + lineages: Arc>, +} + +impl Drop for SnapshotInner { + fn drop(&mut self) { + // SAFETY: Drop the snapshot before the Arc refcount is decremented. + unsafe { + ManuallyDrop::drop(&mut self.snapshot); + } + } +} + +impl core::fmt::Debug for SnapshotInner { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.debug_struct("SnapshotInner").finish_non_exhaustive() + } +} + +// PERSISTENT BACKEND READER +// ================================================================================================ + +/// A read-only, point-in-time snapshot of a [`PersistentBackend`]. +/// +/// This type intentionally implements only [`BackendReader`], not [`Backend`]. It is returned by +/// [`PersistentBackend::reader`] to provide read-only access to a consistent snapshot of the +/// backend state without exposing any mutation capabilities. +/// +/// All reads go through a RocksDB snapshot, so the view is frozen at the instant +/// [`PersistentBackend::reader`] was called — concurrent writes to the underlying database are +/// invisible to this reader. +/// +/// Cloning is O(1): both the snapshot and the lineage metadata are owned by the inner `Arc`. +#[derive(Clone, Debug)] +pub struct PersistentBackendReader { + inner: Arc, +} + +impl PersistentBackendReader { + pub(super) fn new( + db: Arc, + snapshot: db::Snapshot<'static>, + lineages: Arc>, + ) -> Self { + Self { + inner: Arc::new(SnapshotInner { + snapshot: ManuallyDrop::new(snapshot), + db, + lineages, + }), + } + } + + fn load_subtree(&self, tree_key: SubtreeKey) -> Result> { + let cf = self.subtree_cf(tree_key.index)?; + let key_bytes = tree_key.to_bytes(); + let result = match self.inner.snapshot.get_cf(cf, key_bytes) { + Ok(Some(bytes)) => Some(Subtree::from_vec(tree_key.index, &bytes)?), + Ok(None) => None, + Err(e) => return Err(e.into()), + }; + Ok(result) + } + + fn load_leaf_raw(&self, key: &LeafKey) -> Result> { + let col = self.cf(LEAVES_CF)?; + let key_bytes = key.to_bytes(); + let leaf_bytes = self.inner.snapshot.get_cf(col, key_bytes)?; + Ok(match leaf_bytes { + Some(bytes) => Some(SmtLeaf::read_from_bytes_with_budget(&bytes, bytes.len())?), + None => None, + }) + } + + fn load_leaf_for(&self, lineage: LineageId, key: Word) -> Result> { + let key = LeafKey { + lineage, + index: LeafIndex::from(key).position(), + }; + self.load_leaf_raw(&key) + } + + #[inline(always)] + fn subtree_cf(&self, index: NodeIndex) -> Result<&db::ColumnFamily> { + self.subtree_cf_depth(index.depth()) + } + + #[inline(always)] + fn subtree_cf_depth(&self, depth: u8) -> Result<&db::ColumnFamily> { + let cf_name = subtree_cf_name(depth); + self.cf(cf_name) + } + + #[inline(always)] + fn cf(&self, name: &str) -> Result<&db::ColumnFamily> { + self.inner.db.cf_handle(name).ok_or_else(|| { + BackendError::internal_from_message(format!("Could not load column with name {name}")) + }) + } +} + +impl BackendReader for PersistentBackendReader { + fn open(&self, lineage: LineageId, key: Word) -> Result { + open_proof( + &self.inner.lineages, + lineage, + key, + |l, k| self.load_leaf_for(l, k), + |k| self.load_subtree(k), + ) + } + + fn get_leaf(&self, lineage: LineageId, leaf_index: LeafIndex) -> Result { + if !self.inner.lineages.contains_key(&lineage) { + return Err(BackendError::UnknownLineage(lineage)); + } + let key = LeafKey { lineage, index: leaf_index.position() }; + Ok(self.load_leaf_raw(&key)?.unwrap_or_else(|| SmtLeaf::new_empty(leaf_index))) + } + + fn get(&self, lineage: LineageId, key: Word) -> Result> { + if !self.inner.lineages.contains_key(&lineage) { + return Err(BackendError::UnknownLineage(lineage)); + } + let leaf = self.load_leaf_for(lineage, key)?; + Ok(leaf.and_then(|l| { + let val = l.get_value(&key); + val.and_then(|e| if e.is_empty() { None } else { Some(e) }) + })) + } + + fn version(&self, lineage: LineageId) -> Result { + let metadata = + self.inner.lineages.get(&lineage).ok_or(BackendError::UnknownLineage(lineage))?; + Ok(metadata.version) + } + + fn lineages(&self) -> Result> { + Ok(self.inner.lineages.keys().copied()) + } + + fn trees(&self) -> Result> { + Ok(self + .inner + .lineages + .iter() + .map(|(l, m)| TreeWithRoot::new(*l, m.version, m.root_value))) + } + + fn entry_count(&self, lineage: LineageId) -> Result { + let metadata = + self.inner.lineages.get(&lineage).ok_or(BackendError::UnknownLineage(lineage))?; + Ok(metadata.entry_count.try_into().expect("Count of entries should fit into usize")) + } + + fn entries(&self, lineage: LineageId) -> Result>> { + if !self.inner.lineages.contains_key(&lineage) { + return Err(BackendError::UnknownLineage(lineage)); + } + let lineage_bytes = lineage.to_bytes(); + let cf = self.cf(LEAVES_CF)?; + let mut read_opts = db::ReadOptions::default(); + read_opts.set_prefix_same_as_start(true); + let pfx_iterator = self.inner.snapshot.iterator_cf_opt( + cf, + read_opts, + db::IteratorMode::From(&lineage_bytes, db::Direction::Forward), + ); + Ok(PersistentBackendEntriesIterator::new(lineage, pfx_iterator)) + } +} + +// HELPERS +// ================================================================================================ + +fn compute_merkle_path( + mut leaf_index: NodeIndex, + subtrees: &HashMap, +) -> SparseMerklePath { + let mut path = Vec::with_capacity(SMT_DEPTH as usize); + + while leaf_index.depth() > 0 { + let is_right = leaf_index.is_position_odd(); + leaf_index = leaf_index.parent(); + + let root = Subtree::find_subtree_root(leaf_index); + let subtree = &subtrees[&root]; + let InnerNode { left, right } = subtree + .get_inner_node(leaf_index) + .unwrap_or_else(|| EmptySubtreeRoots::get_inner_node(SMT_DEPTH, leaf_index.depth())); + + path.push(if is_right { left } else { right }); + } + + SparseMerklePath::from_sized_iter(path).expect("Always succeeds by construction") +} + +pub(super) fn open_proof( + lineages: &HashMap, + lineage: LineageId, + key: Word, + load_leaf: impl Fn(LineageId, Word) -> Result>, + load_subtree: impl Fn(SubtreeKey) -> Result>, +) -> Result { + if !lineages.contains_key(&lineage) { + return Err(BackendError::UnknownLineage(lineage)); + } + + let leaf = load_leaf(lineage, key)?.unwrap_or_else(|| SmtLeaf::new_empty(LeafIndex::from(key))); + let leaf_index: NodeIndex = LeafIndex::from(key).into(); + + // An opening needs exactly one subtree per level; collect their roots up front so we can + // load them all before constructing the path. + let subtree_roots = (0..SMT_DEPTH / SUBTREE_DEPTH) + .scan(leaf_index.parent(), |cursor, _| { + let subtree_root = Subtree::find_subtree_root(*cursor); + *cursor = subtree_root.parent(); + Some(subtree_root) + }) + .collect::>(); + + // Loading subtrees as a separate step (rather than inline during path construction) + // exhibits better performance due to improved pipelining and branch-predictor behavior. + let mut subtree_cache = HashMap::::new(); + for root in subtree_roots { + let maybe_tree = load_subtree(SubtreeKey { lineage, index: root })?; + subtree_cache.insert(root, maybe_tree.unwrap_or_else(|| Subtree::new(root))); + } + + let merkle_path = compute_merkle_path(leaf_index, &subtree_cache); + Ok(SmtProof::new_unchecked(merkle_path, leaf)) +} diff --git a/miden-crypto/src/merkle/smt/large_forest/backend/persistent/tests.rs b/miden-crypto/src/merkle/smt/large_forest/backend/persistent/tests.rs index 57645b96a4..d3d769ff6c 100644 --- a/miden-crypto/src/merkle/smt/large_forest/backend/persistent/tests.rs +++ b/miden-crypto/src/merkle/smt/large_forest/backend/persistent/tests.rs @@ -15,8 +15,8 @@ use super::{PersistentBackend, Result}; use crate::{ EMPTY_WORD, Word, merkle::smt::{ - Backend, BackendError, LineageId, Smt, SmtForestUpdateBatch, SmtUpdateBatch, TreeEntry, - TreeWithRoot, VersionId, large_forest::backend::persistent::config::Config, + Backend, BackendError, BackendReader, LineageId, Smt, SmtForestUpdateBatch, SmtUpdateBatch, + TreeEntry, TreeWithRoot, VersionId, large_forest::backend::persistent::config::Config, }, rand::test_utils::ContinuousRng, }; @@ -822,3 +822,84 @@ fn update_forest() -> Result<()> { Ok(()) } + +#[test] +fn reader_snapshot_isolation() -> Result<()> { + // Writes committed to the backend after the reader is created must be invisible to the reader. + let (_dir, mut backend) = default_backend()?; + let mut rng = ContinuousRng::new([0xc7; 32]); + let version: VersionId = rng.value(); + + // Add lineage_1 and create the reader while lineage_2 does not yet exist. + let lineage_1: LineageId = rng.value(); + let k1: Word = rng.value(); + let v1: Word = rng.value(); + let mut ops = SmtUpdateBatch::default(); + ops.add_insert(k1, v1); + backend.add_lineage(lineage_1, version, ops)?; + + let reader = backend.reader()?; + + // Now add lineage_2 after the reader was created. + let lineage_2: LineageId = rng.value(); + let k2: Word = rng.value(); + let v2: Word = rng.value(); + let mut ops = SmtUpdateBatch::default(); + ops.add_insert(k2, v2); + backend.add_lineage(lineage_2, version, ops)?; + + // Also mutate lineage_1 after the snapshot. + let k3: Word = rng.value(); + let v3: Word = rng.value(); + let mut ops = SmtUpdateBatch::default(); + ops.add_insert(k3, v3); + backend.update_tree(lineage_1, version + 1, ops)?; + + // The reader must not see lineage_2 at all. + assert_eq!(reader.lineages()?.count(), 1); + assert!(!reader.lineages()?.any(|l| l == lineage_2)); + assert_matches!(reader.open(lineage_2, k2).unwrap_err(), BackendError::UnknownLineage(l) if l == lineage_2); + assert_matches!(reader.get(lineage_2, k2).unwrap_err(), BackendError::UnknownLineage(l) if l == lineage_2); + + // The reader must see lineage_1 at the pre-snapshot state (k3 absent, version unchanged). + assert_eq!(reader.version(lineage_1)?, version); + assert_eq!(reader.entry_count(lineage_1)?, 1); + assert!(reader.get(lineage_1, k3)?.is_none()); + assert_eq!(reader.get(lineage_1, k1)?, Some(v1)); + + Ok(()) +} + +#[test] +fn reader_clone() -> Result<()> { + // Cloning a reader must produce an independent handle to the same snapshot. + let (_dir, mut backend) = default_backend()?; + let mut rng = ContinuousRng::new([0xc8; 32]); + let version: VersionId = rng.value(); + + let lineage_1: LineageId = rng.value(); + let k1: Word = rng.value(); + let v1: Word = rng.value(); + let mut ops = SmtUpdateBatch::default(); + ops.add_insert(k1, v1); + backend.add_lineage(lineage_1, version, ops)?; + + let reader = backend.reader()?; + let reader_clone = reader.clone(); + + // Write to the backend after cloning — neither handle should see it. + let lineage_2: LineageId = rng.value(); + let mut ops = SmtUpdateBatch::default(); + ops.add_insert(rng.value(), rng.value()); + backend.add_lineage(lineage_2, version, ops)?; + + // Both handles see exactly lineage_1 and agree on its data. + for r in [&reader, &reader_clone] { + assert_eq!(r.lineages()?.count(), 1); + assert!(r.lineages()?.any(|l| l == lineage_1)); + assert_eq!(r.get(lineage_1, k1)?, Some(v1)); + assert_matches!(r.get(lineage_2, k1).unwrap_err(), BackendError::UnknownLineage(l) if l == lineage_2); + } + + Ok(()) +} diff --git a/miden-crypto/src/merkle/smt/large_forest/mod.rs b/miden-crypto/src/merkle/smt/large_forest/mod.rs index a5fbb88ed3..f7600ed873 100644 --- a/miden-crypto/src/merkle/smt/large_forest/mod.rs +++ b/miden-crypto/src/merkle/smt/large_forest/mod.rs @@ -314,10 +314,14 @@ mod utils; use alloc::vec::Vec; use core::num::NonZeroU8; -pub use backend::{Backend, BackendError, memory::InMemoryBackend}; +pub use backend::{ + Backend, BackendError, BackendReader, + memory::{InMemoryBackend, InMemoryBackendSnapshot}, +}; #[cfg(feature = "persistent-forest")] pub use backend::{ - persistent::PersistentBackend, persistent::config::Config as PersistentBackendConfig, + persistent::config::Config as PersistentBackendConfig, + persistent::{PersistentBackend, PersistentBackendReader}, }; pub use config::{Config, DEFAULT_MAX_HISTORY_VERSIONS, MIN_HISTORY_VERSIONS}; pub use error::{LargeSmtForestError, Result}; @@ -346,8 +350,8 @@ use crate::{ /// A high-performance forest of sparse merkle trees with pluggable storage backends. /// /// See the module documentation for more information. -#[derive(Debug)] -pub struct LargeSmtForest { +#[derive(Clone, Debug)] +pub struct LargeSmtForest { /// The configuration for how the forest functions. config: Config, @@ -369,17 +373,6 @@ pub struct LargeSmtForest { non_empty_histories: Set, } -impl Clone for LargeSmtForest { - fn clone(&self) -> Self { - Self { - config: self.config.clone(), - backend: self.backend.clone(), - lineage_data: self.lineage_data.clone(), - non_empty_histories: self.non_empty_histories.clone(), - } - } -} - // CONSTRUCTION AND BASIC QUERIES // ================================================================================================ @@ -394,7 +387,7 @@ impl Clone for LargeSmtForest { /// /// Where anything more specific can be said about performance, the method documentation will /// contain more detail. -impl LargeSmtForest { +impl LargeSmtForest { /// Constructs a new forest backed by the provided `backend` using the default [`Config`] for /// the forest's behavior. /// @@ -471,7 +464,7 @@ impl LargeSmtForest { /// All of these methods can be performed fully in-memory, and hence their performance is /// predictable on a given machine regardless of the choice of [`Backend`] instance being used by /// the forest. -impl LargeSmtForest { +impl LargeSmtForest { /// Returns an iterator that yields all the (uniquely identified) roots that the forest knows /// about, including those from historical versions. /// @@ -576,7 +569,7 @@ impl LargeSmtForest { /// /// Where anything more specific can be said about performance, the method documentation will /// contain more detail. -impl LargeSmtForest { +impl LargeSmtForest { /// Returns an opening for the specified `key` in the specified `tree`, regardless of whether /// the `tree` has a value associated with `key` or not. /// @@ -1127,7 +1120,7 @@ impl LargeSmtForest { /// This block contains internal functions that exist to de-duplicate or modularize functionality /// within the forest. These should not be exposed. -impl LargeSmtForest { +impl LargeSmtForest { /// Applies the history delta given by `history_view` on top of the provided `full_tree_leaf` to /// produce the correct leaf for a historical opening. /// @@ -1230,6 +1223,22 @@ impl LargeSmtForest { } } +impl LargeSmtForest { + /// Returns a read-only `LargeSmtForest` backed by a reader view of this forest's backend. + /// + /// The new forest shares the same config, lineage data, and history as `self`, and its backend + /// is a point-in-time snapshot produced by [`Backend::reader`]. The returned forest's backend + /// type is `B::Reader: BackendReader`, so it cannot be used for mutations. + pub fn reader(&self) -> Result> { + Ok(LargeSmtForest { + config: self.config.clone(), + backend: self.backend.reader()?, + lineage_data: self.lineage_data.clone(), + non_empty_histories: self.non_empty_histories.clone(), + }) + } +} + // TESTING FUNCTIONALITY // ================================================================================================ @@ -1237,7 +1246,7 @@ impl LargeSmtForest { /// inspect the internal state of the forest that are unsafe to make part of the forest's public /// API. #[cfg(test)] -impl LargeSmtForest { +impl LargeSmtForest { /// Gets an immutable reference to the underlying backend of the forest. pub fn get_backend(&self) -> &B { &self.backend diff --git a/miden-crypto/src/merkle/smt/large_forest/test_utils.rs b/miden-crypto/src/merkle/smt/large_forest/test_utils.rs index ea6aac4f05..f9d3ea9098 100644 --- a/miden-crypto/src/merkle/smt/large_forest/test_utils.rs +++ b/miden-crypto/src/merkle/smt/large_forest/test_utils.rs @@ -14,9 +14,9 @@ use proptest::prelude::*; use crate::{ EMPTY_WORD, Map, ONE, ZERO, merkle::smt::{ - Backend, ForestInMemoryBackend, ForestOperation, LargeSmtForest, LeafIndex, LineageId, - MAX_LEAF_ENTRIES, RootInfo, SMT_DEPTH, Smt, SmtForestUpdateBatch, SmtProof, SmtUpdateBatch, - TreeId, VersionId, + Backend, BackendReader, ForestInMemoryBackend, ForestOperation, LargeSmtForest, LeafIndex, + LineageId, MAX_LEAF_ENTRIES, RootInfo, SMT_DEPTH, Smt, SmtForestUpdateBatch, SmtProof, + SmtUpdateBatch, TreeId, VersionId, large_forest::{ backend::{BackendError, Result as BackendResult}, root::{TreeEntry, TreeWithRoot}, @@ -162,7 +162,7 @@ pub fn sorted_tree_entries(tree: &Smt) -> Vec { /// Sorts forest entries explicitly by `(key, value)` so tests compare observable contents rather /// than relying on unspecified iterator ordering. -pub fn sorted_forest_entries( +pub fn sorted_forest_entries( forest: &LargeSmtForest, tree: TreeId, ) -> Result, TestCaseError> { @@ -180,7 +180,7 @@ fn word_to_option(value: Word) -> Option { } /// Asserts that the forest and reference tree agree on entries, counts, key lookups, and openings. -pub fn assert_tree_queries_match( +pub fn assert_tree_queries_match( forest: &LargeSmtForest, tree_id: TreeId, reference: &Smt, @@ -207,7 +207,7 @@ pub fn assert_tree_queries_match( } /// Asserts that the forest metadata for `lineage` matches the provided sequence of versions. -pub fn assert_lineage_metadata( +pub fn assert_lineage_metadata( forest: &LargeSmtForest, lineage: LineageId, versions: &[(VersionId, Word)], @@ -275,7 +275,7 @@ impl>> Iterator for FallibleIter } } -impl Backend for FallibleEntriesBackend { +impl BackendReader for FallibleEntriesBackend { fn open(&self, lineage: LineageId, key: Word) -> BackendResult { self.inner.open(lineage, key) } @@ -315,6 +315,14 @@ impl Backend for FallibleEntriesBackend { let inner_iter = self.inner.entries(lineage)?; Ok(FallibleIter { inner: inner_iter, count: 0 }) } +} + +impl Backend for FallibleEntriesBackend { + type Reader = ::Reader; + + fn reader(&self) -> BackendResult { + self.inner.reader() + } fn add_lineage( &mut self, diff --git a/miden-crypto/src/merkle/smt/large_forest/tests.rs b/miden-crypto/src/merkle/smt/large_forest/tests.rs index ea09019101..cb6411f757 100644 --- a/miden-crypto/src/merkle/smt/large_forest/tests.rs +++ b/miden-crypto/src/merkle/smt/large_forest/tests.rs @@ -18,8 +18,9 @@ use crate::{ merkle::{ EmptySubtreeRoots, smt::{ - Backend, ForestInMemoryBackend, ForestOperation, LargeSmtForest, LargeSmtForestError, - RootInfo, Smt, SmtForestUpdateBatch, SmtUpdateBatch, TreeId, VersionId, + BackendReader, ForestInMemoryBackend, ForestOperation, LargeSmtForest, + LargeSmtForestError, RootInfo, Smt, SmtForestUpdateBatch, SmtUpdateBatch, TreeId, + VersionId, large_forest::{ LineageData, history::{ChangedKeys, History, NodeChanges}, diff --git a/miden-crypto/src/merkle/smt/mod.rs b/miden-crypto/src/merkle/smt/mod.rs index 6a0f6988cb..9e788b0dd1 100644 --- a/miden-crypto/src/merkle/smt/mod.rs +++ b/miden-crypto/src/merkle/smt/mod.rs @@ -30,14 +30,18 @@ pub use large::{RocksDbConfig, RocksDbSnapshotStorage, RocksDbStorage}; mod large_forest; pub use large_forest::{ - Backend, BackendError, Config as ForestConfig, + Backend, BackendError, BackendReader, Config as ForestConfig, DEFAULT_MAX_HISTORY_VERSIONS as FOREST_DEFAULT_MAX_HISTORY_VERSIONS, ForestOperation, - InMemoryBackend as ForestInMemoryBackend, LargeSmtForest, LargeSmtForestError, LineageId, - MIN_HISTORY_VERSIONS as FOREST_MIN_HISTORY_VERSIONS, RootInfo, SmtForestUpdateBatch, + InMemoryBackend as ForestInMemoryBackend, + InMemoryBackendSnapshot as ForestInMemoryBackendReader, LargeSmtForest, LargeSmtForestError, + LineageId, MIN_HISTORY_VERSIONS as FOREST_MIN_HISTORY_VERSIONS, RootInfo, SmtForestUpdateBatch, SmtUpdateBatch, TreeEntry, TreeId, TreeWithRoot, VersionId, }; #[cfg(feature = "persistent-forest")] -pub use large_forest::{PersistentBackend as ForestPersistentBackend, PersistentBackendConfig}; +pub use large_forest::{ + PersistentBackend as ForestPersistentBackend, PersistentBackendConfig, + PersistentBackendReader as ForestPersistentBackendReader, +}; mod simple; pub use simple::{SimpleSmt, SimpleSmtProof};