@@ -28,7 +28,10 @@ use crate::{
2828use ahash:: AHasher ;
2929use commonware_codec:: Codec ;
3030use commonware_cryptography:: { Digest , Hasher } ;
31- use commonware_utils:: bitmap:: { Prunable as BitMap , Readable as BitmapReadable } ;
31+ use commonware_utils:: {
32+ bitmap:: { Prunable as BitMap , Readable as BitmapReadable } ,
33+ sync:: { RwLock , RwLockReadGuard , RwLockWriteGuard } ,
34+ } ;
3235use std:: {
3336 collections:: { BTreeSet , HashMap } ,
3437 hash:: BuildHasherDefault ,
@@ -86,11 +89,12 @@ impl<const N: usize> ChunkOverlay<N> {
8689 chunk[ rel / 8 ] |= 1 << ( rel % 8 ) ;
8790 }
8891
89- /// Clear a single bit (used for superseded locations).
90- /// Skips locations in pruned chunks — those bits are already inactive.
91- fn clear_bit < B : BitmapReadable < N > > ( & mut self , base : & B , loc : u64 ) {
92+ /// Clear a single bit (used for superseded locations). `pruned_chunks` is passed in by the
93+ /// caller so the hot loop in `build_chunk_overlay` reads it once rather than per call.
94+ /// Skips locations in pruned chunks since those bits are already inactive.
95+ fn clear_bit < B : BitmapReadable < N > > ( & mut self , base : & B , pruned_chunks : usize , loc : u64 ) {
9296 let idx = BitMap :: < N > :: to_chunk_index ( loc) ;
93- if idx < base . pruned_chunks ( ) {
97+ if idx < pruned_chunks {
9498 return ;
9599 }
96100 let rel = ( loc % Self :: CHUNK_BITS ) as usize ;
@@ -279,11 +283,43 @@ where
279283 bitmap_parent : BitmapBatch < N > ,
280284}
281285
282- /// A speculative batch of operations whose root digest has been computed,
283- /// in contrast to [`UnmerkleizedBatch`].
286+ /// A speculative batch of operations whose root digest has been computed, in contrast to
287+ /// [`UnmerkleizedBatch`].
288+ ///
289+ /// Wraps an [`any::batch::MerkleizedBatch`] and adds the bitmap and grafted MMR state needed to
290+ /// compute the canonical root.
291+ ///
292+ /// # Branch validity
293+ ///
294+ /// A `MerkleizedBatch` is a branch-scoped view rooted at a specific committed prefix of the DB. It
295+ /// is not an immutable snapshot.
296+ ///
297+ /// Internally, the batch chain terminates in the DB's committed bitmap via `BitmapBatch::Base`.
298+ /// That committed bitmap evolves in place as [`Db::apply_batch`](super::db::Db::apply_batch),
299+ /// [`Db::prune`](super::db::Db::prune), and [`Db::rewind`](super::db::Db::rewind) update the DB.
284300///
285- /// Wraps an [`any::batch::MerkleizedBatch`] and adds the bitmap and grafted MMR state needed
286- /// to compute the canonical root.
301+ /// Reads through this batch's chain, constructing child batches from it, and applying it later are
302+ /// only semantically correct while its ancestor chain is still the committed prefix of the DB. In
303+ /// other words, every successful [`apply_batch`](super::db::Db::apply_batch) since this batch was
304+ /// merkleized must have applied an ancestor of this batch.
305+ ///
306+ /// Once a non-ancestor batch is applied, this batch and all of its descendants become invalid
307+ /// objects. The library does not guard against continued use after that point.
308+ ///
309+ /// Applying an invalid batch is caught by the any-layer staleness check and returns
310+ /// [`Error::StaleBatch`] without mutating committed state, so `apply_batch` itself cannot corrupt
311+ /// the DB. The one exception is equal-size sibling branches (where both branches have the same
312+ /// total operation count): the staleness check is size-based and cannot distinguish them, so
313+ /// applying a descendant of one sibling after the other was already applied can silently corrupt
314+ /// snapshot/log state. Callers must not apply batches from an orphaned branch.
315+ ///
316+ /// Rules of thumb:
317+ /// - Drop any `Arc<MerkleizedBatch>` you no longer intend to apply.
318+ /// - Extending a batch after `apply_batch` has consumed it (building a child off the just-applied
319+ /// parent) is safe. The committed bitmap now equals the parent's post-apply state, so child reads
320+ /// are consistent.
321+ /// - Extending a batch after a different branch has been applied is not safe. Do not call `get`,
322+ /// `new_batch`, or `apply_batch` on that branch again.
287323pub struct MerkleizedBatch < F : Graftable , D : Digest , U : update:: Update + Send + Sync , const N : usize >
288324where
289325 Operation < F , U > : Send + Sync ,
@@ -446,13 +482,14 @@ where
446482{
447483 let total_bits = base. len ( ) + batch_len as u64 ;
448484 let mut overlay = ChunkOverlay :: new ( total_bits) ;
485+ let pruned_chunks = base. pruned_chunks ( ) ;
449486
450487 // 1. CommitFloor (last op) is always active.
451488 let commit_loc = batch_base + batch_len as u64 - 1 ;
452489 overlay. set_bit ( base, commit_loc) ;
453490
454491 // 2. Inactivate previous CommitFloor.
455- overlay. clear_bit ( base, batch_base - 1 ) ;
492+ overlay. clear_bit ( base, pruned_chunks , batch_base - 1 ) ;
456493
457494 // 3. Set active bits + clear superseded locations from the diff.
458495 for ( key, entry) in diff {
@@ -473,7 +510,7 @@ where
473510 }
474511 }
475512 if let Some ( old) = prev_loc {
476- overlay. clear_bit ( base, * old) ;
513+ overlay. clear_bit ( base, pruned_chunks , * old) ;
477514 }
478515 }
479516
@@ -595,7 +632,6 @@ where
595632 // compute_db_root sees newly completed chunks. Using bitmap_parent alone would miss chunks
596633 // that transitioned from partial to complete in this batch.
597634 let bitmap_batch = BitmapBatch :: Layer ( Arc :: new ( BitmapBatchLayer {
598- pruned_chunks : bitmap_parent. pruned_chunks ( ) ,
599635 parent : bitmap_parent. clone ( ) ,
600636 overlay : Arc :: new ( overlay) ,
601637 } ) ) ;
@@ -637,13 +673,86 @@ where
637673 } ) )
638674}
639675
640- /// Immutable bitmap state at any point in a batch chain.
676+ /// The committed bitmap shared between the [`Db`](super::db::Db) and live batches.
677+ ///
678+ /// Wrapped in a [`RwLock`] so that [`Db::apply_batch`](super::db::Db::apply_batch),
679+ /// [`Db::prune`](super::db::Db::prune), and [`Db::rewind`](super::db::Db::rewind) can mutate
680+ /// the bitmap in place while live batches concurrently read through it.
681+ ///
682+ /// # Reading through invalid batches
641683///
642- /// Mirrors the [`crate::merkle::batch::MerkleizedBatch`] pattern.
684+ /// The bitmap behind this lock represents *committed* state. If a caller holds a
685+ /// [`MerkleizedBatch`] that has become invalid (see its "Branch validity" docs for the
686+ /// conditions), reads through that batch's chain will silently return inconsistent data (the
687+ /// chain's overlays mixed with post-divergence committed chunks). The library does not guard
688+ /// against this; callers must avoid reading through invalid batches.
689+ pub ( crate ) struct SharedBitmap < const N : usize > {
690+ inner : RwLock < BitMap < N > > ,
691+ }
692+
693+ impl < const N : usize > SharedBitmap < N > {
694+ pub ( crate ) const fn new ( bitmap : BitMap < N > ) -> Self {
695+ Self {
696+ inner : RwLock :: new ( bitmap) ,
697+ }
698+ }
699+
700+ /// Acquire a shared read guard over the committed bitmap. Kept private so external callers
701+ /// go through [`BitmapReadable`] (which doesn't expose a guard across `.await`).
702+ fn read ( & self ) -> RwLockReadGuard < ' _ , BitMap < N > > {
703+ self . inner . read ( )
704+ }
705+
706+ /// Acquire an exclusive write guard. By convention only
707+ /// [`Db::apply_batch`](super::db::Db::apply_batch), [`Db::prune`](super::db::Db::prune), and
708+ /// [`Db::rewind`](super::db::Db::rewind) mutate the shared bitmap.
709+ pub ( super ) fn write ( & self ) -> RwLockWriteGuard < ' _ , BitMap < N > > {
710+ self . inner . write ( )
711+ }
712+ }
713+
714+ impl < const N : usize > std:: fmt:: Debug for SharedBitmap < N > {
715+ fn fmt ( & self , f : & mut std:: fmt:: Formatter < ' _ > ) -> std:: fmt:: Result {
716+ f. debug_struct ( "SharedBitmap" )
717+ . field ( "bitmap_len" , & BitmapReadable :: < N > :: len ( & * self . read ( ) ) )
718+ . finish ( )
719+ }
720+ }
721+
722+ /// [`BitmapReadable`] over the DB's committed bitmap. Each call acquires the read lock briefly.
723+ impl < const N : usize > BitmapReadable < N > for SharedBitmap < N > {
724+ fn complete_chunks ( & self ) -> usize {
725+ self . read ( ) . complete_chunks ( )
726+ }
727+
728+ fn get_chunk ( & self , idx : usize ) -> [ u8 ; N ] {
729+ * self . read ( ) . get_chunk ( idx)
730+ }
731+
732+ fn last_chunk ( & self ) -> ( [ u8 ; N ] , u64 ) {
733+ let guard = self . read ( ) ;
734+ let ( chunk, bits) = guard. last_chunk ( ) ;
735+ ( * chunk, bits)
736+ }
737+
738+ fn pruned_chunks ( & self ) -> usize {
739+ self . read ( ) . pruned_chunks ( )
740+ }
741+
742+ fn len ( & self ) -> u64 {
743+ BitmapReadable :: < N > :: len ( & * self . read ( ) )
744+ }
745+ }
746+
747+ /// A view of the committed bitmap plus zero or more speculative overlay `Layer`s.
748+ ///
749+ /// The chain terminates in a `Base` that references the shared committed bitmap. No validity
750+ /// check is performed. Callers must ensure they only read through batches whose chains are
751+ /// still valid prefixes of committed state (see [`SharedBitmap`]'s docs).
643752#[ derive( Clone , Debug ) ]
644753pub ( crate ) enum BitmapBatch < const N : usize > {
645- /// Committed bitmap (chain terminal) .
646- Base ( Arc < BitMap < N > > ) ,
754+ /// Chain terminal: shared reference to the committed bitmap .
755+ Base ( Arc < SharedBitmap < N > > ) ,
647756 /// Speculative layer on top of a parent batch.
648757 Layer ( Arc < BitmapBatchLayer < N > > ) ,
649758}
@@ -654,10 +763,6 @@ pub(crate) struct BitmapBatchLayer<const N: usize> {
654763 pub ( crate ) parent : BitmapBatch < N > ,
655764 /// Chunk-level overlay: materialized bytes for every chunk that differs from parent.
656765 pub ( crate ) overlay : Arc < ChunkOverlay < N > > ,
657- /// Pruned-chunk count, copied from `parent` at construction. Invariant across the whole
658- /// layer chain (pruning only happens on the committed base), so caching here lets
659- /// `BitmapBatch::pruned_chunks` return in O(1) instead of walking to the Base.
660- pub ( crate ) pruned_chunks : usize ,
661766}
662767
663768impl < const N : usize > BitmapBatch < N > {
@@ -675,7 +780,7 @@ impl<const N: usize> BitmapReadable<N> for BitmapBatch<N> {
675780 let mut current = self ;
676781 loop {
677782 match current {
678- Self :: Base ( bm ) => return * bm . get_chunk ( idx) ,
783+ Self :: Base ( shared ) => return shared . get_chunk ( idx) ,
679784 Self :: Layer ( layer) => {
680785 if let Some ( & chunk) = layer. overlay . get ( idx) {
681786 return chunk;
@@ -702,96 +807,23 @@ impl<const N: usize> BitmapReadable<N> for BitmapBatch<N> {
702807 }
703808
704809 fn pruned_chunks ( & self ) -> usize {
705- match self {
706- Self :: Base ( bm) => bm. pruned_chunks ( ) ,
707- Self :: Layer ( layer) => layer. pruned_chunks ,
810+ let mut current = self ;
811+ loop {
812+ match current {
813+ Self :: Base ( shared) => return shared. pruned_chunks ( ) ,
814+ Self :: Layer ( layer) => current = & layer. parent ,
815+ }
708816 }
709817 }
710818
711819 fn len ( & self ) -> u64 {
712820 match self {
713- Self :: Base ( bm ) => BitmapReadable :: < N > :: len ( bm . as_ref ( ) ) ,
821+ Self :: Base ( shared ) => BitmapReadable :: < N > :: len ( shared . as_ref ( ) ) ,
714822 Self :: Layer ( layer) => layer. overlay . len ,
715823 }
716824 }
717825}
718826
719- impl < const N : usize > BitmapBatch < N > {
720- /// Apply a chunk overlay to this bitmap. When `self` is `Base` with sole ownership, writes
721- /// overlay chunks directly into the bitmap. Otherwise creates a new `Layer`.
722- pub ( super ) fn apply_overlay ( & mut self , overlay : Arc < ChunkOverlay < N > > ) {
723- // Fast path: write overlay chunks directly into the Base bitmap.
724- if let Self :: Base ( base) = self {
725- if let Some ( bitmap) = Arc :: get_mut ( base) {
726- // Extend bitmap to the overlay's length.
727- bitmap. extend_to ( overlay. len ) ;
728- // Overwrite dirty chunks.
729- for ( & idx, chunk_bytes) in & overlay. chunks {
730- if idx >= bitmap. pruned_chunks ( ) {
731- bitmap. set_chunk_by_index ( idx, chunk_bytes) ;
732- }
733- }
734- return ;
735- }
736- }
737-
738- // Slow path: create a new layer.
739- let pruned_chunks = self . pruned_chunks ( ) ;
740- let parent = self . clone ( ) ;
741- * self = Self :: Layer ( Arc :: new ( BitmapBatchLayer {
742- parent,
743- overlay,
744- pruned_chunks,
745- } ) ) ;
746- }
747-
748- /// Flatten all layers back to a single `Base(Arc<BitMap<N>>)`.
749- ///
750- /// After flattening, the new `Base` Arc has refcount 1 (assuming no external clones
751- /// are held).
752- pub ( super ) fn flatten ( & mut self ) {
753- if matches ! ( self , Self :: Base ( _) ) {
754- return ;
755- }
756-
757- // Take ownership of the chain so that Arc refcounts are not
758- // artificially inflated by a clone.
759- let mut owned = std:: mem:: replace ( self , Self :: Base ( Arc :: new ( BitMap :: default ( ) ) ) ) ;
760-
761- // Collect overlays from tip to base.
762- let mut overlays = Vec :: new ( ) ;
763- let base = loop {
764- match owned {
765- Self :: Base ( bm) => break bm,
766- Self :: Layer ( layer) => match Arc :: try_unwrap ( layer) {
767- Ok ( inner) => {
768- overlays. push ( inner. overlay ) ;
769- owned = inner. parent ;
770- }
771- Err ( arc) => {
772- overlays. push ( arc. overlay . clone ( ) ) ;
773- owned = arc. parent . clone ( ) ;
774- }
775- } ,
776- }
777- } ;
778-
779- // Apply overlays from base to tip.
780- let mut bitmap = Arc :: try_unwrap ( base) . unwrap_or_else ( |arc| ( * arc) . clone ( ) ) ;
781- for overlay in overlays. into_iter ( ) . rev ( ) {
782- // Extend bitmap to the overlay's length.
783- bitmap. extend_to ( overlay. len ) ;
784- // Apply dirty chunks.
785- for ( & idx, chunk_bytes) in & overlay. chunks {
786- if idx >= bitmap. pruned_chunks ( ) {
787- bitmap. set_chunk_by_index ( idx, chunk_bytes) ;
788- }
789- }
790- }
791- * self = Self :: Base ( Arc :: new ( bitmap) ) ;
792- }
793- }
794-
795827impl < F : Graftable , D : Digest , U : update:: Update + Send + Sync , const N : usize >
796828 MerkleizedBatch < F , D , U , N >
797829where
@@ -818,6 +850,10 @@ where
818850 /// All uncommitted ancestors in the chain must be kept alive until the child (or any
819851 /// descendant) is merkleized. Dropping an uncommitted ancestor causes data
820852 /// loss detected at `apply_batch` time.
853+ ///
854+ /// This is only valid while `self` is still on the winning branch. If a different branch has
855+ /// been applied since `self` was created, `self` is no longer a valid parent and must not be
856+ /// extended.
821857 pub fn new_batch < H > ( self : & Arc < Self > ) -> UnmerkleizedBatch < F , H , U , N >
822858 where
823859 H : Hasher < Digest = D > ,
@@ -830,6 +866,9 @@ where
830866 }
831867
832868 /// Read through: local diff -> ancestor diffs -> committed DB.
869+ ///
870+ /// This is only valid while `self` remains on the committed prefix. If a non-ancestor batch
871+ /// has been applied since `self` was merkleized, do not read through it.
833872 pub async fn get < E , C , I , H > (
834873 & self ,
835874 key : & U :: Key ,
@@ -855,13 +894,17 @@ where
855894 H : Hasher ,
856895 Operation < F , U > : Codec ,
857896{
858- /// Create an initial [`MerkleizedBatch`] from the committed DB state.
897+ /// Create an initial [`MerkleizedBatch`] from the current committed DB state.
898+ ///
899+ /// The returned batch is rooted at the current committed prefix, but it is not a persistent
900+ /// snapshot across later divergent commits. If some other branch is applied afterward, this
901+ /// batch is no longer valid and must not be read through, extended, or applied.
859902 pub fn to_batch ( & self ) -> Arc < MerkleizedBatch < F , H :: Digest , U , N > > {
860903 let grafted = self . grafted_snapshot ( ) ;
861904 Arc :: new ( MerkleizedBatch {
862905 inner : self . any . to_batch ( ) ,
863906 grafted,
864- bitmap : self . status . clone ( ) ,
907+ bitmap : BitmapBatch :: Base ( Arc :: clone ( & self . status ) ) ,
865908 canonical_root : self . root ,
866909 } )
867910 }
@@ -1301,33 +1344,4 @@ mod tests {
13011344 // Empty bitmap, tip = 0: no candidates.
13021345 assert_eq ! ( scan. next_candidate( Location :: new( 0 ) , 0 ) , None ) ;
13031346 }
1304-
1305- #[ test]
1306- fn test_apply_overlay ( ) {
1307- // Base: 8 bits all set, sole owner -> fast path.
1308- let base = make_bitmap ( & [ true ; 8 ] ) ;
1309- let mut batch = BitmapBatch :: Base ( Arc :: new ( base) ) ;
1310-
1311- let mut overlay = ChunkOverlay :: new ( 12 ) ;
1312- let mut c0 = [ 0u8 ; N ] ;
1313- c0[ 0 ] = 0b1111_0111 ; // bits 0-7 set except bit 3
1314- c0[ 1 ] = 0b0000_0100 ; // bit 10 set
1315- overlay. chunks . insert ( 0 , c0) ;
1316- batch. apply_overlay ( Arc :: new ( overlay) ) ;
1317-
1318- // Fast path keeps Base, extends length, applies chunks.
1319- assert ! ( matches!( batch, BitmapBatch :: Base ( _) ) ) ;
1320- assert_eq ! ( batch. len( ) , 12 ) ;
1321- assert_eq ! ( batch. get_chunk( 0 ) [ 0 ] & ( 1 << 3 ) , 0 ) ;
1322- assert_ne ! ( batch. get_chunk( 0 ) [ 1 ] & ( 1 << 2 ) , 0 ) ;
1323-
1324- // Shared Arc -> slow path creates Layer.
1325- let BitmapBatch :: Base ( ref base_arc) = batch else {
1326- panic ! ( "expected Base" ) ;
1327- } ;
1328- let _shared = Arc :: clone ( base_arc) ;
1329- let overlay2 = ChunkOverlay :: new ( 12 ) ;
1330- batch. apply_overlay ( Arc :: new ( overlay2) ) ;
1331- assert ! ( matches!( batch, BitmapBatch :: Layer ( _) ) ) ;
1332- }
13331347}
0 commit comments