Skip to content

Commit 19e4685

Browse files
fix bitmap batch parent chain growth with rwlock
1 parent ad618b3 commit 19e4685

4 files changed

Lines changed: 320 additions & 290 deletions

File tree

storage/src/qmdb/current/batch.rs

Lines changed: 146 additions & 132 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,10 @@ use crate::{
2828
use ahash::AHasher;
2929
use commonware_codec::Codec;
3030
use commonware_cryptography::{Digest, Hasher};
31-
use commonware_utils::bitmap::{Prunable as BitMap, Readable as BitmapReadable};
31+
use commonware_utils::{
32+
bitmap::{Prunable as BitMap, Readable as BitmapReadable},
33+
sync::{RwLock, RwLockReadGuard, RwLockWriteGuard},
34+
};
3235
use std::{
3336
collections::{BTreeSet, HashMap},
3437
hash::BuildHasherDefault,
@@ -86,11 +89,12 @@ impl<const N: usize> ChunkOverlay<N> {
8689
chunk[rel / 8] |= 1 << (rel % 8);
8790
}
8891

89-
/// Clear a single bit (used for superseded locations).
90-
/// Skips locations in pruned chunks — those bits are already inactive.
91-
fn clear_bit<B: BitmapReadable<N>>(&mut self, base: &B, loc: u64) {
92+
/// Clear a single bit (used for superseded locations). `pruned_chunks` is passed in by the
93+
/// caller so the hot loop in `build_chunk_overlay` reads it once rather than per call.
94+
/// Skips locations in pruned chunks since those bits are already inactive.
95+
fn clear_bit<B: BitmapReadable<N>>(&mut self, base: &B, pruned_chunks: usize, loc: u64) {
9296
let idx = BitMap::<N>::to_chunk_index(loc);
93-
if idx < base.pruned_chunks() {
97+
if idx < pruned_chunks {
9498
return;
9599
}
96100
let rel = (loc % Self::CHUNK_BITS) as usize;
@@ -279,11 +283,43 @@ where
279283
bitmap_parent: BitmapBatch<N>,
280284
}
281285

282-
/// A speculative batch of operations whose root digest has been computed,
283-
/// in contrast to [`UnmerkleizedBatch`].
286+
/// A speculative batch of operations whose root digest has been computed, in contrast to
287+
/// [`UnmerkleizedBatch`].
288+
///
289+
/// Wraps an [`any::batch::MerkleizedBatch`] and adds the bitmap and grafted MMR state needed to
290+
/// compute the canonical root.
291+
///
292+
/// # Branch validity
293+
///
294+
/// A `MerkleizedBatch` is a branch-scoped view rooted at a specific committed prefix of the DB. It
295+
/// is not an immutable snapshot.
296+
///
297+
/// Internally, the batch chain terminates in the DB's committed bitmap via `BitmapBatch::Base`.
298+
/// That committed bitmap evolves in place as [`Db::apply_batch`](super::db::Db::apply_batch),
299+
/// [`Db::prune`](super::db::Db::prune), and [`Db::rewind`](super::db::Db::rewind) update the DB.
284300
///
285-
/// Wraps an [`any::batch::MerkleizedBatch`] and adds the bitmap and grafted MMR state needed
286-
/// to compute the canonical root.
301+
/// Reads through this batch's chain, constructing child batches from it, and applying it later are
302+
/// only semantically correct while its ancestor chain is still the committed prefix of the DB. In
303+
/// other words, every successful [`apply_batch`](super::db::Db::apply_batch) since this batch was
304+
/// merkleized must have applied an ancestor of this batch.
305+
///
306+
/// Once a non-ancestor batch is applied, this batch and all of its descendants become invalid
307+
/// objects. The library does not guard against continued use after that point.
308+
///
309+
/// Applying an invalid batch is caught by the any-layer staleness check and returns
310+
/// [`Error::StaleBatch`] without mutating committed state, so `apply_batch` itself cannot corrupt
311+
/// the DB. The one exception is equal-size sibling branches (where both branches have the same
312+
/// total operation count): the staleness check is size-based and cannot distinguish them, so
313+
/// applying a descendant of one sibling after the other was already applied can silently corrupt
314+
/// snapshot/log state. Callers must not apply batches from an orphaned branch.
315+
///
316+
/// Rules of thumb:
317+
/// - Drop any `Arc<MerkleizedBatch>` you no longer intend to apply.
318+
/// - Extending a batch after `apply_batch` has consumed it (building a child off the just-applied
319+
/// parent) is safe. The committed bitmap now equals the parent's post-apply state, so child reads
320+
/// are consistent.
321+
/// - Extending a batch after a different branch has been applied is not safe. Do not call `get`,
322+
/// `new_batch`, or `apply_batch` on that branch again.
287323
pub struct MerkleizedBatch<F: Graftable, D: Digest, U: update::Update + Send + Sync, const N: usize>
288324
where
289325
Operation<F, U>: Send + Sync,
@@ -446,13 +482,14 @@ where
446482
{
447483
let total_bits = base.len() + batch_len as u64;
448484
let mut overlay = ChunkOverlay::new(total_bits);
485+
let pruned_chunks = base.pruned_chunks();
449486

450487
// 1. CommitFloor (last op) is always active.
451488
let commit_loc = batch_base + batch_len as u64 - 1;
452489
overlay.set_bit(base, commit_loc);
453490

454491
// 2. Inactivate previous CommitFloor.
455-
overlay.clear_bit(base, batch_base - 1);
492+
overlay.clear_bit(base, pruned_chunks, batch_base - 1);
456493

457494
// 3. Set active bits + clear superseded locations from the diff.
458495
for (key, entry) in diff {
@@ -473,7 +510,7 @@ where
473510
}
474511
}
475512
if let Some(old) = prev_loc {
476-
overlay.clear_bit(base, *old);
513+
overlay.clear_bit(base, pruned_chunks, *old);
477514
}
478515
}
479516

@@ -595,7 +632,6 @@ where
595632
// compute_db_root sees newly completed chunks. Using bitmap_parent alone would miss chunks
596633
// that transitioned from partial to complete in this batch.
597634
let bitmap_batch = BitmapBatch::Layer(Arc::new(BitmapBatchLayer {
598-
pruned_chunks: bitmap_parent.pruned_chunks(),
599635
parent: bitmap_parent.clone(),
600636
overlay: Arc::new(overlay),
601637
}));
@@ -637,13 +673,86 @@ where
637673
}))
638674
}
639675

640-
/// Immutable bitmap state at any point in a batch chain.
676+
/// The committed bitmap shared between the [`Db`](super::db::Db) and live batches.
677+
///
678+
/// Wrapped in a [`RwLock`] so that [`Db::apply_batch`](super::db::Db::apply_batch),
679+
/// [`Db::prune`](super::db::Db::prune), and [`Db::rewind`](super::db::Db::rewind) can mutate
680+
/// the bitmap in place while live batches concurrently read through it.
681+
///
682+
/// # Reading through invalid batches
641683
///
642-
/// Mirrors the [`crate::merkle::batch::MerkleizedBatch`] pattern.
684+
/// The bitmap behind this lock represents *committed* state. If a caller holds a
685+
/// [`MerkleizedBatch`] that has become invalid (see its "Branch validity" docs for the
686+
/// conditions), reads through that batch's chain will silently return inconsistent data (the
687+
/// chain's overlays mixed with post-divergence committed chunks). The library does not guard
688+
/// against this; callers must avoid reading through invalid batches.
689+
pub(crate) struct SharedBitmap<const N: usize> {
690+
inner: RwLock<BitMap<N>>,
691+
}
692+
693+
impl<const N: usize> SharedBitmap<N> {
694+
pub(crate) const fn new(bitmap: BitMap<N>) -> Self {
695+
Self {
696+
inner: RwLock::new(bitmap),
697+
}
698+
}
699+
700+
/// Acquire a shared read guard over the committed bitmap. Kept private so external callers
701+
/// go through [`BitmapReadable`] (which doesn't expose a guard across `.await`).
702+
fn read(&self) -> RwLockReadGuard<'_, BitMap<N>> {
703+
self.inner.read()
704+
}
705+
706+
/// Acquire an exclusive write guard. By convention only
707+
/// [`Db::apply_batch`](super::db::Db::apply_batch), [`Db::prune`](super::db::Db::prune), and
708+
/// [`Db::rewind`](super::db::Db::rewind) mutate the shared bitmap.
709+
pub(super) fn write(&self) -> RwLockWriteGuard<'_, BitMap<N>> {
710+
self.inner.write()
711+
}
712+
}
713+
714+
impl<const N: usize> std::fmt::Debug for SharedBitmap<N> {
715+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
716+
f.debug_struct("SharedBitmap")
717+
.field("bitmap_len", &BitmapReadable::<N>::len(&*self.read()))
718+
.finish()
719+
}
720+
}
721+
722+
/// [`BitmapReadable`] over the DB's committed bitmap. Each call acquires the read lock briefly.
723+
impl<const N: usize> BitmapReadable<N> for SharedBitmap<N> {
724+
fn complete_chunks(&self) -> usize {
725+
self.read().complete_chunks()
726+
}
727+
728+
fn get_chunk(&self, idx: usize) -> [u8; N] {
729+
*self.read().get_chunk(idx)
730+
}
731+
732+
fn last_chunk(&self) -> ([u8; N], u64) {
733+
let guard = self.read();
734+
let (chunk, bits) = guard.last_chunk();
735+
(*chunk, bits)
736+
}
737+
738+
fn pruned_chunks(&self) -> usize {
739+
self.read().pruned_chunks()
740+
}
741+
742+
fn len(&self) -> u64 {
743+
BitmapReadable::<N>::len(&*self.read())
744+
}
745+
}
746+
747+
/// A view of the committed bitmap plus zero or more speculative overlay `Layer`s.
748+
///
749+
/// The chain terminates in a `Base` that references the shared committed bitmap. No validity
750+
/// check is performed. Callers must ensure they only read through batches whose chains are
751+
/// still valid prefixes of committed state (see [`SharedBitmap`]'s docs).
643752
#[derive(Clone, Debug)]
644753
pub(crate) enum BitmapBatch<const N: usize> {
645-
/// Committed bitmap (chain terminal).
646-
Base(Arc<BitMap<N>>),
754+
/// Chain terminal: shared reference to the committed bitmap.
755+
Base(Arc<SharedBitmap<N>>),
647756
/// Speculative layer on top of a parent batch.
648757
Layer(Arc<BitmapBatchLayer<N>>),
649758
}
@@ -654,10 +763,6 @@ pub(crate) struct BitmapBatchLayer<const N: usize> {
654763
pub(crate) parent: BitmapBatch<N>,
655764
/// Chunk-level overlay: materialized bytes for every chunk that differs from parent.
656765
pub(crate) overlay: Arc<ChunkOverlay<N>>,
657-
/// Pruned-chunk count, copied from `parent` at construction. Invariant across the whole
658-
/// layer chain (pruning only happens on the committed base), so caching here lets
659-
/// `BitmapBatch::pruned_chunks` return in O(1) instead of walking to the Base.
660-
pub(crate) pruned_chunks: usize,
661766
}
662767

663768
impl<const N: usize> BitmapBatch<N> {
@@ -675,7 +780,7 @@ impl<const N: usize> BitmapReadable<N> for BitmapBatch<N> {
675780
let mut current = self;
676781
loop {
677782
match current {
678-
Self::Base(bm) => return *bm.get_chunk(idx),
783+
Self::Base(shared) => return shared.get_chunk(idx),
679784
Self::Layer(layer) => {
680785
if let Some(&chunk) = layer.overlay.get(idx) {
681786
return chunk;
@@ -702,96 +807,23 @@ impl<const N: usize> BitmapReadable<N> for BitmapBatch<N> {
702807
}
703808

704809
fn pruned_chunks(&self) -> usize {
705-
match self {
706-
Self::Base(bm) => bm.pruned_chunks(),
707-
Self::Layer(layer) => layer.pruned_chunks,
810+
let mut current = self;
811+
loop {
812+
match current {
813+
Self::Base(shared) => return shared.pruned_chunks(),
814+
Self::Layer(layer) => current = &layer.parent,
815+
}
708816
}
709817
}
710818

711819
fn len(&self) -> u64 {
712820
match self {
713-
Self::Base(bm) => BitmapReadable::<N>::len(bm.as_ref()),
821+
Self::Base(shared) => BitmapReadable::<N>::len(shared.as_ref()),
714822
Self::Layer(layer) => layer.overlay.len,
715823
}
716824
}
717825
}
718826

719-
impl<const N: usize> BitmapBatch<N> {
720-
/// Apply a chunk overlay to this bitmap. When `self` is `Base` with sole ownership, writes
721-
/// overlay chunks directly into the bitmap. Otherwise creates a new `Layer`.
722-
pub(super) fn apply_overlay(&mut self, overlay: Arc<ChunkOverlay<N>>) {
723-
// Fast path: write overlay chunks directly into the Base bitmap.
724-
if let Self::Base(base) = self {
725-
if let Some(bitmap) = Arc::get_mut(base) {
726-
// Extend bitmap to the overlay's length.
727-
bitmap.extend_to(overlay.len);
728-
// Overwrite dirty chunks.
729-
for (&idx, chunk_bytes) in &overlay.chunks {
730-
if idx >= bitmap.pruned_chunks() {
731-
bitmap.set_chunk_by_index(idx, chunk_bytes);
732-
}
733-
}
734-
return;
735-
}
736-
}
737-
738-
// Slow path: create a new layer.
739-
let pruned_chunks = self.pruned_chunks();
740-
let parent = self.clone();
741-
*self = Self::Layer(Arc::new(BitmapBatchLayer {
742-
parent,
743-
overlay,
744-
pruned_chunks,
745-
}));
746-
}
747-
748-
/// Flatten all layers back to a single `Base(Arc<BitMap<N>>)`.
749-
///
750-
/// After flattening, the new `Base` Arc has refcount 1 (assuming no external clones
751-
/// are held).
752-
pub(super) fn flatten(&mut self) {
753-
if matches!(self, Self::Base(_)) {
754-
return;
755-
}
756-
757-
// Take ownership of the chain so that Arc refcounts are not
758-
// artificially inflated by a clone.
759-
let mut owned = std::mem::replace(self, Self::Base(Arc::new(BitMap::default())));
760-
761-
// Collect overlays from tip to base.
762-
let mut overlays = Vec::new();
763-
let base = loop {
764-
match owned {
765-
Self::Base(bm) => break bm,
766-
Self::Layer(layer) => match Arc::try_unwrap(layer) {
767-
Ok(inner) => {
768-
overlays.push(inner.overlay);
769-
owned = inner.parent;
770-
}
771-
Err(arc) => {
772-
overlays.push(arc.overlay.clone());
773-
owned = arc.parent.clone();
774-
}
775-
},
776-
}
777-
};
778-
779-
// Apply overlays from base to tip.
780-
let mut bitmap = Arc::try_unwrap(base).unwrap_or_else(|arc| (*arc).clone());
781-
for overlay in overlays.into_iter().rev() {
782-
// Extend bitmap to the overlay's length.
783-
bitmap.extend_to(overlay.len);
784-
// Apply dirty chunks.
785-
for (&idx, chunk_bytes) in &overlay.chunks {
786-
if idx >= bitmap.pruned_chunks() {
787-
bitmap.set_chunk_by_index(idx, chunk_bytes);
788-
}
789-
}
790-
}
791-
*self = Self::Base(Arc::new(bitmap));
792-
}
793-
}
794-
795827
impl<F: Graftable, D: Digest, U: update::Update + Send + Sync, const N: usize>
796828
MerkleizedBatch<F, D, U, N>
797829
where
@@ -818,6 +850,10 @@ where
818850
/// All uncommitted ancestors in the chain must be kept alive until the child (or any
819851
/// descendant) is merkleized. Dropping an uncommitted ancestor causes data
820852
/// loss detected at `apply_batch` time.
853+
///
854+
/// This is only valid while `self` is still on the winning branch. If a different branch has
855+
/// been applied since `self` was created, `self` is no longer a valid parent and must not be
856+
/// extended.
821857
pub fn new_batch<H>(self: &Arc<Self>) -> UnmerkleizedBatch<F, H, U, N>
822858
where
823859
H: Hasher<Digest = D>,
@@ -830,6 +866,9 @@ where
830866
}
831867

832868
/// Read through: local diff -> ancestor diffs -> committed DB.
869+
///
870+
/// This is only valid while `self` remains on the committed prefix. If a non-ancestor batch
871+
/// has been applied since `self` was merkleized, do not read through it.
833872
pub async fn get<E, C, I, H>(
834873
&self,
835874
key: &U::Key,
@@ -855,13 +894,17 @@ where
855894
H: Hasher,
856895
Operation<F, U>: Codec,
857896
{
858-
/// Create an initial [`MerkleizedBatch`] from the committed DB state.
897+
/// Create an initial [`MerkleizedBatch`] from the current committed DB state.
898+
///
899+
/// The returned batch is rooted at the current committed prefix, but it is not a persistent
900+
/// snapshot across later divergent commits. If some other branch is applied afterward, this
901+
/// batch is no longer valid and must not be read through, extended, or applied.
859902
pub fn to_batch(&self) -> Arc<MerkleizedBatch<F, H::Digest, U, N>> {
860903
let grafted = self.grafted_snapshot();
861904
Arc::new(MerkleizedBatch {
862905
inner: self.any.to_batch(),
863906
grafted,
864-
bitmap: self.status.clone(),
907+
bitmap: BitmapBatch::Base(Arc::clone(&self.status)),
865908
canonical_root: self.root,
866909
})
867910
}
@@ -1301,33 +1344,4 @@ mod tests {
13011344
// Empty bitmap, tip = 0: no candidates.
13021345
assert_eq!(scan.next_candidate(Location::new(0), 0), None);
13031346
}
1304-
1305-
#[test]
1306-
fn test_apply_overlay() {
1307-
// Base: 8 bits all set, sole owner -> fast path.
1308-
let base = make_bitmap(&[true; 8]);
1309-
let mut batch = BitmapBatch::Base(Arc::new(base));
1310-
1311-
let mut overlay = ChunkOverlay::new(12);
1312-
let mut c0 = [0u8; N];
1313-
c0[0] = 0b1111_0111; // bits 0-7 set except bit 3
1314-
c0[1] = 0b0000_0100; // bit 10 set
1315-
overlay.chunks.insert(0, c0);
1316-
batch.apply_overlay(Arc::new(overlay));
1317-
1318-
// Fast path keeps Base, extends length, applies chunks.
1319-
assert!(matches!(batch, BitmapBatch::Base(_)));
1320-
assert_eq!(batch.len(), 12);
1321-
assert_eq!(batch.get_chunk(0)[0] & (1 << 3), 0);
1322-
assert_ne!(batch.get_chunk(0)[1] & (1 << 2), 0);
1323-
1324-
// Shared Arc -> slow path creates Layer.
1325-
let BitmapBatch::Base(ref base_arc) = batch else {
1326-
panic!("expected Base");
1327-
};
1328-
let _shared = Arc::clone(base_arc);
1329-
let overlay2 = ChunkOverlay::new(12);
1330-
batch.apply_overlay(Arc::new(overlay2));
1331-
assert!(matches!(batch, BitmapBatch::Layer(_)));
1332-
}
13331347
}

0 commit comments

Comments
 (0)