Skip to content

Commit e68d29b

Browse files
fix bitmap batch parent chain growth with rwlock
1 parent 5cce12d commit e68d29b

4 files changed

Lines changed: 277 additions & 289 deletions

File tree

storage/src/qmdb/current/batch.rs

Lines changed: 126 additions & 131 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,10 @@ use crate::{
2828
use ahash::AHasher;
2929
use commonware_codec::Codec;
3030
use commonware_cryptography::{Digest, Hasher};
31-
use commonware_utils::bitmap::{Prunable as BitMap, Readable as BitmapReadable};
31+
use commonware_utils::{
32+
bitmap::{Prunable as BitMap, Readable as BitmapReadable},
33+
sync::{RwLock, RwLockReadGuard, RwLockWriteGuard},
34+
};
3235
use std::{
3336
collections::{BTreeSet, HashMap},
3437
hash::BuildHasherDefault,
@@ -86,11 +89,12 @@ impl<const N: usize> ChunkOverlay<N> {
8689
chunk[rel / 8] |= 1 << (rel % 8);
8790
}
8891

89-
/// Clear a single bit (used for superseded locations).
90-
/// Skips locations in pruned chunks — those bits are already inactive.
91-
fn clear_bit<B: BitmapReadable<N>>(&mut self, base: &B, loc: u64) {
92+
/// Clear a single bit (used for superseded locations). `pruned_chunks` is passed in by the
93+
/// caller so the hot loop in `build_chunk_overlay` reads it once rather than per call.
94+
/// Skips locations in pruned chunks since those bits are already inactive.
95+
fn clear_bit<B: BitmapReadable<N>>(&mut self, base: &B, pruned_chunks: usize, loc: u64) {
9296
let idx = BitMap::<N>::to_chunk_index(loc);
93-
if idx < base.pruned_chunks() {
97+
if idx < pruned_chunks {
9498
return;
9599
}
96100
let rel = (loc % Self::CHUNK_BITS) as usize;
@@ -279,11 +283,35 @@ where
279283
bitmap_parent: BitmapBatch<N>,
280284
}
281285

282-
/// A speculative batch of operations whose root digest has been computed,
283-
/// in contrast to [`UnmerkleizedBatch`].
286+
/// A speculative batch of operations whose root digest has been computed, in contrast to
287+
/// [`UnmerkleizedBatch`].
288+
///
289+
/// Wraps an [`any::batch::MerkleizedBatch`] and adds the bitmap and grafted MMR state needed to
290+
/// compute the canonical root.
291+
///
292+
/// # Stale reads
293+
///
294+
/// A `MerkleizedBatch` holds a reference to the DB's committed bitmap via the internal
295+
/// `BitmapBatch::Base` variant. That bitmap evolves in place as
296+
/// [`Db::apply_batch`](super::db::Db::apply_batch) commits batches.
297+
/// Reads through this batch's chain (via its own `get`, by constructing child batches, or by
298+
/// anything that walks the bitmap chain) are only semantically correct if this batch's chain is
299+
/// still a valid prefix of the DB's committed state. That is, every
300+
/// [`apply_batch`](super::db::Db::apply_batch) call since this batch was merkleized has applied an
301+
/// ancestor of *this* batch.
302+
///
303+
/// **The library does not guard against stale reads.** If you apply an unrelated sibling or any
304+
/// other non-ancestor batch and then read through this one, chunk reads fall through to the
305+
/// (now-advanced) committed bitmap while your overlays are still applied on top, producing
306+
/// incoherent state that doesn't correspond to any valid DB view.
284307
///
285-
/// Wraps an [`any::batch::MerkleizedBatch`] and adds the bitmap and grafted MMR state needed
286-
/// to compute the canonical root.
308+
/// Rules of thumb:
309+
/// - Drop any `Arc<MerkleizedBatch>` you no longer intend to apply.
310+
/// - Extending a batch after `apply_batch` has consumed it (building a child off the just-applied
311+
/// parent) is safe. The committed bitmap now equals the parent's post-apply state, so child
312+
/// reads are consistent.
313+
/// - Extending a batch after a *different* branch has been applied is not safe and will silently
314+
/// return wrong data.
287315
pub struct MerkleizedBatch<F: Graftable, D: Digest, U: update::Update + Send + Sync, const N: usize>
288316
where
289317
Operation<F, U>: Send + Sync,
@@ -446,13 +474,14 @@ where
446474
{
447475
let total_bits = base.len() + batch_len as u64;
448476
let mut overlay = ChunkOverlay::new(total_bits);
477+
let pruned_chunks = base.pruned_chunks();
449478

450479
// 1. CommitFloor (last op) is always active.
451480
let commit_loc = batch_base + batch_len as u64 - 1;
452481
overlay.set_bit(base, commit_loc);
453482

454483
// 2. Inactivate previous CommitFloor.
455-
overlay.clear_bit(base, batch_base - 1);
484+
overlay.clear_bit(base, pruned_chunks, batch_base - 1);
456485

457486
// 3. Set active bits + clear superseded locations from the diff.
458487
for (key, entry) in diff {
@@ -473,7 +502,7 @@ where
473502
}
474503
}
475504
if let Some(old) = prev_loc {
476-
overlay.clear_bit(base, *old);
505+
overlay.clear_bit(base, pruned_chunks, *old);
477506
}
478507
}
479508

@@ -595,7 +624,6 @@ where
595624
// compute_db_root sees newly completed chunks. Using bitmap_parent alone would miss chunks
596625
// that transitioned from partial to complete in this batch.
597626
let bitmap_batch = BitmapBatch::Layer(Arc::new(BitmapBatchLayer {
598-
pruned_chunks: bitmap_parent.pruned_chunks(),
599627
parent: bitmap_parent.clone(),
600628
overlay: Arc::new(overlay),
601629
}));
@@ -637,13 +665,86 @@ where
637665
}))
638666
}
639667

640-
/// Immutable bitmap state at any point in a batch chain.
668+
/// The committed bitmap shared between the [`Db`](super::db::Db) and live batches.
641669
///
642-
/// Mirrors the [`crate::merkle::batch::MerkleizedBatch`] pattern.
670+
/// Wrapped in a [`RwLock`] so that [`Db::apply_batch`](super::db::Db::apply_batch),
671+
/// [`Db::prune`](super::db::Db::prune), and [`Db::rewind`](super::db::Db::rewind) can mutate
672+
/// the bitmap in place while live batches concurrently read through it.
673+
///
674+
/// # Stale reads
675+
///
676+
/// The bitmap behind this lock represents *committed* state. If a caller holds a `MerkleizedBatch`
677+
/// whose chain is no longer a valid prefix of committed state (e.g., an orphaned sibling branch
678+
/// after the other branch was applied), reads through that chain's `Base` will silently return
679+
/// inconsistent data (the chain's overlays mixed with post-divergence committed chunks). The
680+
/// library does not guard against this; callers must avoid reading through stale batches.
681+
pub(crate) struct SharedBitmap<const N: usize> {
682+
inner: RwLock<BitMap<N>>,
683+
}
684+
685+
impl<const N: usize> SharedBitmap<N> {
686+
pub(crate) const fn new(bitmap: BitMap<N>) -> Self {
687+
Self {
688+
inner: RwLock::new(bitmap),
689+
}
690+
}
691+
692+
/// Acquire a shared read guard over the committed bitmap. Kept private so external callers
693+
/// go through [`BitmapReadable`] (which doesn't expose a guard across `.await`).
694+
fn read(&self) -> RwLockReadGuard<'_, BitMap<N>> {
695+
self.inner.read()
696+
}
697+
698+
/// Acquire an exclusive write guard. By convention only
699+
/// [`Db::apply_batch`](super::db::Db::apply_batch), [`Db::prune`](super::db::Db::prune), and
700+
/// [`Db::rewind`](super::db::Db::rewind) mutate the shared bitmap.
701+
pub(super) fn write(&self) -> RwLockWriteGuard<'_, BitMap<N>> {
702+
self.inner.write()
703+
}
704+
}
705+
706+
impl<const N: usize> std::fmt::Debug for SharedBitmap<N> {
707+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
708+
f.debug_struct("SharedBitmap")
709+
.field("bitmap_len", &BitmapReadable::<N>::len(&*self.read()))
710+
.finish()
711+
}
712+
}
713+
714+
/// [`BitmapReadable`] over the DB's committed bitmap. Each call acquires the read lock briefly.
715+
impl<const N: usize> BitmapReadable<N> for SharedBitmap<N> {
716+
fn complete_chunks(&self) -> usize {
717+
self.read().complete_chunks()
718+
}
719+
720+
fn get_chunk(&self, idx: usize) -> [u8; N] {
721+
*self.read().get_chunk(idx)
722+
}
723+
724+
fn last_chunk(&self) -> ([u8; N], u64) {
725+
let guard = self.read();
726+
let (chunk, bits) = guard.last_chunk();
727+
(*chunk, bits)
728+
}
729+
730+
fn pruned_chunks(&self) -> usize {
731+
self.read().pruned_chunks()
732+
}
733+
734+
fn len(&self) -> u64 {
735+
BitmapReadable::<N>::len(&*self.read())
736+
}
737+
}
738+
739+
/// A view of the committed bitmap plus zero or more speculative overlay `Layer`s.
740+
///
741+
/// The chain terminates in a `Base` that references the shared committed bitmap. No staleness
742+
/// check is performed. Callers must ensure they only read through batches whose chains are
743+
/// still valid prefixes of committed state (see [`SharedBitmap`]'s docs).
643744
#[derive(Clone, Debug)]
644745
pub(crate) enum BitmapBatch<const N: usize> {
645-
/// Committed bitmap (chain terminal).
646-
Base(Arc<BitMap<N>>),
746+
/// Chain terminal: shared reference to the committed bitmap.
747+
Base(Arc<SharedBitmap<N>>),
647748
/// Speculative layer on top of a parent batch.
648749
Layer(Arc<BitmapBatchLayer<N>>),
649750
}
@@ -654,10 +755,6 @@ pub(crate) struct BitmapBatchLayer<const N: usize> {
654755
pub(crate) parent: BitmapBatch<N>,
655756
/// Chunk-level overlay: materialized bytes for every chunk that differs from parent.
656757
pub(crate) overlay: Arc<ChunkOverlay<N>>,
657-
/// Pruned-chunk count, copied from `parent` at construction. Invariant across the whole
658-
/// layer chain (pruning only happens on the committed base), so caching here lets
659-
/// `BitmapBatch::pruned_chunks` return in O(1) instead of walking to the Base.
660-
pub(crate) pruned_chunks: usize,
661758
}
662759

663760
impl<const N: usize> BitmapBatch<N> {
@@ -675,7 +772,7 @@ impl<const N: usize> BitmapReadable<N> for BitmapBatch<N> {
675772
let mut current = self;
676773
loop {
677774
match current {
678-
Self::Base(bm) => return *bm.get_chunk(idx),
775+
Self::Base(shared) => return shared.get_chunk(idx),
679776
Self::Layer(layer) => {
680777
if let Some(&chunk) = layer.overlay.get(idx) {
681778
return chunk;
@@ -702,96 +799,23 @@ impl<const N: usize> BitmapReadable<N> for BitmapBatch<N> {
702799
}
703800

704801
fn pruned_chunks(&self) -> usize {
705-
match self {
706-
Self::Base(bm) => bm.pruned_chunks(),
707-
Self::Layer(layer) => layer.pruned_chunks,
802+
let mut current = self;
803+
loop {
804+
match current {
805+
Self::Base(shared) => return shared.pruned_chunks(),
806+
Self::Layer(layer) => current = &layer.parent,
807+
}
708808
}
709809
}
710810

711811
fn len(&self) -> u64 {
712812
match self {
713-
Self::Base(bm) => BitmapReadable::<N>::len(bm.as_ref()),
813+
Self::Base(shared) => BitmapReadable::<N>::len(shared.as_ref()),
714814
Self::Layer(layer) => layer.overlay.len,
715815
}
716816
}
717817
}
718818

719-
impl<const N: usize> BitmapBatch<N> {
720-
/// Apply a chunk overlay to this bitmap. When `self` is `Base` with sole ownership, writes
721-
/// overlay chunks directly into the bitmap. Otherwise creates a new `Layer`.
722-
pub(super) fn apply_overlay(&mut self, overlay: Arc<ChunkOverlay<N>>) {
723-
// Fast path: write overlay chunks directly into the Base bitmap.
724-
if let Self::Base(base) = self {
725-
if let Some(bitmap) = Arc::get_mut(base) {
726-
// Extend bitmap to the overlay's length.
727-
bitmap.extend_to(overlay.len);
728-
// Overwrite dirty chunks.
729-
for (&idx, chunk_bytes) in &overlay.chunks {
730-
if idx >= bitmap.pruned_chunks() {
731-
bitmap.set_chunk_by_index(idx, chunk_bytes);
732-
}
733-
}
734-
return;
735-
}
736-
}
737-
738-
// Slow path: create a new layer.
739-
let pruned_chunks = self.pruned_chunks();
740-
let parent = self.clone();
741-
*self = Self::Layer(Arc::new(BitmapBatchLayer {
742-
parent,
743-
overlay,
744-
pruned_chunks,
745-
}));
746-
}
747-
748-
/// Flatten all layers back to a single `Base(Arc<BitMap<N>>)`.
749-
///
750-
/// After flattening, the new `Base` Arc has refcount 1 (assuming no external clones
751-
/// are held).
752-
pub(super) fn flatten(&mut self) {
753-
if matches!(self, Self::Base(_)) {
754-
return;
755-
}
756-
757-
// Take ownership of the chain so that Arc refcounts are not
758-
// artificially inflated by a clone.
759-
let mut owned = std::mem::replace(self, Self::Base(Arc::new(BitMap::default())));
760-
761-
// Collect overlays from tip to base.
762-
let mut overlays = Vec::new();
763-
let base = loop {
764-
match owned {
765-
Self::Base(bm) => break bm,
766-
Self::Layer(layer) => match Arc::try_unwrap(layer) {
767-
Ok(inner) => {
768-
overlays.push(inner.overlay);
769-
owned = inner.parent;
770-
}
771-
Err(arc) => {
772-
overlays.push(arc.overlay.clone());
773-
owned = arc.parent.clone();
774-
}
775-
},
776-
}
777-
};
778-
779-
// Apply overlays from base to tip.
780-
let mut bitmap = Arc::try_unwrap(base).unwrap_or_else(|arc| (*arc).clone());
781-
for overlay in overlays.into_iter().rev() {
782-
// Extend bitmap to the overlay's length.
783-
bitmap.extend_to(overlay.len);
784-
// Apply dirty chunks.
785-
for (&idx, chunk_bytes) in &overlay.chunks {
786-
if idx >= bitmap.pruned_chunks() {
787-
bitmap.set_chunk_by_index(idx, chunk_bytes);
788-
}
789-
}
790-
}
791-
*self = Self::Base(Arc::new(bitmap));
792-
}
793-
}
794-
795819
impl<F: Graftable, D: Digest, U: update::Update + Send + Sync, const N: usize>
796820
MerkleizedBatch<F, D, U, N>
797821
where
@@ -861,7 +885,7 @@ where
861885
Arc::new(MerkleizedBatch {
862886
inner: self.any.to_batch(),
863887
grafted,
864-
bitmap: self.status.clone(),
888+
bitmap: BitmapBatch::Base(Arc::clone(&self.status)),
865889
canonical_root: self.root,
866890
})
867891
}
@@ -1301,33 +1325,4 @@ mod tests {
13011325
// Empty bitmap, tip = 0: no candidates.
13021326
assert_eq!(scan.next_candidate(Location::new(0), 0), None);
13031327
}
1304-
1305-
#[test]
1306-
fn test_apply_overlay() {
1307-
// Base: 8 bits all set, sole owner -> fast path.
1308-
let base = make_bitmap(&[true; 8]);
1309-
let mut batch = BitmapBatch::Base(Arc::new(base));
1310-
1311-
let mut overlay = ChunkOverlay::new(12);
1312-
let mut c0 = [0u8; N];
1313-
c0[0] = 0b1111_0111; // bits 0-7 set except bit 3
1314-
c0[1] = 0b0000_0100; // bit 10 set
1315-
overlay.chunks.insert(0, c0);
1316-
batch.apply_overlay(Arc::new(overlay));
1317-
1318-
// Fast path keeps Base, extends length, applies chunks.
1319-
assert!(matches!(batch, BitmapBatch::Base(_)));
1320-
assert_eq!(batch.len(), 12);
1321-
assert_eq!(batch.get_chunk(0)[0] & (1 << 3), 0);
1322-
assert_ne!(batch.get_chunk(0)[1] & (1 << 2), 0);
1323-
1324-
// Shared Arc -> slow path creates Layer.
1325-
let BitmapBatch::Base(ref base_arc) = batch else {
1326-
panic!("expected Base");
1327-
};
1328-
let _shared = Arc::clone(base_arc);
1329-
let overlay2 = ChunkOverlay::new(12);
1330-
batch.apply_overlay(Arc::new(overlay2));
1331-
assert!(matches!(batch, BitmapBatch::Layer(_)));
1332-
}
13331328
}

0 commit comments

Comments
 (0)