Skip to content

Commit 26982f6

Browse files
[Storage] PR 3627 feedback (#3635)
Co-authored-by: Roberto Bayardo <roberto@commonware.xyz>
1 parent 0fc4391 commit 26982f6

2 files changed

Lines changed: 189 additions & 14 deletions

File tree

storage/src/qmdb/current/batch.rs

Lines changed: 130 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -634,6 +634,7 @@ where
634634
let bitmap_batch = BitmapBatch::Layer(Arc::new(BitmapBatchLayer {
635635
parent: bitmap_parent.clone(),
636636
overlay: Arc::new(overlay),
637+
shared: Arc::clone(bitmap_parent.shared()),
637638
}));
638639

639640
// Compute canonical root. The grafted batch alone cannot resolve committed nodes,
@@ -772,19 +773,19 @@ pub(crate) struct BitmapBatchLayer<const N: usize> {
772773
pub(crate) parent: BitmapBatch<N>,
773774
/// Chunk-level overlay: materialized bytes for every chunk that differs from parent.
774775
pub(crate) overlay: Arc<ChunkOverlay<N>>,
776+
/// Cached terminal [`SharedBitmap`] so [`BitmapBatch::shared`] and
777+
/// [`BitmapBatch::pruned_chunks`] answer in O(1) instead of walking the chain.
778+
pub(crate) shared: Arc<SharedBitmap<N>>,
775779
}
776780

777781
impl<const N: usize> BitmapBatch<N> {
778782
const CHUNK_SIZE_BITS: u64 = BitMap::<N>::CHUNK_SIZE_BITS;
779783

780-
/// Walk to the terminal [`SharedBitmap`] at the bottom of the chain.
784+
/// Return the terminal [`SharedBitmap`] at the bottom of the chain.
781785
fn shared(&self) -> &Arc<SharedBitmap<N>> {
782-
let mut current = self;
783-
loop {
784-
match current {
785-
Self::Base(s) => return s,
786-
Self::Layer(layer) => current = &layer.parent,
787-
}
786+
match self {
787+
Self::Base(s) => s,
788+
Self::Layer(layer) => &layer.shared,
788789
}
789790
}
790791

@@ -808,6 +809,7 @@ impl<const N: usize> BitmapBatch<N> {
808809
result = Self::Layer(Arc::new(BitmapBatchLayer {
809810
parent: result,
810811
overlay,
812+
shared: Arc::clone(shared),
811813
}));
812814
}
813815
result
@@ -852,13 +854,7 @@ impl<const N: usize> BitmapReadable<N> for BitmapBatch<N> {
852854
}
853855

854856
fn pruned_chunks(&self) -> usize {
855-
let mut current = self;
856-
loop {
857-
match current {
858-
Self::Base(shared) => return shared.pruned_chunks(),
859-
Self::Layer(layer) => current = &layer.parent,
860-
}
861-
}
857+
self.shared().pruned_chunks()
862858
}
863859

864860
fn len(&self) -> u64 {
@@ -1389,4 +1385,124 @@ mod tests {
13891385
// Empty bitmap, tip = 0: no candidates.
13901386
assert_eq!(scan.next_candidate(Location::new(0), 0), None);
13911387
}
1388+
1389+
// ---- trim_committed tests ----
1390+
//
1391+
// `trim_committed` is called from `MerkleizedBatch::new_batch` to strip any `Layer`s whose
1392+
// overlays have already been absorbed into the shared committed bitmap by a prior apply.
1393+
// The implementation is a single loop that collects uncommitted overlays top-down and
1394+
// rebuilds a fresh chain rooted at `Base`. These tests cover distinct input shapes directly,
1395+
// without going through the full Db/batch machinery, so the function's structural output
1396+
// can be asserted.
1397+
1398+
/// Build a chain `Base(shared) -> Layer(len=L1) -> Layer(len=L2) -> ...` from a list of
1399+
/// overlay lengths (bottom to top). Each constructed `Layer` caches `shared` per the
1400+
/// struct's invariant.
1401+
fn make_chain(shared: &Arc<SharedBitmap<N>>, overlay_lens: &[u64]) -> BitmapBatch<N> {
1402+
let mut chain = BitmapBatch::Base(Arc::clone(shared));
1403+
for &len in overlay_lens {
1404+
chain = BitmapBatch::Layer(Arc::new(BitmapBatchLayer {
1405+
parent: chain,
1406+
overlay: Arc::new(ChunkOverlay::new(len)),
1407+
shared: Arc::clone(shared),
1408+
}));
1409+
}
1410+
chain
1411+
}
1412+
1413+
/// Walk a chain and return its overlay lengths in bottom-to-top order. Used to assert the
1414+
/// structural output of `trim_committed` without touching private fields. Panics if the
1415+
/// chain isn't terminated by a single `Base` at the bottom.
1416+
fn chain_overlays(batch: &BitmapBatch<N>) -> Vec<u64> {
1417+
let mut lens = Vec::new();
1418+
let mut current = batch;
1419+
while let BitmapBatch::Layer(layer) = current {
1420+
lens.push(layer.overlay.len);
1421+
current = &layer.parent;
1422+
}
1423+
assert!(matches!(current, BitmapBatch::Base(_)));
1424+
lens.reverse();
1425+
lens
1426+
}
1427+
1428+
/// Input is already a bare `Base` with no speculative layers on top — the loop body never
1429+
/// runs, `kept` stays empty, and the result is a freshly constructed `Base` pointing at the
1430+
/// same `SharedBitmap`. Real-world trigger: `MerkleizedBatch::new_batch` on a batch whose
1431+
/// chain was previously trimmed flat (e.g., immediately after an apply collapsed everything).
1432+
#[test]
1433+
fn trim_committed_already_base() {
1434+
let shared = Arc::new(SharedBitmap::<N>::new(make_bitmap(&[true; 64])));
1435+
let base = BitmapBatch::Base(Arc::clone(&shared));
1436+
let result = base.trim_committed();
1437+
// Still `Base`, pointing at the same shared terminal.
1438+
match result {
1439+
BitmapBatch::Base(s) => assert!(Arc::ptr_eq(&s, &shared)),
1440+
BitmapBatch::Layer(_) => panic!("expected Base"),
1441+
}
1442+
}
1443+
1444+
/// Every layer has been absorbed by prior applies — the loop breaks on the first iteration
1445+
/// and `kept` stays empty, so the result is a bare `Base`. This is the steady-state
1446+
/// "extend a just-applied batch" flow: after `apply_batch(A)`, `A`'s own layer has
1447+
/// `overlay.len == committed` and the next `new_batch` call should start from a clean
1448+
/// terminal.
1449+
#[test]
1450+
fn trim_committed_all_committed() {
1451+
// `shared.len() == 64`; the single layer's `overlay.len == 32 (<= 64)`, so it's committed.
1452+
let shared = Arc::new(SharedBitmap::<N>::new(make_bitmap(&[true; 64])));
1453+
let chain = make_chain(&shared, &[32]);
1454+
let result = chain.trim_committed();
1455+
// Collapsed to a bare Base, pointing at the original shared.
1456+
match result {
1457+
BitmapBatch::Base(s) => assert!(Arc::ptr_eq(&s, &shared)),
1458+
BitmapBatch::Layer(_) => panic!("expected Base after full trim"),
1459+
}
1460+
}
1461+
1462+
/// Every layer is still speculative — the loop walks all the way to `Base` without
1463+
/// breaking, and `kept` holds every overlay. The rebuilt chain is structurally equivalent
1464+
/// to the input (same overlay lens, same shared terminal). Real-world trigger: speculating
1465+
/// multiple batches deep (A, then B off A, then C off B) without `apply_batch` in between.
1466+
#[test]
1467+
fn trim_committed_none_committed() {
1468+
// `shared.len() == 32`; both overlays have `len > 32`, so neither is committed.
1469+
let shared = Arc::new(SharedBitmap::<N>::new(make_bitmap(&[true; 32])));
1470+
let chain = make_chain(&shared, &[64, 96]);
1471+
let result = chain.trim_committed();
1472+
// Structure must be preserved in bottom-to-top order.
1473+
assert_eq!(chain_overlays(&result), vec![64, 96]);
1474+
}
1475+
1476+
/// Exactly one layer is uncommitted (the newest) on top of a committed prefix — the
1477+
/// dominant pattern in chained growth. The loop collects the one uncommitted overlay, and
1478+
/// the rebuild produces `Layer(Base, overlay_B)`. Also verifies the rebuilt layer carries
1479+
/// the cached `shared` reference correctly. Real-world trigger: apply parent A, then B
1480+
/// held alive off A, then `B.new_batch()` to build C.
1481+
#[test]
1482+
fn trim_committed_exactly_one_uncommitted() {
1483+
// `shared.len() == 64`; committed layer (`overlay.len == 64`) + uncommitted (`96`).
1484+
let shared = Arc::new(SharedBitmap::<N>::new(make_bitmap(&[true; 64])));
1485+
let chain = make_chain(&shared, &[64, 96]);
1486+
let result = chain.trim_committed();
1487+
// The committed layer is gone; only the uncommitted overlay remains.
1488+
assert_eq!(chain_overlays(&result), vec![96]);
1489+
// And the rebuilt layer's `shared` field still points at the original terminal.
1490+
assert!(Arc::ptr_eq(result.shared(), &shared));
1491+
}
1492+
1493+
/// Two or more uncommitted layers on top of a committed prefix — exercises the loop's
1494+
/// iterated `kept.push` and the rebuild's iterated `Arc::new(BitmapBatchLayer)`, including
1495+
/// the cached `shared` wire-through on every reconstructed layer. Real-world trigger:
1496+
/// build A, then B off A, then C off B; apply only A; then call `C.new_batch()`.
1497+
#[test]
1498+
fn trim_committed_multiple_uncommitted() {
1499+
// `shared.len() == 64`; committed layer (64), then two uncommitted (96, 128).
1500+
let shared = Arc::new(SharedBitmap::<N>::new(make_bitmap(&[true; 64])));
1501+
let chain = make_chain(&shared, &[64, 96, 128]);
1502+
let result = chain.trim_committed();
1503+
// Committed layer dropped; uncommitted pair preserved in order.
1504+
assert_eq!(chain_overlays(&result), vec![96, 128]);
1505+
// Every reconstructed layer must still cache the original shared terminal.
1506+
assert!(Arc::ptr_eq(result.shared(), &shared));
1507+
}
13921508
}

storage/src/qmdb/current/mod.rs

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2970,6 +2970,65 @@ pub mod tests {
29702970
});
29712971
}
29722972

2973+
/// Build a child batch from a still-live parent whose apply was followed by a prune, then
2974+
/// merkleize and apply the child. The parent's `BitmapBatch` chain terminates in the shared
2975+
/// committed bitmap, and `prune` mutates that bitmap's pruning boundary in place. When the
2976+
/// child is constructed via `parent.new_batch()`, the internal `trim_committed` call must
2977+
/// observe the advanced boundary and produce a correct child chain; merkleize and apply must
2978+
/// then produce correct state for keys at and beyond the advanced floor.
2979+
#[test_traced("INFO")]
2980+
fn test_current_live_batch_child_after_prune() {
2981+
let executor = deterministic::Runner::default();
2982+
executor.start(|context| async move {
2983+
let ctx = context.with_label("db");
2984+
let mut db: UnorderedVariableDb = UnorderedVariableDb::init(
2985+
ctx.clone(),
2986+
variable_config::<OneCap>("child-after-prune", &ctx),
2987+
)
2988+
.await
2989+
.unwrap();
2990+
2991+
// Seed enough ops to span multiple bitmap chunks.
2992+
let mut seed = db.new_batch();
2993+
for i in 0u64..300 {
2994+
seed = seed.write(key(i), Some(val(i)));
2995+
}
2996+
let seed_m = seed.merkleize(&db, None).await.unwrap();
2997+
db.apply_batch(seed_m).await.unwrap();
2998+
db.commit().await.unwrap();
2999+
3000+
// Overwrite keys 0..250 so the inactivity floor advances past chunk 0.
3001+
let mut a_batch = db.new_batch();
3002+
for i in 0u64..250 {
3003+
a_batch = a_batch.write(key(i), Some(val(i + 10_000)));
3004+
}
3005+
let a = a_batch.merkleize(&db, None).await.unwrap();
3006+
db.apply_batch(Arc::clone(&a)).await.unwrap();
3007+
db.commit().await.unwrap();
3008+
3009+
// Prune while `a` is still live. Mutates the shared bitmap's pruning boundary in place.
3010+
let floor = db.inactivity_floor_loc();
3011+
db.prune(floor).await.unwrap();
3012+
3013+
// Extend `a` into `b` AFTER the prune. Building `b` off `a` triggers
3014+
// `trim_committed` on `a`'s chain, which must correctly see the advanced pruning
3015+
// boundary on the shared bitmap.
3016+
let b = a
3017+
.new_batch::<Sha256>()
3018+
.write(key(300), Some(val(300)))
3019+
.merkleize(&db, None)
3020+
.await
3021+
.unwrap();
3022+
3023+
db.apply_batch(b).await.unwrap();
3024+
assert_eq!(db.get(&key(0)).await.unwrap(), Some(val(10_000)));
3025+
assert_eq!(db.get(&key(249)).await.unwrap(), Some(val(10_249)));
3026+
assert_eq!(db.get(&key(300)).await.unwrap(), Some(val(300)));
3027+
3028+
db.destroy().await.unwrap();
3029+
});
3030+
}
3031+
29733032
/// Regression: applying a batch after its ancestor Arc is dropped (without
29743033
/// committing) must still apply the ancestor's bitmap pushes/clears and
29753034
/// snapshot diffs.

0 commit comments

Comments
 (0)