Skip to content

Commit 9737a09

Browse files
committed
PR 3627 nits
1 parent 094c8ee commit 9737a09

3 files changed

Lines changed: 228 additions & 44 deletions

File tree

storage/src/qmdb/benches/chained_growth.rs

Lines changed: 35 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
//! Setup (untimed): seed `NUM_KEYS` keys, then grow a chain of `PREBUILT_CHAIN` batches applying
44
//! each parent while the child is still alive.
55
//!
6-
//! Timed: do `batches` more merkleize + apply iterations on top of the pre-built chain, with a
7-
//! single random update per batch so each overlay covers a tiny fraction of chunks.
6+
//! Timed: do `batches` more merkleize + apply iterations on top of the pre-built chain, with
7+
//! `updates` random updates per batch (varied across `UPDATES_PER_BATCH_SET`).
88
99
use crate::common::{make_fixed_value, Digest, WRITE_BUFFER_SIZE};
1010
use commonware_cryptography::{Hasher, Sha256};
@@ -87,17 +87,17 @@ fn cur_fix_cfg(
8787
/// Number of pre-populated keys in the seeded database.
8888
const NUM_KEYS: u64 = 1_000_000;
8989

90-
/// Random updates per batch. One update means each batch's chunk overlay covers ~1 / num_chunks
91-
/// of the bitmap, forcing chain reads to walk deep before finding a matching layer.
92-
const UPDATES_PER_BATCH: u64 = 1;
93-
9490
/// Number of batches grown during the untimed seed phase, producing a Db::status chain of this
9591
/// depth that subsequent reads must walk through.
96-
const PREBUILT_CHAIN: u64 = 10_000;
92+
const PREBUILT_CHAIN: u64 = 1_000;
9793

9894
/// Number of additional batches to grow during the timed region.
9995
const GROW_COUNTS: [u64; 1] = [100];
10096

97+
/// Random updates per batch. The sparse value exercises the chain-walk read path; the dense value
98+
/// exercises the merkleize path under many per-batch chunk writes.
99+
const UPDATES_PER_BATCH_SET: [u64; 2] = [1, 10_000];
100+
101101
#[derive(Debug, Clone, Copy)]
102102
enum CurrentVariant {
103103
UnorderedFixed32,
@@ -191,18 +191,19 @@ async fn run_chained_growth<
191191
Fork: Fn(&C::Merkleized) -> C::Batch,
192192
>(
193193
mut db: C,
194+
updates_per_batch: u64,
194195
grow: u64,
195196
fork_child: Fork,
196197
) -> Duration {
197198
seed_db(&mut db, NUM_KEYS).await;
198199
let mut rng = StdRng::seed_from_u64(99);
199200

200201
// Pre-build a deep chain (untimed).
201-
let initial = write_random_updates(db.new_batch(), UPDATES_PER_BATCH, NUM_KEYS, &mut rng);
202+
let initial = write_random_updates(db.new_batch(), updates_per_batch, NUM_KEYS, &mut rng);
202203
let mut parent = initial.merkleize(&db, None).await.unwrap();
203204
for _ in 0..PREBUILT_CHAIN {
204205
let child_batch =
205-
write_random_updates(fork_child(&parent), UPDATES_PER_BATCH, NUM_KEYS, &mut rng);
206+
write_random_updates(fork_child(&parent), updates_per_batch, NUM_KEYS, &mut rng);
206207
let child = child_batch.merkleize(&db, None).await.unwrap();
207208
db.apply_batch(parent).await.unwrap();
208209
parent = child;
@@ -216,7 +217,7 @@ async fn run_chained_growth<
216217
let start = Instant::now();
217218
for _ in 0..grow {
218219
let child_batch =
219-
write_random_updates(fork_child(&parent), UPDATES_PER_BATCH, NUM_KEYS, &mut rng);
220+
write_random_updates(fork_child(&parent), updates_per_batch, NUM_KEYS, &mut rng);
220221
let child = child_batch.merkleize(&db, None).await.unwrap();
221222
black_box(child.root());
222223
db.apply_batch(parent).await.unwrap();
@@ -232,26 +233,30 @@ async fn run_chained_growth<
232233
fn bench_chained_growth(c: &mut Criterion) {
233234
let runner = tokio::Runner::new(Config::default());
234235
for batches in GROW_COUNTS {
235-
for &variant in &CURRENT_VARIANTS {
236-
c.bench_function(
237-
&format!(
238-
"{}/variant={} batches={batches}",
239-
module_path!(),
240-
variant.name()
241-
),
242-
|b| {
243-
b.to_async(&runner).iter_custom(|iters| async move {
244-
let ctx = context::get::<Context>();
245-
let mut total = Duration::ZERO;
246-
for _ in 0..iters {
247-
with_current_db!(ctx.clone(), variant, |mut db| {
248-
total += run_chained_growth(db, batches, |p| p.new_batch()).await;
249-
});
250-
}
251-
total
252-
});
253-
},
254-
);
236+
for updates in UPDATES_PER_BATCH_SET {
237+
for &variant in &CURRENT_VARIANTS {
238+
c.bench_function(
239+
&format!(
240+
"{}/variant={} updates={updates} batches={batches}",
241+
module_path!(),
242+
variant.name()
243+
),
244+
|b| {
245+
b.to_async(&runner).iter_custom(|iters| async move {
246+
let ctx = context::get::<Context>();
247+
let mut total = Duration::ZERO;
248+
for _ in 0..iters {
249+
with_current_db!(ctx.clone(), variant, |mut db| {
250+
total +=
251+
run_chained_growth(db, updates, batches, |p| p.new_batch())
252+
.await;
253+
});
254+
}
255+
total
256+
});
257+
},
258+
);
259+
}
255260
}
256261
}
257262
}

storage/src/qmdb/current/batch.rs

Lines changed: 134 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -634,6 +634,7 @@ where
634634
let bitmap_batch = BitmapBatch::Layer(Arc::new(BitmapBatchLayer {
635635
parent: bitmap_parent.clone(),
636636
overlay: Arc::new(overlay),
637+
shared: Arc::clone(bitmap_parent.shared()),
637638
}));
638639

639640
// Compute canonical root. The grafted batch alone cannot resolve committed nodes,
@@ -763,19 +764,19 @@ pub(crate) struct BitmapBatchLayer<const N: usize> {
763764
pub(crate) parent: BitmapBatch<N>,
764765
/// Chunk-level overlay: materialized bytes for every chunk that differs from parent.
765766
pub(crate) overlay: Arc<ChunkOverlay<N>>,
767+
/// Cached terminal [`SharedBitmap`] so [`BitmapBatch::shared`] and
768+
/// [`BitmapBatch::pruned_chunks`] answer in O(1) instead of walking the chain.
769+
pub(crate) shared: Arc<SharedBitmap<N>>,
766770
}
767771

768772
impl<const N: usize> BitmapBatch<N> {
769773
const CHUNK_SIZE_BITS: u64 = BitMap::<N>::CHUNK_SIZE_BITS;
770774

771-
/// Walk to the terminal [`SharedBitmap`] at the bottom of the chain.
775+
/// Return the terminal [`SharedBitmap`] at the bottom of the chain.
772776
fn shared(&self) -> &Arc<SharedBitmap<N>> {
773-
let mut current = self;
774-
loop {
775-
match current {
776-
Self::Base(s) => return s,
777-
Self::Layer(layer) => current = &layer.parent,
778-
}
777+
match self {
778+
Self::Base(s) => s,
779+
Self::Layer(layer) => &layer.shared,
779780
}
780781
}
781782

@@ -817,6 +818,7 @@ impl<const N: usize> BitmapBatch<N> {
817818
return Self::Layer(Arc::new(BitmapBatchLayer {
818819
parent: Self::Base(Arc::clone(shared)),
819820
overlay: Arc::clone(&layer.overlay),
821+
shared: Arc::clone(shared),
820822
}));
821823
}
822824
}
@@ -835,6 +837,7 @@ impl<const N: usize> BitmapBatch<N> {
835837
result = Self::Layer(Arc::new(BitmapBatchLayer {
836838
parent: result,
837839
overlay,
840+
shared: Arc::clone(shared),
838841
}));
839842
}
840843
result
@@ -879,13 +882,7 @@ impl<const N: usize> BitmapReadable<N> for BitmapBatch<N> {
879882
}
880883

881884
fn pruned_chunks(&self) -> usize {
882-
let mut current = self;
883-
loop {
884-
match current {
885-
Self::Base(shared) => return shared.pruned_chunks(),
886-
Self::Layer(layer) => current = &layer.parent,
887-
}
888-
}
885+
self.shared().pruned_chunks()
889886
}
890887

891888
fn len(&self) -> u64 {
@@ -1416,4 +1413,127 @@ mod tests {
14161413
// Empty bitmap, tip = 0: no candidates.
14171414
assert_eq!(scan.next_candidate(Location::new(0), 0), None);
14181415
}
1416+
1417+
// ---- trim_committed tests ----
1418+
//
1419+
// `trim_committed` is called from `MerkleizedBatch::new_batch` to strip any `Layer`s whose
1420+
// overlays have already been absorbed into the shared committed bitmap by a prior apply.
1421+
// It has five branches; each test below exercises one of them directly, without going
1422+
// through the full Db/batch machinery, so the function's structural output can be asserted.
1423+
1424+
/// Build a chain `Base(shared) -> Layer(len=L1) -> Layer(len=L2) -> ...` from a list of
1425+
/// overlay lengths (bottom to top). Each constructed `Layer` caches `shared` per the
1426+
/// struct's invariant.
1427+
fn make_chain(shared: &Arc<SharedBitmap<N>>, overlay_lens: &[u64]) -> BitmapBatch<N> {
1428+
let mut chain = BitmapBatch::Base(Arc::clone(shared));
1429+
for &len in overlay_lens {
1430+
chain = BitmapBatch::Layer(Arc::new(BitmapBatchLayer {
1431+
parent: chain,
1432+
overlay: Arc::new(ChunkOverlay::new(len)),
1433+
shared: Arc::clone(shared),
1434+
}));
1435+
}
1436+
chain
1437+
}
1438+
1439+
/// Walk a chain and return its overlay lengths in bottom-to-top order. Used to assert the
1440+
/// structural output of `trim_committed` without touching private fields. Panics if the
1441+
/// chain isn't terminated by a single `Base` at the bottom.
1442+
fn chain_overlays(batch: &BitmapBatch<N>) -> Vec<u64> {
1443+
let mut lens = Vec::new();
1444+
let mut current = batch;
1445+
while let BitmapBatch::Layer(layer) = current {
1446+
lens.push(layer.overlay.len);
1447+
current = &layer.parent;
1448+
}
1449+
assert!(matches!(current, BitmapBatch::Base(_)));
1450+
lens.reverse();
1451+
lens
1452+
}
1453+
1454+
/// Branch 1: input is already a bare `Base` with no speculative layers on top. This is the
1455+
/// input `MerkleizedBatch::new_batch` sees when a caller builds a child off a batch whose
1456+
/// chain was previously trimmed flat (e.g., immediately after apply collapsed everything).
1457+
/// `trim_committed` must short-circuit and return an equivalent `Base` pointing at the
1458+
/// same `SharedBitmap`; no new allocation should be needed for the terminal.
1459+
#[test]
1460+
fn trim_committed_already_base() {
1461+
let shared = Arc::new(SharedBitmap::<N>::new(make_bitmap(&[true; 64])));
1462+
let base = BitmapBatch::Base(Arc::clone(&shared));
1463+
let result = base.trim_committed();
1464+
// Must still be `Base`, and pointer-equal to the input's shared terminal.
1465+
match result {
1466+
BitmapBatch::Base(s) => assert!(Arc::ptr_eq(&s, &shared)),
1467+
BitmapBatch::Layer(_) => panic!("expected Base"),
1468+
}
1469+
}
1470+
1471+
/// Branch 2: the shared committed bitmap has caught up to (or past) the top layer's
1472+
/// `overlay.len`, meaning every layer in the chain has been absorbed by prior applies.
1473+
/// This is the steady-state "extend a just-applied batch" flow: after `apply_batch(A)`,
1474+
/// `A`'s own layer has `overlay.len == committed`. `trim_committed` must collapse the
1475+
/// entire chain down to a bare `Base` so the new child starts from a clean terminal.
1476+
#[test]
1477+
fn trim_committed_all_committed() {
1478+
// `shared.len() == 64`; the single layer's `overlay.len == 32 (<= 64)`, so it's committed.
1479+
let shared = Arc::new(SharedBitmap::<N>::new(make_bitmap(&[true; 64])));
1480+
let chain = make_chain(&shared, &[32]);
1481+
let result = chain.trim_committed();
1482+
// Collapsed to a bare Base, pointing at the original shared.
1483+
match result {
1484+
BitmapBatch::Base(s) => assert!(Arc::ptr_eq(&s, &shared)),
1485+
BitmapBatch::Layer(_) => panic!("expected Base after full trim"),
1486+
}
1487+
}
1488+
1489+
/// Branch 3: every layer in the chain is still speculative (none have been applied), so
1490+
/// the committed bitmap hasn't caught up to any layer's `overlay.len`. `trim_committed`
1491+
/// must return the chain unchanged. This is the "speculate ahead without applying" case:
1492+
/// a caller building multiple batches deep (A, then B off A, then C off B) without
1493+
/// `apply_batch` in between.
1494+
#[test]
1495+
fn trim_committed_none_committed() {
1496+
// `shared.len() == 32`; both overlays have `len > 32`, so neither is committed.
1497+
let shared = Arc::new(SharedBitmap::<N>::new(make_bitmap(&[true; 32])));
1498+
let chain = make_chain(&shared, &[64, 96]);
1499+
let result = chain.trim_committed();
1500+
// Structure must be preserved in bottom-to-top order.
1501+
assert_eq!(chain_overlays(&result), vec![64, 96]);
1502+
}
1503+
1504+
/// Branch 4: exactly one layer is uncommitted (the newest), and everything below it has
1505+
/// been absorbed. This is the dominant pattern in chained growth: apply parent A, then
1506+
/// B held alive off A, then call `B.new_batch()` to build C. Roberto's fast path avoids
1507+
/// the general-case allocation dance and constructs the result directly as
1508+
/// `Layer(Base, overlay_B)` with a single `Arc::new`. The test also verifies the rebuilt
1509+
/// layer carries the cached `shared` reference correctly.
1510+
#[test]
1511+
fn trim_committed_exactly_one_uncommitted() {
1512+
// `shared.len() == 64`; committed layer (`overlay.len == 64`) + uncommitted (`96`).
1513+
let shared = Arc::new(SharedBitmap::<N>::new(make_bitmap(&[true; 64])));
1514+
let chain = make_chain(&shared, &[64, 96]);
1515+
let result = chain.trim_committed();
1516+
// The committed layer is gone; only the uncommitted overlay remains.
1517+
assert_eq!(chain_overlays(&result), vec![96]);
1518+
// And the rebuilt layer's `shared` field still points at the original terminal.
1519+
assert!(Arc::ptr_eq(result.shared(), &shared));
1520+
}
1521+
1522+
/// Branch 5: two or more uncommitted layers on top of a committed prefix. This is the
1523+
/// general-case path: the function has to walk the uncommitted tail, collect the overlays,
1524+
/// and rebuild a fresh chain rooted at `Base`. Exercises the loop that wires the cached
1525+
/// `shared` field into each reconstructed layer. Real-world trigger: build A, then B off A,
1526+
/// then C off B; apply only A; then call `C.new_batch()` — the trim sees B and C as
1527+
/// uncommitted, with A already absorbed.
1528+
#[test]
1529+
fn trim_committed_multiple_uncommitted() {
1530+
// `shared.len() == 64`; committed layer (64), then two uncommitted (96, 128).
1531+
let shared = Arc::new(SharedBitmap::<N>::new(make_bitmap(&[true; 64])));
1532+
let chain = make_chain(&shared, &[64, 96, 128]);
1533+
let result = chain.trim_committed();
1534+
// Committed layer dropped; uncommitted pair preserved in order.
1535+
assert_eq!(chain_overlays(&result), vec![96, 128]);
1536+
// Every reconstructed layer must still cache the original shared terminal.
1537+
assert!(Arc::ptr_eq(result.shared(), &shared));
1538+
}
14191539
}

storage/src/qmdb/current/mod.rs

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2969,6 +2969,65 @@ pub mod tests {
29692969
});
29702970
}
29712971

2972+
/// Build a child batch from a still-live parent whose apply was followed by a prune, then
2973+
/// merkleize and apply the child. The parent's `BitmapBatch` chain terminates in the shared
2974+
/// committed bitmap, and `prune` mutates that bitmap's pruning boundary in place. When the
2975+
/// child is constructed via `parent.new_batch()`, the internal `trim_committed` call must
2976+
/// observe the advanced boundary and produce a correct child chain; merkleize and apply must
2977+
/// then produce correct state for keys at and beyond the advanced floor.
2978+
#[test_traced("INFO")]
2979+
fn test_current_live_batch_child_after_prune() {
2980+
let executor = deterministic::Runner::default();
2981+
executor.start(|context| async move {
2982+
let ctx = context.with_label("db");
2983+
let mut db: UnorderedVariableDb = UnorderedVariableDb::init(
2984+
ctx.clone(),
2985+
variable_config::<OneCap>("child-after-prune", &ctx),
2986+
)
2987+
.await
2988+
.unwrap();
2989+
2990+
// Seed enough ops to span multiple bitmap chunks.
2991+
let mut seed = db.new_batch();
2992+
for i in 0u64..300 {
2993+
seed = seed.write(key(i), Some(val(i)));
2994+
}
2995+
let seed_m = seed.merkleize(&db, None).await.unwrap();
2996+
db.apply_batch(seed_m).await.unwrap();
2997+
db.commit().await.unwrap();
2998+
2999+
// Overwrite keys 0..250 so the inactivity floor advances past chunk 0.
3000+
let mut a_batch = db.new_batch();
3001+
for i in 0u64..250 {
3002+
a_batch = a_batch.write(key(i), Some(val(i + 10_000)));
3003+
}
3004+
let a = a_batch.merkleize(&db, None).await.unwrap();
3005+
db.apply_batch(Arc::clone(&a)).await.unwrap();
3006+
db.commit().await.unwrap();
3007+
3008+
// Prune while `a` is still live. Mutates the shared bitmap's pruning boundary in place.
3009+
let floor = db.inactivity_floor_loc();
3010+
db.prune(floor).await.unwrap();
3011+
3012+
// Extend `a` into `b` AFTER the prune. Building `b` off `a` triggers
3013+
// `trim_committed` on `a`'s chain, which must correctly see the advanced pruning
3014+
// boundary on the shared bitmap.
3015+
let b = a
3016+
.new_batch::<Sha256>()
3017+
.write(key(300), Some(val(300)))
3018+
.merkleize(&db, None)
3019+
.await
3020+
.unwrap();
3021+
3022+
db.apply_batch(b).await.unwrap();
3023+
assert_eq!(db.get(&key(0)).await.unwrap(), Some(val(10_000)));
3024+
assert_eq!(db.get(&key(249)).await.unwrap(), Some(val(10_249)));
3025+
assert_eq!(db.get(&key(300)).await.unwrap(), Some(val(300)));
3026+
3027+
db.destroy().await.unwrap();
3028+
});
3029+
}
3030+
29723031
/// Regression: applying a batch after its ancestor Arc is dropped (without
29733032
/// committing) must still apply the ancestor's bitmap pushes/clears and
29743033
/// snapshot diffs.

0 commit comments

Comments
 (0)