Skip to content

Commit 5da3189

Browse files
[commonware-storage/mmr/bitmap] add persisting of pruned portion of authenticated bitmap (#805)
1 parent b36ba66 commit 5da3189

2 files changed

Lines changed: 203 additions & 44 deletions

File tree

storage/src/mmr/bitmap.rs

Lines changed: 193 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,25 @@
1-
//! An authenticatable bitmap.
2-
3-
use crate::mmr::{
4-
iterator::leaf_num_to_pos, mem::Mmr, verification::Proof, verification::Storage, Error,
1+
//! An authenticated bitmap.
2+
//!
3+
//! The authenticated bitmap is is an in-memory data structure that does not persist its contents
4+
//! other than the data corresponding to its "pruned" section, allowing full restoration by
5+
//! "replaying" any unpruned elements.
6+
//!
7+
//! Authentication is provided by a Merkle tree that is maintained over the bitmap, with each leaf
8+
//! covering a chunk of N bytes. This Merkle tree isn't balanced, but instead mimics the structure
9+
//! of an MMR with an equivalent number of leaves. This structure reduces overhead of updating the
10+
//! most recently added elements, and (more importantly) simplifies aligning the bitmap with an MMR
11+
//! over elements whose activity state is reflected by the bitmap.
12+
13+
use crate::{
14+
metadata::{Config as MConfig, Metadata},
15+
mmr::{iterator::leaf_num_to_pos, mem::Mmr, verification::Proof, verification::Storage, Error},
516
};
17+
use commonware_codec::DecodeExt;
618
use commonware_cryptography::Hasher as CHasher;
19+
use commonware_runtime::{Clock, Metrics, Storage as RStorage};
20+
use commonware_utils::array::prefixed_u64::U64;
721
use std::collections::VecDeque;
22+
use tracing::{error, warn};
823

924
/// Implements the [Storage] trait for generating inclusion proofs over the bitmap.
1025
struct BitmapStorage<'a, H: CHasher> {
@@ -33,6 +48,9 @@ impl<H: CHasher + Send + Sync> Storage<H::Digest> for BitmapStorage<'_, H> {
3348
///
3449
/// Merkelization of the bitmap is performed over chunks of N bytes. If the goal is to minimize
3550
/// proof sizes, choose an N that is equal to the size or double the size of the hasher's digest.
51+
///
52+
/// Warning: Even though we use u64 identifiers for bits, on 32-bit machines, the maximum
53+
/// addressable bit is limited to (u32::MAX * N * 8).
3654
pub struct Bitmap<H: CHasher, const N: usize> {
3755
/// The bitmap itself, in chunks of size N bytes. The number of valid bits in the last chunk is
3856
/// given by `self.next_bit`. Within each byte, lowest order bits are treated as coming before
@@ -54,6 +72,9 @@ pub struct Bitmap<H: CHasher, const N: usize> {
5472
/// update overhead for elements being appended or updated near the tip compared to a more
5573
/// typical balanced Merkle tree.
5674
mmr: Mmr<H>,
75+
76+
/// The number of bitmap chunks that have been pruned.
77+
pruned_chunks: usize,
5778
}
5879

5980
impl<H: CHasher, const N: usize> Default for Bitmap<H, N> {
@@ -62,6 +83,12 @@ impl<H: CHasher, const N: usize> Default for Bitmap<H, N> {
6283
}
6384
}
6485

86+
/// Prefix used for the metadata key identifying node digests.
87+
const NODE_PREFIX: u8 = 0;
88+
89+
/// Prefix used for the metadata key identifying the pruned_chunks value.
90+
const PRUNED_CHUNKS_PREFIX: u8 = 1;
91+
6592
impl<H: CHasher, const N: usize> Bitmap<H, N> {
6693
/// The size of a chunk in bytes.
6794
pub const CHUNK_SIZE: usize = N;
@@ -77,19 +104,100 @@ impl<H: CHasher, const N: usize> Bitmap<H, N> {
77104
bitmap,
78105
next_bit: 0,
79106
mmr: Mmr::new(),
107+
pruned_chunks: 0,
80108
}
81109
}
82110

83-
/// Return the number of bitmap chunks that have been pruned.
84-
#[inline]
85-
fn pruned_chunks(&self) -> usize {
86-
self.mmr.pruned_to_pos() as usize
111+
/// Restore the fully pruned state of a bitmap from the metadata in the given partition. (The
112+
/// caller must still replay retained elements to restore its full state.)
113+
///
114+
/// The metadata must store the number of pruned chunks and the pinned hashes corresponding to
115+
/// that pruning boundary.
116+
pub async fn restore_pruned<C: RStorage + Metrics + Clock>(
117+
context: C,
118+
partition: String,
119+
) -> Result<Self, Error> {
120+
let metadata_cfg = MConfig { partition };
121+
let metadata = Metadata::init(context.with_label("metadata"), metadata_cfg).await?;
122+
123+
let key: U64 = U64::new(PRUNED_CHUNKS_PREFIX, 0);
124+
let pruned_chunks = match metadata.get(&key) {
125+
Some(bytes) => u64::from_be_bytes(
126+
bytes
127+
.as_slice()
128+
.try_into()
129+
.expect("pruned_chunks bytes could not be converted to u64"),
130+
),
131+
None => {
132+
warn!("bitmap metadata does not contain pruned chunks, initializing as empty");
133+
0
134+
}
135+
} as usize;
136+
if pruned_chunks == 0 {
137+
return Ok(Self::new());
138+
}
139+
let mmr_size = leaf_num_to_pos(pruned_chunks as u64);
140+
141+
let mut pinned_nodes = Vec::new();
142+
for (index, pos) in Proof::<H>::nodes_to_pin(mmr_size).enumerate() {
143+
let Some(bytes) = metadata.get(&U64::new(NODE_PREFIX, index as u64)) else {
144+
error!(size = mmr_size, pos, "missing pinned node");
145+
return Err(Error::MissingNode(pos));
146+
};
147+
let digest = H::Digest::decode(bytes.as_ref());
148+
let Ok(digest) = digest else {
149+
error!(
150+
size = mmr_size,
151+
pos, "could not convert node bytes to digest"
152+
);
153+
return Err(Error::MissingNode(pos));
154+
};
155+
pinned_nodes.push(digest);
156+
}
157+
158+
metadata.close().await?;
159+
160+
let mmr = Mmr::<H>::init(Vec::new(), mmr_size, pinned_nodes);
161+
162+
Ok(Self {
163+
bitmap: VecDeque::from([[0u8; N]]),
164+
next_bit: 0,
165+
mmr,
166+
pruned_chunks,
167+
})
168+
}
169+
170+
/// Write the information necessary to restore the bitmap in its fully pruned state at its last
171+
/// pruning boundary. Restoring the entire bitmap state is then possible by replaying the
172+
/// retained elements.
173+
pub async fn write_pruned<C: RStorage + Metrics + Clock>(
174+
&self,
175+
context: C,
176+
partition: String,
177+
) -> Result<(), Error> {
178+
let metadata_cfg = MConfig { partition };
179+
let mut metadata = Metadata::init(context.with_label("metadata"), metadata_cfg).await?;
180+
metadata.clear();
181+
182+
// Write the number of pruned chunks.
183+
let key = U64::new(PRUNED_CHUNKS_PREFIX, 0);
184+
metadata.put(key, self.pruned_chunks.to_be_bytes().to_vec());
185+
186+
// Write the pinned nodes.
187+
let mmr_size = leaf_num_to_pos(self.pruned_chunks as u64);
188+
for (i, digest) in Proof::<H>::nodes_to_pin(mmr_size).enumerate() {
189+
let digest = self.mmr.get_node_unchecked(digest);
190+
let key = U64::new(NODE_PREFIX, i as u64);
191+
metadata.put(key, digest.to_vec());
192+
}
193+
194+
metadata.close().await.map_err(Error::MetadataError)
87195
}
88196

89197
/// Return the number of bits currently stored in the bitmap, irrespective of any pruning.
90198
#[inline]
91199
pub fn bit_count(&self) -> u64 {
92-
(self.pruned_chunks() + self.bitmap.len()) as u64 * Self::CHUNK_SIZE_BITS
200+
(self.pruned_chunks + self.bitmap.len()) as u64 * Self::CHUNK_SIZE_BITS
93201
- Self::CHUNK_SIZE_BITS
94202
+ self.next_bit
95203
}
@@ -98,15 +206,16 @@ impl<H: CHasher, const N: usize> Bitmap<H, N> {
98206
/// if the referenced bit is greater than the number of bits in the bitmap.
99207
pub fn prune_to_bit(&mut self, bit_offset: u64) {
100208
let chunk_pos = Self::chunk_pos(bit_offset);
101-
let pruned_pos = self.pruned_chunks();
102-
if chunk_pos < pruned_pos {
209+
if chunk_pos < self.pruned_chunks {
103210
return;
104211
}
105212

106-
let chunk_index = chunk_pos - pruned_pos;
213+
let chunk_index = chunk_pos - self.pruned_chunks;
107214
self.bitmap.drain(0..chunk_index);
215+
self.pruned_chunks = chunk_pos;
108216

109-
self.mmr.prune_to_pos(chunk_pos as u64);
217+
let mmr_pos = leaf_num_to_pos(chunk_pos as u64);
218+
self.mmr.prune_to_pos(mmr_pos);
110219
}
111220

112221
/// Return the last chunk of the bitmap.
@@ -204,10 +313,9 @@ impl<H: CHasher, const N: usize> Bitmap<H, N> {
204313
fn chunk_index(&self, bit_offset: u64) -> usize {
205314
assert!(bit_offset < self.bit_count(), "out of bounds");
206315
let chunk_pos = Self::chunk_pos(bit_offset);
207-
let pruned_pos = self.pruned_chunks();
208-
assert!(chunk_pos >= pruned_pos, "bit pruned");
316+
assert!(chunk_pos >= self.pruned_chunks, "bit pruned");
209317

210-
chunk_pos - pruned_pos
318+
chunk_pos - self.pruned_chunks
211319
}
212320

213321
// Convert a bit offset into the position of the chunk it belongs to.
@@ -319,6 +427,7 @@ impl<H: CHasher, const N: usize> Bitmap<H, N> {
319427
mod tests {
320428
use super::*;
321429
use commonware_cryptography::{hash, Sha256};
430+
use commonware_macros::test_traced;
322431
use commonware_runtime::{deterministic, Runner as _};
323432

324433
fn test_chunk<const N: usize>(s: &[u8]) -> [u8; N] {
@@ -337,9 +446,9 @@ mod tests {
337446
executor.start(|_| async move {
338447
let mut bitmap = Bitmap::<Sha256, 32>::new();
339448
assert_eq!(bitmap.bit_count(), 0);
340-
assert_eq!(bitmap.pruned_chunks(), 0);
449+
assert_eq!(bitmap.pruned_chunks, 0);
341450
bitmap.prune_to_bit(0);
342-
assert_eq!(bitmap.pruned_chunks(), 0);
451+
assert_eq!(bitmap.pruned_chunks, 0);
343452
assert_eq!(bitmap.last_chunk(), &[0u8; 32]);
344453

345454
// Add a single bit
@@ -353,7 +462,7 @@ mod tests {
353462
assert_eq!(bitmap.bit_count(), 1);
354463
assert!(bitmap.last_chunk() != &[0u8; 32]);
355464
// Pruning should be a no-op since we're not beyond a chunk boundary.
356-
assert_eq!(bitmap.pruned_chunks(), 0);
465+
assert_eq!(bitmap.pruned_chunks, 0);
357466
assert_eq!(root, bitmap.root(&mut hasher));
358467

359468
// Fill up a full chunk
@@ -374,7 +483,7 @@ mod tests {
374483
// Now pruning all bits should matter.
375484
bitmap.prune_to_bit(256);
376485
assert_eq!(bitmap.bit_count(), 256);
377-
assert_eq!(bitmap.pruned_chunks(), 1);
486+
assert_eq!(bitmap.pruned_chunks, 1);
378487
assert_eq!(root, bitmap.root(&mut hasher));
379488
// Last chunk should be empty again
380489
assert_eq!(bitmap.last_chunk(), &[0u8; 32]);
@@ -501,6 +610,7 @@ mod tests {
501610

502611
// Confirm pruning everything doesn't affect the root hash.
503612
bitmap.prune_to_bit(bitmap.bit_count());
613+
assert_eq!(bitmap.pruned_chunks, 3);
504614
assert_eq!(bitmap.bit_count(), 256 * 3 + 1);
505615
assert_eq!(newer_root, bitmap.root(&mut hasher));
506616
}
@@ -592,25 +702,74 @@ mod tests {
592702
"proving bit {} after flipping should have failed",
593703
i
594704
);
705+
}
706+
}
707+
})
708+
}
595709

596-
let (proof, chunk) = bitmap.proof(&mut hasher, i).await.unwrap();
710+
#[test_traced]
711+
fn test_bitmap_persistence() {
712+
const PARTITION: &str = "bitmap_test";
713+
const FULL_CHUNK_COUNT: usize = 100;
597714

598-
// Proof should verify for the original chunk containing the bit.
599-
assert!(
600-
Bitmap::verify_bit_inclusion(&mut hasher, &proof, &chunk, i, &root),
601-
"failed to prove bit {}",
602-
i
603-
);
715+
let executor = deterministic::Runner::default();
716+
executor.start(|context| async move {
717+
// Initializing from an empty partition should result in an empty bitmap.
718+
let mut bitmap =
719+
Bitmap::<Sha256, 32>::restore_pruned(context.clone(), PARTITION.to_string())
720+
.await
721+
.unwrap();
722+
assert_eq!(bitmap.bit_count(), 0);
604723

605-
// Flip the bit in the chunk and make sure the proof fails.
606-
let corrupted = flip_bit(i, &chunk);
607-
assert!(
608-
!Bitmap::verify_bit_inclusion(&mut hasher, &proof, &corrupted, i, &root),
609-
"proving bit {} after flipping should have failed",
610-
i
724+
// Add a non-trivial amount of data.
725+
let mut hasher = Sha256::new();
726+
for i in 0..FULL_CHUNK_COUNT {
727+
bitmap.append_chunk_unchecked(
728+
&mut hasher,
729+
&test_chunk(format!("test{}", i).as_bytes()),
730+
);
731+
}
732+
let chunk_aligned_root = bitmap.root(&mut hasher);
733+
734+
// Add a few extra bits beyond the last chunk boundary.
735+
bitmap.append_byte_unchecked(&mut hasher, 0xA6);
736+
bitmap.append(&mut hasher, true);
737+
bitmap.append(&mut hasher, false);
738+
bitmap.append(&mut hasher, true);
739+
let root = bitmap.root(&mut hasher);
740+
741+
// prune 10 chunks at a time and make sure replay will restore the bitmap every time.
742+
for i in (10..=FULL_CHUNK_COUNT).step_by(10) {
743+
bitmap.prune_to_bit(i as u64 * Bitmap::<Sha256, 32>::CHUNK_SIZE_BITS);
744+
bitmap
745+
.write_pruned(context.clone(), PARTITION.to_string())
746+
.await
747+
.unwrap();
748+
bitmap =
749+
Bitmap::<Sha256, 32>::restore_pruned(context.clone(), PARTITION.to_string())
750+
.await
751+
.unwrap();
752+
let _ = bitmap.root(&mut hasher);
753+
754+
// Replay missing chunks.
755+
for j in i..FULL_CHUNK_COUNT {
756+
bitmap.append_chunk_unchecked(
757+
&mut hasher,
758+
&test_chunk(format!("test{}", j).as_bytes()),
611759
);
760+
let _ = bitmap.root(&mut hasher);
612761
}
762+
assert_eq!(bitmap.pruned_chunks, i);
763+
assert_eq!(bitmap.bit_count(), FULL_CHUNK_COUNT as u64 * 256);
764+
assert_eq!(bitmap.root(&mut hasher), chunk_aligned_root);
765+
766+
// Replay missing partial chunk.
767+
bitmap.append_byte_unchecked(&mut hasher, 0xA6);
768+
bitmap.append(&mut hasher, true);
769+
bitmap.append(&mut hasher, false);
770+
bitmap.append(&mut hasher, true);
771+
assert_eq!(bitmap.root(&mut hasher), root);
613772
}
614-
})
773+
});
615774
}
616775
}

storage/src/mmr/journaled.rs

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,12 @@ impl<E: RStorage + Clock + Metrics, H: Hasher> Storage<H::Digest> for Mmr<E, H>
7878
}
7979
}
8080

81+
/// Prefix used for nodes in the metadata prefixed U8 key.
82+
const NODE_PREFIX: u8 = 0;
83+
84+
/// Prefix used for the key storing the prune_to_pos position in the metadata.
85+
const PRUNE_TO_POS_PREFIX: u8 = 1;
86+
8187
impl<E: RStorage + Clock + Metrics, H: Hasher> Mmr<E, H> {
8288
/// Initialize a new `Mmr` instance.
8389
pub async fn init(context: E, cfg: Config) -> Result<Self, Error> {
@@ -107,7 +113,7 @@ impl<E: RStorage + Clock + Metrics, H: Hasher> Mmr<E, H> {
107113
// Make sure the journal's oldest retained node is as expected based on the last pruning
108114
// boundary stored in metadata. If they don't match, prune the journal to the appropriate
109115
// location.
110-
let key: U64 = U64::new(Self::PRUNE_TO_POS_PREFIX, 0);
116+
let key: U64 = U64::new(PRUNE_TO_POS_PREFIX, 0);
111117
let metadata_prune_pos = match metadata.get(&key) {
112118
Some(bytes) => u64::from_be_bytes(
113119
bytes
@@ -211,7 +217,7 @@ impl<E: RStorage + Clock + Metrics, H: Hasher> Mmr<E, H> {
211217
journal: &Journal<E, H::Digest>,
212218
pos: u64,
213219
) -> Result<H::Digest, Error> {
214-
if let Some(bytes) = metadata.get(&U64::new(0, pos)) {
220+
if let Some(bytes) = metadata.get(&U64::new(NODE_PREFIX, pos)) {
215221
debug!(pos, "read node from metadata");
216222
let digest = H::Digest::decode(bytes.as_ref());
217223
let Ok(digest) = digest else {
@@ -331,12 +337,6 @@ impl<E: RStorage + Clock + Metrics, H: Hasher> Mmr<E, H> {
331337
Ok(())
332338
}
333339

334-
/// Prefix used for nodes in the metadata prefixed U8 key.
335-
const NODE_PREFIX: u8 = 0;
336-
337-
/// Prefix used for the key storing the prune_to_pos position in the metadata.
338-
const PRUNE_TO_POS_PREFIX: u8 = 1;
339-
340340
/// Compute and add required nodes for the given pruning point to the metadata, and write it to
341341
/// disk. Return the computed set of required nodes.
342342
async fn update_metadata(
@@ -347,11 +347,11 @@ impl<E: RStorage + Clock + Metrics, H: Hasher> Mmr<E, H> {
347347
for pos in Proof::<H>::nodes_to_pin(prune_to_pos) {
348348
let digest = self.get_node(pos).await?.unwrap();
349349
self.metadata
350-
.put(U64::new(Self::NODE_PREFIX, pos), digest.to_vec());
350+
.put(U64::new(NODE_PREFIX, pos), digest.to_vec());
351351
pinned_nodes.insert(pos, digest);
352352
}
353353

354-
let key: U64 = U64::new(Self::PRUNE_TO_POS_PREFIX, 0);
354+
let key: U64 = U64::new(PRUNE_TO_POS_PREFIX, 0);
355355
self.metadata.put(key, prune_to_pos.to_be_bytes().into());
356356

357357
self.metadata.sync().await.map_err(Error::MetadataError)?;

0 commit comments

Comments
 (0)