Skip to content

Commit d8eff36

Browse files
committed
improve partitioned filter size estimation
1 parent caf4227 commit d8eff36

File tree

2 files changed

+8
-13
lines changed

2 files changed

+8
-13
lines changed

src/table/filter/mod.rs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,17 +40,19 @@ impl BloomConstructionPolicy {
4040
}
4141
}
4242

43+
/// Returns the estimated filter size in bytes.
4344
#[must_use]
44-
pub fn estimated_key_bits(&self, n: usize) -> f32 {
45+
pub fn estimated_filter_size(&self, n: usize) -> usize {
4546
#[expect(
4647
clippy::cast_precision_loss,
4748
reason = "truncation is fine because this is an estimation"
4849
)]
4950
match self {
50-
Self::BitsPerKey(bpk) => *bpk,
51+
Self::BitsPerKey(bpk) => (*bpk * (n as f32)) as usize / 8,
5152
Self::FalsePositiveRate(fpr) => {
5253
let m = StandardBloomFilterBuilder::calculate_m(n, *fpr);
53-
(m / n) as f32
54+
let bpk = (m / n) as f32;
55+
(bpk * (n as f32)) as usize / 8
5456
}
5557
}
5658
}

src/table/writer/filter/partitioned.rs

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -163,16 +163,9 @@ impl<W: std::io::Write + std::io::Seek> FilterWriter<W> for PartitionedFilterWri
163163
fn register_key(&mut self, key: &UserKey) -> crate::Result<()> {
164164
self.bloom_hash_buffer.push(Builder::get_hash(key));
165165

166-
#[expect(
167-
clippy::cast_possible_truncation,
168-
clippy::cast_sign_loss,
169-
reason = "truncation is fine because this is an estimation"
170-
)]
171-
let estimated_key_bits =
172-
self.bloom_policy
173-
.estimated_key_bits(self.bloom_hash_buffer.len()) as usize;
174-
175-
self.approx_filter_size += estimated_key_bits;
166+
self.approx_filter_size = self
167+
.bloom_policy
168+
.estimated_filter_size(self.bloom_hash_buffer.len());
176169

177170
self.last_key = Some(key.clone());
178171

0 commit comments

Comments
 (0)