Skip to content

Commit d8bb37b

Browse files
committed
replace RoaringTreemap by TwoLevelRoaringBitmap
Benchmark results on insert_range (only bench available for 64-bit): group roaring64 treemap ----- --------- ------- from_empty_1000 1.00 87.6±4.76ns 10.6 GElem/sec 1.65 144.2±1.28ns 6.5 GElem/sec from_empty_10000 1.00 166.8±9.32ns 55.8 GElem/sec 1.28 213.6±7.26ns 43.6 GElem/sec from_empty_8589934590 1.01 151.5±0.99ms 52.8 GElem/sec 1.00 149.7±1.00ms 53.5 GElem/sec pre_populated_1000 1.00 139.3±19.83ns 6.7 GElem/sec 1.30 180.8±20.70ns 5.2 GElem/sec pre_populated_10000 1.00 235.4±83.29ns 39.6 GElem/sec 1.26 295.9±106.25ns 31.5 GElem/sec pre_populated_8589934590 1.00 74.8±2.56ms 107.0 GElem/sec 1.01 75.3±1.82ms 106.3 GElem/sec
1 parent c7bab3c commit d8bb37b

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+1275
-3384
lines changed

benchmarks/benches/lib.rs

Lines changed: 33 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,14 @@
1-
use itertools::Itertools;
2-
use std::cmp::Reverse;
3-
use std::ops::{BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign, Sub, SubAssign};
4-
5-
use criterion::measurement::Measurement;
1+
use crate::datasets::Datasets;
62
use criterion::{
7-
black_box, criterion_group, criterion_main, BatchSize, BenchmarkGroup, BenchmarkId, Criterion,
8-
Throughput,
3+
black_box, criterion_group, criterion_main, measurement::Measurement, BatchSize,
4+
BenchmarkGroup, BenchmarkId, Criterion, Throughput,
5+
};
6+
use itertools::Itertools;
7+
use roaring::{MultiOps, Roaring32, Roaring64};
8+
use std::{
9+
cmp::Reverse,
10+
ops::{BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign, Sub, SubAssign},
911
};
10-
11-
use roaring::{MultiOps, Roaring32};
12-
13-
use crate::datasets::Datasets;
1412

1513
mod datasets;
1614

@@ -670,29 +668,29 @@ fn insert_range_bitmap(c: &mut Criterion) {
670668
}
671669
}
672670

673-
// fn insert_range_treemap(c: &mut Criterion) {
674-
// for &size in &[1_000_u64, 10_000u64, 2 * (u32::MAX as u64)] {
675-
// let mut group = c.benchmark_group("insert_range_treemap");
676-
// group.throughput(criterion::Throughput::Elements(size));
677-
// group.bench_function(format!("from_empty_{}", size), |b| {
678-
// let bm = RoaringTreemap::new();
679-
// b.iter_batched(
680-
// || bm.clone(),
681-
// |mut bm| black_box(bm.insert_range(0..size)),
682-
// criterion::BatchSize::SmallInput,
683-
// )
684-
// });
685-
// group.bench_function(format!("pre_populated_{}", size), |b| {
686-
// let mut bm = RoaringTreemap::new();
687-
// bm.insert_range(0..size);
688-
// b.iter_batched(
689-
// || bm.clone(),
690-
// |mut bm| black_box(bm.insert_range(0..size)),
691-
// criterion::BatchSize::SmallInput,
692-
// )
693-
// });
694-
// }
695-
// }
671+
fn insert_range_roaring64(c: &mut Criterion) {
672+
for &size in &[1_000_u64, 10_000u64, 2 * (u32::MAX as u64)] {
673+
let mut group = c.benchmark_group("insert_range_roaring64");
674+
group.throughput(criterion::Throughput::Elements(size));
675+
group.bench_function(format!("from_empty_{}", size), |b| {
676+
let bm = Roaring64::new();
677+
b.iter_batched(
678+
|| bm.clone(),
679+
|mut bm| black_box(bm.insert_range(0..size)),
680+
criterion::BatchSize::SmallInput,
681+
)
682+
});
683+
group.bench_function(format!("pre_populated_{}", size), |b| {
684+
let mut bm = Roaring64::new();
685+
bm.insert_range(0..size);
686+
b.iter_batched(
687+
|| bm.clone(),
688+
|mut bm| black_box(bm.insert_range(0..size)),
689+
criterion::BatchSize::SmallInput,
690+
)
691+
});
692+
}
693+
}
696694

697695
criterion_group!(
698696
benches,
@@ -711,7 +709,7 @@ criterion_group!(
711709
remove,
712710
remove_range_bitmap,
713711
insert_range_bitmap,
714-
// insert_range_treemap,
712+
insert_range_roaring64,
715713
iteration,
716714
is_empty,
717715
serialization,

src/lib.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,15 +20,15 @@ use std::fmt;
2020

2121
mod core;
2222

23-
/// A compressed bitmap with u64 values. Implemented as a `BTreeMap` of `RoaringBitmap`s.
24-
// pub mod treemap;
25-
// pub use treemap::RoaringTreemap;
2623
mod value;
2724
pub use value::{ContainerKey, Value, ValueRange};
2825

2926
mod roaring32;
3027
pub use roaring32::Roaring32;
3128

29+
mod roaring64;
30+
pub use roaring64::Roaring64;
31+
3232
pub use self::core::RoaringBitmap;
3333

3434
/// An error type that is returned when an iterator isn't sorted.
@@ -53,7 +53,7 @@ impl fmt::Display for NonSortedIntegers {
5353
impl Error for NonSortedIntegers {}
5454

5555
/// A [`Iterator::collect`] blanket implementation that provides extra methods for [`Roaring32`]
56-
/// and [`RoaringTreemap`].
56+
/// and [`Roaring64`].
5757
///
5858
/// When merging multiple bitmap with the same operation it's usually faster to call the
5959
/// method in this trait than to write your own for loop and merging the bitmaps yourself.

src/roaring64.rs

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
use crate::{ContainerKey, RoaringBitmap, Value, ValueRange};
2+
use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
3+
use std::{
4+
io,
5+
ops::{Bound, RangeBounds, RangeInclusive},
6+
};
7+
8+
/// A compressed bitmap for 64-bit values.
9+
///
10+
/// # Examples
11+
///
12+
/// ```rust
13+
/// use roaring::Roaring64;
14+
///
15+
/// let mut rb = Roaring64::new();
16+
///
17+
/// // insert all primes less than 10
18+
/// rb.insert(2);
19+
/// rb.insert(3);
20+
/// rb.insert(5);
21+
/// rb.insert(7);
22+
/// println!("total bits set to true: {}", rb.len());
23+
/// ```
24+
pub type Roaring64 = RoaringBitmap<u64>;
25+
26+
impl Value for u64 {
27+
type Key = u64;
28+
type Range = RangeInclusive<Self>;
29+
30+
fn split(self) -> (Self::Key, u16) {
31+
(self >> 16, self as u16)
32+
}
33+
34+
fn join(key: Self::Key, index: u16) -> Self {
35+
(key << 16) + u64::from(index)
36+
}
37+
38+
fn range(range: impl RangeBounds<Self>) -> Option<Self::Range> {
39+
let start: u64 = match range.start_bound() {
40+
Bound::Included(&i) => i,
41+
Bound::Excluded(&i) => i.checked_add(1)?,
42+
Bound::Unbounded => 0,
43+
};
44+
let end: u64 = match range.end_bound() {
45+
Bound::Included(&i) => i,
46+
Bound::Excluded(&i) => i.checked_sub(1)?,
47+
Bound::Unbounded => u64::MAX,
48+
};
49+
50+
if end < start {
51+
return None;
52+
}
53+
54+
Some(start..=end)
55+
}
56+
57+
fn max_containers() -> usize {
58+
// Theoretically, u64::MAX + 1.
59+
// Realistically we're probably capped at usize anyway.
60+
usize::MAX
61+
}
62+
}
63+
64+
impl ContainerKey for u64 {
65+
#[inline(always)]
66+
fn size() -> usize {
67+
// Key is coded on 48-bit, the 16 upper ones are unused.
68+
6
69+
}
70+
71+
fn write(self, writer: &mut impl WriteBytesExt) -> io::Result<()> {
72+
writer.write_u48::<LittleEndian>(self)
73+
}
74+
75+
fn read(reader: &mut impl ReadBytesExt) -> io::Result<Self> {
76+
reader.read_u48::<LittleEndian>()
77+
}
78+
}
79+
80+
impl ValueRange<u64> for RangeInclusive<u64> {
81+
type KeyIterator = RangeInclusive<u64>;
82+
83+
fn start(&self) -> (<u64 as Value>::Key, u16) {
84+
self.start().split()
85+
}
86+
87+
fn end(&self) -> (<u64 as Value>::Key, u16) {
88+
self.end().split()
89+
}
90+
91+
fn containers_count(&self) -> usize {
92+
let start = ValueRange::start(self).0;
93+
let end = ValueRange::end(self).0;
94+
(end - start) as usize + 1
95+
}
96+
97+
fn keys(self) -> Self::KeyIterator {
98+
let start = ValueRange::start(&self).0;
99+
let end = ValueRange::end(&self).0;
100+
start..=end
101+
}
102+
}
103+
104+
#[cfg(test)]
105+
mod test {
106+
use super::*;
107+
108+
#[test]
109+
fn split() {
110+
assert_eq!((0x0000_0000_0000u64, 0x0000u16), 0x0000_0000_0000_0000u64.split());
111+
assert_eq!((0x0000_0000_0000u64, 0x0001u16), 0x0000_0000_0000_0001u64.split());
112+
assert_eq!((0x0000_0000_FFFFu64, 0xFFFEu16), 0x0000_0000_FFFF_FFFEu64.split());
113+
assert_eq!((0x0000_0000_FFFFu64, 0xFFFFu16), 0x0000_0000_FFFF_FFFFu64.split());
114+
assert_eq!((0x0000_0001_0000u64, 0x0000u16), 0x0000_0001_0000_0000u64.split());
115+
assert_eq!((0x0000_0001_0000u64, 0x0001u16), 0x0000_0001_0000_0001u64.split());
116+
assert_eq!((0xFFFF_FFFF_FFFFu64, 0xFFFEu16), 0xFFFF_FFFF_FFFF_FFFEu64.split());
117+
assert_eq!((0xFFFF_FFFF_FFFFu64, 0xFFFFu16), 0xFFFF_FFFF_FFFF_FFFFu64.split());
118+
}
119+
120+
#[test]
121+
fn join() {
122+
assert_eq!(0x0000_0000_0000_0000u64, u64::join(0x0000_0000_0000u64, 0x0000u16));
123+
assert_eq!(0x0000_0000_0000_0001u64, u64::join(0x0000_0000_0000u64, 0x0001u16));
124+
assert_eq!(0x0000_0000_FFFF_FFFEu64, u64::join(0x0000_0000_FFFFu64, 0xFFFEu16));
125+
assert_eq!(0x0000_0000_FFFF_FFFFu64, u64::join(0x0000_0000_FFFFu64, 0xFFFFu16));
126+
assert_eq!(0x0000_0001_0000_0000u64, u64::join(0x0000_0001_0000u64, 0x0000u16));
127+
assert_eq!(0x0000_0001_0000_0001u64, u64::join(0x0000_0001_0000u64, 0x0001u16));
128+
assert_eq!(0xFFFF_FFFF_FFFF_FFFEu64, u64::join(0xFFFF_FFFF_FFFFu64, 0xFFFEu16));
129+
assert_eq!(0xFFFF_FFFF_FFFF_FFFFu64, u64::join(0xFFFF_FFFF_FFFFu64, 0xFFFFu16));
130+
}
131+
132+
#[test]
133+
fn range() {
134+
assert_eq!(Some(1..=5), u64::range(1..6));
135+
assert_eq!(Some(1..=u64::MAX), u64::range(1..));
136+
assert_eq!(Some(0..=u64::MAX), u64::range(..));
137+
assert_eq!(None, u64::range(5..5));
138+
assert_eq!(Some(16..=16), u64::range(16..=16))
139+
}
140+
}

src/treemap/arbitrary.rs

Lines changed: 0 additions & 17 deletions
This file was deleted.

0 commit comments

Comments
 (0)