Skip to content

Commit 9412a7b

Browse files
committed
union with serialized bitmap
1 parent c54275c commit 9412a7b

File tree

2 files changed

+174
-0
lines changed

2 files changed

+174
-0
lines changed

src/bitmap/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ mod cmp;
1212
mod inherent;
1313
mod iter;
1414
mod ops;
15+
mod ops_with_serialized;
1516
#[cfg(feature = "serde")]
1617
mod serde;
1718
mod serialization;

src/bitmap/ops_with_serialized.rs

Lines changed: 173 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,173 @@
1+
use bytemuck::cast_slice_mut;
2+
use byteorder::{LittleEndian, ReadBytesExt};
3+
use std::{
4+
io::{self, Read},
5+
mem::size_of,
6+
};
7+
8+
use crate::RoaringBitmap;
9+
10+
use super::{
11+
container::Container,
12+
store::{ArrayStore, BitmapStore, Store},
13+
};
14+
15+
const SERIAL_COOKIE_NO_RUNCONTAINER: u32 = 12346;
16+
const SERIAL_COOKIE: u16 = 12347;
17+
18+
impl RoaringBitmap {
19+
pub fn union_with_serialized(&mut self, mut reader: impl Read) -> io::Result<()> {
20+
let (size, has_offsets) = {
21+
let cookie = reader.read_u32::<LittleEndian>()?;
22+
if cookie == SERIAL_COOKIE_NO_RUNCONTAINER {
23+
(reader.read_u32::<LittleEndian>()? as usize, true)
24+
} else if (cookie as u16) == SERIAL_COOKIE {
25+
return Err(io::Error::new(io::ErrorKind::Other, "run containers are unsupported"));
26+
} else {
27+
return Err(io::Error::new(io::ErrorKind::Other, "unknown cookie value"));
28+
}
29+
};
30+
31+
if size > u16::MAX as usize + 1 {
32+
return Err(io::Error::new(io::ErrorKind::Other, "size is greater than supported"));
33+
}
34+
35+
let mut description_bytes = vec![0u8; size * 4];
36+
reader.read_exact(&mut description_bytes)?;
37+
let mut description_bytes = &description_bytes[..];
38+
39+
if has_offsets {
40+
let mut offsets = vec![0u8; size * 4];
41+
reader.read_exact(&mut offsets)?;
42+
drop(offsets); // Not useful when deserializing into memory
43+
}
44+
45+
for _ in 0..size {
46+
let key = description_bytes.read_u16::<LittleEndian>()?;
47+
let len = u64::from(description_bytes.read_u16::<LittleEndian>()?) + 1;
48+
49+
if len <= 4096 {
50+
match self.containers.binary_search_by_key(&key, |c| c.key) {
51+
Ok(loc) => {
52+
let container = &mut self.containers[loc];
53+
54+
for _ in 0..len {
55+
let mut value = [0u8; size_of::<u16>()];
56+
reader.read_exact(value.as_mut())?;
57+
// TODO: since this is sorted it could probably be faster
58+
let value = u16::from_le_bytes(value);
59+
container.insert(value);
60+
}
61+
}
62+
Err(loc) => {
63+
let mut values = vec![0u16; len as usize];
64+
reader.read_exact(cast_slice_mut(&mut values))?;
65+
values.iter_mut().for_each(|n| *n = u16::from_le(*n));
66+
67+
let array = ArrayStore::from_vec_unchecked(values);
68+
let mut container = Container::new(key);
69+
container.store = Store::Array(array);
70+
self.containers.insert(loc, container);
71+
}
72+
}
73+
} else {
74+
match self.containers.binary_search_by_key(&key, |c| c.key) {
75+
Ok(loc) => {
76+
let current_store = std::mem::take(&mut self.containers[loc].store);
77+
78+
let mut values = Box::new([0; 1024]);
79+
reader.read_exact(cast_slice_mut(&mut values[..]))?;
80+
values.iter_mut().for_each(|n| *n = u64::from_le(*n));
81+
82+
let mut store = BitmapStore::from_unchecked(len, values);
83+
84+
match current_store {
85+
Store::Array(array) => array.into_iter().for_each(|el| {
86+
store.insert(el);
87+
}),
88+
Store::Bitmap(bitmap_store) => store |= &bitmap_store,
89+
};
90+
91+
self.containers[loc].store = Store::Bitmap(store);
92+
}
93+
Err(loc) => {
94+
let mut values = Box::new([0; 1024]);
95+
reader.read_exact(cast_slice_mut(&mut values[..]))?;
96+
values.iter_mut().for_each(|n| *n = u64::from_le(*n));
97+
98+
let array = BitmapStore::from_unchecked(len, values);
99+
let mut container = Container::new(key);
100+
container.store = Store::Bitmap(array);
101+
self.containers.insert(loc, container);
102+
}
103+
}
104+
};
105+
}
106+
Ok(())
107+
}
108+
}
109+
110+
#[cfg(test)]
111+
mod test {
112+
use crate::RoaringBitmap;
113+
use proptest::prelude::*;
114+
115+
proptest! {
116+
#[test]
117+
fn prop_or_with_serialized(
118+
mut a in RoaringBitmap::arbitrary(),
119+
b in RoaringBitmap::arbitrary()
120+
) {
121+
let union = &a | &b;
122+
123+
let mut b_ser = Vec::new();
124+
b.serialize_into(&mut b_ser).unwrap();
125+
a.union_with_serialized(&*b_ser).unwrap();
126+
127+
prop_assert_eq!(a, union);
128+
}
129+
}
130+
131+
#[test]
132+
fn or_with_serialized() {
133+
let unions = [
134+
(RoaringBitmap::new(), RoaringBitmap::new()),
135+
(RoaringBitmap::from_sorted_iter([0]).unwrap(), RoaringBitmap::new()),
136+
(RoaringBitmap::new(), RoaringBitmap::from_sorted_iter([0]).unwrap()),
137+
(
138+
RoaringBitmap::from_sorted_iter([0]).unwrap(),
139+
RoaringBitmap::from_sorted_iter([0]).unwrap(),
140+
),
141+
(
142+
RoaringBitmap::from_sorted_iter([0]).unwrap(),
143+
RoaringBitmap::from_sorted_iter([1]).unwrap(),
144+
),
145+
(
146+
RoaringBitmap::from_sorted_iter([0]).unwrap(),
147+
RoaringBitmap::from_sorted_iter(0..3000).unwrap(),
148+
),
149+
(
150+
RoaringBitmap::from_sorted_iter([]).unwrap(),
151+
RoaringBitmap::from_sorted_iter(0..3000).unwrap(),
152+
),
153+
(
154+
RoaringBitmap::from_sorted_iter(0..3000).unwrap(),
155+
RoaringBitmap::from_sorted_iter([3001]).unwrap(),
156+
),
157+
(
158+
RoaringBitmap::from_sorted_iter(0..3000).unwrap(),
159+
RoaringBitmap::from_sorted_iter(3000..6000).unwrap(),
160+
),
161+
];
162+
163+
for (mut a, b) in unions {
164+
let union = &a | &b;
165+
166+
let mut b_ser = Vec::new();
167+
b.serialize_into(&mut b_ser).unwrap();
168+
a.union_with_serialized(&*b_ser).unwrap();
169+
170+
assert_eq!(a, union, "When testing: {a:?} | {b:?}");
171+
}
172+
}
173+
}

0 commit comments

Comments
 (0)