Skip to content

Commit 60968d6

Browse files
authored
Try #255:
2 parents 516cd80 + 9a59e63 commit 60968d6

File tree

6 files changed

+78
-19
lines changed

6 files changed

+78
-19
lines changed

src/bitmap/container.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ use std::ops::{
66
use super::store::{self, Store};
77
use super::util;
88

9-
const ARRAY_LIMIT: u64 = 4096;
9+
pub const ARRAY_LIMIT: u64 = 4096;
1010

1111
#[derive(PartialEq, Clone)]
1212
pub struct Container {

src/bitmap/serialization.rs

Lines changed: 52 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,19 @@ use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
33
use std::convert::{Infallible, TryFrom};
44
use std::error::Error;
55
use std::io;
6+
use std::ops::RangeInclusive;
67

7-
use super::container::Container;
8-
use crate::bitmap::store::{ArrayStore, BitmapStore, Store};
8+
use crate::bitmap::container::{Container, ARRAY_LIMIT};
9+
use crate::bitmap::store::{ArrayStore, BitmapStore, Store, BITMAP_LENGTH};
910
use crate::RoaringBitmap;
1011

1112
const SERIAL_COOKIE_NO_RUNCONTAINER: u32 = 12346;
1213
const SERIAL_COOKIE: u16 = 12347;
13-
// TODO: Need this once run containers are supported
14-
// const NO_OFFSET_THRESHOLD: u8 = 4;
14+
const NO_OFFSET_THRESHOLD: usize = 4;
15+
16+
// Sizes of header structures
17+
const DESCRIPTION_BYTES: usize = 4;
18+
const OFFSET_BYTES: usize = 4;
1519

1620
impl RoaringBitmap {
1721
/// Return the size in bytes of the serialized output.
@@ -163,49 +167,81 @@ impl RoaringBitmap {
163167
B: Fn(u64, Box<[u64; 1024]>) -> Result<BitmapStore, BErr>,
164168
BErr: Error + Send + Sync + 'static,
165169
{
166-
let (size, has_offsets) = {
170+
// First read the cookie to determine which version of the format we are reading
171+
let (size, has_offsets, has_run_containers) = {
167172
let cookie = reader.read_u32::<LittleEndian>()?;
168173
if cookie == SERIAL_COOKIE_NO_RUNCONTAINER {
169-
(reader.read_u32::<LittleEndian>()? as usize, true)
174+
(reader.read_u32::<LittleEndian>()? as usize, true, false)
170175
} else if (cookie as u16) == SERIAL_COOKIE {
171-
return Err(io::Error::new(io::ErrorKind::Other, "run containers are unsupported"));
176+
let size = ((cookie >> 16) + 1) as usize;
177+
(size, size >= NO_OFFSET_THRESHOLD, true)
172178
} else {
173179
return Err(io::Error::new(io::ErrorKind::Other, "unknown cookie value"));
174180
}
175181
};
176182

183+
// Read the run container bitmap if necessary
184+
let run_container_bitmap = if has_run_containers {
185+
let mut bitmap = vec![0u8; (size + 7) / 8];
186+
reader.read_exact(&mut bitmap)?;
187+
Some(bitmap)
188+
} else {
189+
None
190+
};
191+
177192
if size > u16::MAX as usize + 1 {
178193
return Err(io::Error::new(io::ErrorKind::Other, "size is greater than supported"));
179194
}
180195

181-
let mut description_bytes = vec![0u8; size * 4];
196+
// Read the container descriptions
197+
let mut description_bytes = vec![0u8; size * DESCRIPTION_BYTES];
182198
reader.read_exact(&mut description_bytes)?;
183199
let mut description_bytes = &description_bytes[..];
184200

185201
if has_offsets {
186-
let mut offsets = vec![0u8; size * 4];
202+
let mut offsets = vec![0u8; size * OFFSET_BYTES];
187203
reader.read_exact(&mut offsets)?;
188204
drop(offsets); // Not useful when deserializing into memory
189205
}
190206

191207
let mut containers = Vec::with_capacity(size);
192208

193-
for _ in 0..size {
209+
// Read each container
210+
for i in 0..size {
194211
let key = description_bytes.read_u16::<LittleEndian>()?;
195-
let len = u64::from(description_bytes.read_u16::<LittleEndian>()?) + 1;
212+
let cardinality = u64::from(description_bytes.read_u16::<LittleEndian>()?) + 1;
213+
214+
// If the run container bitmap is present, check if this container is a run container
215+
let is_run_container =
216+
run_container_bitmap.as_ref().map_or(false, |bm| bm[i / 8] & (1 << (i % 8)) != 0);
217+
218+
let store = if is_run_container {
219+
let runs = reader.read_u16::<LittleEndian>()?;
220+
let mut intervals = vec![[0, 0]; runs as usize];
221+
reader.read_exact(cast_slice_mut(&mut intervals))?;
222+
intervals.iter_mut().for_each(|[s, len]| {
223+
*s = u16::from_le(*s);
224+
*len = u16::from_le(*len);
225+
});
196226

197-
let store = if len <= 4096 {
198-
let mut values = vec![0; len as usize];
227+
let cardinality = intervals.iter().map(|[_, len]| *len as usize).sum();
228+
let mut store = Store::with_capacity(cardinality);
229+
intervals.into_iter().for_each(|[s, len]| {
230+
store.insert_range(RangeInclusive::new(s, s + len));
231+
});
232+
store
233+
} else if cardinality <= ARRAY_LIMIT {
234+
let mut values = vec![0; cardinality as usize];
199235
reader.read_exact(cast_slice_mut(&mut values))?;
200236
values.iter_mut().for_each(|n| *n = u16::from_le(*n));
201237
let array = a(values).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
202238
Store::Array(array)
203239
} else {
204-
let mut values = Box::new([0; 1024]);
240+
let mut values = Box::new([0; BITMAP_LENGTH]);
205241
reader.read_exact(cast_slice_mut(&mut values[..]))?;
206242
values.iter_mut().for_each(|n| *n = u64::from_le(*n));
207-
let bitmap =
208-
b(len, values).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
243+
let bitmap = b(cardinality, values)
244+
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
209245
Store::Bitmap(bitmap)
210246
};
211247

src/bitmap/store/array_store/mod.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,10 @@ impl ArrayStore {
2121
ArrayStore { vec: vec![] }
2222
}
2323

24+
pub fn with_capacity(capacity: usize) -> ArrayStore {
25+
ArrayStore { vec: Vec::with_capacity(capacity) }
26+
}
27+
2428
///
2529
/// Create a new SortedU16Vec from a given vec
2630
/// It is up to the caller to ensure the vec is sorted and deduplicated

src/bitmap/store/mod.rs

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,14 @@ use std::ops::{
77
};
88
use std::{slice, vec};
99

10-
use self::bitmap_store::BITMAP_LENGTH;
10+
pub use self::bitmap_store::BITMAP_LENGTH;
1111
use self::Store::{Array, Bitmap};
1212

1313
pub use self::array_store::ArrayStore;
1414
pub use self::bitmap_store::{BitmapIter, BitmapStore};
1515

16+
use crate::bitmap::container::ARRAY_LIMIT;
17+
1618
#[derive(Clone)]
1719
pub enum Store {
1820
Array(ArrayStore),
@@ -31,6 +33,14 @@ impl Store {
3133
Store::Array(ArrayStore::new())
3234
}
3335

36+
pub fn with_capacity(capacity: usize) -> Store {
37+
if capacity <= ARRAY_LIMIT as usize {
38+
Store::Array(ArrayStore::with_capacity(capacity))
39+
} else {
40+
Store::Bitmap(BitmapStore::new())
41+
}
42+
}
43+
3444
pub fn full() -> Store {
3545
Store::Bitmap(BitmapStore::full())
3646
}

tests/bitmapwithruns.bin

46.9 KB
Binary file not shown.

tests/serialization.rs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ use roaring::RoaringBitmap;
44

55
// Test data from https://github.com/RoaringBitmap/RoaringFormatSpec/tree/master/testdata
66
static BITMAP_WITHOUT_RUNS: &[u8] = include_bytes!("bitmapwithoutruns.bin");
7+
static BITMAP_WITH_RUNS: &[u8] = include_bytes!("bitmapwithruns.bin");
78

89
fn test_data_bitmap() -> RoaringBitmap {
910
(0..100)
@@ -21,10 +22,18 @@ fn serialize_and_deserialize(bitmap: &RoaringBitmap) -> RoaringBitmap {
2122
}
2223

2324
#[test]
24-
fn test_deserialize_from_provided_data() {
25+
fn test_deserialize_without_runs_from_provided_data() {
2526
assert_eq!(RoaringBitmap::deserialize_from(BITMAP_WITHOUT_RUNS).unwrap(), test_data_bitmap());
2627
}
2728

29+
#[test]
30+
fn test_deserialize_with_runs_from_provided_data() {
31+
assert_eq!(
32+
RoaringBitmap::deserialize_from(&mut &BITMAP_WITH_RUNS[..]).unwrap(),
33+
test_data_bitmap()
34+
);
35+
}
36+
2837
#[test]
2938
fn test_serialize_into_provided_data() {
3039
let bitmap = test_data_bitmap();

0 commit comments

Comments
 (0)