@@ -3,15 +3,19 @@ use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
3
3
use std:: convert:: { Infallible , TryFrom } ;
4
4
use std:: error:: Error ;
5
5
use std:: io;
6
+ use std:: ops:: RangeInclusive ;
6
7
7
- use super :: container:: Container ;
8
- use crate :: bitmap:: store:: { ArrayStore , BitmapStore , Store } ;
8
+ use crate :: bitmap :: container:: { Container , ARRAY_LIMIT } ;
9
+ use crate :: bitmap:: store:: { ArrayStore , BitmapStore , Store , BITMAP_LENGTH } ;
9
10
use crate :: RoaringBitmap ;
10
11
11
12
const SERIAL_COOKIE_NO_RUNCONTAINER : u32 = 12346 ;
12
13
const SERIAL_COOKIE : u16 = 12347 ;
13
- // TODO: Need this once run containers are supported
14
- // const NO_OFFSET_THRESHOLD: u8 = 4;
14
+ const NO_OFFSET_THRESHOLD : usize = 4 ;
15
+
16
+ // Sizes of header structures
17
+ const DESCRIPTION_BYTES : usize = 4 ;
18
+ const OFFSET_BYTES : usize = 4 ;
15
19
16
20
impl RoaringBitmap {
17
21
/// Return the size in bytes of the serialized output.
@@ -163,49 +167,81 @@ impl RoaringBitmap {
163
167
B : Fn ( u64 , Box < [ u64 ; 1024 ] > ) -> Result < BitmapStore , BErr > ,
164
168
BErr : Error + Send + Sync + ' static ,
165
169
{
166
- let ( size, has_offsets) = {
170
+ // First read the cookie to determine which version of the format we are reading
171
+ let ( size, has_offsets, has_run_containers) = {
167
172
let cookie = reader. read_u32 :: < LittleEndian > ( ) ?;
168
173
if cookie == SERIAL_COOKIE_NO_RUNCONTAINER {
169
- ( reader. read_u32 :: < LittleEndian > ( ) ? as usize , true )
174
+ ( reader. read_u32 :: < LittleEndian > ( ) ? as usize , true , false )
170
175
} else if ( cookie as u16 ) == SERIAL_COOKIE {
171
- return Err ( io:: Error :: new ( io:: ErrorKind :: Other , "run containers are unsupported" ) ) ;
176
+ let size = ( ( cookie >> 16 ) + 1 ) as usize ;
177
+ ( size, size >= NO_OFFSET_THRESHOLD , true )
172
178
} else {
173
179
return Err ( io:: Error :: new ( io:: ErrorKind :: Other , "unknown cookie value" ) ) ;
174
180
}
175
181
} ;
176
182
183
+ // Read the run container bitmap if necessary
184
+ let run_container_bitmap = if has_run_containers {
185
+ let mut bitmap = vec ! [ 0u8 ; ( size + 7 ) / 8 ] ;
186
+ reader. read_exact ( & mut bitmap) ?;
187
+ Some ( bitmap)
188
+ } else {
189
+ None
190
+ } ;
191
+
177
192
if size > u16:: MAX as usize + 1 {
178
193
return Err ( io:: Error :: new ( io:: ErrorKind :: Other , "size is greater than supported" ) ) ;
179
194
}
180
195
181
- let mut description_bytes = vec ! [ 0u8 ; size * 4 ] ;
196
+ // Read the container descriptions
197
+ let mut description_bytes = vec ! [ 0u8 ; size * DESCRIPTION_BYTES ] ;
182
198
reader. read_exact ( & mut description_bytes) ?;
183
199
let mut description_bytes = & description_bytes[ ..] ;
184
200
185
201
if has_offsets {
186
- let mut offsets = vec ! [ 0u8 ; size * 4 ] ;
202
+ let mut offsets = vec ! [ 0u8 ; size * OFFSET_BYTES ] ;
187
203
reader. read_exact ( & mut offsets) ?;
188
204
drop ( offsets) ; // Not useful when deserializing into memory
189
205
}
190
206
191
207
let mut containers = Vec :: with_capacity ( size) ;
192
208
193
- for _ in 0 ..size {
209
+ // Read each container
210
+ for i in 0 ..size {
194
211
let key = description_bytes. read_u16 :: < LittleEndian > ( ) ?;
195
- let len = u64:: from ( description_bytes. read_u16 :: < LittleEndian > ( ) ?) + 1 ;
212
+ let cardinality = u64:: from ( description_bytes. read_u16 :: < LittleEndian > ( ) ?) + 1 ;
213
+
214
+ // If the run container bitmap is present, check if this container is a run container
215
+ let is_run_container =
216
+ run_container_bitmap. as_ref ( ) . map_or ( false , |bm| bm[ i / 8 ] & ( 1 << ( i % 8 ) ) != 0 ) ;
217
+
218
+ let store = if is_run_container {
219
+ let runs = reader. read_u16 :: < LittleEndian > ( ) ?;
220
+ let mut intervals = vec ! [ [ 0 , 0 ] ; runs as usize ] ;
221
+ reader. read_exact ( cast_slice_mut ( & mut intervals) ) ?;
222
+ intervals. iter_mut ( ) . for_each ( |[ s, len] | {
223
+ * s = u16:: from_le ( * s) ;
224
+ * len = u16:: from_le ( * len) ;
225
+ } ) ;
196
226
197
- let store = if len <= 4096 {
198
- let mut values = vec ! [ 0 ; len as usize ] ;
227
+ let cardinality = intervals. iter ( ) . map ( |[ _, len] | * len as usize ) . sum ( ) ;
228
+ let mut store = Store :: with_capacity ( cardinality) ;
229
+ intervals. into_iter ( ) . for_each ( |[ s, len] | {
230
+ store. insert_range ( RangeInclusive :: new ( s, s + len) ) ;
231
+ } ) ;
232
+ store
233
+ } else if cardinality <= ARRAY_LIMIT {
234
+ let mut values = vec ! [ 0 ; cardinality as usize ] ;
199
235
reader. read_exact ( cast_slice_mut ( & mut values) ) ?;
200
236
values. iter_mut ( ) . for_each ( |n| * n = u16:: from_le ( * n) ) ;
201
237
let array = a ( values) . map_err ( |e| io:: Error :: new ( io:: ErrorKind :: InvalidData , e) ) ?;
202
238
Store :: Array ( array)
203
239
} else {
204
- let mut values = Box :: new ( [ 0 ; 1024 ] ) ;
240
+ let mut values = Box :: new ( [ 0 ; BITMAP_LENGTH ] ) ;
205
241
reader. read_exact ( cast_slice_mut ( & mut values[ ..] ) ) ?;
206
242
values. iter_mut ( ) . for_each ( |n| * n = u64:: from_le ( * n) ) ;
207
- let bitmap =
208
- b ( len , values ) . map_err ( |e| io:: Error :: new ( io:: ErrorKind :: InvalidData , e) ) ?;
243
+ let bitmap = b ( cardinality , values )
244
+ . map_err ( |e| io:: Error :: new ( io:: ErrorKind :: InvalidData , e) ) ?;
209
245
Store :: Bitmap ( bitmap)
210
246
} ;
211
247
0 commit comments