2020Roaring Bitmap.
2121"""
2222
23- from typing import Iterator , Set
24- import struct
23+ from typing import Iterator
24+
25+ try :
26+ from pyroaring import BitMap64
27+ PYROARING_AVAILABLE = True
28+ except ImportError :
29+ PYROARING_AVAILABLE = False
2530
2631
2732class RoaringBitmap64 :
2833 """
2934 A 64-bit roaring bitmap implementation.
35+
3036 This class provides efficient storage and operations for sets of 64-bit integers.
31- It uses a set-based implementation for simplicity, which can be replaced with
32- a more efficient roaring bitmap library if needed .
37+ If pyroaring is available, it uses BitMap64 for better performance and memory efficiency.
38+ Otherwise, it falls back to a set-based implementation .
3339 """
3440
3541 def __init__ (self ):
36- self ._data : Set [int ] = set ()
42+ if PYROARING_AVAILABLE :
43+ self ._data = BitMap64 ()
44+ else :
45+ self ._data = set ()
46+ self ._fallback = True
3747
3848 def add (self , value : int ) -> None :
3949 """Add a single value to the bitmap."""
40- self ._data .add (value )
50+ if PYROARING_AVAILABLE :
51+ self ._data .add (value )
52+ else :
53+ self ._data .add (value )
4154
4255 def add_range (self , from_ : int , to : int ) -> None :
4356 """Add a range of values [from_, to] to the bitmap."""
44- for i in range (from_ , to + 1 ):
45- self ._data .add (i )
57+ if PYROARING_AVAILABLE :
58+ self ._data .add_range (from_ , to + 1 )
59+ else :
60+ for i in range (from_ , to + 1 ):
61+ self ._data .add (i )
4662
4763 def contains (self , value : int ) -> bool :
4864 """Check if the bitmap contains the given value."""
49- return value in self ._data
65+ if PYROARING_AVAILABLE :
66+ return value in self ._data
67+ else :
68+ return value in self ._data
5069
5170 def is_empty (self ) -> bool :
5271 """Check if the bitmap is empty."""
53- return len (self ._data ) == 0
72+ if PYROARING_AVAILABLE :
73+ return len (self ._data ) == 0
74+ else :
75+ return len (self ._data ) == 0
5476
5577 def cardinality (self ) -> int :
5678 """Return the number of elements in the bitmap."""
57- return len (self ._data )
79+ if PYROARING_AVAILABLE :
80+ return len (self ._data )
81+ else :
82+ return len (self ._data )
5883
5984 def __iter__ (self ) -> Iterator [int ]:
6085 """Iterate over all values in the bitmap in sorted order."""
61- return iter (sorted (self ._data ))
86+ if PYROARING_AVAILABLE :
87+ return iter (self ._data )
88+ else :
89+ return iter (sorted (self ._data ))
6290
6391 def __len__ (self ) -> int :
6492 """Return the number of elements in the bitmap."""
65- return len (self ._data )
93+ if PYROARING_AVAILABLE :
94+ return len (self ._data )
95+ else :
96+ return len (self ._data )
6697
6798 def __contains__ (self , value : int ) -> bool :
6899 """Check if the bitmap contains the given value."""
69100 return self .contains (value )
70101
71102 def clear (self ) -> None :
72103 """Clear all values from the bitmap."""
73- self ._data .clear ()
104+ if PYROARING_AVAILABLE :
105+ self ._data .clear ()
106+ else :
107+ self ._data .clear ()
74108
75109 def to_list (self ) -> list :
76110 """Return a sorted list of all values in the bitmap."""
77- return sorted (self ._data )
111+ if PYROARING_AVAILABLE :
112+ return list (self ._data )
113+ else :
114+ return sorted (self ._data )
78115
79116 def to_range_list (self ) -> list :
80117 """
@@ -85,76 +122,128 @@ def to_range_list(self) -> list:
85122 if self .is_empty ():
86123 return []
87124
88- sorted_values = sorted (self ._data )
89- ranges = []
90- start = sorted_values [0 ]
91- end = start
92-
93- for i in range (1 , len (sorted_values )):
94- if sorted_values [i ] == end + 1 :
95- # Consecutive, extend the range
96- end = sorted_values [i ]
97- else :
98- # Gap, close current range and start new one
99- ranges .append (Range (start , end ))
100- start = sorted_values [i ]
101- end = start
102-
103- # Add the last range
104- ranges .append (Range (start , end ))
105-
106- return ranges
125+ if PYROARING_AVAILABLE :
126+ # Use pyroaring's efficient iteration
127+ ranges = []
128+ sorted_values = list (self ._data )
129+ start = sorted_values [0 ]
130+ end = start
131+
132+ for i in range (1 , len (sorted_values )):
133+ if sorted_values [i ] == end + 1 :
134+ # Consecutive, extend the range
135+ end = sorted_values [i ]
136+ else :
137+ # Gap, close current range and start new one
138+ ranges .append (Range (start , end ))
139+ start = sorted_values [i ]
140+ end = start
141+
142+ # Add the last range
143+ ranges .append (Range (start , end ))
144+
145+ return ranges
146+ else :
147+ # Fallback implementation
148+ sorted_values = sorted (self ._data )
149+ ranges = []
150+ start = sorted_values [0 ]
151+ end = start
152+
153+ for i in range (1 , len (sorted_values )):
154+ if sorted_values [i ] == end + 1 :
155+ # Consecutive, extend the range
156+ end = sorted_values [i ]
157+ else :
158+ # Gap, close current range and start new one
159+ ranges .append (Range (start , end ))
160+ start = sorted_values [i ]
161+ end = start
162+
163+ # Add the last range
164+ ranges .append (Range (start , end ))
165+
166+ return ranges
107167
108168 @staticmethod
109169 def and_ (a : 'RoaringBitmap64' , b : 'RoaringBitmap64' ) -> 'RoaringBitmap64' :
110170 """Return the intersection of two bitmaps."""
111171 result = RoaringBitmap64 ()
112- result ._data = a ._data & b ._data
172+ if PYROARING_AVAILABLE :
173+ result ._data = a ._data & b ._data
174+ else :
175+ result ._data = a ._data & b ._data
113176 return result
114177
115178 @staticmethod
116179 def or_ (a : 'RoaringBitmap64' , b : 'RoaringBitmap64' ) -> 'RoaringBitmap64' :
117180 """Return the union of two bitmaps."""
118181 result = RoaringBitmap64 ()
119- result ._data = a ._data | b ._data
182+ if PYROARING_AVAILABLE :
183+ result ._data = a ._data | b ._data
184+ else :
185+ result ._data = a ._data | b ._data
120186 return result
121187
122188 @staticmethod
123189 def remove_all (a : 'RoaringBitmap64' , b : 'RoaringBitmap64' ) -> 'RoaringBitmap64' :
124190 result = RoaringBitmap64 ()
125- result ._data = a ._data - b ._data
191+ if PYROARING_AVAILABLE :
192+ result ._data = a ._data - b ._data
193+ else :
194+ result ._data = a ._data - b ._data
126195 return result
127196
128197 def serialize (self ) -> bytes :
129198 """Serialize the bitmap to bytes."""
130- # Simple serialization format: count followed by sorted values
131- values = sorted (self ._data )
132- data = struct .pack ('>Q' , len (values )) # 8-byte count
133- for v in values :
134- data += struct .pack ('>q' , v ) # 8-byte signed value
135- return data
199+ if PYROARING_AVAILABLE :
200+ return self ._data .serialize ()
201+ else :
202+ # Simple serialization format: count followed by sorted values
203+ import struct
204+ values = sorted (self ._data )
205+ data = struct .pack ('>Q' , len (values )) # 8-byte count
206+ for v in values :
207+ data += struct .pack ('>q' , v ) # 8-byte signed value
208+ return data
136209
137210 @staticmethod
138211 def deserialize (data : bytes ) -> 'RoaringBitmap64' :
139212 """Deserialize a bitmap from bytes."""
140213 result = RoaringBitmap64 ()
141- count = struct .unpack ('>Q' , data [:8 ])[0 ]
142- offset = 8
143- for _ in range (count ):
144- value = struct .unpack ('>q' , data [offset :offset + 8 ])[0 ]
145- result .add (value )
146- offset += 8
214+ if PYROARING_AVAILABLE :
215+ result ._data = BitMap64 .deserialize (data )
216+ else :
217+ import struct
218+ count = struct .unpack ('>Q' , data [:8 ])[0 ]
219+ offset = 8
220+ for _ in range (count ):
221+ value = struct .unpack ('>q' , data [offset :offset + 8 ])[0 ]
222+ result .add (value )
223+ offset += 8
147224 return result
148225
149226 def __eq__ (self , other : object ) -> bool :
150227 if not isinstance (other , RoaringBitmap64 ):
151228 return False
152- return self ._data == other ._data
229+ if PYROARING_AVAILABLE :
230+ return self ._data == other ._data
231+ else :
232+ return self ._data == other ._data
153233
154234 def __hash__ (self ) -> int :
155- return hash (frozenset (self ._data ))
235+ if PYROARING_AVAILABLE :
236+ return hash (tuple (sorted (self ._data )))
237+ else :
238+ return hash (frozenset (self ._data ))
156239
157240 def __repr__ (self ) -> str :
158- if len (self ._data ) <= 10 :
159- return f"RoaringBitmap64({ sorted (self ._data )} )"
160- return f"RoaringBitmap64({ len (self ._data )} elements)"
241+ if PYROARING_AVAILABLE :
242+ values = list (self ._data )
243+ if len (values ) <= 10 :
244+ return f"RoaringBitmap64({ values } )"
245+ return f"RoaringBitmap64({ len (values )} elements)"
246+ else :
247+ if len (self ._data ) <= 10 :
248+ return f"RoaringBitmap64({ sorted (self ._data )} )"
249+ return f"RoaringBitmap64({ len (self ._data )} elements)"
0 commit comments