2020Roaring Bitmap.
2121"""
2222
23- from typing import Iterator , Set
24- import struct
23+ from typing import Iterator
24+ from pyroaring import BitMap64
2525
2626
2727class RoaringBitmap64 :
2828 """
2929 A 64-bit roaring bitmap implementation.
30+
3031 This class provides efficient storage and operations for sets of 64-bit integers.
31- It uses a set-based implementation for simplicity, which can be replaced with
32- a more efficient roaring bitmap library if needed.
32+ It uses pyroaring.BitMap64 for better performance and memory efficiency.
3333 """
3434
3535 def __init__ (self ):
36- self ._data : Set [ int ] = set ()
36+ self ._data = BitMap64 ()
3737
3838 def add (self , value : int ) -> None :
3939 """Add a single value to the bitmap."""
4040 self ._data .add (value )
4141
4242 def add_range (self , from_ : int , to : int ) -> None :
4343 """Add a range of values [from_, to] to the bitmap."""
44- for i in range (from_ , to + 1 ):
45- self ._data .add (i )
44+ self ._data .add_range (from_ , to + 1 )
4645
4746 def contains (self , value : int ) -> bool :
4847 """Check if the bitmap contains the given value."""
@@ -58,7 +57,7 @@ def cardinality(self) -> int:
5857
5958 def __iter__ (self ) -> Iterator [int ]:
6059 """Iterate over all values in the bitmap in sorted order."""
61- return iter (sorted ( self ._data ) )
60+ return iter (self ._data )
6261
6362 def __len__ (self ) -> int :
6463 """Return the number of elements in the bitmap."""
@@ -74,7 +73,7 @@ def clear(self) -> None:
7473
7574 def to_list (self ) -> list :
7675 """Return a sorted list of all values in the bitmap."""
77- return sorted (self ._data )
76+ return list (self ._data )
7877
7978 def to_range_list (self ) -> list :
8079 """
@@ -85,8 +84,9 @@ def to_range_list(self) -> list:
8584 if self .is_empty ():
8685 return []
8786
88- sorted_values = sorted ( self . _data )
87+ # Use pyroaring's efficient iteration
8988 ranges = []
89+ sorted_values = list (self ._data )
9090 start = sorted_values [0 ]
9191 end = start
9292
@@ -127,23 +127,13 @@ def remove_all(a: 'RoaringBitmap64', b: 'RoaringBitmap64') -> 'RoaringBitmap64':
127127
128128 def serialize (self ) -> bytes :
129129 """Serialize the bitmap to bytes."""
130- # Simple serialization format: count followed by sorted values
131- values = sorted (self ._data )
132- data = struct .pack ('>Q' , len (values )) # 8-byte count
133- for v in values :
134- data += struct .pack ('>q' , v ) # 8-byte signed value
135- return data
130+ return self ._data .serialize ()
136131
137132 @staticmethod
138133 def deserialize (data : bytes ) -> 'RoaringBitmap64' :
139134 """Deserialize a bitmap from bytes."""
140135 result = RoaringBitmap64 ()
141- count = struct .unpack ('>Q' , data [:8 ])[0 ]
142- offset = 8
143- for _ in range (count ):
144- value = struct .unpack ('>q' , data [offset :offset + 8 ])[0 ]
145- result .add (value )
146- offset += 8
136+ result ._data = BitMap64 .deserialize (data )
147137 return result
148138
149139 def __eq__ (self , other : object ) -> bool :
@@ -152,9 +142,10 @@ def __eq__(self, other: object) -> bool:
152142 return self ._data == other ._data
153143
154144 def __hash__ (self ) -> int :
155- return hash (frozenset ( self ._data ))
145+ return hash (tuple ( sorted ( self ._data ) ))
156146
157147 def __repr__ (self ) -> str :
158- if len (self ._data ) <= 10 :
159- return f"RoaringBitmap64({ sorted (self ._data )} )"
160- return f"RoaringBitmap64({ len (self ._data )} elements)"
148+ values = list (self ._data )
149+ if len (values ) <= 10 :
150+ return f"RoaringBitmap64({ values } )"
151+ return f"RoaringBitmap64({ len (values )} elements)"
0 commit comments