Skip to content

Commit 7eb16ed

Browse files
committed
Use Bytes instead of Array
1 parent da0d148 commit 7eb16ed

2 files changed

Lines changed: 28 additions & 22 deletions

File tree

shard.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
name: bloom_filter
22
version: 0.1.0
33

4+
crystal: 1.9.2
5+
46
authors:
57
- Potapov Sergey <blake131313@gmail.com>
68

src/bloom_filter/filter.cr

Lines changed: 26 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,22 @@ module BloomFilter
22
class Filter
33
@bitsize : UInt32
44

5-
getter :hash_num, :bitsize, :bytesize, :bitmap
5+
getter hash_num : UInt8
6+
getter bitsize : UInt32
7+
getter bitmap : Bytes
8+
9+
def bytesize
10+
@bitmap.size
11+
end
612

713
SEED_A = 0xdeadbeef_u32
814
SEED_B = 0x71fefeed_u32
915

1016
MULT_A = 0xb8b34b2d_u32
1117
MULT_B = 0x52c6a2d9_u32
1218

13-
def initialize(@bytesize, hash_num, @bitmap = Array(UInt8).new(bytesize, 0_u8))
14-
@bitsize = bytesize * 8
19+
def initialize(bytesize, hash_num, @bitmap = Bytes.new(bytesize.to_i32, 0_u8))
20+
@bitsize = (bytesize * 8).to_u32
1521
@hash_num = hash_num.to_u8
1622
end
1723

@@ -20,14 +26,11 @@ module BloomFilter
2026
@hash_num = io.read_byte.as UInt8
2127

2228
# TODO: Is it possible to read 4 byte chunks?
23-
@bytesize = 0_u32
24-
@bitmap = Array(UInt8).new
25-
while byte = io.read_byte
26-
@bitmap << byte.to_u8
27-
@bytesize += 1
28-
end
29+
size = IO::ByteFormat::BigEndian.decode(Int32, io)
30+
@bitmap = Bytes.new(size)
31+
io.read_fully(@bitmap).to_u32
2932

30-
@bitsize = bytesize * 8
33+
@bitsize = (size * 8).to_u32
3134
end
3235

3336
def insert(str : String)
@@ -51,47 +54,48 @@ module BloomFilter
5154

5255
def dump(io : IO)
5356
io.write_byte(@hash_num)
57+
IO::ByteFormat::BigEndian.encode(@bitmap.size, io)
5458
# TODO: is it possible write 4 byte chunks?
5559
@bitmap.each { |byte| io.write_byte(byte) }
5660
io
5761
end
5862

5963
def ==(another : Filter)
60-
@bytesize == another.bytesize && @hash_num == another.hash_num && @bitmap == another.bitmap
64+
self.bytesize == another.bytesize && @hash_num == another.hash_num && @bitmap == another.bitmap
6165
end
6266

6367
# Get a union of two filters.
6468
def |(another : Filter) : Filter
65-
raise(ArgumentError.new("Cannot unite filters of different size")) unless another.bytesize == @bytesize
69+
raise(ArgumentError.new("Cannot unite filters of different size")) unless another.bytesize == self.bytesize
6670
raise(ArgumentError.new("Cannot unite filters with different number of hash functions")) unless another.hash_num == @hash_num
6771

68-
union_bitmap = Array(UInt8).new(bytesize.to_i) do |index|
72+
union_bitmap = Bytes.new(bytesize) do |index|
6973
@bitmap[index] | another.bitmap[index]
7074
end
71-
Filter.new(@bytesize, @hash_num, union_bitmap)
75+
Filter.new(self.bytesize, @hash_num, union_bitmap)
7276
end
7377

7478
# Get intersection of two filters.
7579
def &(another : Filter) : Filter
76-
raise(ArgumentError.new("Cannot unite filters of different size")) unless another.bytesize == @bytesize
80+
raise(ArgumentError.new("Cannot unite filters of different size")) unless another.bytesize == self.bytesize
7781
raise(ArgumentError.new("Cannot unite filters with different number of hash functions")) unless another.hash_num == @hash_num
7882

79-
intersection_bitmap = Array(UInt8).new(bytesize.to_i) do |index|
83+
intersection_bitmap = Bytes.new(bytesize) do |index|
8084
@bitmap[index] & another.bitmap[index]
8185
end
82-
Filter.new(@bytesize, @hash_num, intersection_bitmap)
86+
Filter.new(self.bytesize, @hash_num, intersection_bitmap)
8387
end
8488

8589
@[AlwaysInline]
8690
private def set(index : UInt32)
87-
item_index = index / 8
91+
item_index = index // 8
8892
bit_index = index % 8
8993
@bitmap[item_index] = @bitmap[item_index] | (1 << bit_index)
9094
end
9195

9296
@[AlwaysInline]
9397
private def set?(index : UInt32) : Bool
94-
item_index = index / 8
98+
item_index = index // 8
9599
bit_index = index % 8
96100
@bitmap[item_index] & (1 << bit_index) != 0
97101
end
@@ -136,11 +140,11 @@ module BloomFilter
136140
ha = SEED_A
137141
hb = SEED_B
138142
u = str.to_unsafe
139-
(str.bytesize / 4).times do
143+
(str.bytesize // 4).times do
140144
v = 0_u32
141145
4.times { |i| v |= u[i].to_u32 << (i*8) }
142-
ha = hswap(ha ^ v) * MULT_A
143-
hb = (hswap(hb) ^ v) * MULT_B
146+
ha = hswap(ha ^ v) &* MULT_A
147+
hb = (hswap(hb) ^ v) &* MULT_B
144148
u += 4
145149
end
146150
v = 0_u32

0 commit comments

Comments
 (0)