Skip to content

Commit 9fa09c9

Browse files
Merge pull request #32 from borgbackup/separate-format
HashTableNT: give separate formats in value_format namedtuple
2 parents 9cc7509 + b4f8c7e commit 9fa09c9

File tree

5 files changed

+32
-18
lines changed

5 files changed

+32
-18
lines changed

src/borghash/HashTableNT.pxd

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
cdef class HashTableNT:
22
cdef int key_size
33
cdef object value_type
4+
cdef object value_format
45
cdef object value_struct
56
cdef int value_size
67
cdef object inner

src/borghash/HashTableNT.pyx

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -21,18 +21,23 @@ _NoDefault = object()
2121

2222
cdef class HashTableNT:
2323
def __init__(self, items=None, *,
24-
key_size: int = 0, value_format: str = "", value_type: Any = None,
24+
key_size: int, value_type: Any, value_format: Any,
2525
capacity: int = MIN_CAPACITY) -> None:
26-
if not key_size:
27-
raise ValueError("key_size must be specified and must be > 0.")
28-
if not value_format:
29-
raise ValueError("value_format must be specified and must be non-empty.")
30-
if value_type is None:
31-
raise ValueError("value_type must be specified (a namedtuple type corresponding to value_format).")
26+
if not isinstance(key_size, int) or not key_size > 0:
27+
raise ValueError("key_size must be an integer and > 0.")
28+
if type(value_type) is not type:
29+
raise TypeError("value_type must be a namedtuple type.")
30+
if not isinstance(value_format, tuple):
31+
raise TypeError("value_format must be a namedtuple instance.")
32+
if value_format._fields != value_type._fields:
33+
raise TypeError("value_format's and value_type's element names must correspond.")
34+
if not all(isinstance(fmt, str) and len(fmt) > 0 for fmt in value_format):
35+
raise ValueError("value_format's elements must be str and non-empty.")
3236
self.key_size = key_size
33-
self.value_struct = struct.Struct(value_format)
34-
self.value_size = self.value_struct.size
3537
self.value_type = value_type
38+
self.value_format = value_format
39+
self.value_struct = struct.Struct("".join(value_format))
40+
self.value_size = self.value_struct.size
3641
self.inner = HashTable(key_size=self.key_size, value_size=self.value_size, capacity=capacity)
3742
_fill(self, items)
3843

@@ -159,9 +164,11 @@ cdef class HashTableNT:
159164
meta = {
160165
'key_size': self.key_size,
161166
'value_size': self.value_size,
162-
'value_format': self.value_struct.format,
163167
'value_type_name': self.value_type.__name__,
164168
'value_type_fields': self.value_type._fields,
169+
'value_format_name': self.value_format.__class__.__name__,
170+
'value_format_fields': self.value_format._fields,
171+
'value_format': self.value_format,
165172
'capacity': self.inner.capacity,
166173
'used': self.inner.used, # count of keys / values
167174
}
@@ -201,7 +208,9 @@ cdef class HashTableNT:
201208
raise ValueError(f"Invalid file, file is too short.")
202209
meta = json.loads(meta_bytes.decode("utf-8"))
203210
value_type = namedtuple(meta['value_type_name'], meta['value_type_fields'])
204-
ht = cls(key_size=meta['key_size'], value_format=meta['value_format'], value_type=value_type, capacity=meta['capacity'])
211+
value_format_t = namedtuple(meta['value_format_name'], meta['value_format_fields'])
212+
value_format = value_format_t(*meta['value_format'])
213+
ht = cls(key_size=meta['key_size'], value_format=value_format, value_type=value_type, capacity=meta['capacity'])
205214
count = 0
206215
ksize, vsize = meta['key_size'], meta['value_size']
207216
for i in range(meta['used']):

src/borghash/__main__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,10 @@ def demo():
1515
1616
count = 50000
1717
value_type = namedtuple("Chunk", ["refcount", "size"])
18+
value_format_t = namedtuple("ChunkFormat", ["refcount", "size"])
19+
value_format = value_format_t(refcount="<I", size="I")
1820
# 256bit (32Byte) key, 2x 32bit (4Byte) values
19-
ht = HashTableNT(key_size=32, value_format="<II", value_type=value_type)
21+
ht = HashTableNT(key_size=32, value_type=value_type, value_format=value_format)
2022
2123
t0 = time()
2224
for i in range(count):

tests/benchmark_test.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,10 @@
1010
from .hashtable_test import H2
1111

1212
VALUE_TYPE = namedtuple("value_type", "value")
13-
VALUE_FMT = "<I"
13+
VALUE_FMT_TYPE = namedtuple("value_format", "value")
14+
VALUE_FMT = VALUE_FMT_TYPE("<I")
1415
KEY_SIZE = len(H2(0))
15-
VALUE_SIZE = len(struct.pack(VALUE_FMT, 0))
16+
VALUE_SIZE = len(struct.pack("".join(VALUE_FMT), 0))
1617
VALUE_BITS = VALUE_SIZE * 8
1718

1819
@pytest.fixture(scope="module")

tests/hashtablent_test.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,9 @@
88
from .hashtable_test import H2
99

1010
key_size = 32 # 32 bytes = 256bits key
11-
value_format = "<III" # 3x little endian 32bit unsigned int
1211
value_type = namedtuple("vt", "v1 v2 v3")
12+
value_format_t = namedtuple("vf", "v1 v2 v3")
13+
value_format = value_format_t(v1="<I", v2="I", v3="I") # 3x little endian 32bit unsigned int
1314

1415
key1, value1 = b"a" * 32, value_type(11, 12, 13)
1516
key2, value2 = b"b" * 32, value_type(21, 22, 23)
@@ -19,7 +20,7 @@
1920

2021
@pytest.fixture
2122
def ntht():
22-
return HashTableNT(key_size=key_size, value_format=value_format, value_type=value_type)
23+
return HashTableNT(key_size=key_size, value_type=value_type, value_format=value_format)
2324

2425

2526
@pytest.fixture
@@ -30,10 +31,10 @@ def ntht12(ntht):
3031

3132

3233
def test_init():
33-
ht = HashTableNT(key_size=32, value_format=value_format, value_type=value_type)
34+
ht = HashTableNT(key_size=32, value_type=value_type, value_format=value_format)
3435
assert len(ht) == 0
3536
items = [(key1, value1), (key2, value2)]
36-
ht = HashTableNT(items, key_size=32, value_format=value_format, value_type=value_type)
37+
ht = HashTableNT(items, key_size=32, value_type=value_type, value_format=value_format)
3738
assert ht[key1] == value1
3839
assert ht[key2] == value2
3940

0 commit comments

Comments
 (0)