|
1 | 1 | import collections
|
2 | 2 | import itertools
|
| 3 | +import math |
3 | 4 | from io import IOBase
|
4 | 5 | import sys
|
5 | 6 | from heapq import heappush, heappop, heapify
|
@@ -65,20 +66,25 @@ def ensure_dir(path: Union[str, Path]) -> Path:
|
65 | 66 |
|
66 | 67 | class CodeTable:
|
67 | 68 | """
|
68 |
| - Code table: mapping a symbol to codes (and vice versa). |
| 69 | + Code table: mapping a symbol to codewords (and vice versa). |
69 | 70 |
|
70 | 71 | The symbols are the things you want to encode, usually characters in a string
|
71 | 72 | or byte sequence, but it can be anything hashable.
|
72 |
| - The codes are the corresponding bit sequences, represented as a tuple (bits, value) |
| 73 | + The codewords are the corresponding bit sequences, represented as a tuple (bits, value) |
73 | 74 | where `bits` is the number of bits and `value` the integer interpretation of these bits.
|
74 | 75 | """
|
| 76 | + # TODO: use something like namedtuple or class with slots for codewords instead of tuples? |
75 | 77 |
|
76 | 78 | def __init__(self, symbol_code_map: dict):
|
77 | 79 | self._symbol_map = {}
|
78 | 80 | self._code_map = {}
|
79 | 81 | for symbol, (bits, value) in symbol_code_map.items():
|
80 |
| - assert isinstance(bits, int) and bits >= 1, f"Invalid bit count {bits}" |
81 |
| - assert isinstance(value, int) and value >= 0, f"Invalid code value {value}" |
| 82 | + if not ( |
| 83 | + isinstance(bits, int) and bits >= 1 |
| 84 | + and isinstance(value, int) and value >= 0 |
| 85 | + and math.log2(max(value, 1)) < bits |
| 86 | + ): |
| 87 | + raise ValueError("Invalid code: {b} bits, value {v}".format(b=bits, v=value)) |
82 | 88 | self._symbol_map[symbol] = (bits, value)
|
83 | 89 | self._code_map[(bits, value)] = symbol
|
84 | 90 | # TODO check if code table is actually a prefix code
|
|
0 commit comments