Skip to content

Commit 4f4558a

Browse files
Print seed diagnostics in random-seed tests
1 parent a9e8364 commit 4f4558a

File tree

1 file changed

+74
-0
lines changed

1 file changed

+74
-0
lines changed
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
"""Regression tests that exercise the sketch under multiple RNG seeds."""
2+
3+
from __future__ import annotations
4+
5+
import math
6+
import random
7+
import statistics
8+
from typing import Iterable
9+
10+
import pytest
11+
12+
from kll_sketch import KLL
13+
14+
15+
def _exact_quantile(values: Iterable[float], q: float) -> float:
16+
ordered = sorted(values)
17+
if not ordered:
18+
raise ValueError("_exact_quantile requires a non-empty iterable")
19+
# Linear interpolation between neighbouring order statistics to mirror the
20+
# behaviour of NumPy's ``quantile`` default method.
21+
position = q * (len(ordered) - 1)
22+
lower = math.floor(position)
23+
upper = math.ceil(position)
24+
if lower == upper:
25+
return ordered[lower]
26+
weight_upper = position - lower
27+
weight_lower = 1.0 - weight_upper
28+
return ordered[lower] * weight_lower + ordered[upper] * weight_upper
29+
30+
31+
@pytest.mark.parametrize("seed", [3, 17, 221, 1987, 4096])
32+
def test_quantile_accuracy_across_random_seeds(seed: int) -> None:
33+
rng = random.Random(seed)
34+
samples = [rng.gauss(0.0, 1.0) for _ in range(8_000)]
35+
36+
sketch = KLL(capacity=256, rng_seed=seed)
37+
sketch.extend(samples)
38+
39+
approx = sketch.quantile(0.5)
40+
exact = _exact_quantile(samples, 0.5)
41+
dispersion = statistics.pstdev(samples)
42+
tolerance = max(1e-9, 0.12 * dispersion)
43+
44+
print(
45+
"seed={seed}: approx median={approx:.6f}, exact={exact:.6f}, tolerance={tolerance:.6f}".format(
46+
seed=seed, approx=approx, exact=exact, tolerance=tolerance
47+
)
48+
)
49+
50+
assert abs(approx - exact) <= tolerance
51+
52+
53+
def test_deterministic_compactions_for_fixed_seed() -> None:
54+
seed = 123_456
55+
rng = random.Random(seed)
56+
payload = [rng.uniform(-5.0, 5.0) for _ in range(5_000)]
57+
58+
a = KLL(capacity=200, rng_seed=seed)
59+
b = KLL(capacity=200, rng_seed=seed)
60+
for value in payload:
61+
a.add(value)
62+
b.add(value)
63+
64+
print(
65+
"deterministic compaction: sketch bytes size={size}, levels={levels}".format(
66+
size=len(a.to_bytes()),
67+
levels=sum(len(level) for level in a._levels),
68+
)
69+
)
70+
71+
assert a._levels == b._levels
72+
assert a.to_bytes() == b.to_bytes()
73+
for q in [0.05, 0.5, 0.95]:
74+
assert math.isclose(a.quantile(q), b.quantile(q), rel_tol=1e-12, abs_tol=1e-12)

0 commit comments

Comments
 (0)