-
Notifications
You must be signed in to change notification settings - Fork 12
Expand file tree
/
Copy pathaggregate.py
More file actions
122 lines (106 loc) · 4.01 KB
/
aggregate.py
File metadata and controls
122 lines (106 loc) · 4.01 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
"""Spec 047 §14 Phase 2 (Q1 spike) — aggregate launch-N.jsonl into a means
+ 95% CI table per (bench, variant), and emit the Q1 decision-matrix deltas
(ReactorDescriptors vs ReactorV2, ReactorDescriptors vs ReactorToday).
Usage: python aggregate.py # reads launch-*.jsonl in CWD
"""
import glob
import json
import math
import statistics
from collections import defaultdict
def main():
rows = []
for path in sorted(glob.glob("launch-*.jsonl")):
with open(path, "r", encoding="utf-8") as f:
for line in f:
line = line.strip()
if not line:
continue
row = json.loads(line)
if row.get("status") != "ok":
continue
rows.append(row)
# Group by (benchId, variant).
buckets = defaultdict(list)
for r in rows:
buckets[(r["benchId"], r["variant"])].append(r)
benches = sorted({b for (b, _) in buckets}, key=_bench_key)
variants = ["ReactorToday", "ReactorV2", "ReactorDescriptors"]
def summarize(rs, key):
vals = [r[key] for r in rs]
if not vals:
return (math.nan, math.nan, 0)
mean = statistics.mean(vals)
if len(vals) > 1:
stdev = statistics.stdev(vals)
# 95% CI half-width for a t-distribution. For n=15 dof=14, t ≈ 2.145.
# Approximate with 1.96 for simplicity — close enough at n≥10.
ci_half = 1.96 * stdev / math.sqrt(len(vals))
else:
ci_half = math.nan
return mean, ci_half, len(vals)
# ── Per-(bench, variant) summary table. ──
print("# Per-(bench, variant) means")
print()
print(f"| Bench | Variant | n | Mean ns | 95% CI ±ns | Mean alloc B | 95% CI ±B |")
print(f"|---|---|---:|---:|---:|---:|---:|")
for b in benches:
for v in variants:
rs = buckets.get((b, v), [])
mean_ns, ci_ns, n = summarize(rs, "meanNs")
mean_b, ci_b, _ = summarize(rs, "allocBytes")
if n == 0:
print(f"| {b} | {v} | 0 | — | — | — | — |")
else:
print(
f"| {b} | {v} | {n} | {mean_ns:,.0f} | {ci_ns:,.0f} "
f"| {mean_b:,.0f} | {ci_b:,.0f} |"
)
print(f"| | | | | | | |")
# ── Q1 decision-matrix deltas. ──
print()
print("# Q1 head-to-head — ReactorDescriptors deltas")
print()
print(
"| Bench | vs ReactorV2 ns | vs ReactorV2 alloc | vs ReactorToday ns | vs ReactorToday alloc | Q1 band |"
)
print("|---|---:|---:|---:|---:|---|")
for b in benches:
ds = buckets.get((b, "ReactorDescriptors"), [])
v2 = buckets.get((b, "ReactorV2"), [])
today = buckets.get((b, "ReactorToday"), [])
d_ns, _, _ = summarize(ds, "meanNs")
d_b, _, _ = summarize(ds, "allocBytes")
v_ns, _, _ = summarize(v2, "meanNs")
v_b, _, _ = summarize(v2, "allocBytes")
t_ns, _, _ = summarize(today, "meanNs")
t_b, _, _ = summarize(today, "allocBytes")
def pct(a, base):
if base and not math.isnan(base) and not math.isnan(a):
return (a - base) / base * 100.0
return math.nan
vs_v2_ns = pct(d_ns, v_ns)
vs_v2_b = pct(d_b, v_b)
vs_t_ns = pct(d_ns, t_ns)
vs_t_b = pct(d_b, t_b)
# §13 Q1 matrix bands keyed off the worst of ns vs V2.
worst = vs_v2_ns
if math.isnan(worst):
band = "-"
elif abs(worst) <= 5:
band = "<=5%: ship descriptors"
elif abs(worst) <= 15:
band = "5-15%: judgment call"
else:
band = ">15%: ship hand-coded"
print(
f"| {b} | {vs_v2_ns:+.1f}% | {vs_v2_b:+.1f}% | {vs_t_ns:+.1f}% | {vs_t_b:+.1f}% | {band} |"
)
def _bench_key(s):
# M1, M2, ..., M13 — sort numerically.
try:
return int(s.lstrip("M"))
except ValueError:
return 999
if __name__ == "__main__":
main()