forked from mozilla/neqo
-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathperfcompare.py
More file actions
426 lines (370 loc) · 14.3 KB
/
perfcompare.py
File metadata and controls
426 lines (370 loc) · 14.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
#!/usr/bin/env python3
"""Compare QUIC implementations using hyperfine and perf."""
import argparse
import json
import math
import os
import re
import shlex
import shutil
import subprocess
import tempfile
import time
from dataclasses import dataclass
from pathlib import Path
from statistics import mean as avg, variance
from typing import NamedTuple
class ImplConfig(NamedTuple):
client_cmd: str
server_cmd: str
disk_flag: str
interop_flag: str
# fmt: off
IMPLS = {
"neqo": ImplConfig(
"build-neqo/neqo/neqo-client _cc _pacing _disk _flags -Q 1 https://{host}:{port}/{size}",
"build-neqo/neqo/neqo-server _cc _pacing _flags -Q 1 {host}:{port}",
"--output-dir .", "",
),
"msquic": ImplConfig(
"build-msquic/quicinterop -test:D -custom:{host} -port:{port} -urls:https://{host}:{port}/{size}",
"build-msquic/quicinteropserver -root:{tmp} -listen:{host} -port:{port} -file:{tmp}/cert -key:{tmp}/key -noexit",
"", "-a hq-interop",
),
"google": ImplConfig(
"build-google/quic_client --disable_certificate_verification https://{host}:{port}/{size}",
"build-google/quic_server --generate_dynamic_responses --port {port} --certificate_file {tmp}/cert --key_file {tmp}/key",
"", "",
),
"quiche": ImplConfig(
"build-quiche/quiche-client _disk --no-verify https://{host}:{port}/{size}",
"build-quiche/quiche-server --root {tmp} --listen {host}:{port} --cert {tmp}/cert --key {tmp}/key",
"--dump-responses .", "",
),
"s2n": ImplConfig(
"build-s2n/s2n-quic-qns interop client --tls rustls --disable-cert-verification _disk --local-ip {host} https://{host}:{port}/{size}",
"build-s2n/s2n-quic-qns interop server --www-dir {tmp} --certificate {tmp}/cert --private-key {tmp}/key --ip {host} --port {port}",
"--download-dir .", "-a hq-interop",
),
}
# fmt: on
@dataclass
class Cfg:
"""Benchmark configuration."""
host: str
port: int
size: int
runs: int
workspace: Path
perf_opt: str
server_set: str = "bench/server"
client_set: str = "bench/client"
def _tag(cmd: str) -> str:
"""Return a short process tag suitable for pkill from a command string."""
return Path(cmd.split()[0]).name[:15]
def is_significant(s1: list[float], s2: list[float]) -> bool:
"""Welch's t-test with normal approximation. Valid for n >= 30."""
v1, v2, n1, n2 = variance(s1), variance(s2), len(s1), len(s2)
se = math.sqrt(v1 / n1 + v2 / n2)
return se > 0 and abs(avg(s1) - avg(s2)) / se > 1.96
def sh(cmd, **kw):
"""Run shell command."""
kw.setdefault("check", False)
if isinstance(cmd, str):
cmd = shlex.split(cmd)
return subprocess.run(cmd, **kw)
def mangle(cmd, cc, pacing, flags, disk):
"""Replace placeholders, return (command, filename_extension)."""
ext = f"-{cc}" if cc else ""
if not pacing:
ext += "-nopacing"
cmd = (
cmd.replace("_cc", f"--cc {cc}" if cc else "")
.replace("_pacing", "" if pacing else "--no-pacing")
.replace("_flags", flags)
.replace("_disk", disk)
)
return re.sub(r"\s+", " ", cmd).strip(), ext
def kill_port(port: int) -> None:
"""Kill any processes (including root-owned) listening on the given UDP/TCP port."""
for proto in ("udp", "tcp"):
subprocess.run(
["sudo", "fuser", "-k", f"{port}/{proto}"],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
def kill_servers() -> None:
"""Kill any lingering server processes from any known implementation."""
for impl_config in IMPLS.values():
tag = _tag(impl_config.server_cmd)
subprocess.run(
["sudo", "pkill", "-9", tag],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
def setup(cfg):
"""Create temp dir with cert/key and test files, set MTU."""
kill_servers()
kill_port(cfg.port)
tmp = Path(tempfile.mkdtemp())
(tmp / "out").mkdir()
sh(
[
"openssl",
"req",
"-nodes",
"-new",
"-x509",
"-keyout",
str(tmp / "key"),
"-out",
str(tmp / "cert"),
"-subj",
"/CN=DOMAIN",
],
check=True,
stderr=subprocess.DEVNULL,
)
for s in (cfg.size, cfg.size * 20):
sh(["truncate", "-s", str(s), str(tmp / str(s))], check=True)
return tmp
def verify(cfg, tmp, client, server_cmd, client_cmd):
"""Run single transfer to verify it works."""
tag = _tag(server_cmd)
os.chdir(tmp / "out")
proc = subprocess.Popen(
shlex.split(f"{cfg.workspace}/{server_cmd}"),
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
time.sleep(0.2)
try:
out = tmp / "out" / str(cfg.size)
if client == "google":
with open(out, "w", encoding="utf-8") as f:
sh(f"{cfg.workspace}/{client_cmd}", stdout=f, stderr=subprocess.DEVNULL)
else:
sh(
f"{cfg.workspace}/{client_cmd}",
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
finally:
sh(["sudo", "pkill", tag])
try:
proc.wait(timeout=5)
except subprocess.TimeoutExpired:
sh(["sudo", "pkill", "-9", tag])
proc.wait(timeout=5)
os.chdir(cfg.workspace)
out = tmp / "out" / str(cfg.size)
return out.exists() and out.stat().st_size >= cfg.size
def _sudo_nice_env() -> list[str]:
"""Prefix for elevated-priority subprocesses: sudo resets env, so restore
the vars that neqo binaries need to find NSS libraries and certificates."""
# TODO: Remove NSS_DB_PATH once baseline uses nss-rs >= 0.11.0
env_vars = {k: os.environ[k] for k in ("LD_LIBRARY_PATH", "TEST_FIXTURE_DB", "NSS_DB_PATH") if k in os.environ}
env_args = [f"{k}={v}" for k, v in env_vars.items()]
return ["sudo", "nice", "-n", "-20"] + (["env"] + env_args if env_args else [])
def hyperfine(cfg, scmd, ccmd, name, out_dir, md=False):
"""Run hyperfine benchmark."""
tag = shlex.quote(_tag(scmd))
ws = shlex.quote(str(cfg.workspace))
out_dir.mkdir(exist_ok=True)
cmd = [
*_sudo_nice_env(),
"setarch",
"--addr-no-randomize",
shutil.which("hyperfine") or "hyperfine",
"--command-name",
name,
"--time-unit",
"millisecond",
"--export-json",
str(out_dir / f"{name}.json"),
"--output",
"null",
"--warmup",
"5",
"--min-runs",
str(cfg.runs),
"--prepare",
f"{ws}/{scmd} & echo $! >> /cpusets/{shlex.quote(cfg.server_set)}/tasks; sleep 0.2",
"--conclude",
f"pkill -9 {tag}",
]
if md:
cmd += ["--export-markdown", str(out_dir / f"{name}.md")]
cmd.append(f"echo $$ >> /cpusets/{shlex.quote(cfg.client_set)}/tasks; {ws}/{ccmd}")
sh(cmd, check=True)
def perf(cfg, scmd, ccmd, name):
"""Run perf profiling with 20x larger file."""
tag, ws = _tag(scmd), cfg.workspace
ccmd = ccmd.replace(str(cfg.size), str(cfg.size * 20))
def perf_cmd(cset, out, exe):
return (
[*_sudo_nice_env(), "setarch", "--addr-no-randomize",
"cset", "proc", f"--set={cset}", "--exec", "perf", "--"]
+ shlex.split(cfg.perf_opt)
+ ["-o", f"{ws}/{out}"]
+ shlex.split(f"{ws}/{exe}")
)
proc = subprocess.Popen(
perf_cmd(cfg.server_set, f"{name}.server.perf", scmd),
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
time.sleep(0.2)
client_cmd = perf_cmd(cfg.client_set, f"{name}.client.perf", ccmd)
sh(client_cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
sh(["sudo", "pkill", tag])
try:
proc.wait(timeout=5)
except subprocess.TimeoutExpired:
sh(["sudo", "pkill", "-9", tag])
proc.wait(timeout=5)
def process(cfg, name, bold):
"""Process benchmark results into a table row."""
rj = cfg.workspace / "hyperfine" / f"{name}.json"
rm = cfg.workspace / "hyperfine" / f"{name}.md"
bj = cfg.workspace / "hyperfine-baseline" / f"{name}.json"
if not rj.exists() or not rm.exists():
return None
res = json.loads(rj.read_text(encoding="utf-8"))["results"][0]
mean, times = res["mean"], res["times"]
md = rm.read_text(encoding="utf-8")
match = next(
(
x
for x in md.splitlines()
if x.startswith("|") and "Command" not in x and ":--" not in x
),
None,
)
if not match:
return None
parts = match.replace("`", "").split("|")
b = "**" if bold else ""
row = f"| {b}{parts[1].strip()}{b} |{'|'.join(parts[2:5])}|"
m = re.search(r"± *(\S+)", md)
if not m:
raise ValueError(f"Could not parse standard deviation from {rm}")
rng = float(m.group(1))
row += f" {(cfg.size / 1048576) / mean:.1f} ± {(cfg.size / 1048576) / rng:.1f} "
if bj.exists():
base = json.loads(bj.read_text(encoding="utf-8"))["results"][0]
delta = (mean - base["mean"]) * 1000
pct = (mean - base["mean"]) / base["mean"] * 100
if is_significant(base["times"], times):
sym = ":broken_heart:" if delta > 0 else ":green_heart:"
print(
f"Performance {'regressed' if delta > 0 else 'improved'}: {base['mean']} -> {mean}"
)
row += f"| {sym} **{delta:.1f}** | **{pct:.1f}%** |\n"
else:
print(f"No significant change: {base['mean']} -> {mean}")
row += f"| {delta:.1f} | {pct:.1f}% |\n"
elif "neqo" in name:
print("No cached baseline found.")
row += "| :question: | :question: |\n"
else:
row += "| | |\n"
return row
def run(cfg, tmp):
"""Run all comparisons."""
def fmt(t):
return t.format(host=cfg.host, port=cfg.port, size=cfg.size, tmp=tmp)
steps = []
for server, scfg in IMPLS.items():
for client, ccfg in IMPLS.items():
if client != server and client != "neqo" and server != "neqo":
print(f"Skipping {client} vs. {server}")
continue
print(f"*** {client} vs. {server}")
for impl in (client, server):
impl_cfg = IMPLS[impl]
cmd = impl_cfg.client_cmd if impl == client else impl_cfg.server_cmd
src = cfg.workspace / cmd.split()[0]
if (
src.exists()
and not (dst := cfg.workspace / "binaries" / src.name).exists()
):
shutil.copy2(src, dst)
dst.chmod(0o755)
if client == "neqo" and server == "neqo":
opts = [
("newreno", True),
("newreno", False),
("cubic", True),
("cubic", False),
]
elif client == "neqo" or server == "neqo":
opts = [("cubic", True)]
else:
opts = [("", False)]
for cc, pacing in opts:
# When neqo is the server, apply the client's interop flags to it.
# When neqo is the client, apply the server's interop flags to it.
cf = ccfg.interop_flag if server == "neqo" else ""
sf = scfg.interop_flag if client == "neqo" else ""
scmd, ext = mangle(fmt(scfg.server_cmd), cc, pacing, cf, "")
ccmd_d, _ = mangle(fmt(ccfg.client_cmd), cc, pacing, sf, ccfg.disk_flag)
ccmd, _ = mangle(fmt(ccfg.client_cmd), cc, pacing, sf, "")
name = f"{client}-{server}{ext}"
if not verify(cfg, tmp, client, scmd, ccmd_d):
raise RuntimeError(f"Transfer failed: {client} vs. {server}")
if client == "neqo" or server == "neqo":
hyperfine(
cfg,
scmd.replace("/neqo/", "/neqo-baseline/"),
ccmd.replace("/neqo/", "/neqo-baseline/"),
name,
cfg.workspace / "hyperfine-baseline",
)
hyperfine(cfg, scmd, ccmd, name, cfg.workspace / "hyperfine", md=True)
perf(cfg, scmd, ccmd, name)
bold = client == server or (
client == "neqo" and server == "neqo" and cc == "cubic" and pacing
)
if row := process(cfg, name, bold):
steps.append(row)
return steps
def main():
p = argparse.ArgumentParser(description="Compare QUIC implementations")
p.add_argument("--host", default="127.0.0.1")
p.add_argument("--port", type=int, default=4433)
p.add_argument("--size", type=int, default=33554432)
p.add_argument("--runs", type=int, default=100)
p.add_argument("--workspace", type=Path, default=Path.cwd())
p.add_argument("--perf-opt", default="record -F2999 --call-graph fp -g")
p.add_argument("--server-set", default="bench/server", help="cset name for the server CPU")
p.add_argument("--client-set", default="bench/client", help="cset name for the client CPU")
a = p.parse_args()
cfg = Cfg(
host=a.host, port=a.port, size=a.size, runs=a.runs,
workspace=a.workspace, perf_opt=a.perf_opt,
server_set=a.server_set, client_set=a.client_set,
)
for d in ("binaries", "hyperfine", "hyperfine-baseline"):
(cfg.workspace / d).mkdir(exist_ok=True)
(cfg.workspace / "results.txt").touch()
tmp = setup(cfg)
try:
steps = run(cfg, tmp)
finally:
kill_servers()
kill_port(cfg.port)
shutil.rmtree(tmp, ignore_errors=True)
(cfg.workspace / "steps.md").write_text("".join(steps), encoding="utf-8")
header = (
f"Transfer of {cfg.size} bytes over loopback, min. {cfg.runs} runs. "
"All unit-less numbers are in milliseconds.\n\n"
"| Client vs. server (params) | Mean ± σ | Min | Max | MiB/s ± σ | Δ `baseline` | Δ `baseline` |\n"
"|:---|---:|---:|---:|---:|---:|---:|\n"
)
sorted_steps = sorted(steps, key=lambda r: re.sub(r"^\| \*\*", "| ", r))
(cfg.workspace / "comparison.md").write_text(
header + "".join(sorted_steps), encoding="utf-8"
)
if __name__ == "__main__":
raise SystemExit(main())