Skip to content

Commit c640423

Browse files
elainethaleclaude
andcommitted
test: document gdxcc vs gams_transfer read/write speed across fixtures
Adds tests/test_backend_timing.py: times read+write for both backends over every in-tree fixture (sub-3 KB up to ~1.9 MB) and records the results. A pytest_terminal_summary hook in conftest renders a size-sorted table plus a clear-winner / switchover note at the end of the run. These are not pass/fail performance gates (timings are machine-dependent and measured as a single min-of-3 burst); the only assertion is that both backends ran. The robust finding: gdxcc wins on small files (transfer's Container overhead is not amortized) while gams.transfer wins on large ones (~2x read, ~4x write at 1.9 MB), with the write switchover around 100-300 KB. Runs by default; skipped only when gams.transfer is unavailable. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
1 parent e16aa44 commit c640423

2 files changed

Lines changed: 135 additions & 0 deletions

File tree

tests/conftest.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,63 @@ def clean_up(request):
2121
return not request.config.getoption("--no-clean-up")
2222

2323

24+
# Rows appended by tests/test_backend_timing.py; rendered once after the run by
25+
# pytest_terminal_summary below. Each row: dict(fixture, size_kb, op, gdxcc,
26+
# gams_transfer, ratio) where ratio = gdxcc / gams_transfer (>1 = transfer faster).
27+
_BACKEND_TIMINGS = []
28+
29+
30+
@pytest.fixture(scope="session")
31+
def backend_timings():
32+
return _BACKEND_TIMINGS
33+
34+
35+
def _crossover_note(rows, op):
36+
"""Describe the gdxcc<->gams_transfer winner across sizes for one op.
37+
38+
Rows are sorted by size; returns "clear winner" text if one backend wins at
39+
every size, else the size band where gams_transfer overtakes gdxcc.
40+
"""
41+
op_rows = sorted((r for r in rows if r["op"] == op), key=lambda r: r["size_kb"])
42+
if not op_rows:
43+
return ""
44+
wins = [r["ratio"] >= 1.0 for r in op_rows] # True = transfer faster
45+
if all(wins):
46+
return f"{op}: gams_transfer faster at every size tested."
47+
if not any(wins):
48+
return f"{op}: gdxcc faster at every size tested (transfer overhead never amortized)."
49+
first = next(i for i, w in enumerate(wins) if w)
50+
below = op_rows[first - 1]
51+
above = op_rows[first]
52+
return (
53+
f"{op}: switchover between {below['size_kb']:.1f} KB ({below['fixture']}, gdxcc faster) "
54+
f"and {above['size_kb']:.1f} KB ({above['fixture']}, transfer faster)."
55+
)
56+
57+
58+
def pytest_terminal_summary(terminalreporter, exitstatus, config):
59+
rows = _BACKEND_TIMINGS
60+
if not rows:
61+
return
62+
tr = terminalreporter
63+
tr.write_sep("=", "backend timing (gdxcc vs gams_transfer)")
64+
tr.write_line(
65+
"min seconds over repeated runs; ratio = gdxcc / gams_transfer (>1 = transfer faster)"
66+
)
67+
header = f"{'fixture':32s} {'size_KB':>9s} {'op':>5s} {'gdxcc':>9s} {'xfer':>9s} {'ratio':>7s}"
68+
tr.write_line(header)
69+
tr.write_line("-" * len(header))
70+
for r in sorted(rows, key=lambda r: (r["size_kb"], r["op"])):
71+
tr.write_line(
72+
f"{r['fixture'][:32]:32s} {r['size_kb']:9.1f} {r['op']:>5s} "
73+
f"{r['gdxcc']:9.4f} {r['gams_transfer']:9.4f} {r['ratio']:7.2f}"
74+
)
75+
for op in ("read", "write"):
76+
note = _crossover_note(rows, op)
77+
if note:
78+
tr.write_line(note)
79+
80+
2481
@pytest.fixture(scope="session")
2582
def base_dir():
2683
return os.path.dirname(__file__)

tests/test_backend_timing.py

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
"""Document the read/write speed difference between the gdxcc and gams_transfer
2+
backends across the in-tree fixtures (sub-3 KB up to ~1.9 MB).
3+
4+
These are not pass/fail performance gates -- timings are machine-dependent. Each
5+
test records its measurements; conftest's ``pytest_terminal_summary`` renders a
6+
size-sorted table plus a clear-winner / switchover note at the end of the run.
7+
The only assertion is that both backends actually ran (a backend that errors on a
8+
fixture fails here rather than silently dropping out of the comparison).
9+
10+
Skipped when gams.transfer is unavailable.
11+
"""
12+
13+
import glob
14+
import os
15+
import time
16+
17+
import pytest
18+
19+
import gdxpds
20+
from gdxpds import to_dataframes, to_gdx
21+
22+
pytestmark = pytest.mark.skipif(not gdxpds.HAVE_GAMS_TRANSFER, reason="gams.transfer not available")
23+
24+
FIXTURES = sorted(
25+
os.path.basename(p) for p in glob.glob(os.path.join(os.path.dirname(__file__), "data", "*.gdx"))
26+
)
27+
28+
# Repeats per measurement; the minimum is reported (least perturbed by noise).
29+
# Small because this runs in the default suite and the largest fixture is ~1.9 MB.
30+
_REPEATS = 3
31+
32+
33+
def _min_time(fn, repeats=_REPEATS):
34+
best = float("inf")
35+
for _ in range(repeats):
36+
t = time.perf_counter()
37+
fn()
38+
best = min(best, time.perf_counter() - t)
39+
return best
40+
41+
42+
@pytest.mark.parametrize("fixture", FIXTURES)
43+
def test_backend_timing(data_dir, fixture, tmp_path, backend_timings):
44+
path = os.path.join(data_dir, fixture)
45+
size_kb = os.path.getsize(path) / 1024.0
46+
47+
# Read (eager / bulk path -- the same one to_dataframes uses).
48+
read_g = _min_time(lambda: to_dataframes(path, backend="gdxcc"))
49+
read_t = _min_time(lambda: to_dataframes(path, backend="gams_transfer"))
50+
51+
# Write: read once (untimed) to get DataFrames, then time each backend's write.
52+
dfs = to_dataframes(path, backend="gdxcc")
53+
write_g = _min_time(lambda: to_gdx(dfs, str(tmp_path / "g.gdx"), backend="gdxcc"))
54+
write_t = _min_time(lambda: to_gdx(dfs, str(tmp_path / "t.gdx"), backend="gams_transfer"))
55+
56+
backend_timings.append(
57+
{
58+
"fixture": fixture,
59+
"size_kb": size_kb,
60+
"op": "read",
61+
"gdxcc": read_g,
62+
"gams_transfer": read_t,
63+
"ratio": read_g / read_t,
64+
}
65+
)
66+
backend_timings.append(
67+
{
68+
"fixture": fixture,
69+
"size_kb": size_kb,
70+
"op": "write",
71+
"gdxcc": write_g,
72+
"gams_transfer": write_t,
73+
"ratio": write_g / write_t,
74+
}
75+
)
76+
77+
# Sanity only: both backends ran for both ops (no timing threshold).
78+
assert min(read_g, read_t, write_g, write_t) > 0

0 commit comments

Comments
 (0)