Skip to content

Commit e3f845f

Browse files
jlowinclaude
andauthored
Improve real-world schema crash test: failure dump, cluster analysis, TypeErrors baseline ratchet (#3958)
Co-authored-by: Claude <noreply@anthropic.com>
1 parent 34313ea commit e3f845f

3 files changed

Lines changed: 198 additions & 6 deletions

File tree

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
"""Cluster real-world schema failure records by exception signature.
2+
3+
Usage
4+
-----
5+
Run the crash test with DUMP_SCHEMA_FAILURES to collect per-failure records,
6+
then run this script to cluster them:
7+
8+
# 1. Run the crash test with failure dumping enabled:
9+
RUN_REAL_WORLD_SCHEMA_TEST=1 \\
10+
OPENAPI_DIRECTORY_PATH=/tmp/openapi-directory \\
11+
DUMP_SCHEMA_FAILURES=/tmp/schema_failures \\
12+
uv run pytest tests/utilities/json_schema_type/test_real_world_schemas.py \\
13+
-m integration -n auto --timeout-method=thread -q
14+
15+
# 2. Cluster the failures:
16+
python tests/utilities/json_schema_type/cluster_failures.py
17+
18+
# 3. (Optional) specify a custom dump directory:
19+
DUMP_SCHEMA_FAILURES=/my/path python tests/utilities/json_schema_type/cluster_failures.py
20+
21+
Each JSONL record written by the test has the shape:
22+
{
23+
"provider": "amazonaws.com",
24+
"name": "TagKey",
25+
"bucket": "schema_errors", # type_errors | schema_errors | timeouts | other_errors
26+
"error_type": "SchemaError",
27+
"error_msg": "...",
28+
"schema": "{...}" # JSON-encoded, truncated to 2000 chars
29+
}
30+
31+
Workflow for fixing a cluster
32+
------------------------------
33+
1. Identify the top cluster(s) by count.
34+
2. Grab the example schema from the cluster output.
35+
3. Reproduce in a unit test: add a test to test_json_schema_type.py that calls
36+
json_schema_to_type() with that schema and asserts the correct type is returned.
37+
4. Fix the root cause in src/fastmcp/utilities/json_schema_type.py.
38+
5. Re-run the crash test — confirm the cluster count drops.
39+
6. Ratchet the baseline in tests/utilities/json_schema_type/conftest.py:
40+
- Lower MAX_TYPE_ERRORS / MAX_SCHEMA_ERRORS to the new actual count.
41+
- Add a comment with the date and what was fixed.
42+
7. Commit.
43+
"""
44+
45+
from __future__ import annotations
46+
47+
import collections
48+
import json
49+
import os
50+
import re
51+
import sys
52+
from pathlib import Path
53+
54+
DUMP_DIR = Path(os.environ.get("DUMP_SCHEMA_FAILURES", "/tmp/schema_failures"))
55+
56+
_NORMALIZE_RE = re.compile(r"'[^']{3,}'|\"[^\"]{3,}\"|0x[0-9a-fA-F]+|\b\d+\b")
57+
58+
59+
def normalize(msg: str) -> str:
60+
head = "\n".join(msg.splitlines()[:3])
61+
return _NORMALIZE_RE.sub("<X>", head)[:300]
62+
63+
64+
def main() -> None:
65+
if not DUMP_DIR.exists():
66+
print(f"No dump directory found at {DUMP_DIR}.")
67+
print("Run the crash test with DUMP_SCHEMA_FAILURES set first.")
68+
sys.exit(1)
69+
70+
records = []
71+
for f in DUMP_DIR.glob("*.jsonl"):
72+
for line in f.read_text().splitlines():
73+
if line.strip():
74+
records.append(json.loads(line))
75+
76+
if not records:
77+
print(f"No failure records found in {DUMP_DIR}. All schemas passed!")
78+
return
79+
80+
print(f"Total failures: {len(records)}")
81+
print()
82+
83+
buckets: collections.Counter[str] = collections.Counter(
84+
r["bucket"] for r in records
85+
)
86+
for k, v in buckets.most_common():
87+
print(f" {k}: {v}")
88+
print()
89+
90+
clusters: collections.Counter[tuple[str, str, str]] = collections.Counter()
91+
examples: dict[tuple[str, str, str], dict] = {}
92+
for r in records:
93+
key = (r["bucket"], r["error_type"], normalize(r["error_msg"]))
94+
clusters[key] += 1
95+
examples.setdefault(key, r)
96+
97+
print(f"=== Top clusters (of {len(clusters)} total) ===")
98+
for (bucket, etype, sig), count in clusters.most_common():
99+
print(f"\n[{count:>4}x] {bucket} / {etype}")
100+
print(f" sig: {sig[:150]}")
101+
ex = examples[(bucket, etype, sig)]
102+
print(f" ex : provider={ex['provider']} name={ex['name']}")
103+
print(f" msg: {ex['error_msg'][:200]}")
104+
105+
106+
if __name__ == "__main__":
107+
main()

tests/utilities/json_schema_type/conftest.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -69,9 +69,11 @@ def pytest_sessionfinish(session: pytest.Session, exitstatus: int) -> None:
6969
f"Total crashes: {crashes:,} ({crashes / max(totals['schemas'], 1) * 100:.2f}%)"
7070
)
7171

72-
# Snapshot baselines (captured 2026-04-10, openapi-directory@f7207cf0,
73-
# origin/main, with JSON round-trip to strip YAML artifacts).
74-
MAX_TYPE_ERRORS = 420 # was 388 — real json_schema_to_type bugs
72+
# Snapshot baselines (openapi-directory@f7207cf0).
73+
# Ratcheted 2026-04-17: TypeErrors 420→0 (already fixed on main since
74+
# original 2026-04-10 capture). SchemaErrors unchanged — all 279 are
75+
# Pydantic Rust-regex rejections (lookahead, \p{…}, size limits).
76+
MAX_TYPE_ERRORS = 0
7577
MAX_SCHEMA_ERRORS = 300 # was 277 — Pydantic regex rejections (not our code)
7678
MAX_TIMEOUTS = 5 # was 0
7779
MAX_OTHER_ERRORS = 50 # was 0

tests/utilities/json_schema_type/test_real_world_schemas.py

Lines changed: 86 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,49 @@
77
shows progress and can identify which provider caused a hang.
88
99
Marked as an integration test — skipped by default, run with:
10-
uv run pytest tests/utilities/json_schema_type/test_real_world_schemas.py -m integration -v
10+
11+
uv run pytest tests/utilities/json_schema_type/test_real_world_schemas.py -m integration -n auto --timeout-method=thread -q
12+
13+
On a 16-core machine this takes ~3-5 minutes. The CI workflow
14+
(.github/workflows/run-schema-crash-test.yml) runs it on push/PR when
15+
relevant source files change.
16+
17+
## Aggregate baselines
18+
19+
After each run, conftest.py compares aggregate crash counts against baseline
20+
caps in ``conftest.py``. Ratchet those caps DOWN whenever a cluster of errors
21+
is fixed — never up. See ``conftest.py`` for the current caps and comments
22+
on what was fixed.
23+
24+
## Collecting and analysing failures
25+
26+
Set ``DUMP_SCHEMA_FAILURES`` to a directory to write per-failure JSONL records:
27+
28+
RUN_REAL_WORLD_SCHEMA_TEST=1 \\
29+
OPENAPI_DIRECTORY_PATH=/tmp/openapi-directory \\
30+
DUMP_SCHEMA_FAILURES=/tmp/schema_failures \\
31+
uv run pytest tests/utilities/json_schema_type/test_real_world_schemas.py \\
32+
-m integration -n auto --timeout-method=thread -q
33+
34+
Then cluster the results to find root causes:
35+
36+
python tests/utilities/json_schema_type/cluster_failures.py
37+
38+
See ``cluster_failures.py`` for the full fix workflow (reproduce → unit test →
39+
fix → ratchet baseline → commit).
40+
41+
## Cloning the corpus locally
42+
43+
The test auto-clones when ``RUN_REAL_WORLD_SCHEMA_TEST=1``. To pre-clone
44+
manually (~800 MB):
45+
46+
git clone --depth 1 https://github.com/APIs-guru/openapi-directory.git /tmp/openapi-directory
47+
48+
Then point at it:
49+
50+
OPENAPI_DIRECTORY_PATH=/tmp/openapi-directory \\
51+
RUN_REAL_WORLD_SCHEMA_TEST=1 \\
52+
uv run pytest ...
1153
"""
1254

1355
from __future__ import annotations
@@ -223,6 +265,37 @@ def _test_provider(provider: str) -> ProviderResult:
223265
result = ProviderResult()
224266
use_alarm = hasattr(signal, "SIGALRM")
225267

268+
# Optional per-failure dump: when DUMP_SCHEMA_FAILURES is set to a directory,
269+
# write one JSONL record per failure so we can cluster them later.
270+
dump_dir_env = os.environ.get("DUMP_SCHEMA_FAILURES")
271+
dump_path: Path | None = None
272+
if dump_dir_env:
273+
dump_path = Path(dump_dir_env) / f"{provider}.jsonl"
274+
dump_path.parent.mkdir(parents=True, exist_ok=True)
275+
# Truncate any prior content from a previous run of this provider.
276+
dump_path.write_text("")
277+
278+
def _record_failure(
279+
bucket: str, schema_name: str, schema_obj: dict, exc: BaseException
280+
) -> None:
281+
if dump_path is None:
282+
return
283+
# Cap the schema snippet so huge recursive specs don't blow up disk.
284+
try:
285+
schema_repr = json.dumps(schema_obj, default=str)[:2000]
286+
except Exception:
287+
schema_repr = "<unserializable>"
288+
record = {
289+
"provider": provider,
290+
"name": schema_name,
291+
"bucket": bucket,
292+
"error_type": type(exc).__name__,
293+
"error_msg": str(exc)[:500],
294+
"schema": schema_repr,
295+
}
296+
with dump_path.open("a") as fh:
297+
fh.write(json.dumps(record) + "\n")
298+
226299
for spec_file in _spec_files_for_provider(provider):
227300
spec = _load_spec(spec_file)
228301
if spec is None:
@@ -246,16 +319,26 @@ def _test_provider(provider: str) -> ProviderResult:
246319
try:
247320
T = json_schema_to_type(schema)
248321
TypeAdapter(T)
249-
except _SchemaTimeout:
322+
except _SchemaTimeout as e:
323+
if use_alarm:
324+
signal.alarm(0)
250325
result.timeouts += 1
251-
except TypeError:
326+
_record_failure("timeouts", _name, schema, e)
327+
except TypeError as e:
328+
if use_alarm:
329+
signal.alarm(0)
252330
result.type_errors += 1
331+
_record_failure("type_errors", _name, schema, e)
253332
except Exception as e:
333+
if use_alarm:
334+
signal.alarm(0)
254335
err_type = type(e).__name__
255336
if "SchemaError" in err_type or "schema" in str(e).lower()[:50]:
256337
result.schema_errors += 1
338+
_record_failure("schema_errors", _name, schema, e)
257339
else:
258340
result.other_errors += 1
341+
_record_failure("other_errors", _name, schema, e)
259342
finally:
260343
if use_alarm:
261344
signal.alarm(0)

0 commit comments

Comments
 (0)