Skip to content

Commit e47cec6

Browse files
jvalin17cursoragent
andcommitted
Add TDD strict mode and address eval-report code quality gaps.
Ship tdd_mode strict blocking with last_test_edits tracking, consolidate path protection and finalize render helpers, add pytest.ini for stable mechanical runs, and remove dead imports. Demo features skipped per request. Co-authored-by: Cursor <cursoragent@cursor.com>
1 parent 9d7e214 commit e47cec6

14 files changed

Lines changed: 444 additions & 334 deletions

gate/scripts/verify_gate.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ def _default_project_root() -> Path:
1919

2020
from gate.attest import build_attestation, git_head_sha, write_attestation # noqa: E402
2121
from gate.core import ( # noqa: E402
22-
find_gates_config,
2322
load_gates_config,
2423
read_token,
2524
verify_token,

gates.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
"auto": false,
99
"continue": false,
1010
"tdd": true,
11+
"tdd_mode": "remind",
1112
"skill_routing": true,
1213
"model": "auto",
1314
"gate_protect": true,

hooks/finalize_render.py

Lines changed: 73 additions & 157 deletions
Original file line numberDiff line numberDiff line change
@@ -9,17 +9,44 @@
99
from gate.attest import CheckResult
1010

1111

12+
def _utc_date() -> str:
13+
return datetime.now(timezone.utc).strftime("%Y-%m-%d")
14+
15+
16+
def _report_header(skill: str, title: str, slug: str, report_id: str, extra_rows: str = "") -> str:
17+
date = _utc_date()
18+
return f"""<!-- agent-toolkit:{skill} | v1 | {date} | {report_id} -->
19+
<!-- writer: hooks/finalize_report.py — agent did not write this file -->
20+
# {title}
21+
22+
| Field | Value |
23+
|-------|-------|
24+
| Status | completed |
25+
| Writer | hooks/finalize_report.py |
26+
| Skill | {skill} |
27+
| Slug | {slug} |
28+
| Date (UTC) | {date} |
29+
{extra_rows}"""
30+
31+
32+
def _append_summary_and_gate(md: str, summary: str, final_marker: str, pass_line: str = "") -> str:
33+
if summary:
34+
md += f"\n## Summary\n\n{summary}\n"
35+
if pass_line:
36+
md += f"\n{pass_line}\n"
37+
md += f"\n## Final Gate\n\n{final_marker}\n"
38+
return md
39+
40+
1241
def mechanical_table(test: CheckResult, lint: CheckResult) -> str:
1342
test_status = "passed" if test.passed else "FAILED"
1443
lint_status = "passed" if lint.passed else "FAILED"
15-
test_detail = trim_detail(test.detail)
16-
lint_detail = trim_detail(lint.detail)
1744
return f"""## Mechanical Re-run (hook-owned)
1845
1946
| Check | Command | Result | Detail |
2047
|-------|---------|--------|--------|
21-
| tests | `{test.name}` | {test_status} | {test_detail} |
22-
| lint | `{lint.name}` | {lint_status} | {lint_detail} |
48+
| tests | `{test.name}` | {test_status} | {trim_detail(test.detail)} |
49+
| lint | `{lint.name}` | {lint_status} | {trim_detail(lint.detail)} |
2350
"""
2451

2552

@@ -32,56 +59,29 @@ def compose_precommit_markdown(
3259
report_id: str,
3360
) -> str:
3461
slug = findings["slug"]
35-
date = datetime.now(timezone.utc).strftime("%Y-%m-%d")
3662
instr = findings["instructions"]
3763
rules = findings["rules"]
3864
readme = findings["readme"]
3965
tm = findings["tests_meaningful"]
4066
appv = findings["app_verification"]
4167
summary = findings.get("summary", "").strip()
4268

43-
test_status = "passed" if test.passed else "FAILED"
44-
lint_status = "passed" if lint.passed else "FAILED"
45-
4669
final_marker = (
4770
"[x] READY TO COMMIT\n[ ] BLOCKED"
4871
if ready
4972
else f"[ ] READY TO COMMIT\n[x] BLOCKED — {'; '.join(reasons)}"
5073
)
5174

52-
md = f"""<!-- agent-toolkit:precommit | v1 | {date} | {report_id} -->
53-
<!-- writer: hooks/finalize_report.py — agent did not write this file -->
54-
# Pre-commit Report: {slug}
55-
56-
| Field | Value |
57-
|-------|-------|
58-
| Status | completed |
59-
| Writer | hooks/finalize_report.py |
60-
| Skill | precommit |
61-
| Slug | {slug} |
62-
| Date (UTC) | {date} |
63-
64-
## Mechanical Re-run (hook-owned)
65-
66-
| Check | Command | Result | Detail |
67-
|-------|---------|--------|--------|
68-
| tests | `{test.name}` | {test_status} | {trim_detail(test.detail)} |
69-
| lint | `{lint.name}` | {lint_status} | {trim_detail(lint.detail)} |
70-
71-
## Findings (agent-authored)
72-
73-
- Instructions: {instr['addressed']}/{instr['total']} addressed
74-
- Test quality: {tm['result']}{inline(' — ', tm.get('evidence'))}
75-
- Rules: {rules['violations']} violation(s)
76-
- README: {'PASS' if readme['passed'] else 'FAIL'}{inline(' — ', readme.get('details'))}
77-
- App verification: {appv['status']}{inline(' — ', appv.get('notes'))}
78-
"""
79-
80-
if summary:
81-
md += f"\n## Summary\n\n{summary}\n"
82-
83-
md += f"\n## Final Gate\n\n{final_marker}\n"
84-
return md
75+
md = _report_header("precommit", f"Pre-commit Report: {slug}", slug, report_id)
76+
md += f"\n{mechanical_table(test, lint)}\n## Findings (agent-authored)\n\n"
77+
md += (
78+
f"- Instructions: {instr['addressed']}/{instr['total']} addressed\n"
79+
f"- Test quality: {tm['result']}{inline(' — ', tm.get('evidence'))}\n"
80+
f"- Rules: {rules['violations']} violation(s)\n"
81+
f"- README: {'PASS' if readme['passed'] else 'FAIL'}{inline(' — ', readme.get('details'))}\n"
82+
f"- App verification: {appv['status']}{inline(' — ', appv.get('notes'))}\n"
83+
)
84+
return _append_summary_and_gate(md, summary, final_marker)
8585

8686

8787
def compose_evaluate_markdown(
@@ -97,55 +97,26 @@ def compose_evaluate_markdown(
9797
slug = findings["slug"]
9898
topic = findings["topic"]
9999
dims = findings["dimensions"]
100-
date = datetime.now(timezone.utc).strftime("%Y-%m-%d")
101100
grade = grade_letter(score)
102101
summary = findings.get("summary", "").strip()
103102

104-
dim_rows = []
105-
for key, weight in EVAL_DIMENSION_WEIGHTS.items():
106-
label = key.replace("_", " ").title()
107-
dim_score = dims[key]
108-
weighted = round(dim_score * weight)
109-
dim_rows.append(
110-
f"| {label} | {dim_score}% | {int(weight * 100)}% | {weighted} |"
111-
)
112-
dim_table = "\n".join(dim_rows)
113-
114-
if passed:
115-
final_marker = (
116-
f"[x] PASSED — score {score}% ≥ threshold {threshold}%\n"
117-
f"[ ] BLOCKED"
118-
)
119-
else:
120-
final_marker = f"[ ] PASSED\n[x] BLOCKED — {'; '.join(reasons)}"
121-
122-
md = f"""<!-- agent-toolkit:evaluate | v1 | {date} | {report_id} -->
123-
<!-- writer: hooks/finalize_report.py — agent did not write this file -->
124-
# Evaluation: {topic}
125-
# Score: **{score}%** ({grade})
126-
127-
| Field | Value |
128-
|-------|-------|
129-
| Status | completed |
130-
| Writer | hooks/finalize_report.py |
131-
| Skill | evaluate |
132-
| Slug | {slug} |
133-
| Threshold | {threshold}% |
134-
| Date (UTC) | {date} |
135-
136-
| Dimension | Score | Weight | Weighted |
137-
|-----------|-------|--------|----------|
138-
{dim_table}
139-
| **Overall** | | | **{score}%** |
140-
141-
{mechanical_table(test, lint)}
142-
"""
103+
dim_rows = [
104+
f"| {key.replace('_', ' ').title()} | {dims[key]}% | {int(weight * 100)}% | {round(dims[key] * weight)} |"
105+
for key, weight in EVAL_DIMENSION_WEIGHTS.items()
106+
]
143107

144-
if summary:
145-
md += f"\n## Summary\n\n{summary}\n"
108+
final_marker = (
109+
f"[x] PASSED — score {score}% ≥ threshold {threshold}%\n[ ] BLOCKED"
110+
if passed
111+
else f"[ ] PASSED\n[x] BLOCKED — {'; '.join(reasons)}"
112+
)
146113

147-
md += f"\n## Final Gate\n\n{final_marker}\n"
148-
return md
114+
extra = f"| Threshold | {threshold}% |\n"
115+
md = _report_header("evaluate", f"Evaluation: {topic}\n# Score: **{score}%** ({grade})", slug, report_id, extra)
116+
md += f"\n| Dimension | Score | Weight | Weighted |\n|-----------|-------|--------|----------|\n"
117+
md += "\n".join(dim_rows)
118+
md += f"\n| **Overall** | | | **{score}%** |\n\n{mechanical_table(test, lint)}\n"
119+
return _append_summary_and_gate(md, summary, final_marker)
149120

150121

151122
def compose_reviewer_markdown(
@@ -159,9 +130,8 @@ def compose_reviewer_markdown(
159130
slug = findings["slug"]
160131
topic = findings["topic"]
161132
counts = findings["findings"]
162-
date = datetime.now(timezone.utc).strftime("%Y-%m-%d")
163133
summary = findings.get("summary", "").strip()
164-
areas = findings.get("areas_reviewed") or []
134+
areas_line = ", ".join(findings.get("areas_reviewed") or []) or "—"
165135

166136
if passed:
167137
final_marker = "[x] PASSED\n[ ] BLOCKED"
@@ -170,40 +140,14 @@ def compose_reviewer_markdown(
170140
final_marker = f"[ ] PASSED\n[x] BLOCKED — {'; '.join(reasons)}"
171141
pass_line = ""
172142

173-
areas_line = ", ".join(areas) if areas else "—"
174-
175-
md = f"""<!-- agent-toolkit:reviewer | v1 | {date} | {report_id} -->
176-
<!-- writer: hooks/finalize_report.py — agent did not write this file -->
177-
# Reviewer Report: {topic}
178-
179-
| Field | Value |
180-
|-------|-------|
181-
| **Status** | completed |
182-
| Writer | hooks/finalize_report.py |
183-
| Skill | reviewer |
184-
| Slug | {slug} |
185-
| Areas reviewed | {areas_line} |
186-
| Date (UTC) | {date} |
187-
188-
## Findings Summary
189-
190-
| Severity | Count |
191-
|----------|-------|
192-
| High | {counts['high']} |
193-
| Medium | {counts['medium']} |
194-
| Low | {counts['low']} |
195-
196-
{mechanical_table(test, lint)}
197-
"""
198-
199-
if summary:
200-
md += f"\n## Summary\n\n{summary}\n"
201-
202-
if pass_line:
203-
md += f"\n{pass_line}\n"
204-
205-
md += f"\n## Final Gate\n\n{final_marker}\n"
206-
return md
143+
extra = f"| Areas reviewed | {areas_line} |\n"
144+
md = _report_header("reviewer", f"Reviewer Report: {topic}", slug, report_id, extra)
145+
md += (
146+
f"\n## Findings Summary\n\n| Severity | Count |\n|----------|-------|\n"
147+
f"| High | {counts['high']} |\n| Medium | {counts['medium']} |\n| Low | {counts['low']} |\n\n"
148+
f"{mechanical_table(test, lint)}\n"
149+
)
150+
return _append_summary_and_gate(md, summary, final_marker, pass_line)
207151

208152

209153
def compose_assess_markdown(
@@ -217,7 +161,6 @@ def compose_assess_markdown(
217161
slug = findings["slug"]
218162
topic = findings["topic"]
219163
counts = findings["findings"]
220-
date = datetime.now(timezone.utc).strftime("%Y-%m-%d")
221164
summary = findings.get("summary", "").strip()
222165

223166
if passed:
@@ -227,39 +170,12 @@ def compose_assess_markdown(
227170
else:
228171
final_marker = f"[ ] PASSED\n[x] BLOCKED — {'; '.join(reasons)}"
229172
pass_line = ""
230-
fix_now_section = (
231-
f"### [!!] Fix Now\n\n{counts['fix_now']} critical finding(s) remain.\n"
232-
)
233-
234-
md = f"""<!-- agent-toolkit:assess | v1 | {date} | {report_id} -->
235-
<!-- writer: hooks/finalize_report.py — agent did not write this file -->
236-
# Assess Report: {topic}
237-
238-
| Field | Value |
239-
|-------|-------|
240-
| **Status** | completed |
241-
| Writer | hooks/finalize_report.py |
242-
| Skill | assess |
243-
| Slug | {slug} |
244-
| Date (UTC) | {date} |
245-
246-
## Findings Summary
247-
248-
| Bucket | Count |
249-
|--------|-------|
250-
| [!!] Fix now | {counts['fix_now']} |
251-
| [~] Consider | {counts['consider']} |
252-
| [ok] Good as-is | {counts['good']} |
253-
254-
{fix_now_section}
255-
{mechanical_table(test, lint)}
256-
"""
257-
258-
if summary:
259-
md += f"\n## Summary\n\n{summary}\n"
260-
261-
if pass_line:
262-
md += f"\n{pass_line}\n"
173+
fix_now_section = f"### [!!] Fix Now\n\n{counts['fix_now']} critical finding(s) remain.\n"
263174

264-
md += f"\n## Final Gate\n\n{final_marker}\n"
265-
return md
175+
md = _report_header("assess", f"Assess Report: {topic}", slug, report_id)
176+
md += (
177+
f"\n## Findings Summary\n\n| Bucket | Count |\n|--------|-------|\n"
178+
f"| [!!] Fix now | {counts['fix_now']} |\n| [~] Consider | {counts['consider']} |\n"
179+
f"| [ok] Good as-is | {counts['good']} |\n\n{fix_now_section}{mechanical_table(test, lint)}\n"
180+
)
181+
return _append_summary_and_gate(md, summary, final_marker, pass_line)

0 commit comments

Comments
 (0)