Skip to content

Commit 6dc0d3b

Browse files
committed
fix(hooks): emit permissionDecision "ask" for T2 findings
The PreToolUse hook collapsed T2 (TIER-ASK) findings into the same exit-2 deny path as T3 (TIER-DENY), so commands like git push and gh pr merge got silently blocked instead of surfacing Claude Code's yellow permission prompt. check_tool_input now also returns a verdict field with values allow, ask, or deny. This is a non-breaking superset: the existing safe boolean is unchanged for the 7 callers that read it. _gate_bash, _gate_spawn, and _gate_state_sanitize short-circuit on verdict == ask to emit hookSpecificOutput.permissionDecision = ask and exit 0, letting the harness render its native permission prompt. Mixed TIER-ASK plus non-ask findings still resolve to deny (deny-wins invariant). 12 new tests cover pure-T2, pure-T3, mixed, and safe input cases plus the deny-wins invariant.
1 parent 4380840 commit 6dc0d3b

4 files changed

Lines changed: 236 additions & 4 deletions

File tree

hooks/spellbook_hook.py

Lines changed: 48 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -348,13 +348,50 @@ def _validate_tool_use_id(tool_use_id: str) -> bool:
348348
# Handlers: Security gates (FAIL-CLOSED)
349349
# ---------------------------------------------------------------------------
350350

351+
def _emit_ask_and_exit(findings: list[dict]) -> None:
352+
"""Emit Claude Code's ``permissionDecision: "ask"`` JSON and exit 0.
353+
354+
Used when ``check_tool_input`` returns ``verdict == "ask"`` — every
355+
non-LOW finding is a TIER-ASK (e.g. ``git push``, ``gh pr merge``).
356+
The harness shows a yellow permission prompt the operator can
357+
approve from inside the session; T3 deny still hits the exit-2
358+
branch.
359+
"""
360+
reason = "; ".join(
361+
f.get("message", "")
362+
for f in findings
363+
if str(f.get("rule_id", "")).startswith("TIER-ASK")
364+
)
365+
print(
366+
json.dumps(
367+
{
368+
"hookSpecificOutput": {
369+
"hookEventName": "PreToolUse",
370+
"permissionDecision": "ask",
371+
"permissionDecisionReason": reason,
372+
}
373+
}
374+
)
375+
)
376+
sys.exit(0)
377+
378+
351379
def _gate_bash(data: dict) -> None:
352380
"""Security: validate bash commands. FAIL-CLOSED.
353381
354-
Calls check_tool_input from the security module. If the check finds
355-
dangerous patterns, exits with code 2 and a structured error on stdout.
382+
Calls check_tool_input from the security module. The ``verdict`` field
383+
selects the action:
384+
385+
- ``"allow"``: no findings above LOW; return silently.
386+
- ``"ask"``: only TIER-ASK findings (T2, e.g. ``git push``); emit
387+
``permissionDecision: "ask"`` and exit 0 so the harness surfaces
388+
a permission prompt.
389+
- ``"deny"``: TIER-DENY (T3), CRITICAL bashlex/exfil findings, or
390+
any mix containing a non-ask finding; exit 2 with a structured
391+
error on stdout. Error messages never include blocked content
392+
(anti-reflection).
393+
356394
If the security module cannot be imported, blocks (fail-closed).
357-
Error messages never include blocked content (anti-reflection).
358395
"""
359396
try:
360397
from spellbook.gates.check import check_tool_input
@@ -369,6 +406,8 @@ def _gate_bash(data: dict) -> None:
369406
sys.exit(2)
370407

371408
result = check_tool_input("Bash", tool_input)
409+
if result.get("verdict") == "ask":
410+
_emit_ask_and_exit(result["findings"])
372411
if not result["safe"]:
373412
reasons = "; ".join(f["message"] for f in result["findings"])
374413
print(json.dumps({"error": f"Security check failed: {reasons}"}))
@@ -379,6 +418,7 @@ def _gate_spawn(data: dict) -> None:
379418
"""Security: validate spawn prompts. FAIL-CLOSED.
380419
381420
Normalizes tool_name from MCP prefix to bare name before checking.
421+
See :func:`_gate_bash` for the verdict / exit-code contract.
382422
"""
383423
try:
384424
from spellbook.gates.check import check_tool_input
@@ -393,6 +433,8 @@ def _gate_spawn(data: dict) -> None:
393433
sys.exit(2)
394434

395435
result = check_tool_input("spawn_claude_session", tool_input)
436+
if result.get("verdict") == "ask":
437+
_emit_ask_and_exit(result["findings"])
396438
if not result["safe"]:
397439
reasons = "; ".join(f["message"] for f in result["findings"])
398440
print(json.dumps({"error": f"Security check failed: {reasons}"}))
@@ -403,6 +445,7 @@ def _gate_state_sanitize(data: dict) -> None:
403445
"""Security: validate workflow state. FAIL-CLOSED.
404446
405447
Normalizes tool_name from MCP prefix to bare name before checking.
448+
See :func:`_gate_bash` for the verdict / exit-code contract.
406449
"""
407450
try:
408451
from spellbook.gates.check import check_tool_input
@@ -417,6 +460,8 @@ def _gate_state_sanitize(data: dict) -> None:
417460
sys.exit(2)
418461

419462
result = check_tool_input("workflow_state_save", tool_input)
463+
if result.get("verdict") == "ask":
464+
_emit_ask_and_exit(result["findings"])
420465
if not result["safe"]:
421466
reasons = "; ".join(f["message"] for f in result["findings"])
422467
print(json.dumps({"error": f"Security check failed: {reasons}"}))

spellbook/gates/check.py

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,12 @@ def check_tool_input(
166166
Returns:
167167
Dict with keys:
168168
safe: bool - True if no findings above LOW severity
169+
verdict: str - "allow" | "ask" | "deny". "ask" iff every
170+
non-LOW finding is a TIER-ASK; "deny" if any non-LOW
171+
finding is not a TIER-ASK; "allow" if no non-LOW
172+
findings. Callers that want the harness ``ask`` UX
173+
(Claude Code's permission prompt) should branch on
174+
this field; legacy callers can keep using ``safe``.
169175
findings: list[dict] - matched patterns
170176
tool_name: str - the tool name checked
171177
"""
@@ -216,13 +222,37 @@ def check_tool_input(
216222
check_patterns(text, INJECTION_RULES, security_mode)
217223
)
218224

225+
safe = all(f.get("severity") == "LOW" for f in findings)
219226
return {
220-
"safe": all(f.get("severity") == "LOW" for f in findings),
227+
"safe": safe,
228+
"verdict": _compute_verdict(findings, safe=safe),
221229
"findings": findings,
222230
"tool_name": tool_name,
223231
}
224232

225233

234+
def _compute_verdict(findings: list[dict], *, safe: bool) -> str:
235+
"""Project ``findings`` to one of ``allow`` / ``ask`` / ``deny``.
236+
237+
- ``allow``: no non-LOW findings (the ``safe`` codepath).
238+
- ``ask``: at least one non-LOW finding, AND every non-LOW finding
239+
is a TIER-ASK (``rule_id`` starts with ``"TIER-ASK"``). This is
240+
the harness ``permissionDecision: "ask"`` codepath — operator
241+
can approve in-session.
242+
- ``deny``: at least one non-LOW finding that is not a TIER-ASK
243+
(TIER-DENY, CRITICAL bashlex/exfil/injection/secret-path, etc.).
244+
Mixed TIER-ASK + non-ask findings resolve to ``deny`` — deny wins.
245+
"""
246+
if safe:
247+
return "allow"
248+
non_low = [f for f in findings if f.get("severity") != "LOW"]
249+
if non_low and all(
250+
str(f.get("rule_id", "")).startswith("TIER-ASK") for f in non_low
251+
):
252+
return "ask"
253+
return "deny"
254+
255+
226256
def _check_read_path(tool_input: dict) -> list[dict]:
227257
"""Check a Read-tool invocation against the secret-path denylist.
228258

tests/test_security/test_check.py

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,95 @@ def test_findings_is_list(self):
190190
result = check_tool_input("Bash", {"command": "ls"})
191191
assert isinstance(result["findings"], list)
192192

193+
def test_return_has_verdict_key(self):
194+
from spellbook.gates.check import check_tool_input
195+
196+
result = check_tool_input("Bash", {"command": "ls"})
197+
assert "verdict" in result
198+
199+
200+
class TestCheckToolInputVerdict:
201+
"""Tests for the ``verdict`` projection of findings -> allow/ask/deny.
202+
203+
The hook surface uses ``verdict`` to pick between Claude Code's
204+
``permissionDecision: "ask"`` JSON (T2-only) and a hard exit-2 deny
205+
(T3, CRITICAL bashlex/exfil findings, or any mix containing a
206+
non-ask finding).
207+
"""
208+
209+
def test_safe_command_is_allow(self):
210+
from spellbook.gates.check import check_tool_input
211+
212+
result = check_tool_input("Bash", {"command": "ls"})
213+
assert result["safe"] is True
214+
assert result["verdict"] == "allow"
215+
216+
def test_pure_t2_is_ask(self):
217+
"""A pure T2 (TIER-ASK) match resolves to ``verdict == "ask"``."""
218+
from spellbook.gates.check import check_tool_input
219+
220+
# ``git push`` is seeded as T2 in tiers.toml; no other layer fires.
221+
result = check_tool_input("Bash", {"command": "git push"})
222+
assert result["safe"] is False # T2 emits HIGH-severity finding
223+
assert result["verdict"] == "ask"
224+
rule_ids = [f["rule_id"] for f in result["findings"]]
225+
assert "TIER-ASK" in rule_ids
226+
227+
def test_pure_t3_is_deny(self):
228+
"""A pure T3 (TIER-DENY) match resolves to ``verdict == "deny"``."""
229+
from spellbook.gates.check import check_tool_input
230+
231+
result = check_tool_input(
232+
"Bash", {"command": "git push --force origin main"}
233+
)
234+
assert result["safe"] is False
235+
assert result["verdict"] == "deny"
236+
237+
def test_critical_bashlex_finding_is_deny(self):
238+
"""A CRITICAL non-tier finding (bashlex compound + tier match)
239+
resolves to ``verdict == "deny"`` even though TIER-ASK would
240+
otherwise fire — deny wins over ask."""
241+
from spellbook.gates.check import check_tool_input
242+
243+
# ``git push && echo done`` triggers BASH-PARSER-COMPOUND (CRITICAL)
244+
# AND TIER-ASK (T2) — mixed findings must collapse to deny.
245+
result = check_tool_input(
246+
"Bash", {"command": "git push && echo done"}
247+
)
248+
rule_ids = [f["rule_id"] for f in result["findings"]]
249+
assert any(rid.startswith("BASH-PARSER-") for rid in rule_ids)
250+
assert result["verdict"] == "deny"
251+
252+
def test_compute_verdict_mixed_ask_and_deny(self):
253+
"""Direct unit test for ``_compute_verdict``: a synthetic mix of
254+
TIER-ASK and TIER-DENY findings must collapse to ``deny``.
255+
Guards the deny-wins invariant against future finding-source
256+
changes that could otherwise let an ASK leak through."""
257+
from spellbook.gates.check import _compute_verdict
258+
259+
findings = [
260+
{"rule_id": "TIER-ASK", "severity": "HIGH", "message": "ask"},
261+
{"rule_id": "TIER-DENY", "severity": "CRITICAL", "message": "deny"},
262+
]
263+
assert _compute_verdict(findings, safe=False) == "deny"
264+
265+
def test_compute_verdict_pure_ask(self):
266+
from spellbook.gates.check import _compute_verdict
267+
268+
findings = [
269+
{"rule_id": "TIER-ASK", "severity": "HIGH", "message": "ask"},
270+
]
271+
assert _compute_verdict(findings, safe=False) == "ask"
272+
273+
def test_compute_verdict_low_only_is_allow(self):
274+
"""LOW-severity findings keep ``safe = True`` and verdict = allow."""
275+
from spellbook.gates.check import _compute_verdict
276+
277+
findings = [
278+
{"rule_id": "INJ-LOW", "severity": "LOW", "message": "low"},
279+
]
280+
assert _compute_verdict(findings, safe=True) == "allow"
281+
193282

194283
class TestCheckToolInputSecurityModes:
195284
"""Tests for security_mode parameter in check_tool_input."""

tests/test_security/test_hooks.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -413,6 +413,74 @@ def test_missing_tool_input_blocks(self):
413413
assert proc.returncode == 2
414414

415415

416+
class TestBashGateAskPrompt:
417+
"""Verify T2 (TIER-ASK) findings translate to Claude Code's
418+
``permissionDecision: "ask"`` JSON on stdout with exit 0,
419+
rather than the hard exit-2 deny path that swallowed them
420+
before the T2-as-ask fix.
421+
422+
Coverage matrix (matches the four cases in the fix brief):
423+
424+
1. pure T2 -> ``permissionDecision: "ask"`` JSON, exit 0
425+
2. pure T3 -> exit 2, no ask JSON (existing behavior preserved)
426+
3. mixed T2 + non-ask CRITICAL -> exit 2 (deny wins)
427+
4. pure T0/T1/safe -> no stdout, exit 0 (existing behavior preserved)
428+
"""
429+
430+
def test_pure_t2_emits_ask_prompt(self):
431+
"""``git push`` matches the T2 record only; the hook must emit
432+
``permissionDecision: "ask"`` and exit 0 so the harness can
433+
surface a yellow permission prompt."""
434+
proc = _run_bash_gate({"command": "git push"})
435+
assert proc.returncode == 0, (
436+
f"expected exit 0 (ask), got {proc.returncode}; "
437+
f"stdout={proc.stdout!r} stderr={proc.stderr!r}"
438+
)
439+
output = json.loads(proc.stdout.strip())
440+
hook_output = output["hookSpecificOutput"]
441+
assert hook_output["hookEventName"] == "PreToolUse"
442+
assert hook_output["permissionDecision"] == "ask"
443+
assert "permissionDecisionReason" in hook_output
444+
assert hook_output["permissionDecisionReason"] # non-empty
445+
446+
def test_pure_t2_gh_pr_merge_emits_ask_prompt(self):
447+
"""Second seeded T2 record: ``gh pr merge``."""
448+
proc = _run_bash_gate({"command": "gh pr merge --squash"})
449+
assert proc.returncode == 0
450+
output = json.loads(proc.stdout.strip())
451+
assert output["hookSpecificOutput"]["permissionDecision"] == "ask"
452+
453+
def test_pure_t3_still_exits_2(self):
454+
"""T3 deny path is unchanged — no ask JSON, exit 2."""
455+
proc = _run_bash_gate(
456+
{"command": "git push --force origin main"}
457+
)
458+
assert proc.returncode == 2
459+
# Output must be the legacy ``{"error": "..."}`` shape, not an
460+
# ``ask`` JSON. Parsing it as JSON should yield an ``error`` key.
461+
output = json.loads(proc.stdout.strip())
462+
assert "error" in output
463+
assert "hookSpecificOutput" not in output
464+
465+
def test_mixed_t2_and_critical_exits_2(self):
466+
"""A T2 match combined with a non-ask CRITICAL finding (here
467+
the bashlex compound-command parser firing on ``&&``) must
468+
collapse to deny — ask never wins over a real block.
469+
"""
470+
proc = _run_bash_gate({"command": "git push && echo done"})
471+
assert proc.returncode == 2
472+
output = json.loads(proc.stdout.strip())
473+
assert "error" in output
474+
assert "hookSpecificOutput" not in output
475+
476+
def test_safe_command_emits_no_stdout(self):
477+
"""T0/T1 / safe commands keep the silent exit-0 contract — no
478+
stdout payload, no ask prompt."""
479+
proc = _run_bash_gate({"command": "ls -la"})
480+
assert proc.returncode == 0
481+
assert proc.stdout.strip() == ""
482+
483+
416484
# #############################################################################
417485
# state-sanitize tests via unified hook
418486
# #############################################################################

0 commit comments

Comments
 (0)