fix(hooks): emit permissionDecision "ask" for T2 findings

elijahr · elijahr · commit 6dc0d3bce014 · 2026-05-07T16:48:05.000-05:00
The PreToolUse hook collapsed T2 (TIER-ASK) findings into the same
exit-2 deny path as T3 (TIER-DENY), so commands like git push and
gh pr merge got silently blocked instead of surfacing Claude Code's
yellow permission prompt.

check_tool_input now also returns a verdict field with values allow,
ask, or deny. This is a non-breaking superset: the existing safe
boolean is unchanged for the 7 callers that read it. _gate_bash,
_gate_spawn, and _gate_state_sanitize short-circuit on verdict == ask
to emit hookSpecificOutput.permissionDecision = ask and exit 0,
letting the harness render its native permission prompt. Mixed
TIER-ASK plus non-ask findings still resolve to deny (deny-wins
invariant).

12 new tests cover pure-T2, pure-T3, mixed, and safe input cases
plus the deny-wins invariant.
diff --git a/hooks/spellbook_hook.py b/hooks/spellbook_hook.py
@@ -348,13 +348,50 @@ def _validate_tool_use_id(tool_use_id: str) -> bool:
 # Handlers: Security gates (FAIL-CLOSED)
 # ---------------------------------------------------------------------------
 
+def _emit_ask_and_exit(findings: list[dict]) -> None:
+    """Emit Claude Code's ``permissionDecision: "ask"`` JSON and exit 0.
+
+    Used when ``check_tool_input`` returns ``verdict == "ask"`` — every
+    non-LOW finding is a TIER-ASK (e.g. ``git push``, ``gh pr merge``).
+    The harness shows a yellow permission prompt the operator can
+    approve from inside the session; T3 deny still hits the exit-2
+    branch.
+    """
+    reason = "; ".join(
+        f.get("message", "")
+        for f in findings
+        if str(f.get("rule_id", "")).startswith("TIER-ASK")
+    )
+    print(
+        json.dumps(
+            {
+                "hookSpecificOutput": {
+                    "hookEventName": "PreToolUse",
+                    "permissionDecision": "ask",
+                    "permissionDecisionReason": reason,
+                }
+            }
+        )
+    )
+    sys.exit(0)
+
+
 def _gate_bash(data: dict) -> None:
     """Security: validate bash commands. FAIL-CLOSED.
 
-    Calls check_tool_input from the security module. If the check finds
-    dangerous patterns, exits with code 2 and a structured error on stdout.
+    Calls check_tool_input from the security module. The ``verdict`` field
+    selects the action:
+
+    - ``"allow"``: no findings above LOW; return silently.
+    - ``"ask"``: only TIER-ASK findings (T2, e.g. ``git push``); emit
+      ``permissionDecision: "ask"`` and exit 0 so the harness surfaces
+      a permission prompt.
+    - ``"deny"``: TIER-DENY (T3), CRITICAL bashlex/exfil findings, or
+      any mix containing a non-ask finding; exit 2 with a structured
+      error on stdout. Error messages never include blocked content
+      (anti-reflection).
+
     If the security module cannot be imported, blocks (fail-closed).
-    Error messages never include blocked content (anti-reflection).
     """
     try:
         from spellbook.gates.check import check_tool_input
@@ -369,6 +406,8 @@ def _gate_bash(data: dict) -> None:
         sys.exit(2)
 
     result = check_tool_input("Bash", tool_input)
+    if result.get("verdict") == "ask":
+        _emit_ask_and_exit(result["findings"])
     if not result["safe"]:
         reasons = "; ".join(f["message"] for f in result["findings"])
         print(json.dumps({"error": f"Security check failed: {reasons}"}))
@@ -379,6 +418,7 @@ def _gate_spawn(data: dict) -> None:
     """Security: validate spawn prompts. FAIL-CLOSED.
 
     Normalizes tool_name from MCP prefix to bare name before checking.
+    See :func:`_gate_bash` for the verdict / exit-code contract.
     """
     try:
         from spellbook.gates.check import check_tool_input
@@ -393,6 +433,8 @@ def _gate_spawn(data: dict) -> None:
         sys.exit(2)
 
     result = check_tool_input("spawn_claude_session", tool_input)
+    if result.get("verdict") == "ask":
+        _emit_ask_and_exit(result["findings"])
     if not result["safe"]:
         reasons = "; ".join(f["message"] for f in result["findings"])
         print(json.dumps({"error": f"Security check failed: {reasons}"}))
@@ -403,6 +445,7 @@ def _gate_state_sanitize(data: dict) -> None:
     """Security: validate workflow state. FAIL-CLOSED.
 
     Normalizes tool_name from MCP prefix to bare name before checking.
+    See :func:`_gate_bash` for the verdict / exit-code contract.
     """
     try:
         from spellbook.gates.check import check_tool_input
@@ -417,6 +460,8 @@ def _gate_state_sanitize(data: dict) -> None:
         sys.exit(2)
 
     result = check_tool_input("workflow_state_save", tool_input)
+    if result.get("verdict") == "ask":
+        _emit_ask_and_exit(result["findings"])
     if not result["safe"]:
         reasons = "; ".join(f["message"] for f in result["findings"])
         print(json.dumps({"error": f"Security check failed: {reasons}"}))
diff --git a/spellbook/gates/check.py b/spellbook/gates/check.py
@@ -166,6 +166,12 @@ def check_tool_input(
     Returns:
         Dict with keys:
             safe: bool - True if no findings above LOW severity
+            verdict: str - "allow" | "ask" | "deny". "ask" iff every
+                non-LOW finding is a TIER-ASK; "deny" if any non-LOW
+                finding is not a TIER-ASK; "allow" if no non-LOW
+                findings. Callers that want the harness ``ask`` UX
+                (Claude Code's permission prompt) should branch on
+                this field; legacy callers can keep using ``safe``.
             findings: list[dict] - matched patterns
             tool_name: str - the tool name checked
     """
@@ -216,13 +222,37 @@ def check_tool_input(
                 check_patterns(text, INJECTION_RULES, security_mode)
             )
 
+    safe = all(f.get("severity") == "LOW" for f in findings)
     return {
-        "safe": all(f.get("severity") == "LOW" for f in findings),
+        "safe": safe,
+        "verdict": _compute_verdict(findings, safe=safe),
         "findings": findings,
         "tool_name": tool_name,
     }
 
 
+def _compute_verdict(findings: list[dict], *, safe: bool) -> str:
+    """Project ``findings`` to one of ``allow`` / ``ask`` / ``deny``.
+
+    - ``allow``: no non-LOW findings (the ``safe`` codepath).
+    - ``ask``: at least one non-LOW finding, AND every non-LOW finding
+      is a TIER-ASK (``rule_id`` starts with ``"TIER-ASK"``). This is
+      the harness ``permissionDecision: "ask"`` codepath — operator
+      can approve in-session.
+    - ``deny``: at least one non-LOW finding that is not a TIER-ASK
+      (TIER-DENY, CRITICAL bashlex/exfil/injection/secret-path, etc.).
+      Mixed TIER-ASK + non-ask findings resolve to ``deny`` — deny wins.
+    """
+    if safe:
+        return "allow"
+    non_low = [f for f in findings if f.get("severity") != "LOW"]
+    if non_low and all(
+        str(f.get("rule_id", "")).startswith("TIER-ASK") for f in non_low
+    ):
+        return "ask"
+    return "deny"
+
+
 def _check_read_path(tool_input: dict) -> list[dict]:
     """Check a Read-tool invocation against the secret-path denylist.
 
diff --git a/tests/test_security/test_check.py b/tests/test_security/test_check.py
@@ -190,6 +190,95 @@ def test_findings_is_list(self):
         result = check_tool_input("Bash", {"command": "ls"})
         assert isinstance(result["findings"], list)
 
+    def test_return_has_verdict_key(self):
+        from spellbook.gates.check import check_tool_input
+
+        result = check_tool_input("Bash", {"command": "ls"})
+        assert "verdict" in result
+
+
+class TestCheckToolInputVerdict:
+    """Tests for the ``verdict`` projection of findings -> allow/ask/deny.
+
+    The hook surface uses ``verdict`` to pick between Claude Code's
+    ``permissionDecision: "ask"`` JSON (T2-only) and a hard exit-2 deny
+    (T3, CRITICAL bashlex/exfil findings, or any mix containing a
+    non-ask finding).
+    """
+
+    def test_safe_command_is_allow(self):
+        from spellbook.gates.check import check_tool_input
+
+        result = check_tool_input("Bash", {"command": "ls"})
+        assert result["safe"] is True
+        assert result["verdict"] == "allow"
+
+    def test_pure_t2_is_ask(self):
+        """A pure T2 (TIER-ASK) match resolves to ``verdict == "ask"``."""
+        from spellbook.gates.check import check_tool_input
+
+        # ``git push`` is seeded as T2 in tiers.toml; no other layer fires.
+        result = check_tool_input("Bash", {"command": "git push"})
+        assert result["safe"] is False  # T2 emits HIGH-severity finding
+        assert result["verdict"] == "ask"
+        rule_ids = [f["rule_id"] for f in result["findings"]]
+        assert "TIER-ASK" in rule_ids
+
+    def test_pure_t3_is_deny(self):
+        """A pure T3 (TIER-DENY) match resolves to ``verdict == "deny"``."""
+        from spellbook.gates.check import check_tool_input
+
+        result = check_tool_input(
+            "Bash", {"command": "git push --force origin main"}
+        )
+        assert result["safe"] is False
+        assert result["verdict"] == "deny"
+
+    def test_critical_bashlex_finding_is_deny(self):
+        """A CRITICAL non-tier finding (bashlex compound + tier match)
+        resolves to ``verdict == "deny"`` even though TIER-ASK would
+        otherwise fire — deny wins over ask."""
+        from spellbook.gates.check import check_tool_input
+
+        # ``git push && echo done`` triggers BASH-PARSER-COMPOUND (CRITICAL)
+        # AND TIER-ASK (T2) — mixed findings must collapse to deny.
+        result = check_tool_input(
+            "Bash", {"command": "git push && echo done"}
+        )
+        rule_ids = [f["rule_id"] for f in result["findings"]]
+        assert any(rid.startswith("BASH-PARSER-") for rid in rule_ids)
+        assert result["verdict"] == "deny"
+
+    def test_compute_verdict_mixed_ask_and_deny(self):
+        """Direct unit test for ``_compute_verdict``: a synthetic mix of
+        TIER-ASK and TIER-DENY findings must collapse to ``deny``.
+        Guards the deny-wins invariant against future finding-source
+        changes that could otherwise let an ASK leak through."""
+        from spellbook.gates.check import _compute_verdict
+
+        findings = [
+            {"rule_id": "TIER-ASK", "severity": "HIGH", "message": "ask"},
+            {"rule_id": "TIER-DENY", "severity": "CRITICAL", "message": "deny"},
+        ]
+        assert _compute_verdict(findings, safe=False) == "deny"
+
+    def test_compute_verdict_pure_ask(self):
+        from spellbook.gates.check import _compute_verdict
+
+        findings = [
+            {"rule_id": "TIER-ASK", "severity": "HIGH", "message": "ask"},
+        ]
+        assert _compute_verdict(findings, safe=False) == "ask"
+
+    def test_compute_verdict_low_only_is_allow(self):
+        """LOW-severity findings keep ``safe = True`` and verdict = allow."""
+        from spellbook.gates.check import _compute_verdict
+
+        findings = [
+            {"rule_id": "INJ-LOW", "severity": "LOW", "message": "low"},
+        ]
+        assert _compute_verdict(findings, safe=True) == "allow"
+
 
 class TestCheckToolInputSecurityModes:
     """Tests for security_mode parameter in check_tool_input."""
diff --git a/tests/test_security/test_hooks.py b/tests/test_security/test_hooks.py
@@ -413,6 +413,74 @@ def test_missing_tool_input_blocks(self):
         assert proc.returncode == 2
 
 
+class TestBashGateAskPrompt:
+    """Verify T2 (TIER-ASK) findings translate to Claude Code's
+    ``permissionDecision: "ask"`` JSON on stdout with exit 0,
+    rather than the hard exit-2 deny path that swallowed them
+    before the T2-as-ask fix.
+
+    Coverage matrix (matches the four cases in the fix brief):
+
+    1. pure T2  -> ``permissionDecision: "ask"`` JSON, exit 0
+    2. pure T3  -> exit 2, no ask JSON (existing behavior preserved)
+    3. mixed T2 + non-ask CRITICAL -> exit 2 (deny wins)
+    4. pure T0/T1/safe -> no stdout, exit 0 (existing behavior preserved)
+    """
+
+    def test_pure_t2_emits_ask_prompt(self):
+        """``git push`` matches the T2 record only; the hook must emit
+        ``permissionDecision: "ask"`` and exit 0 so the harness can
+        surface a yellow permission prompt."""
+        proc = _run_bash_gate({"command": "git push"})
+        assert proc.returncode == 0, (
+            f"expected exit 0 (ask), got {proc.returncode}; "
+            f"stdout={proc.stdout!r} stderr={proc.stderr!r}"
+        )
+        output = json.loads(proc.stdout.strip())
+        hook_output = output["hookSpecificOutput"]
+        assert hook_output["hookEventName"] == "PreToolUse"
+        assert hook_output["permissionDecision"] == "ask"
+        assert "permissionDecisionReason" in hook_output
+        assert hook_output["permissionDecisionReason"]  # non-empty
+
+    def test_pure_t2_gh_pr_merge_emits_ask_prompt(self):
+        """Second seeded T2 record: ``gh pr merge``."""
+        proc = _run_bash_gate({"command": "gh pr merge --squash"})
+        assert proc.returncode == 0
+        output = json.loads(proc.stdout.strip())
+        assert output["hookSpecificOutput"]["permissionDecision"] == "ask"
+
+    def test_pure_t3_still_exits_2(self):
+        """T3 deny path is unchanged — no ask JSON, exit 2."""
+        proc = _run_bash_gate(
+            {"command": "git push --force origin main"}
+        )
+        assert proc.returncode == 2
+        # Output must be the legacy ``{"error": "..."}`` shape, not an
+        # ``ask`` JSON. Parsing it as JSON should yield an ``error`` key.
+        output = json.loads(proc.stdout.strip())
+        assert "error" in output
+        assert "hookSpecificOutput" not in output
+
+    def test_mixed_t2_and_critical_exits_2(self):
+        """A T2 match combined with a non-ask CRITICAL finding (here
+        the bashlex compound-command parser firing on ``&&``) must
+        collapse to deny — ask never wins over a real block.
+        """
+        proc = _run_bash_gate({"command": "git push && echo done"})
+        assert proc.returncode == 2
+        output = json.loads(proc.stdout.strip())
+        assert "error" in output
+        assert "hookSpecificOutput" not in output
+
+    def test_safe_command_emits_no_stdout(self):
+        """T0/T1 / safe commands keep the silent exit-0 contract — no
+        stdout payload, no ask prompt."""
+        proc = _run_bash_gate({"command": "ls -la"})
+        assert proc.returncode == 0
+        assert proc.stdout.strip() == ""
+
+
 # #############################################################################
 # state-sanitize tests via unified hook
 # #############################################################################