Skip to content

Commit 0727fa8

Browse files
committed
fix(plugin): align tool-call payload contract
1 parent 6fe89ee commit 0727fa8

3 files changed

Lines changed: 146 additions & 15 deletions

File tree

src/ouroboros/plugin/firewall.py

Lines changed: 60 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
import re
4242
import shlex
4343
import subprocess
44+
import time
4445
from typing import Literal
4546

4647
from ouroboros.plugin.digest import (
@@ -1107,11 +1108,14 @@ def _run_failed_invocation_observability_hooks() -> None:
11071108
).hexdigest()
11081109
redacted_tool_argv, _ = _redact_argv([command_name] + list(argv))
11091110
tool_args_preview = shlex.join(redacted_tool_argv)
1110-
tool_args_digest = hashlib.sha256(
1111-
json.dumps([command_name] + list(argv), separators=(",", ":")).encode(
1112-
"utf-8", errors="surrogateescape"
1113-
)
1114-
).hexdigest()
1111+
tool_args_digest = (
1112+
"sha256:"
1113+
+ hashlib.sha256(
1114+
json.dumps([command_name] + list(argv), separators=(",", ":")).encode(
1115+
"utf-8", errors="surrogateescape"
1116+
)
1117+
).hexdigest()
1118+
)
11151119
tool_name = f"{namespace}.{command_name}" if namespace else command_name
11161120
command_permissions = tuple(getattr(command, "permissions", ()) or ())
11171121
tool_permissions = command_permissions or tuple(_required_permissions(manifest))
@@ -1173,6 +1177,37 @@ def _run_failed_invocation_observability_hooks() -> None:
11731177
if plugin_home is not None:
11741178
run_kwargs["cwd"] = str(plugin_home)
11751179

1180+
tool_call_started_at = time.perf_counter()
1181+
1182+
def _tool_call_duration_ms() -> int:
1183+
return max(1, int(round((time.perf_counter() - tool_call_started_at) * 1000)))
1184+
1185+
def _output_digest(stdout_bytes: bytes, stderr_bytes: bytes) -> str:
1186+
return "sha256:" + hashlib.sha256(stdout_bytes + stderr_bytes).hexdigest()
1187+
1188+
def _dispatch_failed_after_tool_call(
1189+
*,
1190+
output_digest: str,
1191+
duration_ms: int,
1192+
exit_code: int | None,
1193+
) -> None:
1194+
dispatch_after_tool_call(
1195+
manifest=manifest,
1196+
tool=tool_name,
1197+
status="failed",
1198+
output_digest=output_digest,
1199+
duration_ms=duration_ms,
1200+
correlation_id=correlation_id,
1201+
invocation_id=tool_call_invocation_id,
1202+
event_sink=_emit,
1203+
exit_code=exit_code,
1204+
namespace=namespace,
1205+
command_name=command_name,
1206+
trust_state=trust_state,
1207+
plugin_home=plugin_home,
1208+
subprocess_runner=runner,
1209+
)
1210+
11761211
try:
11771212
completed = runner(cmd_argv, **run_kwargs)
11781213
except FileNotFoundError as exc:
@@ -1191,6 +1226,11 @@ def _run_failed_invocation_observability_hooks() -> None:
11911226
provenance={"correlation_id": correlation_id},
11921227
)
11931228
)
1229+
_dispatch_failed_after_tool_call(
1230+
output_digest=_output_digest(b"", message.encode("utf-8", errors="surrogateescape")),
1231+
duration_ms=_tool_call_duration_ms(),
1232+
exit_code=127,
1233+
)
11941234
_run_failed_invocation_observability_hooks()
11951235
return InvocationResult(
11961236
status="failed",
@@ -1208,6 +1248,7 @@ def _run_failed_invocation_observability_hooks() -> None:
12081248
stderr_bytes = _to_bytes(exc.stderr)
12091249
stdout_hash = hashlib.sha256(stdout_bytes).hexdigest()
12101250
stderr_hash = hashlib.sha256(stderr_bytes).hexdigest()
1251+
output_digest = _output_digest(stdout_bytes, stderr_bytes)
12111252
message = (
12121253
f"entrypoint timed out after "
12131254
f"{DEFAULT_PLUGIN_INVOCATION_TIMEOUT_SECONDS:g}s: {cmd_argv[0]!r}"
@@ -1231,6 +1272,11 @@ def _run_failed_invocation_observability_hooks() -> None:
12311272
},
12321273
)
12331274
)
1275+
_dispatch_failed_after_tool_call(
1276+
output_digest=output_digest,
1277+
duration_ms=_tool_call_duration_ms(),
1278+
exit_code=124,
1279+
)
12341280
_run_failed_invocation_observability_hooks()
12351281
return InvocationResult(
12361282
status="failed",
@@ -1266,6 +1312,11 @@ def _run_failed_invocation_observability_hooks() -> None:
12661312
},
12671313
)
12681314
)
1315+
_dispatch_failed_after_tool_call(
1316+
output_digest=_output_digest(b"", message.encode("utf-8", errors="surrogateescape")),
1317+
duration_ms=_tool_call_duration_ms(),
1318+
exit_code=126,
1319+
)
12691320
_run_failed_invocation_observability_hooks()
12701321
return InvocationResult(
12711322
status="failed",
@@ -1278,7 +1329,8 @@ def _run_failed_invocation_observability_hooks() -> None:
12781329
stderr_bytes = _to_bytes(completed.stderr)
12791330
stdout_hash = hashlib.sha256(stdout_bytes).hexdigest()
12801331
stderr_hash = hashlib.sha256(stderr_bytes).hexdigest()
1281-
output_digest = hashlib.sha256(stdout_bytes + stderr_bytes).hexdigest()
1332+
output_digest = _output_digest(stdout_bytes, stderr_bytes)
1333+
duration_ms = _tool_call_duration_ms()
12821334

12831335
terminal_provenance = {
12841336
"correlation_id": correlation_id,
@@ -1307,7 +1359,7 @@ def _run_failed_invocation_observability_hooks() -> None:
13071359
tool=tool_name,
13081360
status="success",
13091361
output_digest=output_digest,
1310-
duration_ms=0,
1362+
duration_ms=duration_ms,
13111363
correlation_id=correlation_id,
13121364
invocation_id=tool_call_invocation_id,
13131365
event_sink=_emit,
@@ -1347,7 +1399,7 @@ def _run_failed_invocation_observability_hooks() -> None:
13471399
tool=tool_name,
13481400
status="failed",
13491401
output_digest=output_digest,
1350-
duration_ms=0,
1402+
duration_ms=duration_ms,
13511403
correlation_id=correlation_id,
13521404
invocation_id=tool_call_invocation_id,
13531405
event_sink=_emit,

tests/integration/plugin/test_e2e.py

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -677,7 +677,13 @@ def test_path_5_before_tool_call_can_block_production_invocation(tmp_path: Path)
677677
_enable_v04_tool_call_hooks(
678678
plugin_home,
679679
before_policy="fail_closed",
680-
before_script="import sys\nsys.exit(7)\n",
680+
before_script=(
681+
"import json, os, sys\n"
682+
"from pathlib import Path\n"
683+
"payload = json.loads(os.environ['OUROBOROS_PLUGIN_TOOL_CALL_PAYLOAD'])\n"
684+
"Path('before-tool-payload.json').write_text(json.dumps(payload, sort_keys=True))\n"
685+
"sys.exit(7)\n"
686+
),
681687
)
682688
_grant_trust(paths)
683689
_grant_tool_hook_trust(paths)
@@ -702,6 +708,8 @@ def test_path_5_before_tool_call_can_block_production_invocation(tmp_path: Path)
702708
types = [p["event_type"] for p in payloads]
703709
assert "plugin.tool.intercept.blocked" in types
704710
assert "plugin.completed" not in types
711+
observed = json.loads((plugin_home / "before-tool-payload.json").read_text())
712+
assert observed["args_digest"].startswith("sha256:")
705713
failed = next(p for p in payloads if p["event_type"] == "plugin.failed")
706714
assert failed["result"]["status"] == "blocked"
707715
assert failed["provenance"]["reason"] == "tool_call_blocked"
@@ -772,10 +780,12 @@ def test_path_5_after_tool_call_observes_completed_result(tmp_path: Path) -> Non
772780
observed = json.loads((plugin_home / "after-tool-payload.json").read_text())
773781
assert observed["status"] == "success"
774782
assert observed["tool"] == "github-pr.review"
775-
expected_digest = hashlib.sha256(
776-
(result.stdout_bytes or b"") + (result.stderr_bytes or b"")
777-
).hexdigest()
783+
expected_digest = (
784+
"sha256:"
785+
+ hashlib.sha256((result.stdout_bytes or b"") + (result.stderr_bytes or b"")).hexdigest()
786+
)
778787
assert observed["output_digest"] == expected_digest
788+
assert observed["duration_ms"] >= 1
779789
payloads = [unwrap_plugin_event(env) for env in envelopes]
780790
assert "plugin.tool.observe.recorded" in [p["event_type"] for p in payloads]
781791

@@ -814,11 +824,13 @@ def test_path_5_after_tool_call_hashes_combined_failed_output(tmp_path: Path) ->
814824
assert result.stderr_bytes == b"synthetic failure\n"
815825
observed = json.loads((plugin_home / "after-tool-failed-payload.json").read_text())
816826
assert observed["status"] == "failed"
817-
expected_digest = hashlib.sha256(
818-
(result.stdout_bytes or b"") + (result.stderr_bytes or b"")
819-
).hexdigest()
827+
expected_digest = (
828+
"sha256:"
829+
+ hashlib.sha256((result.stdout_bytes or b"") + (result.stderr_bytes or b"")).hexdigest()
830+
)
820831
assert observed["output_digest"] == expected_digest
821832
assert observed["output_digest"] != result.stdout_sha256
833+
assert observed["duration_ms"] >= 1
822834
payloads = [unwrap_plugin_event(env) for env in envelopes]
823835
assert "plugin.tool.observe.recorded" in [p["event_type"] for p in payloads]
824836

tests/unit/plugin/test_firewall.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
HOOK_EVENT_TYPES,
1818
HOOK_LIFECYCLE_POLICY_SCOPE,
1919
HOOK_LIFECYCLE_READ_SCOPE,
20+
HOOK_TOOL_OBSERVE_SCOPE,
2021
)
2122
from ouroboros.plugin.manifest import load_manifest
2223
from ouroboros.plugin.trust_store import TrustRecord, TrustStore
@@ -2028,6 +2029,72 @@ def test_entrypoint_missing_emits_failed_127(tmp_path: Path) -> None:
20282029
assert "not found" in result.message.lower()
20292030

20302031

2032+
def test_v04_after_tool_call_observes_launch_failure_paths(tmp_path: Path) -> None:
2033+
payload = json.loads(json.dumps(REFERENCE_MANIFEST))
2034+
payload["schema_version"] = "0.4"
2035+
payload["permissions"].append(
2036+
{
2037+
"scope": HOOK_TOOL_OBSERVE_SCOPE,
2038+
"risk": "read_only",
2039+
"required": True,
2040+
"reason": "Allow tool-call observation.",
2041+
}
2042+
)
2043+
payload["hooks"] = [
2044+
{
2045+
"name": "after_tool_call",
2046+
"entrypoint": {"type": "command", "command": "python -m hook_after_tool"},
2047+
"permissions": [HOOK_TOOL_OBSERVE_SCOPE],
2048+
"failure_policy": "fail_open",
2049+
}
2050+
]
2051+
program = _make_program(tmp_path, payload)
2052+
trust = _grant_trust_scopes(tmp_path, "github:read", HOOK_TOOL_OBSERVE_SCOPE)
2053+
observed_payloads: list[dict] = []
2054+
2055+
def _runner(argv, *args, **kwargs) -> subprocess.CompletedProcess:
2056+
if argv[:3] == ["python", "-m", "hook_after_tool"]:
2057+
observed_payloads.append(
2058+
json.loads(kwargs["env"]["OUROBOROS_PLUGIN_TOOL_CALL_PAYLOAD"])
2059+
)
2060+
return subprocess.CompletedProcess(args=argv, returncode=0, stdout="", stderr="")
2061+
raise FileNotFoundError("synthetic missing entrypoint")
2062+
2063+
events: list[dict] = []
2064+
result = invoke_plugin(
2065+
program,
2066+
command_name="review",
2067+
argv=["url"],
2068+
trust_record=trust,
2069+
event_sink=events.append,
2070+
correlation_id="corr-v04-launch-failure-after-tool",
2071+
subprocess_runner=_runner,
2072+
)
2073+
2074+
assert result.status == "failed"
2075+
assert result.exit_code == 127
2076+
assert observed_payloads == [
2077+
{
2078+
"correlation_id": "corr-v04-launch-failure-after-tool",
2079+
"duration_ms": observed_payloads[0]["duration_ms"],
2080+
"exit_code": 127,
2081+
"invocation_id": observed_payloads[0]["invocation_id"],
2082+
"output_digest": observed_payloads[0]["output_digest"],
2083+
"status": "failed",
2084+
"tool": "github-pr.review",
2085+
}
2086+
]
2087+
assert observed_payloads[0]["duration_ms"] >= 1
2088+
assert observed_payloads[0]["output_digest"].startswith("sha256:")
2089+
assert [event["event_type"] for event in events] == [
2090+
"plugin.invoked",
2091+
"plugin.permission_used",
2092+
"plugin.permission_used",
2093+
"plugin.failed",
2094+
"plugin.tool.observe.recorded",
2095+
]
2096+
2097+
20312098
def test_subprocess_invocation_uses_default_timeout(tmp_path: Path) -> None:
20322099
"""The firewall owns the external plugin process boundary, so the
20332100
subprocess launch must always carry a finite timeout."""

0 commit comments

Comments
 (0)