diff --git a/.github/workflows/behavioral.yml b/.github/workflows/behavioral.yml index 824f584..90f4226 100644 --- a/.github/workflows/behavioral.yml +++ b/.github/workflows/behavioral.yml @@ -139,6 +139,8 @@ jobs: ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} # Lets the harness default to this skill if a test relies on the env. BEHAVIORAL_SKILL: ${{ matrix.skill }} + # Cost cap: sonnet only. The harness also enforces this under CI. + BEHAVIORAL_MODEL: sonnet run: | set -euo pipefail test_file="tests/test_$(echo '${{ matrix.skill }}' | tr '-' '_').py" diff --git a/eval/behavioral/harness.py b/eval/behavioral/harness.py index 60f0ef1..54ada85 100644 --- a/eval/behavioral/harness.py +++ b/eval/behavioral/harness.py @@ -44,6 +44,32 @@ def test_image_generation(): DEFAULT_MODEL = os.environ.get("BEHAVIORAL_MODEL", "sonnet") DEFAULT_EFFORT = os.environ.get("BEHAVIORAL_EFFORT", "high") +# Automated runs are capped at sonnet: a behavioral run makes real cloud calls +# (agent run + LLM judge), so a workflow picking an expensive model can quietly +# run up a large bill. No override -- the cap is non-negotiable in CI. +AUTOMATED_MODEL = "sonnet" +_TRUTHY = {"1", "true", "yes", "on"} + + +def _is_automated_env() -> bool: + """True under CI / an automated workflow (GitHub Actions sets both).""" + return any( + os.environ.get(var, "").strip().lower() in _TRUTHY + for var in ("CI", "GITHUB_ACTIONS") + ) + + +def _enforce_model_policy(model: str | None) -> str | None: + """Coerce non-sonnet models to sonnet in CI; pass through otherwise.""" + if model is None or not _is_automated_env() or "sonnet" in model.lower(): + return model + print( + f"[behavioral] automated run: coercing model '{model}' -> " + f"'{AUTOMATED_MODEL}' to cap token usage.", + flush=True, + ) + return AUTOMATED_MODEL + def _claude_env() -> dict[str, str]: """Environment for `claude` subprocesses. @@ -70,6 +96,7 @@ def check_api_reachable(model: str | None = DEFAULT_MODEL, timeout: int = 60) -> if not claude_bin: return False, "'claude' CLI not found on PATH" + model = _enforce_model_policy(model) cmd = [claude_bin, "-p", "Reply with the single word: ok", "--output-format", "json"] if model: cmd += ["--model", model] @@ -314,7 +341,8 @@ def __init__( skill: str = DEFAULT_SKILL, effort: str | None = DEFAULT_EFFORT, ) -> None: - self.model = model + # Coerce here so the agent run and the LLM judge share the capped model. + self.model = _enforce_model_policy(model) self.skill = skill self.effort = effort self.workspace: Path | None = None