@@ -44,6 +44,32 @@ def test_image_generation():
4444DEFAULT_MODEL = os .environ .get ("BEHAVIORAL_MODEL" , "sonnet" )
4545DEFAULT_EFFORT = os .environ .get ("BEHAVIORAL_EFFORT" , "high" )
4646
47+ # Automated runs are capped at sonnet: a behavioral run makes real cloud calls
48+ # (agent run + LLM judge), so a workflow picking an expensive model can quietly
49+ # run up a large bill. No override -- the cap is non-negotiable in CI.
50+ AUTOMATED_MODEL = "sonnet"
51+ _TRUTHY = {"1" , "true" , "yes" , "on" }
52+
53+
54+ def _is_automated_env () -> bool :
55+ """True under CI / an automated workflow (GitHub Actions sets both)."""
56+ return any (
57+ os .environ .get (var , "" ).strip ().lower () in _TRUTHY
58+ for var in ("CI" , "GITHUB_ACTIONS" )
59+ )
60+
61+
62+ def _enforce_model_policy (model : str | None ) -> str | None :
63+ """Coerce non-sonnet models to sonnet in CI; pass through otherwise."""
64+ if model is None or not _is_automated_env () or "sonnet" in model .lower ():
65+ return model
66+ print (
67+ f"[behavioral] automated run: coercing model '{ model } ' -> "
68+ f"'{ AUTOMATED_MODEL } ' to cap token usage." ,
69+ flush = True ,
70+ )
71+ return AUTOMATED_MODEL
72+
4773
4874def _claude_env () -> dict [str , str ]:
4975 """Environment for `claude` subprocesses.
@@ -70,6 +96,7 @@ def check_api_reachable(model: str | None = DEFAULT_MODEL, timeout: int = 60) ->
7096 if not claude_bin :
7197 return False , "'claude' CLI not found on PATH"
7298
99+ model = _enforce_model_policy (model )
73100 cmd = [claude_bin , "-p" , "Reply with the single word: ok" , "--output-format" , "json" ]
74101 if model :
75102 cmd += ["--model" , model ]
@@ -314,7 +341,8 @@ def __init__(
314341 skill : str = DEFAULT_SKILL ,
315342 effort : str | None = DEFAULT_EFFORT ,
316343 ) -> None :
317- self .model = model
344+ # Coerce here so the agent run and the LLM judge share the capped model.
345+ self .model = _enforce_model_policy (model )
318346 self .skill = skill
319347 self .effort = effort
320348 self .workspace : Path | None = None
0 commit comments