99from typing import Any
1010
1111from pydantic_ai import Agent
12+ from pydantic_ai .usage import UsageLimits
1213
1314from src .v1 .llm .model_config import (
1415 create_pydantic_ai_model ,
@@ -94,6 +95,45 @@ def _extract_usage_counts(usage: Any) -> tuple[int | None, int | None]:
9495 return normalized_requests , normalized_tool_calls
9596
9697
98+ _DEFAULT_REQUEST_LIMIT_ENV = "V2_LLM_REQUEST_LIMIT"
99+ _DEFAULT_TOOL_CALLS_LIMIT_ENV = "V2_LLM_TOOL_CALLS_LIMIT"
100+ _DEFAULT_REQUEST_LIMIT = 25
101+ _DEFAULT_TOOL_CALLS_LIMIT = 50
102+
103+
104+ def _coerce_positive_int_env (name : str , fallback : int ) -> int :
105+ raw = os .getenv (name )
106+ if raw is None :
107+ return fallback
108+ try :
109+ value = int (raw .strip ())
110+ except ValueError :
111+ return fallback
112+ return max (1 , value )
113+
114+
115+ def _default_usage_limits () -> UsageLimits :
116+ """Per-agent caps on LLM roundtrips and tool calls.
117+
118+ Without an explicit cap, observed: the article agent looped >100 tool
119+ calls (12 min wall) on a paper-heavy repo, exploring every tool every
120+ time. The cap turns runaway loops into clean ``UsageLimitExceeded``
121+ errors that the per-stage runner surfaces as a single warning instead
122+ of consuming the whole job budget.
123+
124+ Tunable per deployment via ``V2_LLM_REQUEST_LIMIT`` /
125+ ``V2_LLM_TOOL_CALLS_LIMIT``.
126+ """
127+ return UsageLimits (
128+ request_limit = _coerce_positive_int_env (
129+ _DEFAULT_REQUEST_LIMIT_ENV , _DEFAULT_REQUEST_LIMIT ,
130+ ),
131+ tool_calls_limit = _coerce_positive_int_env (
132+ _DEFAULT_TOOL_CALLS_LIMIT_ENV , _DEFAULT_TOOL_CALLS_LIMIT ,
133+ ),
134+ )
135+
136+
97137def _coerce_output_payload (output : Any ) -> dict [str , Any ]:
98138 """Convert model output into a JSON-object dictionary.
99139
@@ -160,6 +200,7 @@ async def run_json_prompt(
160200 user_prompt : str ,
161201 output_type : Any = None ,
162202 tools : list [Any ] | None = None ,
203+ usage_limits : UsageLimits | None = None ,
163204 ) -> LLMRuntimeResult :
164205 """Execute a prompt and return a structured JSON payload plus metadata.
165206
@@ -170,6 +211,12 @@ async def run_json_prompt(
170211 schema. Defaults to ``dict[str, Any]`` when not provided.
171212 Passing a Pydantic model class constrains the LLM to produce
172213 output that conforms to its schema (first validation pass).
214+ usage_limits: Optional pydantic-ai ``UsageLimits``. Defaults to
215+ ``_default_usage_limits()``: 25 model requests + 50 tool
216+ calls per agent invocation. Observed in profiling: the
217+ article agent would loop 100+ tool calls without a cap and
218+ spend 12 minutes on a single repo; the default keeps any
219+ agent's wall time bounded.
173220 """
174221
175222 resolved_output_type : Any = output_type if output_type is not None else dict [str , Any ]
@@ -199,11 +246,18 @@ async def run_json_prompt(
199246 if model_parameters and "model_settings" in run_parameters :
200247 run_kwargs ["model_settings" ] = model_parameters
201248
249+ effective_limits = usage_limits or _default_usage_limits ()
250+ if "usage_limits" in run_parameters :
251+ run_kwargs ["usage_limits" ] = effective_limits
252+
202253 logger .info (
203- "LLM runtime start (provider=%s, model=%s, tools=%d)" ,
254+ "LLM runtime start (provider=%s, model=%s, tools=%d, "
255+ "request_limit=%s, tool_calls_limit=%s)" ,
204256 provider_name ,
205257 model_name ,
206258 len (tools or []),
259+ effective_limits .request_limit ,
260+ effective_limits .tool_calls_limit ,
207261 )
208262 result = await agent .run (user_prompt , ** run_kwargs )
209263 except LLMRuntimeError :
0 commit comments