Skip to content

Commit da24a0e

Browse files
authored
Merge pull request #39 from Imaging-Plaza/feat/v2-pipeline-perf
perf(v2 pipeline): parallel agents + LLM iteration cap + tighter arti…
2 parents 7ec4727 + dc2d64e commit da24a0e

4 files changed

Lines changed: 73 additions & 5 deletions

File tree

src/v2/agents/llm/article/prompts/system_prompt.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,15 @@ Rules:
2424
- Do not invent unknown people or organizations when canonical IDs are available in context.
2525
- Do not emit fields outside the schema.
2626
- Use `null` only where nullable fields are permitted.
27+
- **Stop early.** Once you have a concrete DOI (or a concrete
28+
`pulse:infoscienceArticleIdentifier`) plus a title, **emit the JSON
29+
immediately**. Do not cross-validate with more than two sources, do not
30+
re-search the same query, and do not chase tangential leads (READMEs of
31+
related repos, contributor pages, alternative venues). Each extra tool
32+
call costs ~5s and the same DOI never changes.
33+
- If two tool calls in a row return the same paper but neither yields a
34+
DOI/Infoscience id, accept that the article is not findable and emit
35+
`{}`. Don't loop.
2736
- **No identifier, no article.** If the input does not let you ground the
2837
article in a real `schema:identifier` (a real DOI such as
2938
`10.1038/s41586-024-...`) **or** a real `pulse:infoscienceArticleIdentifier`,

src/v2/agents/llm/runtime.py

Lines changed: 55 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from typing import Any
1010

1111
from pydantic_ai import Agent
12+
from pydantic_ai.usage import UsageLimits
1213

1314
from src.v1.llm.model_config import (
1415
create_pydantic_ai_model,
@@ -94,6 +95,45 @@ def _extract_usage_counts(usage: Any) -> tuple[int | None, int | None]:
9495
return normalized_requests, normalized_tool_calls
9596

9697

98+
_DEFAULT_REQUEST_LIMIT_ENV = "V2_LLM_REQUEST_LIMIT"
99+
_DEFAULT_TOOL_CALLS_LIMIT_ENV = "V2_LLM_TOOL_CALLS_LIMIT"
100+
_DEFAULT_REQUEST_LIMIT = 25
101+
_DEFAULT_TOOL_CALLS_LIMIT = 50
102+
103+
104+
def _coerce_positive_int_env(name: str, fallback: int) -> int:
105+
raw = os.getenv(name)
106+
if raw is None:
107+
return fallback
108+
try:
109+
value = int(raw.strip())
110+
except ValueError:
111+
return fallback
112+
return max(1, value)
113+
114+
115+
def _default_usage_limits() -> UsageLimits:
116+
"""Per-agent caps on LLM roundtrips and tool calls.
117+
118+
Without an explicit cap, observed: the article agent looped >100 tool
119+
calls (12 min wall) on a paper-heavy repo, exploring every tool every
120+
time. The cap turns runaway loops into clean ``UsageLimitExceeded``
121+
errors that the per-stage runner surfaces as a single warning instead
122+
of consuming the whole job budget.
123+
124+
Tunable per deployment via ``V2_LLM_REQUEST_LIMIT`` /
125+
``V2_LLM_TOOL_CALLS_LIMIT``.
126+
"""
127+
return UsageLimits(
128+
request_limit=_coerce_positive_int_env(
129+
_DEFAULT_REQUEST_LIMIT_ENV, _DEFAULT_REQUEST_LIMIT,
130+
),
131+
tool_calls_limit=_coerce_positive_int_env(
132+
_DEFAULT_TOOL_CALLS_LIMIT_ENV, _DEFAULT_TOOL_CALLS_LIMIT,
133+
),
134+
)
135+
136+
97137
def _coerce_output_payload(output: Any) -> dict[str, Any]:
98138
"""Convert model output into a JSON-object dictionary.
99139
@@ -160,6 +200,7 @@ async def run_json_prompt(
160200
user_prompt: str,
161201
output_type: Any = None,
162202
tools: list[Any] | None = None,
203+
usage_limits: UsageLimits | None = None,
163204
) -> LLMRuntimeResult:
164205
"""Execute a prompt and return a structured JSON payload plus metadata.
165206
@@ -170,6 +211,12 @@ async def run_json_prompt(
170211
schema. Defaults to ``dict[str, Any]`` when not provided.
171212
Passing a Pydantic model class constrains the LLM to produce
172213
output that conforms to its schema (first validation pass).
214+
usage_limits: Optional pydantic-ai ``UsageLimits``. Defaults to
215+
``_default_usage_limits()``: 25 model requests + 50 tool
216+
calls per agent invocation. Observed in profiling: the
217+
article agent would loop 100+ tool calls without a cap and
218+
spend 12 minutes on a single repo; the default keeps any
219+
agent's wall time bounded.
173220
"""
174221

175222
resolved_output_type: Any = output_type if output_type is not None else dict[str, Any]
@@ -199,11 +246,18 @@ async def run_json_prompt(
199246
if model_parameters and "model_settings" in run_parameters:
200247
run_kwargs["model_settings"] = model_parameters
201248

249+
effective_limits = usage_limits or _default_usage_limits()
250+
if "usage_limits" in run_parameters:
251+
run_kwargs["usage_limits"] = effective_limits
252+
202253
logger.info(
203-
"LLM runtime start (provider=%s, model=%s, tools=%d)",
254+
"LLM runtime start (provider=%s, model=%s, tools=%d, "
255+
"request_limit=%s, tool_calls_limit=%s)",
204256
provider_name,
205257
model_name,
206258
len(tools or []),
259+
effective_limits.request_limit,
260+
effective_limits.tool_calls_limit,
207261
)
208262
result = await agent.run(user_prompt, **run_kwargs)
209263
except LLMRuntimeError:

src/v2/api.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -203,20 +203,25 @@ def _extract_path_kind(source_url: str) -> str | None:
203203

204204

205205
def _resolve_max_concurrent_agents() -> int:
206-
"""Read `V2_MAX_CONCURRENT_AGENTS` env var (default 6).
206+
"""Read `V2_MAX_CONCURRENT_AGENTS` env var (default 8).
207207
208208
Caps how many work items per stage (person agents, contribution agents,
209209
link-veracity calls, etc.) run in parallel within a single /extract
210210
request. Higher values speed up wide-fanout repos at the cost of more
211211
concurrent LLM calls — keep within the LLM provider's rate limit.
212+
213+
Default raised from 6 to 8 after profiling a 50-person repo
214+
(deeplabcut/deeplabcut): person+membership stages were spending ~12
215+
minutes waiting on the semaphore. The RCP/LLM stack absorbed 8
216+
in-flight calls without thermal throttling in that test.
212217
"""
213218
raw = os.getenv("V2_MAX_CONCURRENT_AGENTS")
214219
if raw is None:
215-
return 6
220+
return 8
216221
try:
217222
value = int(raw.strip())
218223
except ValueError:
219-
return 6
224+
return 8
220225
return max(1, value)
221226

222227

src/v2/pipeline/orchestrator.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,7 @@ def __init__( # noqa: PLR0913
203203
retry_max_retries: int = 3,
204204
retry_backoff_base: float = 0.0,
205205
retry_sleep_func: SleepCallable | None = None,
206-
max_concurrent_agents: int = 3,
206+
max_concurrent_agents: int = 8,
207207
include_upstream_stage_outputs_in_prompt: bool = True,
208208
user_prompt_appendix: str | None = None,
209209
cache: ProviderCache | None = None,

0 commit comments

Comments
 (0)