Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions src/v2/agents/llm/article/prompts/system_prompt.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,15 @@ Rules:
- Do not invent unknown people or organizations when canonical IDs are available in context.
- Do not emit fields outside the schema.
- Use `null` only where nullable fields are permitted.
- **Stop early.** Once you have a concrete DOI (or a concrete
`pulse:infoscienceArticleIdentifier`) plus a title, **emit the JSON
immediately**. Do not cross-validate with more than two sources, do not
re-search the same query, and do not chase tangential leads (READMEs of
related repos, contributor pages, alternative venues). Each extra tool
call costs ~5s and the same DOI never changes.
- If two tool calls in a row return the same paper but neither yields a
DOI/Infoscience id, accept that the article is not findable and emit
`{}`. Don't loop.
- **No identifier, no article.** If the input does not let you ground the
article in a real `schema:identifier` (a real DOI such as
`10.1038/s41586-024-...`) **or** a real `pulse:infoscienceArticleIdentifier`,
Expand Down
56 changes: 55 additions & 1 deletion src/v2/agents/llm/runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from typing import Any

from pydantic_ai import Agent
from pydantic_ai.usage import UsageLimits

from src.v1.llm.model_config import (
create_pydantic_ai_model,
Expand Down Expand Up @@ -94,6 +95,45 @@ def _extract_usage_counts(usage: Any) -> tuple[int | None, int | None]:
return normalized_requests, normalized_tool_calls


_DEFAULT_REQUEST_LIMIT_ENV = "V2_LLM_REQUEST_LIMIT"
_DEFAULT_TOOL_CALLS_LIMIT_ENV = "V2_LLM_TOOL_CALLS_LIMIT"
_DEFAULT_REQUEST_LIMIT = 25
_DEFAULT_TOOL_CALLS_LIMIT = 50


def _coerce_positive_int_env(name: str, fallback: int) -> int:
raw = os.getenv(name)
if raw is None:
return fallback
try:
value = int(raw.strip())
except ValueError:
return fallback
return max(1, value)


def _default_usage_limits() -> UsageLimits:
"""Per-agent caps on LLM roundtrips and tool calls.

Without an explicit cap, observed: the article agent looped >100 tool
calls (12 min wall) on a paper-heavy repo, exploring every tool every
time. The cap turns runaway loops into clean ``UsageLimitExceeded``
errors that the per-stage runner surfaces as a single warning instead
of consuming the whole job budget.

Tunable per deployment via ``V2_LLM_REQUEST_LIMIT`` /
``V2_LLM_TOOL_CALLS_LIMIT``.
"""
return UsageLimits(
request_limit=_coerce_positive_int_env(
_DEFAULT_REQUEST_LIMIT_ENV, _DEFAULT_REQUEST_LIMIT,
),
tool_calls_limit=_coerce_positive_int_env(
_DEFAULT_TOOL_CALLS_LIMIT_ENV, _DEFAULT_TOOL_CALLS_LIMIT,
),
)


def _coerce_output_payload(output: Any) -> dict[str, Any]:
"""Convert model output into a JSON-object dictionary.

Expand Down Expand Up @@ -160,6 +200,7 @@ async def run_json_prompt(
user_prompt: str,
output_type: Any = None,
tools: list[Any] | None = None,
usage_limits: UsageLimits | None = None,
) -> LLMRuntimeResult:
"""Execute a prompt and return a structured JSON payload plus metadata.

Expand All @@ -170,6 +211,12 @@ async def run_json_prompt(
schema. Defaults to ``dict[str, Any]`` when not provided.
Passing a Pydantic model class constrains the LLM to produce
output that conforms to its schema (first validation pass).
usage_limits: Optional pydantic-ai ``UsageLimits``. Defaults to
``_default_usage_limits()``: 25 model requests + 50 tool
calls per agent invocation. Observed in profiling: the
article agent would loop 100+ tool calls without a cap and
spend 12 minutes on a single repo; the default keeps any
agent's wall time bounded.
"""

resolved_output_type: Any = output_type if output_type is not None else dict[str, Any]
Expand Down Expand Up @@ -199,11 +246,18 @@ async def run_json_prompt(
if model_parameters and "model_settings" in run_parameters:
run_kwargs["model_settings"] = model_parameters

effective_limits = usage_limits or _default_usage_limits()
if "usage_limits" in run_parameters:
run_kwargs["usage_limits"] = effective_limits

logger.info(
"LLM runtime start (provider=%s, model=%s, tools=%d)",
"LLM runtime start (provider=%s, model=%s, tools=%d, "
"request_limit=%s, tool_calls_limit=%s)",
provider_name,
model_name,
len(tools or []),
effective_limits.request_limit,
effective_limits.tool_calls_limit,
)
result = await agent.run(user_prompt, **run_kwargs)
except LLMRuntimeError:
Expand Down
11 changes: 8 additions & 3 deletions src/v2/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,20 +203,25 @@ def _extract_path_kind(source_url: str) -> str | None:


def _resolve_max_concurrent_agents() -> int:
"""Read `V2_MAX_CONCURRENT_AGENTS` env var (default 6).
"""Read `V2_MAX_CONCURRENT_AGENTS` env var (default 8).

Caps how many work items per stage (person agents, contribution agents,
link-veracity calls, etc.) run in parallel within a single /extract
request. Higher values speed up wide-fanout repos at the cost of more
concurrent LLM calls — keep within the LLM provider's rate limit.

Default raised from 6 to 8 after profiling a 50-person repo
(deeplabcut/deeplabcut): person+membership stages were spending ~12
minutes waiting on the semaphore. The RCP/LLM stack absorbed 8
in-flight calls without thermal throttling in that test.
"""
raw = os.getenv("V2_MAX_CONCURRENT_AGENTS")
if raw is None:
return 6
return 8
try:
value = int(raw.strip())
except ValueError:
return 6
return 8
return max(1, value)


Expand Down
2 changes: 1 addition & 1 deletion src/v2/pipeline/orchestrator.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ def __init__( # noqa: PLR0913
retry_max_retries: int = 3,
retry_backoff_base: float = 0.0,
retry_sleep_func: SleepCallable | None = None,
max_concurrent_agents: int = 3,
max_concurrent_agents: int = 8,
include_upstream_stage_outputs_in_prompt: bool = True,
user_prompt_appendix: str | None = None,
cache: ProviderCache | None = None,
Expand Down
Loading