feat(v2 runtime): V2_LLM_MODEL_OVERRIDE to swap the model across all profiles

caviri · caviri · commit 2c6e8b19941d · 2026-05-20T05:27:48.000Z
When the default `openai/gpt-oss-120b` on the shared EPFL inference
endpoint goes degraded (200 OK + empty body, observed 2026-05-19/20),
operators previously had to edit `src/v1/llm/model_config.py` to repoint
all 18 analysis profiles at a working model and ship a patch.

Add a single env override read in `load_model_config()` that rewrites
the `model` field on every loaded profile when set, leaving the JSON
per-profile env overrides (`LLM_ANALYSIS_MODELS`, etc.) untouched.

Documented in `.env.example` with concrete fallback candidates that
were verified working on the endpoint at degradation time
(`Qwen/Qwen3-30B-A3B-Instruct-2507`, `mistralai/Ministral-3-8B-Instruct-2512`).
diff --git a/.env.example b/.env.example
@@ -75,6 +75,38 @@ OPENROUTER_API_KEY=your-openrouter-key-here
 # `excluded_entities` with reason "critic pruning".
 # V2_APPLY_CRITIC_PRUNING=false
 
+# Maximum number of LLM agents that may run concurrently inside any fan-out
+# stage (person/org/article/membership/contribution). Higher values speed up
+# heavy repos (renku-class, ~80 contributors) but push more parallel load
+# onto the model endpoint. Default: 6.
+# V2_MAX_CONCURRENT_AGENTS=6
+
+# Per-link veracity stage (Selenium fetch + LLM verdict on every external
+# URL the pipeline collects). Off by default — it is the slowest stage and
+# rarely changes outputs; enable only when auditing link rot or veracity.
+# V2_LINK_VERACITY_ENABLED=false
+
+# Hybrid refiner stage: after the rule-based pipeline runs, the LLM agent
+# pool refines/repairs each entity (canonical IDs, missing fields). Default
+# `true` for `agent_runtime=llm` and ignored for `rule_based`.
+# V2_HYBRID_REFINER_ENABLED=true
+
+# Top-N bookend contributors to materialise in context_gather (first/last
+# committers by date). Larger values widen the scout brief but inflate the
+# token bill. Default: 50.
+# V2_CONTRIBUTOR_BOOKENDS_TOP_N=50
+
+# Override the LLM model used by every v2 agent profile (rewrites the
+# `model` field across every `MODEL_CONFIGS[*]` entry at load time).
+# Useful when the default model on the shared inference endpoint is
+# degraded and you want a temporary fallback without editing
+# `src/v1/llm/model_config.py`. Unset / empty = use the defaults
+# (currently `openai/gpt-oss-120b`).
+# Examples for inference-rcp.epfl.ch when gpt-oss is down:
+#   V2_LLM_MODEL_OVERRIDE=Qwen/Qwen3-30B-A3B-Instruct-2507
+#   V2_LLM_MODEL_OVERRIDE=mistralai/Ministral-3-8B-Instruct-2512
+# V2_LLM_MODEL_OVERRIDE=
+
 # Scout-mode upgrade for the `context_summary` LLM stage. When `true`,
 # the summary agent gets the broad RAG-search toolkit (orcid, ror,
 # infoscience, openalex, zenodo, ethz_research_collection, huggingface,
@@ -121,6 +153,12 @@ OPENROUTER_API_KEY=your-openrouter-key-here
 # `logs/v2_queries/` (relative to the server's working directory).
 # V2_QUERY_LOG_DIR=logs/v2_queries
 
+# Log level for the v2 skill subprocesses (selenium_fetch and the other
+# search_* skills under src/v2/skills/). Independent from LOG_LEVEL so the
+# noisy skill output can stay quiet while the main FastAPI app runs at INFO.
+# Default: WARNING.
+# V2_SKILL_LOG_LEVEL=WARNING
+
 # ---------------------------------------------------------------------------
 # src/index/* (Infoscience + OpenAlex RAG indexing)
 # ---------------------------------------------------------------------------
@@ -134,6 +172,10 @@ OPENROUTER_API_KEY=your-openrouter-key-here
 # `config/index/infoscience.yaml`. RCP and Infoscience auth tokens are
 # read from `RCP_TOKEN` / `INFOSCIENCE_TOKEN` above.
 
+# Set to `false` to disable the V2 Infoscience RAG agent tool. Default is
+# on; the tool degrades gracefully when the Qdrant collection is missing.
+# V2_INFOSCIENCE_RAG_ENABLED=true
+
 # ---------------------------------------------------------------------------
 # src/index/openalex — OpenAlex ingestion + RAG over EPFL/Switzerland
 # ---------------------------------------------------------------------------
@@ -157,6 +199,10 @@ OPENROUTER_API_KEY=your-openrouter-key-here
 # INDEX_OPENALEX_SCOPE_ROR=https://ror.org/02s376052
 # INDEX_OPENALEX_SCOPE_COUNTRY=ch
 
+# Set to `false` to disable the V2 OpenAlex RAG agent tool. Default is on;
+# the tool degrades gracefully when the Qdrant collection is missing.
+# V2_OPENALEX_RAG_ENABLED=true
+
 # ---------------------------------------------------------------------------
 # src/index/huggingface — HuggingFace ingestion + RAG over EPFL/Switzerland
 # ---------------------------------------------------------------------------
@@ -173,6 +219,10 @@ OPENROUTER_API_KEY=your-openrouter-key-here
 # Optional: override the active scope at runtime (epfl | switzerland).
 # INDEX_HUGGINGFACE_SCOPE=epfl
 
+# Set to `false` to disable the V2 HuggingFace RAG agent tool. Default is on;
+# the tool degrades gracefully when the Qdrant collection is missing.
+# V2_HUGGINGFACE_RAG_ENABLED=true
+
 # ---------------------------------------------------------------------------
 # src/index/zenodo — Zenodo ingestion + RAG over EPFL/Switzerland
 # ---------------------------------------------------------------------------
@@ -189,6 +239,10 @@ OPENROUTER_API_KEY=your-openrouter-key-here
 # Optional: override the active scope at runtime (epfl | switzerland).
 # INDEX_ZENODO_SCOPE=epfl
 
+# Set to `false` to disable the V2 Zenodo RAG agent tool. Default is on;
+# the tool degrades gracefully when the Qdrant collection is missing.
+# V2_ZENODO_RAG_ENABLED=true
+
 
 # ---------------------------------------------------------------------------
 # SWISSUbase index module (`src/index/swissubase/`)
@@ -335,5 +389,38 @@ EPFL_GRAPH_PASSWORD=
 # V2_CONCEPT_TAGGING_OPENALEX_RELATED_ENABLED=false
 
 
+# ---------------------------------------------------------------------------
+# Pure-API RAG tools (no local index — direct upstream API calls per query)
+# ---------------------------------------------------------------------------
+# These tools wrap public REST APIs as LLM agent tools. They don't ingest
+# anything locally; each agent invocation issues a fresh HTTP call. All
+# default to on; flip to `false` to remove the tool from the agent's
+# toolkit (useful for offline testing or when upstream is flaky).
+
+# ORCID public-API search (people lookup by name / affiliation / ORCID iD).
+# Requires ORCID_CLIENT_ID / ORCID_CLIENT_SECRET below for authenticated
+# access; without them the tool runs against the public anonymous bucket.
+# V2_ORCID_RAG_ENABLED=true
+
+# ROR (Research Organization Registry) search — organization disambiguation
+# by name / acronym / country. Credential-free public API.
+# V2_ROR_RAG_ENABLED=true
+
+# SNSF (Swiss National Science Foundation) grant + person search via the
+# public Data Portal API. Credential-free.
+# V2_SNSF_RAG_ENABLED=true
+
+# ETHZ Research Collection (DSpace) tool. Uses ETHZ_RESEARCH_COLLECTION_TOKEN
+# when set; otherwise falls back to public endpoints.
+# V2_ETHZ_RESEARCH_COLLECTION_RAG_ENABLED=true
+
+# Federated RAG router — single agent-facing tool that fan-outs to every
+# enabled RAG provider above (Infoscience + OpenAlex + Zenodo + HuggingFace +
+# OAM + ORCID + ROR + SNSF + ETHZ-RC + GitHub + RenkuLab + SwissUbase) and
+# merges hits. Off by default for individual-tool clarity; turn on for
+# scout-mode brainstorm runs.
+# V2_FEDERATED_RAG_ENABLED=true
+
+
 ORCID_CLIENT_ID=
 ORCID_CLIENT_SECRET=
diff --git a/src/v1/llm/model_config.py b/src/v1/llm/model_config.py
@@ -508,8 +508,16 @@ def load_model_config(analysis_type: str) -> List[Dict[str, Any]]:
             logger.error(f"Invalid JSON in {env_var}: {e}")
             logger.info(f"Falling back to default configuration for {analysis_type}")
 
-    # Return default configuration
-    return MODEL_CONFIGS.get(analysis_type, [])
+    configs = list(MODEL_CONFIGS.get(analysis_type, []))
+
+    # Swap the `model` field across every profile when an override env var is
+    # set. Useful when the default model on a shared inference endpoint is
+    # degraded and a temporary fallback is needed without editing this file.
+    override = os.getenv("V2_LLM_MODEL_OVERRIDE")
+    if override and configs:
+        configs = [{**c, "model": override} for c in configs]
+
+    return configs
 
 
 def create_pydantic_ai_model(config: Dict[str, Any]):