switch of model during usage now working

qchapp · qchapp · commit 023e4324e0d7 · 2026-01-24T13:56:02.000+01:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -59,6 +59,7 @@ All notable changes to this project will be documented in this file.
 - **Legacy Method**: Removed `recommend_and_link()` method from `api/pipeline.py` (~180 lines) - only used by outdated tests, replaced by agent-based approach.
 - **State Variables**: Removed 3 Gradio State objects: `last_task_state`, `last_suggestions_state`, `excluded_names`.
 - **Outdated Tests**: Removed `tests/full_test.py` which only tested the removed `recommend_and_link()` method.
+- CLI no more supports `ai_agent ui` command
 
 ### Fixed
 - **Conversation Context**: Agent now properly maintains conversation history, enabling natural understanding of follow-up requests like "show me alternatives".
diff --git a/src/ai_agent/agent/agent.py b/src/ai_agent/agent/agent.py
@@ -8,7 +8,7 @@
 from pydantic_ai.models.openai import OpenAIChatModel
 from pydantic_ai.providers.openai import OpenAIProvider
 
-from ai_agent.generator.prompts import AGENT_SYSTEM_PROMPT
+from ai_agent.generator.prompts import get_agent_system_prompt
 from ai_agent.generator.schema import ToolSelection
 from ai_agent.api.pipeline import RAGImagingPipeline
 from ai_agent.utils.utils import _best_runnable_link
@@ -54,7 +54,7 @@
 
 agent = Agent(
     model=openai_model,
-    system_prompt=AGENT_SYSTEM_PROMPT,
+    system_prompt=get_agent_system_prompt(os.getenv("NUM_CHOICES", "3")),
     deps_type=AgentState,
 )
 
@@ -65,7 +65,9 @@
 async def search_tools(ctx: RunContext[AgentState], query: str, excluded: List[str] | None = None, top_k: int = 12, original_formats: List[str] | None = None):
     # Merge explicit excluded param with state's excluded_tools
     all_excluded = list(set((excluded or []) + ctx.deps.excluded_tools))
-    out = tool_search_tools(SearchToolsInput(query=query, excluded=all_excluded, top_k=top_k, original_formats=original_formats or []))
+    # Use override from context if available
+    effective_top_k = ctx.deps.override_top_k if ctx.deps.override_top_k is not None else top_k
+    out = tool_search_tools(SearchToolsInput(query=query, excluded=all_excluded, top_k=effective_top_k, original_formats=original_formats or []))
     payload = [c.model_dump(mode="python") for c in out.candidates]
     ctx.deps.tool_calls.append({"tool": "search_tools", "query": query, "count": len(payload), "original_formats": original_formats or [], "excluded": all_excluded, "timestamp": datetime.now().isoformat()})
     return payload
@@ -161,6 +163,7 @@ def run_agent(
     image_meta: str | None = None,
     conversation_history: List[str] | None = None,
     model: str | None = None,
+    base_url: str | None = None,
     top_k: int | None = None,
     num_choices: int | None = None,
 ) -> AgentToolSelection:
@@ -172,7 +175,15 @@ def run_agent(
 
     tool_logs: List[ToolRunLog] = []
 
-    deps = AgentState(excluded_tools=excluded or [])
+    # Create AgentState with runtime overrides
+    deps = AgentState(
+        excluded_tools=excluded or [],
+        override_model=model,
+        override_base_url=base_url,
+        override_top_k=top_k,
+        override_num_choices=num_choices,
+    )
+    
     # Provide hidden metadata context lines (non-user-visible) below a delimiter
     hidden_meta = ""
     if original_formats:
@@ -194,7 +205,95 @@ def run_agent(
     else:
         prompt = task + extra_context + hidden_meta
     
-    result = agent.run_sync(prompt, deps=deps, output_type=ToolSelection, usage_limits=UsageLimits(tool_calls_limit=10)).output
+    # Determine which agent instance to use
+    agent_instance = agent  # Default to global agent
+    effective_num_choices = num_choices if num_choices is not None else 3
+    effective_model = model if model else agent_model_config.name
+    effective_top_k = top_k if top_k is not None else 12
+    
+    # When model is provided from UI, base_url comes with it (can be None for OpenAI)
+    # When model is NOT provided, use config defaults
+    if model:
+        # Model selected from dropdown - base_url parameter is authoritative
+        if base_url and "inference.rcp.epfl.ch" in base_url:
+            # EPFL model selected
+            runtime_api_key = os.getenv("EPFL_API_KEY")
+            if not runtime_api_key:
+                raise ValueError("EPFL_API_KEY not found. Cannot use EPFL models without VPN and API key.")
+            effective_base_url = base_url
+            log.info("✓ Using EPFL_API_KEY for EPFL inference server")
+        else:
+            # OpenAI or other model selected (base_url=None means OpenAI)
+            runtime_api_key = os.getenv("OPENAI_API_KEY")
+            if not runtime_api_key:
+                raise ValueError("OPENAI_API_KEY not found. Cannot use OpenAI models.")
+            effective_base_url = base_url  # Will be None for OpenAI
+            log.info("✓ Using OPENAI_API_KEY for OpenAI endpoint")
+    else:
+        # No model override - use config defaults
+        effective_base_url = agent_model_config.base_url
+        if effective_base_url and "inference.rcp.epfl.ch" in effective_base_url:
+            runtime_api_key = os.getenv("EPFL_API_KEY")
+            if not runtime_api_key:
+                raise ValueError("EPFL_API_KEY not found")
+            log.info("✓ Using EPFL_API_KEY from config")
+        else:
+            runtime_api_key = os.getenv("OPENAI_API_KEY")
+            if not runtime_api_key:
+                raise ValueError("OPENAI_API_KEY not found")
+            log.info("✓ Using OPENAI_API_KEY from config")
+    
+    # Log runtime configuration
+    endpoint_display = effective_base_url if effective_base_url else "api.openai.com"
+    log.info(
+        f"🤖 Agent execution - Model: {effective_model}, endpoint: {endpoint_display}, "
+        f"top_k: {effective_top_k}, num_choices: {effective_num_choices}, excluded: {len(excluded or [])}"
+    )
+    
+    # Create dynamic agent:
+    needs_dynamic_agent = (
+        (model and model != agent_model_config.name) or
+        (base_url is not None and base_url != agent_model_config.base_url) or
+        (runtime_api_key != api_key)  # API key mismatch - need new agent!
+    )
+    
+    if needs_dynamic_agent:
+        log.info(f"📦 Creating runtime agent with model={effective_model}, endpoint={effective_base_url or 'api.openai.com'}")
+        
+        runtime_provider = OpenAIProvider(
+            base_url=effective_base_url,
+            api_key=runtime_api_key,
+        )
+        runtime_model = OpenAIChatModel(model_name=effective_model, provider=runtime_provider)
+        agent_instance = Agent(
+            model=runtime_model,
+            system_prompt=get_agent_system_prompt(effective_num_choices),
+            deps_type=AgentState,
+        )
+        # Register tools on the dynamic agent
+        agent_instance.tool(search_tools, retries=2, prepare=cap_prepare)
+        agent_instance.tool(rerank, retries=2, prepare=cap_prepare)
+        agent_instance.tool(repo_info, retries=0, prepare=cap_prepare)
+        agent_instance.tool(resolve_demo_link, retries=2, prepare=cap_prepare)
+    elif num_choices is not None and num_choices != 3:
+        # Model/base_url same but num_choices differs - create agent with updated prompt
+        log.info(f"📦 Creating runtime agent with num_choices={effective_num_choices} (model: {effective_model})")
+        agent_instance = Agent(
+            model=openai_model,
+            system_prompt=get_agent_system_prompt(effective_num_choices),
+            deps_type=AgentState,
+        )
+        # Register tools on the dynamic agent
+        agent_instance.tool(search_tools, retries=2, prepare=cap_prepare)
+        agent_instance.tool(rerank, retries=2, prepare=cap_prepare)
+        agent_instance.tool(repo_info, retries=0, prepare=cap_prepare)
+        agent_instance.tool(resolve_demo_link, retries=2, prepare=cap_prepare)
+    else:
+        log.info(f"♻️  Using global agent (model: {effective_model}, num_choices: {effective_num_choices})")
+    
+    log.debug(f"Prompt length: {len(prompt)} chars, has_image: {image_data_url is not None}")
+    result = agent_instance.run_sync(prompt, deps=deps, output_type=ToolSelection, usage_limits=UsageLimits(tool_calls_limit=10)).output
+    log.info(f"✅ Agent execution complete - choices returned: {len(result.choices)}")
 
     # Convert tool call dicts into ToolRunLog entries
     for tc in deps.tool_calls:
diff --git a/src/ai_agent/agent/utils.py b/src/ai_agent/agent/utils.py
@@ -18,6 +18,12 @@ class AgentState(BaseModel):
     tool_counts: Dict[str, int] = Field(default_factory=dict)
     disabled_tools: Set[str] = Field(default_factory=set)
     excluded_tools: List[str] = Field(default_factory=list)  # Tools to exclude from search
+    
+    # Runtime overrides (session-only, not persisted)
+    override_model: Optional[str] = None
+    override_base_url: Optional[str] = None
+    override_top_k: Optional[int] = None
+    override_num_choices: Optional[int] = None
 
 # Quota decorator + prepare hook -----------------------------------------------
 
diff --git a/src/ai_agent/generator/prompts.py b/src/ai_agent/generator/prompts.py
@@ -27,7 +27,7 @@
 - Also include a one-line context explaining why you need this info (≤ 15 words).
 
 SCORING WHEN CLEAR (no question)
-- Rank up to NUM_CHOICES tools that truly match.
+- Rank up to {num_choices} tools that truly match.
 - Accuracy (0–100) = Task match (40) + Input compatibility (30) + Features (30).
 - Consider format friction (e.g., TIF→NIfTI conversion) in “compatibility” (±5 points).
 - Prefer tools matching the file extension/modality and 2D/3D nature.
@@ -37,19 +37,19 @@
   and include a structured reason and explanation.
 
 OUTPUT (valid JSON):
-{
-  "conversation": {
+{{
+  "conversation": {{
     "status": "needs_clarification" | "complete",
     "question": "string, required if status=needs_clarification",
     "context": "string, explain why you need this information",
     "options": ["option1", "option2", ...]  // optional; 3–5 max if present
-  },
+  }},
   "choices": [
-    {"name": "tool-name", "rank": 1, "accuracy": 95.5, "why": "...", "demo_link": "optional"}
+    {{"name": "tool-name", "rank": 1, "accuracy": 95.5, "why": "...", "demo_link": "optional"}}
   ],
   "reason": "no_suitable_tool | no_modality_match | no_task_match | no_dimension_match",
   "explanation": "string (required if choices is empty)"
-}
+}}
 
 CONSISTENCY RULES
 - If you return choices = [], you MUST set conversation.status = "complete" and include a reason + explanation.
@@ -77,7 +77,7 @@
     + "\n1. If task ambiguous (operation OR target structure missing) -> immediately return clarification JSON (NO tool calls). Treat ultra-generic inputs like 'help', 'help me', 'suggest tools', 'what can you do', or empty/emoji-only as ambiguous. Do NOT guess a modality or claim PNG just from a preview."
     + "\n2. Otherwise: call search_tools(query) ONCE early (pass original_formats param if present; do NOT manufacture or over-weight formats — they are a soft compatibility hint)."
     + "\n3. If you have >=3 plausible candidates and high confidence, you MAY skip rerank; else call rerank(query,candidate_names)."
-    + "\n4. Mandatory repo verification before final output: After search_tools (and optional rerank), take the top K ≤ 3 candidates you plan to return and you MUST call repo_info(url) once for each. Use the repo URL from the candidate payload (field name repo_url; fallback keys: github, url, homepage). If a candidate has no repo URL, drop it rather than guessing. Only after repo_info confirms alignment with the requested task should you call resolve_demo_link(name). Do not return any candidate that wasn’t verified by repo_info. Call `repo_info(url)` **only** with a GitHub repo URL or `owner/repo`. If a candidate lacks that, **drop it** (don’t pass papers, docs, or homepages)."
+    + "\n4. Mandatory repo verification before final output: After search_tools (and optional rerank), take the top K ≤ {num_choices} candidates you plan to return and you MUST call repo_info(url) once for each. Use the repo URL from the candidate payload (field name repo_url; fallback keys: github, url, homepage). If a candidate has no repo URL, drop it rather than guessing. Only after repo_info confirms alignment with the requested task should you call resolve_demo_link(name). Do not return any candidate that wasn't verified by repo_info. Call `repo_info(url)` **only** with a GitHub repo URL or `owner/repo`. If a candidate lacks that, **drop it** (don't pass papers, docs, or homepages)."
     + "\n5. The preview you receive may be PNG even if the original file is TIFF/DICOM/NIfTI, etc. Use provided original_formats hint (if any) for compatibility scoring only; do NOT assume a TIFF implies microscopy (could still be CT exported). Ask for modality if unclear."
     + "\n6. FINAL RESPONSE: ONE JSON object only — no prose, no code fences. Include conversation + choices (rank, accuracy, why) OR clarification question."
     + "\n7. Accuracy scoring: task(40)+compat(30)+features(30); incorporate original formats & 2D/3D nature from metadata; penalize format conversions (−5) if heavy."
@@ -89,4 +89,14 @@
       - repo_info(url="https://github.com/org/repo")   # for each finalist
       - resolve_demo_link(tool_name="ToolName")
       """
-)
+)
+
+
+def get_selector_system_prompt(num_choices: int = 3) -> str:
+    """Generate the system prompt with dynamic num_choices."""
+    return SELECTOR_SYSTEM.format(num_choices=num_choices)
+
+
+def get_agent_system_prompt(num_choices: int = 3) -> str:
+    """Generate the full agent system prompt with dynamic num_choices."""
+    return AGENT_SYSTEM_PROMPT.format(num_choices=num_choices)
diff --git a/src/ai_agent/generator/schema.py b/src/ai_agent/generator/schema.py
@@ -145,6 +145,25 @@ class NoToolReason(str, Enum):
     NO_DIMENSION_MATCH = "no_dimension_match"
     INVALID_FILES = "invalid_files"  
 
+class ConversationStatus(str, Enum):
+    NEEDS_CLARIFICATION = "needs_clarification"
+    COMPLETE = "complete"
+
+class Conversation(BaseModel):
+    status: ConversationStatus
+    question: Optional[str] = None
+    context: Optional[str] = None
+    options: Optional[List[str]] = None
+
+    @model_validator(mode='after')
+    def validate_fields(self) -> 'Conversation':
+        if self.status == ConversationStatus.NEEDS_CLARIFICATION:
+            if not self.question:
+                raise ValueError("Question required when status is needs_clarification")
+            if not self.context:
+                raise ValueError("Context required when status is needs_clarification")
+        return self
+
 class ToolChoice(BaseModel):
     name: str
     rank: int
@@ -197,28 +216,12 @@ def _expl_empty_to_none(cls, v):
         return None if v is None or str(v).strip() == "" else v
 
 
-class ConversationStatus(str, Enum):
-    NEEDS_CLARIFICATION = "needs_clarification"
-    COMPLETE = "complete"
-
-class Conversation(BaseModel):
-    status: ConversationStatus
-    question: Optional[str] = None
-    context: Optional[str] = None
-    options: Optional[List[str]] = None
-
-    @model_validator(mode='after')
-    def validate_fields(self) -> 'Conversation':
-        if self.status == ConversationStatus.NEEDS_CLARIFICATION:
-            if not self.question:
-                raise ValueError("Question required when status is needs_clarification")
-            if not self.context:
-                raise ValueError("Context required when status is needs_clarification")
-        return self
-
-
 __all__ = [
     "CandidateDoc",
     "PlanAndCode",
     "ToolSelection",
+    "Conversation",
+    "ConversationStatus",
+    "ToolChoice",
+    "NoToolReason",
 ]
diff --git a/src/ai_agent/ui/components.py b/src/ai_agent/ui/components.py
@@ -13,6 +13,30 @@
 
 log = logging.getLogger("chat_components")
 
+# Model configurations with their inference servers
+MODEL_CONFIGS = {
+    # OpenAI models (default endpoint)
+    "gpt-4o-mini": {"name": "gpt-4o-mini", "base_url": None, "provider": "OpenAI"},
+    "gpt-4o": {"name": "gpt-4o", "base_url": None, "provider": "OpenAI"},
+    "gpt-4-turbo": {"name": "gpt-4-turbo", "base_url": None, "provider": "OpenAI"},
+    
+    # EPFL inference server models
+    "openai/gpt-oss-120b [EPFL]": {
+        "name": "openai/gpt-oss-120b",
+        "base_url": "https://inference.rcp.epfl.ch/v1",
+        "provider": "EPFL"
+    },
+    "mistralai/Mistral-Small-3.2-24B-Instruct-2506 [EPFL]": {
+        "name": "mistralai/Mistral-Small-3.2-24B-Instruct-2506",
+        "base_url": "https://inference.rcp.epfl.ch/v1",
+        "provider": "EPFL"
+    },
+}
+
+def get_model_config(model_display_name: str) -> Dict[str, str]:
+    """Get model configuration from display name."""
+    return MODEL_CONFIGS.get(model_display_name, {"name": model_display_name, "base_url": None, "provider": "Unknown"})
+
 
 def create_chat_interface(doc_index: Dict[str, SoftwareDoc]):
     """
@@ -115,10 +139,10 @@ def create_chat_interface(doc_index: Dict[str, SoftwareDoc]):
         with gr.Accordion("⚙️ Settings", open=False):
             with gr.Row():
                 model_dropdown = gr.Dropdown(
-                    choices=["gpt-4o-mini", "gpt-4o", "gpt-4-turbo"],
-                    value=os.getenv("OPENAI_MODEL", "gpt-4o-mini"),
+                    choices=list(MODEL_CONFIGS.keys()),
+                    value="gpt-4o-mini",
                     label="Model",
-                    info="OpenAI model for agent reasoning",
+                    info="Select AI model and inference server",
                 )
                 top_k_slider = gr.Slider(
                     minimum=5,
diff --git a/src/ai_agent/ui/handlers.py b/src/ai_agent/ui/handlers.py