Imaging-Plaza
diff --git a/‎config.yaml‎
Lines changed: 8 additions & 8 deletions b/‎config.yaml‎
Lines changed: 8 additions & 8 deletions
diff --git a/‎src/ai_agent/agent/agent.py‎
Lines changed: 1 addition & 28 deletions b/‎src/ai_agent/agent/agent.py‎
Lines changed: 1 addition & 28 deletions
diff --git a/‎src/ai_agent/agent/tools/search_alternative_tool.py‎
Lines changed: 3 additions & 6 deletions b/‎src/ai_agent/agent/tools/search_alternative_tool.py‎
Lines changed: 3 additions & 6 deletions
diff --git a/‎src/ai_agent/agent/tools/search_tool.py‎
Lines changed: 5 additions & 6 deletions b/‎src/ai_agent/agent/tools/search_tool.py‎
Lines changed: 5 additions & 6 deletions
diff --git a/‎src/ai_agent/api/pipeline.py‎
Lines changed: 78 additions & 50 deletions b/‎src/ai_agent/api/pipeline.py‎
Lines changed: 78 additions & 50 deletions
@@ -1,13 +1,13 @@
 # AI Agent Model Configuration
 
 # Default config
-agent_model:
-  name: "gpt-5.1" # "gpt-4o"                     # Model name
-  base_url: null                          # null for default OpenAI endpoint
-  api_key_env: "OPENAI_API_KEY"          # Environment variable containing API key
+# agent_model:
+#   name: "gpt-5.1" # "gpt-4o"                     # Model name
+#   base_url: null                          # null for default OpenAI endpoint
+#   api_key_env: "OPENAI_API_KEY"          # Environment variable containing API key
 
 # Using EPFL's inference server
-# agent_model:
-#   name: "openai/gpt-oss-120b"
-#   base_url: "https://inference.rcp.epfl.ch/v1"
-#   api_key_env: "EPFL_API_KEY"           # Set EPFL_API_KEY in .env
+agent_model:
+  name: "openai/gpt-oss-120b"
+  base_url: "https://inference.rcp.epfl.ch/v1"
+  api_key_env: "EPFL_API_KEY"           # Set EPFL_API_KEY in .env
@@ -20,7 +20,6 @@
     RepoSummaryInput,
     coerce_github_url_or_none,
 )
-from .tools.rerank_tool import tool_rerank, RerankInput
 from .tools.search_tool import tool_search_tools, SearchToolsInput
 from .tools.search_alternative_tool import tool_search_alternative, SearchAlternativeInput
 from .tools.gradio_space_tool import tool_run_example, RunExampleInput
@@ -124,32 +123,6 @@ async def search_tools(
     return [c.model_dump(mode="python") for c in out.candidates]
 
 
-@agent.tool(retries=2, prepare=cap_prepare)
-@limit_tool_calls("rerank", cap=3)
-async def rerank(
-    ctx: RunContext[AgentState],
-    query: str,
-    candidate_names: List[str],
-    top_k: int = 5,
-) -> List[dict]:
-    """
-    Cross-encoder reranker over a small set of candidate tool names.
-    """
-    out = tool_rerank(
-        RerankInput(query=query, candidate_names=candidate_names, top_k=top_k)
-    )
-    ctx.deps.tool_calls.append(
-        {
-            "tool": "rerank",
-            "query": query,
-            "used_model": out.used_model,
-            "count": len(out.reranked),
-            "timestamp": datetime.now().isoformat()
-        }
-    )
-    return list(out.reranked)
-
-
 @agent.tool(retries=2, prepare=cap_prepare)
 @limit_tool_calls("search_alternative", cap=3)
 async def search_alternative(
@@ -159,7 +132,7 @@ async def search_alternative(
     top_k: int = 12,
 ) -> List[dict]:
     """
-    Search with an alternative query formulation.
+    Search with an alternative query formulation (includes automatic reranking).
     """
     # Merge exclusions
     explicit_excluded = excluded or []
 
@@ -30,7 +30,7 @@ class SearchAlternativeOutput(BaseModel):
 
 def tool_search_alternative(inp: SearchAlternativeInput) -> SearchAlternativeOutput:
     """
-    Search with an alternative query formulation.
+    Search with an alternative query formulation, with automatic reranking.
     
     This tool allows the agent to explicitly try a different search approach
     when initial results are not satisfactory.
@@ -67,15 +67,12 @@ def tool_search_alternative(inp: SearchAlternativeInput) -> SearchAlternativeOut
             query + " " + " ".join(f"format:{t}" for t in fmt_tokens)
         ).strip()
 
-    # Call retrieval with the alternative query
-    # Set min_results=0 to prevent automatic retry (agent is already retrying)
-    hits = pipe.retrieve_no_rerank(
+    # Call retrieve() which includes automatic reranking
+    hits = pipe.retrieve(
         query,
         image_paths=inp.image_paths or None,
         exclusions=inp.excluded,
         top_k=inp.top_k,
-        min_results=0,  # Disable automatic retry since agent controls this
-        max_retries=0,  # Disable automatic retry
     )
 
     # Convert hits to CandidateDoc objects
 
@@ -18,13 +18,13 @@ class SearchToolsOutput(BaseModel):
 
 def tool_search_tools(inp: SearchToolsInput) -> SearchToolsOutput:
     """
-    Search tools WITHOUT reranker.
+    Search tools with automatic reranking.
 
-    - Uses dense retrieval with similarity-based query expansion.
+    - Uses dense retrieval with dictionary-based query expansion.
+    - Applies CrossEncoder reranking automatically for best results.
     - Softly biases results using file-format hints (format:EXT).
     - Optionally uses `image_paths` so the pipeline can derive additional
       hints (modality / anatomy / dims) directly from the image files.
-    - Includes automatic retry logic if insufficient results are found.
     """
     pipe = get_pipeline()
 
@@ -76,9 +76,8 @@ def tool_search_tools(inp: SearchToolsInput) -> SearchToolsOutput:
             base_query + " " + " ".join(f"format:{t}" for t in fmt_tokens)
         ).strip()
 
-    # 5) Call the vector index with similarity expansion and automatic retry
-    # The pipeline now handles similarity-based expansion internally
-    hits = pipe.retrieve_no_rerank(
+    # 5) Call retrieve() that includes automatic reranking
+    hits = pipe.retrieve(
         base_query,
         image_paths=inp.image_paths or None,
         exclusions=inp.excluded,
 
@@ -19,9 +19,18 @@
 
 
 class RAGImagingPipeline:
-    def __init__(self, index_dir: Optional[str] = None):
+    def __init__(
+        self,
+        index_dir: Optional[str] = None,
+        min_results: int = 5,
+        max_retries: int = 2,
+    ):
+        """Initialize the RAG imaging pipeline."""
         self.index_dir = Path(index_dir or os.getenv("RAG_INDEX_DIR", "artifacts/rag_index"))
         self.index_dir.mkdir(parents=True, exist_ok=True)
+        
+        self.min_results = min_results
+        self.max_retries = max_retries
 
         self.embedder = LocalBGEEmbedder()
         self.reranker = CrossEncoderReranker()
@@ -115,42 +124,37 @@ def retrieve_no_rerank(
         image_paths: Optional[List[str]] = None,
         top_k: int = 30,
         exclusions: Optional[List[str]] = None,
-        max_retries: int = 2,
-        min_results: int = 5,
     ) -> List[dict]:
         """
         Return raw vector hits WITHOUT applying the CrossEncoder reranker.
 
         Each item: {id, doc, score}. Optional `image_paths` are used to derive
         additional text hints (format / modality / anatomy / dims) that are
         appended to the query before embedding.
+        
+        Relies on BGE-M3 semantic embeddings + CrossEncoder reranking.
         """
 
         def _norm(s: str) -> str:
             return re.sub(r"\s+", " ", (s or "").strip().lower())
 
         excluded_norm = {_norm(x) for x in (exclusions or []) if x}
 
-        # 1) Strip any tags from the query (your existing behavior)
+        # 1) Strip any tags from the query
         clean_q = strip_tags(query)
 
         # 2) Add image-derived hints (format, modality, anatomy, dims, ...)
         image_hints = self._build_image_hint_text(image_paths)
         if image_hints:
-            clean_q = f"{clean_q} {image_hints}".strip() if clean_q else image_hints
-
-        # 3) Apply similarity-based expansion
-        if hasattr(self.index, 'similarity_expander') and self.index.similarity_expander.vocabulary:
-            expanded_q = self.index.similarity_expander.expand_query(clean_q)
-            log.info(f"Similarity-expanded query: {clean_q} → {expanded_q}")
+            final_q = f"{clean_q} {image_hints}".strip()
         else:
-            expanded_q = clean_q
+            final_q = clean_q
+        
+        log.info(f"Retrieval query: {clean_q}" + (f" + metadata: {image_hints[:50]}..." if image_hints else ""))
 
-        log.info(f"Final retrieval query: {expanded_q}")
-
-        # 4) Vector search with automatic retry logic
+        # 4) Vector search
         pool_k = max(50, top_k * 3)
-        hits = self.index.search(expanded_q, k=pool_k, reranker=None)
+        hits = self.index.search(final_q, k=pool_k, reranker=None)
 
         # 5) Apply name-based exclusions if any
         if excluded_norm:
@@ -160,46 +164,39 @@ def _norm(s: str) -> str:
                 if _norm(getattr(h["doc"], "name", "")) not in excluded_norm
             ]
 
-        # 6) Check if results are sufficient, retry with alternatives if not
+        # 6) Check if results are sufficient, retry with broader terms if not
         attempt = 0
-        while len(hits) < min_results and attempt < max_retries:
+        while len(hits) < self.min_results and attempt < self.max_retries:
             attempt += 1
-            log.info(f"Insufficient results ({len(hits)} < {min_results}), attempting retry {attempt}/{max_retries}")
+            log.info(f"Insufficient results ({len(hits)} < {self.min_results}), attempting retry {attempt}/{self.max_retries}")
 
-            # Generate alternative query using similarity expander
-            if hasattr(self.index, 'similarity_expander') and self.index.similarity_expander.vocabulary:
-                alternatives = self.index.similarity_expander.suggest_alternative_queries(
-                    clean_q,
-                    num_alternatives=1
-                )
-                if alternatives:
-                    alt_query = alternatives[0]
-                    log.info(f"Trying alternative query: {alt_query}")
-                    
-                    # Add image hints to alternative
-                    if image_hints:
-                        alt_query = f"{alt_query} {image_hints}".strip()
-                    
-                    # Expand alternative query
-                    expanded_alt = self.index.similarity_expander.expand_query(alt_query)
-                    
-                    # Search with alternative
-                    alt_hits = self.index.search(expanded_alt, k=pool_k, reranker=None)
-                    
-                    # Merge results (avoiding duplicates)
-                    existing_ids = {h["id"] for h in hits}
-                    for h in alt_hits:
-                        if h["id"] not in existing_ids:
-                            if not excluded_norm or _norm(getattr(h["doc"], "name", "")) not in excluded_norm:
-                                hits.append(h)
-                                existing_ids.add(h["id"])
-                    
-                    log.info(f"After retry {attempt}: {len(hits)} total results")
+            # Generate alternative by simplifying query (remove specific terms, keep general ones)
+            # Strategy: use first 2-3 words only to broaden the search
+            words = clean_q.split()
+            if len(words) > 3:
+                alt_task = " ".join(words[:3])
+                log.info(f"Trying broader query: {alt_task}")
+                
+                # Build alternative query with image hints
+                if image_hints:
+                    alt_q = f"{alt_task} {image_hints}".strip()
                 else:
-                    log.warning(f"Could not generate alternative query for retry {attempt}")
-                    break
+                    alt_q = alt_task
+                
+                # Search with alternative
+                alt_hits = self.index.search(alt_q, k=pool_k, reranker=None)
+                
+                # Merge results (avoiding duplicates)
+                existing_ids = {h["id"] for h in hits}
+                for h in alt_hits:
+                    if h["id"] not in existing_ids:
+                        if not excluded_norm or _norm(getattr(h["doc"], "name", "")) not in excluded_norm:
+                            hits.append(h)
+                            existing_ids.add(h["id"])
+                
+                log.info(f"After retry {attempt}: {len(hits)} total results")
             else:
-                log.warning("Similarity expander not available for retry")
+                log.warning(f"Query too short to generate alternative for retry {attempt}")
                 break
 
         # 7) Attach convenience fields expected downstream
@@ -218,6 +215,37 @@ def rerank_only(self, query: str, hits: List[dict], top_k: int = 10) -> List[dic
         # Recreate query with any existing format tokens already embedded in retrieval
         ranked = self._apply_reranker(strip_tags(query), hits, top_k=top_k)
         return ranked
+    
+    def retrieve(
+        self,
+        query: str,
+        image_paths: Optional[List[str]] = None,
+        top_k: int = 10,
+        exclusions: Optional[List[str]] = None,
+    ) -> List[dict]:
+        """
+        Retrieve and automatically rerank results using BGE-M3 + CrossEncoder.
+        
+        This is the main retrieval method that combines:
+        1. Semantic search via BGE-M3 embeddings (no query expansion)
+        2. Precision reranking via CrossEncoder
+        3. Image metadata hints (format, modality, dimensions)
+        
+        Returns top_k results after CrossEncoder reranking.
+        """
+        # Get more candidates than needed for reranking
+        pool_k = max(30, top_k * 3)
+        hits = self.retrieve_no_rerank(
+            query=query,
+            image_paths=image_paths,
+            top_k=pool_k,
+            exclusions=exclusions,
+        )
+        
+        # Apply reranking to get final top_k
+        if hits:
+            return self.rerank_only(query, hits, top_k=top_k)
+        return []
 
     def get_doc(self, name: str) -> Optional[SoftwareDoc]:
         """Lookup a SoftwareDoc by name (case-sensitive match)."""