mifunedev
diff --git a/‎Changelog.md‎
Lines changed: 1 addition & 0 deletions b/‎Changelog.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backend/src/constants/__init__.py‎
Lines changed: 4 additions & 0 deletions b/‎backend/src/constants/__init__.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎backend/src/constants/examples/__init__.py‎
Lines changed: 20 additions & 0 deletions b/‎backend/src/constants/examples/__init__.py‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎backend/src/constants/llm.py‎
Lines changed: 2 additions & 0 deletions b/‎backend/src/constants/llm.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎backend/src/flows/xml_agent.py‎
Lines changed: 6 additions & 2 deletions b/‎backend/src/flows/xml_agent.py‎
Lines changed: 6 additions & 2 deletions
diff --git a/‎backend/src/repos/base_repo.py‎
Lines changed: 1 addition & 1 deletion b/‎backend/src/repos/base_repo.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎backend/src/repos/thread_repo.py‎
Lines changed: 148 additions & 0 deletions b/‎backend/src/repos/thread_repo.py‎
Lines changed: 148 additions & 0 deletions
diff --git a/‎backend/src/routes/v0/llm.py‎
Lines changed: 1 addition & 1 deletion b/‎backend/src/routes/v0/llm.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎backend/src/routes/v0/thread.py‎
Lines changed: 65 additions & 6 deletions b/‎backend/src/routes/v0/thread.py‎
Lines changed: 65 additions & 6 deletions
diff --git a/‎backend/src/schemas/entities/__init__.py‎
Lines changed: 17 additions & 0 deletions b/‎backend/src/schemas/entities/__init__.py‎
Lines changed: 17 additions & 0 deletions
@@ -22,6 +22,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   - bug/370-anthropic-streaming (2025-09-16)
 
 ### Changed
+  - feat/530-semantic-search-over-threads (2025-11-30)
   - feat/534-mermaid-diagram (2025-11-30)
   - feat/528-pagination-for-threads (2025-11-30)
   - feat/519-react-window (2025-11-23)
 
@@ -88,3 +88,7 @@ def values(cls) -> list[str]:
 
 # GridSite
 MICROSOFT_TEAMS_WEBHOOK_URL = os.getenv("MICROSOFT_TEAMS_WEBHOOK_URL")
+
+# Thread Search
+# Number of recent messages to store per thread snapshot for semantic search
+THREAD_SNAPSHOT_MESSAGE_COUNT = 20
@@ -411,6 +411,26 @@ class Examples:
             },
         ),
     }
+    THREAD_SEMANTIC_SEARCH_EXAMPLES = {
+        "semantic_search": Example(
+            summary="semantic_search",
+            description="Search threads using natural language",
+            value={
+                "query": "threads about database optimization",
+                "limit": 10,
+                "assistant_id": None,
+            },
+        ),
+        "semantic_search_with_assistant": Example(
+            summary="semantic_search_with_assistant",
+            description="Search threads for a specific assistant",
+            value={
+                "query": "conversations about authentication",
+                "limit": 5,
+                "assistant_id": "assistant-uuid-here",
+            },
+        ),
+    }
 
     LLM_INVOKE_EXAMPLES = {
         "stateless_invoke": Example(
 
@@ -17,6 +17,7 @@ class ChatModels(str, Enum):
         OPENAI_REASONING_03 = "openai:o3"
         OPENAI_REASONING_04_MINI = "openai:o4-mini"
         OPENAI_GPT_4_1_NANO = "openai:gpt-4.1-nano"
+        OPENAI_GPT_4_1_MINI = "openai:gpt-4.1-mini"
         OPENAI_GPT_5_NANO = "openai:gpt-5-nano"
         OPENAI_GPT_5_MINI = "openai:gpt-5-mini"
         OPENAI_GPT_5 = "openai:gpt-5"
@@ -84,6 +85,7 @@ def get_free_models():
     models = []
     if OPENAI_API_KEY:
         models.append(ChatModels.OPENAI_GPT_5_NANO.value)
+        models.append(ChatModels.OPENAI_GPT_4_1_MINI.value)
     if ANTHROPIC_API_KEY:
         models.append(ChatModels.ANTHROPIC_CLAUDE_4_5_HAIKU.value)
     if GOOGLE_API_KEY:
 
@@ -21,6 +21,8 @@
 from langgraph.checkpoint.base import BaseCheckpointSaver
 from langgraph.store.base import BaseStore
 
+from src.utils.format import format_content
+
 
 ###########################################
 ## Parser
@@ -31,8 +33,9 @@ def input_parser(
     xml_lines = ["<thread>"]
     for message in messages:
         if isinstance(message, HumanMessage):
+            content = format_content(message.content)
             xml_lines.append(
-                f'  <event id="{message.id}" type="{message.type}">{message.content}</event>'
+                f'  <event id="{message.id}" type="{message.type}">{content}</event>'
             )
         elif isinstance(message, ToolMessage):
             xml_lines.append(
@@ -45,8 +48,9 @@ def input_parser(
                         f'  <event id="{tool_call["id"]}" type="tool_input" name="{tool_call["name"]}">{json.dumps(tool_call["args"])}</event>'
                     )
             else:
+                content = format_content(message.content)
                 xml_lines.append(
-                    f'  <event id="{message.id}" type="{message.type}">{message.content}</event>'
+                    f'  <event id="{message.id}" type="{message.type}">{content}</event>'
                 )
     xml_lines.append("</thread>")
     return "\n".join(xml_lines) + llm_response_prefix
 
@@ -24,7 +24,7 @@ def _get_namespace(self):
         return (self.user_id, self.entity_type)
 
     async def _set(
-        self, key: str, value: Source | Project | Document, ttl: int | None = None
+        self, key: str, value: Any, ttl: int | None = None
     ) -> bool:
         await self.store.aput(
             namespace=self._get_namespace(),
 
@@ -0,0 +1,148 @@
+import asyncio
+from langgraph.store.base import BaseStore, SearchItem
+from langgraph.store.memory import InMemoryStore
+from langgraph.store.postgres.aio import AsyncPostgresStore
+
+from src.services.db import get_store_in_memory
+from src.schemas.entities import SearchFilter
+from src.constants import THREAD_SNAPSHOT_MESSAGE_COUNT
+from src.repos.base_repo import BaseRepo
+from src.schemas.entities.store import ThreadSnapshot
+from src.utils.logger import logger
+from src.utils.format import format_xml_thread
+from src.utils.messages import from_message_to_dict
+
+
+FIELDS = ["messages", "files"]
+
+class ThreadRepo(BaseRepo):
+    def __init__(self, user_id: str, store: BaseStore = get_store_in_memory(fields=FIELDS)):
+        ## Add fields to the store (if supported)
+        self.user_id = user_id
+        self.store: BaseStore = store
+        
+        try:
+            self.store.fields = FIELDS
+        except AttributeError:
+            pass
+        super().__init__(user_id=user_id, store=store, entity_type="threads")
+        
+
+    async def search(
+        self,
+        search_filter: SearchFilter,
+    ) -> list[dict]:
+        try:
+            max_retries = 3
+            retry_delay = 1  # seconds
+
+            for attempt in range(max_retries):
+                try:
+                    async with self.store as store:
+                        if search_filter.query:
+                            queried_threads: list[SearchItem] = await store.asearch(
+                                self._get_namespace(), 
+                                limit=search_filter.limit, 
+                                filter=search_filter.filter,
+                                query=search_filter.query,
+                            )
+                            return [
+                                ThreadSnapshot(
+                                    id=thread.key, 
+                                    messages=thread.value["messages"], 
+                                    files=thread.value["files"], 
+                                    score=thread.score, 
+                                    updated_at=thread.updated_at
+                                ).model_dump(exclude_none=True) for thread in queried_threads
+                            ]
+                        threads = await store.asearch(
+                            self._get_namespace(), 
+                            limit=search_filter.limit, 
+                            filter=search_filter.filter,
+                        )
+                        return sorted(
+                            [thread.dict() for thread in threads],
+                            key=lambda x: x.get("updated_at"),
+                            reverse=True,
+                        )
+                except Exception as e:
+                    error_msg = str(e).lower()
+                    if "connection" in error_msg and "closed" in error_msg:
+                        logger.warning(
+                            f"Store connection closed on attempt {attempt + 1}/{max_retries}: {e}"
+                        )
+                        if attempt < max_retries - 1:
+                            await asyncio.sleep(
+                                retry_delay * (2**attempt)
+                            )  # Exponential backoff
+                            continue
+                    raise e
+        except Exception as e:
+            logger.error(f"Error searching threads: {e}")
+            return []
+        
+    async def update(self, thread_id: str, data: dict):
+
+        # Extract last human message for storage
+        messages = data.get("messages", [])
+        messages = from_message_to_dict(messages, include_tool_calls=False)
+        recent_messages = (
+            messages[-THREAD_SNAPSHOT_MESSAGE_COUNT:]
+            if len(messages) > THREAD_SNAPSHOT_MESSAGE_COUNT
+            else messages
+        )
+        
+        data["messages"] = recent_messages
+        
+        await self.store.aput(
+            namespace=self._get_namespace(), key=thread_id, value=data
+        )
+
+        return True
+        
+        
+    async def get(self, thread_id: str) -> dict:
+        return await self._get(thread_id)
+        
+    async def delete(self, thread_id: str) -> bool:
+        try:
+            await self._delete(thread_id)
+            logger.info(f"Thread {thread_id} deleted successfully")
+            return True
+        except Exception as e:
+            logger.error(f"Error deleting thread: {e}")
+            return False
+
+    async def _upsert_snapshot(self, thread_id: str, messages: list) -> bool:
+        """Create or update a thread snapshot with recent messages.
+        
+        Note: messages should already be filtered to recent messages before calling this method.
+        """
+        try:
+            # Extract recent messages for snapshot (last N messages)
+            recent_messages = (
+                messages[-THREAD_SNAPSHOT_MESSAGE_COUNT:]
+                if len(messages) > THREAD_SNAPSHOT_MESSAGE_COUNT
+                else messages
+            )
+            
+            # Format messages as "Role: content" pairs
+            page_content = format_xml_thread(recent_messages, include_tool_calls=False)
+
+            # Create snapshot with metadata
+            snapshot = ThreadSnapshot(
+                thread_id=thread_id,
+                page_content=page_content,
+                metadata={
+                    "thread_id": thread_id,
+                    "message_count": len(messages),
+                }
+            )
+
+            await self._set(thread_id, snapshot)
+            return True
+
+        except Exception as e:
+            logger.error(f"Failed to upsert thread snapshot for {thread_id}: {e}")
+            return False
+        
@@ -232,7 +232,7 @@ async def list_models():
     return JSONResponse(
         status_code=status.HTTP_200_OK,
         content={
-            "default": ChatModels.OPENAI_GPT_5_NANO.value,
+            "default": ChatModels.OPENAI_GPT_4_1_MINI.value,
             "free": get_free_models(),
             "models": get_all_models(),
         },
 
@@ -3,7 +3,7 @@
 from fastapi.responses import Response
 from langgraph.store.base import BaseStore
 from src.contexts.service import ServiceContext
-from src.schemas.entities import ThreadSearch
+from src.schemas.entities import SearchFilter, ThreadSemanticSearchRequest
 from src.utils.logger import logger
 from src.constants.examples import Examples
 from src.schemas.models import ProtectedUser
@@ -16,27 +16,26 @@
 
 @router.post("/threads/search", name="Query Threads in Checkpointer")
 async def search_threads(
-    thread_search: ThreadSearch = Body(
+    search_filter: SearchFilter = Body(
         openapi_examples=Examples.THREAD_SEARCH_EXAMPLES
     ),
     user: ProtectedUser = Depends(verify_credentials),
     store: AsyncPostgresStore = Depends(get_store),
 ):
     try:
-        filter = thread_search.model_dump(exclude_none=True).get("filter", {})
         async with get_checkpoint_db() as checkpointer:
             service_context = ServiceContext(
                 user_id=user.id, store=store, checkpointer=checkpointer
             )
-            if "thread_id" in filter and not "checkpoint_id" in filter:
+            if "thread_id" in search_filter.filter and not "checkpoint_id" in search_filter.filter:
                 checkpoints = await service_context.checkpoint_service.list_checkpoints(
-                    thread_id=filter["thread_id"]
+                    thread_id=search_filter.filter["thread_id"]
                 )
                 # if not checkpoints:
                 #     raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Checkpoints not found")
                 return {"checkpoints": checkpoints}
 
-            threads = await service_context.thread_service.search(filter=filter)
+            threads = await service_context.thread_service.search(search_filter)
             return {"threads": threads}
     except Exception as e:
         logger.exception(f"Error searching threads: {e}")
@@ -118,3 +117,63 @@ async def update_thread(
         raise HTTPException(
             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)
         )
+
+
+@router.post("/threads/search/semantic", name="Semantic Search Over Threads")
+async def semantic_search_threads(
+    request: ThreadSemanticSearchRequest = Body(
+        openapi_examples=Examples.THREAD_SEMANTIC_SEARCH_EXAMPLES
+    ),
+    user: ProtectedUser = Depends(verify_credentials),
+    store: AsyncPostgresStore = Depends(get_store),
+):
+    try:
+        # Validate query is not empty
+        if not request.query or request.query.strip() == "":
+            raise HTTPException(
+                status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
+                detail="query field is required and must not be empty"
+            )
+
+        async with get_checkpoint_db() as checkpointer:
+            service_context = ServiceContext(
+                user_id=user.id, store=store, checkpointer=checkpointer
+            )
+
+            # Perform semantic search
+            search_results = await service_context.thread_service.thread_snapshot_repo.search(
+                query=request.query,
+                limit=request.limit,
+                assistant_id=request.assistant_id
+            )
+
+            # Enrich results with thread titles
+            enriched_results = []
+            for result in search_results:
+                thread_id = result.get("thread_id")
+                if thread_id:
+                    # Fetch thread data to get title
+                    thread_data = await service_context.thread_service.get(thread_id)
+                    title = "Untitled Thread"
+                    if thread_data and thread_data.value:
+                        # Try to get title from thread data, fallback to first message
+                        title = thread_data.value.get("title", title)
+
+                    enriched_results.append({
+                        "thread_id": thread_id,
+                        "title": title,
+                        "excerpt": result.get("excerpt", ""),
+                        "score": result.get("score", 0.0),
+                        "updated_at": result.get("updated_at"),
+                    })
+
+            return {"results": enriched_results}
+
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.exception(f"Error performing semantic search: {e}")
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=str(e)
+        )
@@ -5,6 +5,7 @@
 from pydantic import BaseModel, Field
 
 from src.schemas.entities.llm import *
+from src.schemas.entities.store import ThreadSnapshot
 from src.constants.examples import (
     ADD_DOCUMENTS_EXAMPLE,
     THREAD_HISTORY_EXAMPLE,
@@ -134,3 +135,19 @@ class SearchFilter(BaseModel):
             "example": {"query": "", "filter": {}, "limit": 20, "offset": 0}
         }
     }
+
+
+class ThreadSemanticSearchRequest(BaseModel):
+    query: str = Field(..., description="Natural language search query")
+    limit: int = Field(default=10, description="Maximum number of results (max 50)")
+    assistant_id: Optional[str] = Field(default=None, description="Optional assistant ID to filter results")
+
+    model_config = {
+        "json_schema_extra": {
+            "example": {
+                "query": "threads about database optimization",
+                "limit": 10,
+                "assistant_id": None
+            }
+        }
+    }