HKUDS · LarFii · Feb 9, 2026 · Feb 7, 2026 · Feb 7, 2026 · Feb 7, 2026
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -70,10 +70,11 @@ jobs:
             docker-compose -f docker-compose.prod.yml config >/dev/null
           fi
 
-      - name: Docker build (web, ws, ai-server, compile)
+      - name: Docker build (web, ws, ai-server, nanobot, compile)
         run: |
           set -euo pipefail
           docker build -f Dockerfile .
           docker build -f Dockerfile.ws .
           docker build -f ai-server/Dockerfile ai-server
+          docker build -f nanobot/Dockerfile nanobot
           docker build -f compile-server/Dockerfile compile-server
diff --git a/Dockerfile.ws.dev b/Dockerfile.ws.dev
@@ -14,7 +14,9 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
 COPY package.json package-lock.json ./
 
 # Install dependencies (including dev dependencies)
-RUN npm ci --legacy-peer-deps
+# Use taobao mirror to avoid network issues in China
+RUN npm config set registry https://registry.npmmirror.com && \
+    npm ci --legacy-peer-deps
 
 # Copy tsconfig (required by tsx)
 COPY tsconfig.json ./

diff --git a/ai-server/api/chat.py b/ai-server/api/chat.py
@@ -113,6 +113,10 @@ class ChatRequest(BaseModel):
     # Mode: "ask" (read-only) or "agent" (full editing)
     mode: str = "ask"
 
+    # Direct apply mode: when True, file edits are written directly to storage
+    # instead of creating shadow documents. Used by nanobot/API consumers.
+    directApply: bool = False
+
     # Session support
     sessionId: Optional[str] = None
 
@@ -142,6 +146,17 @@ class ChatRequest(BaseModel):
     conversationId: Optional[str] = None
 
 
+class SyncChatRequest(BaseModel):
+    """Synchronous chat request for programmatic invocation (e.g., from nanobot)"""
+
+    projectId: str
+    message: str
+    userId: Optional[str] = None
+    mode: str = "agent"
+    referencedFiles: Optional[List[str]] = []
+    conversationHistory: Optional[List[Dict[str, Any]]] = None
+
+
 # ============================================================================
 # API Endpoints
 # ============================================================================
@@ -275,3 +290,54 @@ async def get_config():
         "compressionThreshold": config.context_compression_threshold,
         "compressionTarget": config.context_compression_target,
     }
+
+
+@router.post("/run-sync")
+async def run_chat_sync(request: SyncChatRequest):
+    """
+    Synchronous chat endpoint for programmatic invocation.
+
+    Unlike /run which streams SSE events, this endpoint waits for the agent
+    to complete and returns a JSON response. Used by nanobot and other API
+    consumers that need to invoke litewrite's built-in AI agent.
+
+    The agent runs in direct-apply mode: file edits are written directly
+    to storage (via /api/internal/files/edit) instead of creating shadow
+    documents that require frontend review.
+
+    Returns:
+        JSON with success status and the agent's response text.
+    """
+    from services.chat_1_5 import ChatService
+
+    service = ChatService(verbose=True)
+
+    # Build query with file references if provided
+    query_parts = [request.message]
+    if request.referencedFiles:
+        refs = [f"[[FILE:{f}]]" for f in request.referencedFiles]
+        query_parts = [" ".join(refs) + " " + request.message]
+    query = "\n".join(query_parts)
+
+    try:
+        result = await service.run_sync(
+            project_id=request.projectId,
+            user_id=request.userId or "",
+            query=query,
+            mode=request.mode,
+            conversation_history=request.conversationHistory,
+            direct_apply=True,  # Always direct-apply for sync endpoint
+        )
+
+        return {
+            "success": True,
+            "response": result,
+        }
+
+    except Exception as e:
+        logger.error(f"[run-sync] Error: {e}", exc_info=True)
+        return {
+            "success": False,
+            "error": str(e),
+            "response": "",
+        }
diff --git a/ai-server/core/embedding.py b/ai-server/core/embedding.py
@@ -104,6 +104,12 @@ async def _embed_one_batch(self, texts: List[str]) -> List[np.ndarray]:
                 encoding_format="float",
             )
 
+        if response.data is None:
+            raise RuntimeError(
+                f"Embedding API returned empty response (model={self.model}). "
+                "Check that EMBEDDING_API_BASE supports the configured EMBEDDING_MODEL."
+            )
+
         return [np.array(item.embedding) for item in response.data]
 
     async def embed(self, text: str) -> np.ndarray:

diff --git a/ai-server/services/chat_1_5/agents/base.py b/ai-server/services/chat_1_5/agents/base.py
@@ -469,11 +469,11 @@ def _log_llm_request(self, messages: List[Dict[str, Any]]) -> None:
 
         Only logs at DEBUG level - requires CHAT_DEBUG=1.
         """
-        logger.debug(f"[{self.name}] {'='*60}")
+        logger.debug(f"[{self.name}] {'=' * 60}")
         logger.debug(f"[{self.name}] LLM REQUEST - Turn {self.turn_count}")
         logger.debug(f"[{self.name}] Model: {self.config.get_model()}")
         logger.debug(f"[{self.name}] Messages: {len(messages)}")
-        logger.debug(f"[{self.name}] {'-'*60}")
+        logger.debug(f"[{self.name}] {'-' * 60}")
 
         for i, msg in enumerate(messages):
             role = msg.get("role", "unknown").upper()
@@ -485,7 +485,7 @@ def _log_llm_request(self, messages: List[Dict[str, Any]]) -> None:
                 logger.debug(f"[{self.name}]     {line}")
             logger.debug(f"[{self.name}]")
 
-        logger.debug(f"[{self.name}] {'='*60}")
+        logger.debug(f"[{self.name}] {'=' * 60}")
 
     def _log_llm_response(self, content: str, response: Any) -> None:
         """
@@ -499,18 +499,18 @@ def _log_llm_response(self, content: str, response: Any) -> None:
         if usage:
             tokens_info = f" (tokens: {usage.prompt_tokens}+{usage.completion_tokens}={usage.total_tokens})"
 
-        logger.debug(f"[{self.name}] {'='*60}")
+        logger.debug(f"[{self.name}] {'=' * 60}")
         logger.debug(
             f"[{self.name}] LLM RESPONSE - Turn {self.turn_count}{tokens_info}"
         )
         logger.debug(f"[{self.name}] Content length: {len(content)} chars")
-        logger.debug(f"[{self.name}] {'-'*60}")
+        logger.debug(f"[{self.name}] {'-' * 60}")
 
         # Log content line by line
         for line in content.split("\n"):
             logger.debug(f"[{self.name}]     {line}")
 
-        logger.debug(f"[{self.name}] {'='*60}")
+        logger.debug(f"[{self.name}] {'=' * 60}")
 
     async def _execute_tool_calls(
         self, tool_calls: List[Dict[str, Any]]
@@ -727,7 +727,7 @@ async def _compress_execution_context(self) -> None:
         self._execution_tokens = new_tokens
 
         logger.info(
-            f"[{self.name}] Execution context compressed: " f"now {new_tokens} tokens"
+            f"[{self.name}] Execution context compressed: now {new_tokens} tokens"
         )
 
     def _reconstruct_assistant_content(

diff --git a/ai-server/services/chat_1_5/service.py b/ai-server/services/chat_1_5/service.py
@@ -253,6 +253,7 @@ async def run_sync(
         mode: str = "ask",
         conversation_history: Optional[List[Dict[str, Any]]] = None,
         agent_config: Optional[AgentConfig] = None,
+        direct_apply: bool = False,
     ) -> str:
         """
         Run the chat service synchronously (non-streaming).
@@ -264,19 +265,22 @@ async def run_sync(
             mode: "ask" or "agent"
             conversation_history: Optional previous conversation history
             agent_config: Optional agent configuration
+            direct_apply: If True, file edits bypass shadow documents and write
+                         directly to storage. Used by nanobot/API consumers.
 
         Returns:
             Final response text
         """
         logger.info(
-            f"[ChatService 1.5] Run sync: project={project_id}, user={user_id}, mode={mode}"
+            f"[ChatService 1.5] Run sync: project={project_id}, user={user_id}, mode={mode}, direct_apply={direct_apply}"
         )
 
         # Create tool context (no emitter for sync mode)
         context = ToolContext(
             project_id=project_id,
             user_id=user_id,
             mode=mode,
+            direct_apply=direct_apply,
         )
 
         # Create main agent
@@ -396,6 +400,7 @@ async def chat(
     user_id: str,
     query: str,
     mode: str = "ask",
+    direct_apply: bool = False,
     **kwargs,
 ) -> str:
     """
@@ -406,6 +411,7 @@ async def chat(
         user_id: Current user ID
         query: User's input query
         mode: "ask" or "agent"
+        direct_apply: If True, file edits write directly to storage
         **kwargs: Additional arguments passed to run_sync
 
     Returns:
@@ -417,6 +423,7 @@ async def chat(
         user_id=user_id,
         query=query,
         mode=mode,
+        direct_apply=direct_apply,
         **kwargs,
     )
 

diff --git a/ai-server/services/tap/service.py b/ai-server/services/tap/service.py
@@ -840,7 +840,7 @@ async def complete(self, request: TAPRequest) -> TAPResponse:
             if corrections:
                 for i, corr in enumerate(corrections):
                     logger.info(
-                        f"    [{i+1}] {corr.get('location', 'unknown')}: '{corr.get('search', '')}' -> '{corr.get('replace', '')}'"
+                        f"    [{i + 1}] {corr.get('location', 'unknown')}: '{corr.get('search', '')}' -> '{corr.get('replace', '')}'"
                     )
 
             # 5) Decide whether to complete
@@ -918,7 +918,7 @@ async def complete(self, request: TAPRequest) -> TAPResponse:
                 location = corr.get("location", "prefix")
 
                 if not search:
-                    logger.debug(f"  Correction [{i+1}]: skipped (empty search)")
+                    logger.debug(f"  Correction [{i + 1}]: skipped (empty search)")
                     continue
 
                 if location == "prefix" and search in final_prefix:

diff --git a/ai-server/tools/arxiv_rag/models.py b/ai-server/tools/arxiv_rag/models.py
@@ -133,13 +133,13 @@ def to_context(self, max_chunks: int = 3, max_chunk_length: int = 1500) -> str:
             max_chunk_length: Max length per chunk
         """
         context_parts = [
-            f"{'='*60}",
+            f"{'=' * 60}",
             f"Paper: {self.paper.title} ({self.paper.year})",
             f"arXiv ID: {self.paper.arxiv_id}",
             f"Authors: {', '.join(self.paper.authors[:5])}",
             f"URL: {self.paper.url}",
             f"Relevance: {self.paper.relevance_score:.3f}",
-            f"{'='*60}",
+            f"{'=' * 60}",
             "",
             "Abstract:",
             self.paper.abstract,

diff --git a/ai-server/tools/arxiv_rag/tool.py b/ai-server/tools/arxiv_rag/tool.py
@@ -157,10 +157,14 @@ async def _process_paper_with_content(
             self._log(f"  {paper.arxiv_id}: {len(chunks)} chunks")
 
             if chunks:
-                # Re-rank chunks
-                relevant_chunks = await self._rerank_chunks_by_embedding(
-                    chunks, query, top_n=3
-                )
+                # Re-rank chunks (graceful fallback if embedding unavailable)
+                try:
+                    relevant_chunks = await self._rerank_chunks_by_embedding(
+                        chunks, query, top_n=3
+                    )
+                except Exception as e:
+                    logger.warning(f"Chunk rerank failed for {paper.arxiv_id}: {e}")
+                    relevant_chunks = chunks[:3]
                 paper.chunks = chunks
                 return RAGResult(paper=paper, relevant_chunks=relevant_chunks)
 
@@ -220,11 +224,18 @@ async def search(
                     "papers": [],
                 }
 
-            # 3) Paper-level re-ranking
-            papers = await self._rerank_papers_by_embedding(
-                papers, query, top_n=max_papers
-            )
-            self._log(f"Selected top {len(papers)} papers")
+            # 3) Paper-level re-ranking (graceful fallback if embedding unavailable)
+            try:
+                papers = await self._rerank_papers_by_embedding(
+                    papers, query, top_n=max_papers
+                )
+                self._log(f"Selected top {len(papers)} papers (reranked)")
+            except Exception as e:
+                self._log(f"Embedding rerank failed ({e}), using raw order")
+                logger.warning(
+                    f"Embedding rerank failed, falling back to raw results: {e}"
+                )
+                papers = papers[:max_papers]
 
             # 4) Process each paper (download, chunk, retrieve)
             rag_results: List[RAGResult] = []
@@ -257,13 +268,13 @@ async def search(
 
                 # Paper context
                 paper_context = [
-                    f"{'='*60}",
+                    f"{'=' * 60}",
                     f"[{paper.arxiv_id}] {paper.title}",
                     f"Authors: {', '.join(paper.authors[:5])}{'...' if len(paper.authors) > 5 else ''}",
                     f"Year: {paper.year}",
                     f"URL: {paper.url}",
                     f"Relevance: {paper.relevance_score:.3f}",
-                    f"{'='*60}",
+                    f"{'=' * 60}",
                     "",
                     "Abstract:",
                     paper.abstract,

diff --git a/ai-server/tools/base.py b/ai-server/tools/base.py
@@ -173,6 +173,7 @@ class ToolContext:
     project_id: str
     user_id: Optional[str] = None
     mode: str = "ask"
+    direct_apply: bool = False  # When True, file edits bypass shadow documents and write directly to storage
     _emitter: Optional[Callable[[str, Dict[str, Any]], None]] = None
     _collected_events: List[Dict[str, Any]] = field(default_factory=list)
     _is_subagent: bool = False
@@ -334,6 +335,7 @@ def for_subagent(self) -> "ToolContext":
             project_id=self.project_id,
             user_id=self.user_id,
             mode=self.mode,
+            direct_apply=self.direct_apply,
             _emitter=self._emitter,
             _collected_events=[],  # SubAgent has its own event collection
             _is_subagent=True,