diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 556410d..5f4a35b 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -70,10 +70,11 @@ jobs: docker-compose -f docker-compose.prod.yml config >/dev/null fi - - name: Docker build (web, ws, ai-server, compile) + - name: Docker build (web, ws, ai-server, nanobot, compile) run: | set -euo pipefail docker build -f Dockerfile . docker build -f Dockerfile.ws . docker build -f ai-server/Dockerfile ai-server + docker build -f nanobot/Dockerfile nanobot docker build -f compile-server/Dockerfile compile-server diff --git a/Dockerfile.ws.dev b/Dockerfile.ws.dev index d689f48..1e253c7 100644 --- a/Dockerfile.ws.dev +++ b/Dockerfile.ws.dev @@ -14,7 +14,9 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ COPY package.json package-lock.json ./ # Install dependencies (including dev dependencies) -RUN npm ci --legacy-peer-deps +# Use taobao mirror to avoid network issues in China +RUN npm config set registry https://registry.npmmirror.com && \ + npm ci --legacy-peer-deps # Copy tsconfig (required by tsx) COPY tsconfig.json ./ diff --git a/ai-server/api/chat.py b/ai-server/api/chat.py index 05d82bc..a87744f 100644 --- a/ai-server/api/chat.py +++ b/ai-server/api/chat.py @@ -21,8 +21,9 @@ - POST /continue: Continue after tool execution (legacy) """ +import hmac import logging -from fastapi import APIRouter +from fastapi import APIRouter, Request from fastapi.responses import StreamingResponse from pydantic import BaseModel from typing import Optional, List, Dict, Any @@ -32,6 +33,19 @@ router = APIRouter() +def _is_internal_request(request: Request) -> bool: + """Check if the request carries a valid X-Internal-Secret header.""" + from core.config import config + + secret = config.internal_api_secret + if not secret: + return False + provided = request.headers.get("X-Internal-Secret", "") + if not provided: + return False + return hmac.compare_digest(secret, provided) + + # ============================================================================ # Request/Response Models # ============================================================================ @@ -113,6 +127,10 @@ class ChatRequest(BaseModel): # Mode: "ask" (read-only) or "agent" (full editing) mode: str = "ask" + # Direct apply mode: when True, file edits are written directly to storage + # instead of creating shadow documents. Used by nanobot/API consumers. + directApply: bool = False + # Session support sessionId: Optional[str] = None @@ -142,13 +160,24 @@ class ChatRequest(BaseModel): conversationId: Optional[str] = None +class SyncChatRequest(BaseModel): + """Synchronous chat request for programmatic invocation (e.g., from nanobot)""" + + projectId: str + message: str + userId: Optional[str] = None + mode: str = "agent" + referencedFiles: Optional[List[str]] = [] + conversationHistory: Optional[List[Dict[str, Any]]] = None + + # ============================================================================ # API Endpoints # ============================================================================ @router.post("/run") -async def run_chat(request: ChatRequest): +async def run_chat(request: ChatRequest, raw_request: Request): """ Chat execution endpoint (SSE streaming output) @@ -178,6 +207,14 @@ async def run_chat(request: ChatRequest): service = ChatService(verbose=True) + # Security: directApply bypasses the shadow-document review flow and writes + # edits straight to storage. Only honour it for trusted (internal) callers. + direct_apply = request.directApply and _is_internal_request(raw_request) + if request.directApply and not direct_apply: + logger.warning( + "[chat/run] directApply requested but caller is not authenticated; ignoring" + ) + # Convert attachments format attachments = [ { @@ -225,6 +262,7 @@ async def event_generator(): session_id=request.sessionId or request.conversationId, mode=request.mode, # "ask" or "agent" user_id=request.actorId, + direct_apply=direct_apply, ): yield output.to_sse() @@ -275,3 +313,65 @@ async def get_config(): "compressionThreshold": config.context_compression_threshold, "compressionTarget": config.context_compression_target, } + + +@router.post("/run-sync") +async def run_chat_sync(request: SyncChatRequest, raw_request: Request): + """ + Synchronous chat endpoint for programmatic invocation. + + Unlike /run which streams SSE events, this endpoint waits for the agent + to complete and returns a JSON response. Used by nanobot and other API + consumers that need to invoke litewrite's built-in AI agent. + + The agent runs in direct-apply mode: file edits are written directly + to storage (via /api/internal/files/edit) instead of creating shadow + documents that require frontend review. + + Requires X-Internal-Secret header for authentication. + + Returns: + JSON with success status and the agent's response text. + """ + # Security: /run-sync always uses direct_apply=True, so it must be + # restricted to internal callers to prevent unauthenticated direct writes. + if not _is_internal_request(raw_request): + return { + "success": False, + "error": "Unauthorized: X-Internal-Secret required", + "response": "", + } + + from services.chat_1_5 import ChatService + + service = ChatService(verbose=True) + + # Build query with file references if provided + query_parts = [request.message] + if request.referencedFiles: + refs = [f"[[FILE:{f}]]" for f in request.referencedFiles] + query_parts = [" ".join(refs) + " " + request.message] + query = "\n".join(query_parts) + + try: + result = await service.run_sync( + project_id=request.projectId, + user_id=request.userId or "", + query=query, + mode=request.mode, + conversation_history=request.conversationHistory, + direct_apply=True, # Always direct-apply for sync endpoint + ) + + return { + "success": True, + "response": result, + } + + except Exception as e: + logger.error(f"[run-sync] Error: {e}", exc_info=True) + return { + "success": False, + "error": str(e), + "response": "", + } diff --git a/ai-server/core/embedding.py b/ai-server/core/embedding.py index 249e21a..a4056b9 100644 --- a/ai-server/core/embedding.py +++ b/ai-server/core/embedding.py @@ -104,6 +104,12 @@ async def _embed_one_batch(self, texts: List[str]) -> List[np.ndarray]: encoding_format="float", ) + if response.data is None: + raise RuntimeError( + f"Embedding API returned empty response (model={self.model}). " + "Check that EMBEDDING_API_BASE supports the configured EMBEDDING_MODEL." + ) + return [np.array(item.embedding) for item in response.data] async def embed(self, text: str) -> np.ndarray: diff --git a/ai-server/services/chat_1_5/agents/base.py b/ai-server/services/chat_1_5/agents/base.py index be0b102..6c0ae95 100644 --- a/ai-server/services/chat_1_5/agents/base.py +++ b/ai-server/services/chat_1_5/agents/base.py @@ -469,11 +469,11 @@ def _log_llm_request(self, messages: List[Dict[str, Any]]) -> None: Only logs at DEBUG level - requires CHAT_DEBUG=1. """ - logger.debug(f"[{self.name}] {'='*60}") + logger.debug(f"[{self.name}] {'=' * 60}") logger.debug(f"[{self.name}] LLM REQUEST - Turn {self.turn_count}") logger.debug(f"[{self.name}] Model: {self.config.get_model()}") logger.debug(f"[{self.name}] Messages: {len(messages)}") - logger.debug(f"[{self.name}] {'-'*60}") + logger.debug(f"[{self.name}] {'-' * 60}") for i, msg in enumerate(messages): role = msg.get("role", "unknown").upper() @@ -485,7 +485,7 @@ def _log_llm_request(self, messages: List[Dict[str, Any]]) -> None: logger.debug(f"[{self.name}] {line}") logger.debug(f"[{self.name}]") - logger.debug(f"[{self.name}] {'='*60}") + logger.debug(f"[{self.name}] {'=' * 60}") def _log_llm_response(self, content: str, response: Any) -> None: """ @@ -499,18 +499,18 @@ def _log_llm_response(self, content: str, response: Any) -> None: if usage: tokens_info = f" (tokens: {usage.prompt_tokens}+{usage.completion_tokens}={usage.total_tokens})" - logger.debug(f"[{self.name}] {'='*60}") + logger.debug(f"[{self.name}] {'=' * 60}") logger.debug( f"[{self.name}] LLM RESPONSE - Turn {self.turn_count}{tokens_info}" ) logger.debug(f"[{self.name}] Content length: {len(content)} chars") - logger.debug(f"[{self.name}] {'-'*60}") + logger.debug(f"[{self.name}] {'-' * 60}") # Log content line by line for line in content.split("\n"): logger.debug(f"[{self.name}] {line}") - logger.debug(f"[{self.name}] {'='*60}") + logger.debug(f"[{self.name}] {'=' * 60}") async def _execute_tool_calls( self, tool_calls: List[Dict[str, Any]] @@ -727,7 +727,7 @@ async def _compress_execution_context(self) -> None: self._execution_tokens = new_tokens logger.info( - f"[{self.name}] Execution context compressed: " f"now {new_tokens} tokens" + f"[{self.name}] Execution context compressed: now {new_tokens} tokens" ) def _reconstruct_assistant_content( diff --git a/ai-server/services/chat_1_5/service.py b/ai-server/services/chat_1_5/service.py index c3a47f0..39196a5 100644 --- a/ai-server/services/chat_1_5/service.py +++ b/ai-server/services/chat_1_5/service.py @@ -89,6 +89,7 @@ async def run( attached_contents: Optional[Dict[str, str]] = None, session_id: Optional[str] = None, agent_config: Optional[AgentConfig] = None, + direct_apply: bool = False, ) -> AsyncGenerator[SSEOutput, None]: """ Run the chat service. @@ -105,12 +106,14 @@ async def run( attached_contents: Content of attached files session_id: Session identifier for history compression updates agent_config: Optional agent configuration + direct_apply: If True, file edits bypass shadow documents and write + directly to storage. Used by nanobot/API consumers. Yields: SSEOutput events for streaming """ logger.info( - f"[ChatService 1.5] Run: project={project_id}, user={user_id}, mode={mode}" + f"[ChatService 1.5] Run: project={project_id}, user={user_id}, mode={mode}, direct_apply={direct_apply}" ) # Build the query with context @@ -125,6 +128,7 @@ async def run( project_id=project_id, user_id=user_id, mode=mode, + direct_apply=direct_apply, ) # Create main agent @@ -253,6 +257,7 @@ async def run_sync( mode: str = "ask", conversation_history: Optional[List[Dict[str, Any]]] = None, agent_config: Optional[AgentConfig] = None, + direct_apply: bool = False, ) -> str: """ Run the chat service synchronously (non-streaming). @@ -264,12 +269,14 @@ async def run_sync( mode: "ask" or "agent" conversation_history: Optional previous conversation history agent_config: Optional agent configuration + direct_apply: If True, file edits bypass shadow documents and write + directly to storage. Used by nanobot/API consumers. Returns: Final response text """ logger.info( - f"[ChatService 1.5] Run sync: project={project_id}, user={user_id}, mode={mode}" + f"[ChatService 1.5] Run sync: project={project_id}, user={user_id}, mode={mode}, direct_apply={direct_apply}" ) # Create tool context (no emitter for sync mode) @@ -277,6 +284,7 @@ async def run_sync( project_id=project_id, user_id=user_id, mode=mode, + direct_apply=direct_apply, ) # Create main agent @@ -396,6 +404,7 @@ async def chat( user_id: str, query: str, mode: str = "ask", + direct_apply: bool = False, **kwargs, ) -> str: """ @@ -406,6 +415,7 @@ async def chat( user_id: Current user ID query: User's input query mode: "ask" or "agent" + direct_apply: If True, file edits write directly to storage **kwargs: Additional arguments passed to run_sync Returns: @@ -417,6 +427,7 @@ async def chat( user_id=user_id, query=query, mode=mode, + direct_apply=direct_apply, **kwargs, ) diff --git a/ai-server/services/tap/service.py b/ai-server/services/tap/service.py index 872170b..f302c52 100644 --- a/ai-server/services/tap/service.py +++ b/ai-server/services/tap/service.py @@ -840,7 +840,7 @@ async def complete(self, request: TAPRequest) -> TAPResponse: if corrections: for i, corr in enumerate(corrections): logger.info( - f" [{i+1}] {corr.get('location', 'unknown')}: '{corr.get('search', '')}' -> '{corr.get('replace', '')}'" + f" [{i + 1}] {corr.get('location', 'unknown')}: '{corr.get('search', '')}' -> '{corr.get('replace', '')}'" ) # 5) Decide whether to complete @@ -918,7 +918,7 @@ async def complete(self, request: TAPRequest) -> TAPResponse: location = corr.get("location", "prefix") if not search: - logger.debug(f" Correction [{i+1}]: skipped (empty search)") + logger.debug(f" Correction [{i + 1}]: skipped (empty search)") continue if location == "prefix" and search in final_prefix: diff --git a/ai-server/tools/arxiv_rag/models.py b/ai-server/tools/arxiv_rag/models.py index bc1d319..c5586cd 100644 --- a/ai-server/tools/arxiv_rag/models.py +++ b/ai-server/tools/arxiv_rag/models.py @@ -133,13 +133,13 @@ def to_context(self, max_chunks: int = 3, max_chunk_length: int = 1500) -> str: max_chunk_length: Max length per chunk """ context_parts = [ - f"{'='*60}", + f"{'=' * 60}", f"Paper: {self.paper.title} ({self.paper.year})", f"arXiv ID: {self.paper.arxiv_id}", f"Authors: {', '.join(self.paper.authors[:5])}", f"URL: {self.paper.url}", f"Relevance: {self.paper.relevance_score:.3f}", - f"{'='*60}", + f"{'=' * 60}", "", "Abstract:", self.paper.abstract, diff --git a/ai-server/tools/arxiv_rag/tool.py b/ai-server/tools/arxiv_rag/tool.py index 6e4fb7c..e79047e 100644 --- a/ai-server/tools/arxiv_rag/tool.py +++ b/ai-server/tools/arxiv_rag/tool.py @@ -157,10 +157,14 @@ async def _process_paper_with_content( self._log(f" {paper.arxiv_id}: {len(chunks)} chunks") if chunks: - # Re-rank chunks - relevant_chunks = await self._rerank_chunks_by_embedding( - chunks, query, top_n=3 - ) + # Re-rank chunks (graceful fallback if embedding unavailable) + try: + relevant_chunks = await self._rerank_chunks_by_embedding( + chunks, query, top_n=3 + ) + except Exception as e: + logger.warning(f"Chunk rerank failed for {paper.arxiv_id}: {e}") + relevant_chunks = chunks[:3] paper.chunks = chunks return RAGResult(paper=paper, relevant_chunks=relevant_chunks) @@ -220,11 +224,18 @@ async def search( "papers": [], } - # 3) Paper-level re-ranking - papers = await self._rerank_papers_by_embedding( - papers, query, top_n=max_papers - ) - self._log(f"Selected top {len(papers)} papers") + # 3) Paper-level re-ranking (graceful fallback if embedding unavailable) + try: + papers = await self._rerank_papers_by_embedding( + papers, query, top_n=max_papers + ) + self._log(f"Selected top {len(papers)} papers (reranked)") + except Exception as e: + self._log(f"Embedding rerank failed ({e}), using raw order") + logger.warning( + f"Embedding rerank failed, falling back to raw results: {e}" + ) + papers = papers[:max_papers] # 4) Process each paper (download, chunk, retrieve) rag_results: List[RAGResult] = [] @@ -257,13 +268,13 @@ async def search( # Paper context paper_context = [ - f"{'='*60}", + f"{'=' * 60}", f"[{paper.arxiv_id}] {paper.title}", f"Authors: {', '.join(paper.authors[:5])}{'...' if len(paper.authors) > 5 else ''}", f"Year: {paper.year}", f"URL: {paper.url}", f"Relevance: {paper.relevance_score:.3f}", - f"{'='*60}", + f"{'=' * 60}", "", "Abstract:", paper.abstract, diff --git a/ai-server/tools/base.py b/ai-server/tools/base.py index 4ffd773..1f0843b 100644 --- a/ai-server/tools/base.py +++ b/ai-server/tools/base.py @@ -173,6 +173,7 @@ class ToolContext: project_id: str user_id: Optional[str] = None mode: str = "ask" + direct_apply: bool = False # When True, file edits bypass shadow documents and write directly to storage _emitter: Optional[Callable[[str, Dict[str, Any]], None]] = None _collected_events: List[Dict[str, Any]] = field(default_factory=list) _is_subagent: bool = False @@ -334,6 +335,7 @@ def for_subagent(self) -> "ToolContext": project_id=self.project_id, user_id=self.user_id, mode=self.mode, + direct_apply=self.direct_apply, _emitter=self._emitter, _collected_events=[], # SubAgent has its own event collection _is_subagent=True, diff --git a/ai-server/tools/edit_file.py b/ai-server/tools/edit_file.py index 9673c2f..a11fc91 100644 --- a/ai-server/tools/edit_file.py +++ b/ai-server/tools/edit_file.py @@ -370,7 +370,7 @@ async def execute( ) for i, eb in enumerate(edit_blocks): logger.debug( - f"[EditFile] Step 1 - Block {i+1}: lines {eb.get('start_line')}-{eb.get('end_line')}, " + f"[EditFile] Step 1 - Block {i + 1}: lines {eb.get('start_line')}-{eb.get('end_line')}, " f"updated_len={len(eb.get('updated', ''))}, desc={eb.get('description', '')[:80]}" ) logger.debug( @@ -379,31 +379,56 @@ async def execute( else "[EditFile] Step 1 - No reasoning provided" ) - # Step 2: Execute all edits via Next.js in one batch + # Step 2: Execute all edits logger.info( - f"[EditFile] === Step 2: Executing {len(edit_blocks)} edit(s) via Next.js ===" + f"[EditFile] === Step 2: Executing {len(edit_blocks)} edit(s) " + f"(direct_apply={context.direct_apply}) ===" ) context.emit_status(f"Applying {len(edit_blocks)} edit(s)...", status="working") - # Emit file_locked event - context.emit("file_locked", {"filePath": file_path}) - logger.debug(f"[EditFile] Step 2 - Emitted file_locked event for {file_path}") + if context.direct_apply: + # Direct apply mode: apply edits in memory and write full content + # via /api/internal/files/edit (bypasses shadow documents) + logger.info("[EditFile] Step 2 - Using direct apply mode") - try: - result = await self._execute_all_edits( - file_path=file_path, - edit_blocks=edit_blocks, - context=context, - timeout=timeout, - ) - except Exception as e: - logger.error(f"[EditFile] Step 2 FAILED: {e}") - return ToolResult( - success=False, - text=f"Failed to apply edits: {str(e)}", - error=str(e), + try: + result = await self._execute_direct_apply( + file_path=file_path, + raw_content=raw_content, + edit_blocks=edit_blocks, + context=context, + timeout=timeout, + ) + except Exception as e: + logger.error(f"[EditFile] Step 2 FAILED (direct apply): {e}") + return ToolResult( + success=False, + text=f"Failed to apply edits: {str(e)}", + error=str(e), + ) + else: + # Standard mode: use shadow documents via /api/internal/files/write + # Emit file_locked event + context.emit("file_locked", {"filePath": file_path}) + logger.debug( + f"[EditFile] Step 2 - Emitted file_locked event for {file_path}" ) + try: + result = await self._execute_all_edits( + file_path=file_path, + edit_blocks=edit_blocks, + context=context, + timeout=timeout, + ) + except Exception as e: + logger.error(f"[EditFile] Step 2 FAILED: {e}") + return ToolResult( + success=False, + text=f"Failed to apply edits: {str(e)}", + error=str(e), + ) + # Handle FILE_LOCKED error - return early with clear message if result.get("errorCode") == "FILE_LOCKED": locked_by = result.get("lockedBy", {}) @@ -432,17 +457,20 @@ async def execute( # Extract updated content from result (for further edits) result_data = result.get("data", {}) - shadow_content = result_data.get("shadowContent", "") + # In direct apply mode, updated content is in "content"; in shadow mode, "shadowContent" + new_content = result_data.get("content", "") or result_data.get( + "shadowContent", "" + ) updated_content = "" - if shadow_content: + if new_content: # Format with line numbers for agent's reference - updated_content = add_line_numbers(shadow_content, 1, None) + updated_content = add_line_numbers(new_content, 1, None) logger.info( - f"[EditFile] Step 2 - Updated content: {len(shadow_content)} chars, {len(shadow_content.splitlines())} lines" + f"[EditFile] Step 2 - Updated content: {len(new_content)} chars, {len(new_content.splitlines())} lines" ) logger.debug(f"[EditFile] Step 2 - Updated content:\n{updated_content}...") else: - logger.warning("[EditFile] Step 2 - No shadowContent in response") + logger.warning("[EditFile] Step 2 - No updated content in response") # Step 3: Generate summary using LLM logger.info("[EditFile] === Step 3: Generating summary via LLM ===") @@ -661,7 +689,7 @@ async def _generate_edit_blocks( ) for i, edit in enumerate(valid_edits): logger.debug( - f"[EditFile] Edit block {i+1}: lines {edit['start_line']}-{edit['end_line']}, " + f"[EditFile] Edit block {i + 1}: lines {edit['start_line']}-{edit['end_line']}, " f"updated_len={len(edit.get('updated', ''))}, desc={edit.get('description', 'N/A')[:50]}" ) if reasoning: @@ -800,6 +828,99 @@ async def _execute_all_edits( logger.error(f"[EditFile] Error editing {file_path}: {e}") return {"success": False, "error": str(e)} + async def _execute_direct_apply( + self, + file_path: str, + raw_content: str, + edit_blocks: List[Dict[str, Any]], + context: ToolContext, + timeout: float, + ) -> Dict[str, Any]: + """ + Apply edit blocks directly to storage via /api/internal/files/edit. + + This bypasses the shadow document system and writes the full updated + content directly to storage. Used when context.direct_apply is True + (e.g., when invoked by nanobot). + + Steps: + 1. Apply edit blocks to raw_content in memory (bottom-to-top) + 2. Write the full updated content via /api/internal/files/edit + + Returns: + Result dict with success/error and updated content + """ + # Apply edit blocks in memory (already sorted descending by start_line) + lines = raw_content.split("\n") + for block in edit_blocks: + start = block["start_line"] - 1 # Convert to 0-indexed + end = block["end_line"] # Exclusive end for slice + updated_lines = block["updated"].split("\n") + lines[start:end] = updated_lines + + new_content = "\n".join(lines) + + logger.info( + f"[EditFile] _execute_direct_apply: Applied {len(edit_blocks)} blocks in memory, " + f"content {len(raw_content)} -> {len(new_content)} chars" + ) + + # Write full content via /api/internal/files/edit + url = f"{config.nextjs_api_url}/api/internal/files/edit" + payload = { + "projectId": context.project_id, + "filePath": file_path, + "content": new_content, + } + + logger.info(f"[EditFile] _execute_direct_apply: POST {url}") + + try: + async with httpx.AsyncClient(timeout=timeout) as client: + response = await client.post( + url, + json=payload, + headers={ + "Content-Type": "application/json", + "X-Internal-Secret": config.internal_api_secret, + }, + ) + + logger.info( + f"[EditFile] _execute_direct_apply: status={response.status_code}" + ) + + if response.status_code != 200: + logger.error( + f"[EditFile] _execute_direct_apply: HTTP error {response.status_code}: {response.text[:500]}" + ) + return {"success": False, "error": f"HTTP {response.status_code}"} + + try: + result = response.json() + except Exception as json_err: + logger.error( + f"[EditFile] _execute_direct_apply: JSON parse error: {json_err}" + ) + return {"success": False, "error": f"JSON parse error: {json_err}"} + + if result.get("success"): + # Include the updated content in the result for the agent + result.setdefault("data", {}) + result["data"]["content"] = new_content + + return result + + except httpx.TimeoutException: + logger.error(f"[EditFile] Timeout in direct apply for {file_path}") + return {"success": False, "error": f"HTTP timeout after {timeout}s"} + except httpx.ConnectError as e: + logger.error(f"[EditFile] Connection error to {url}: {e}") + return {"success": False, "error": f"Connection error: {e}"} + except Exception as e: + logger.error(f"[EditFile] Error in direct apply for {file_path}: {e}") + return {"success": False, "error": str(e)} + async def _execute_single_edit( self, file_path: str, diff --git a/ai-server/tools/web_search/models.py b/ai-server/tools/web_search/models.py index 22453dc..e8fa2ca 100644 --- a/ai-server/tools/web_search/models.py +++ b/ai-server/tools/web_search/models.py @@ -92,11 +92,11 @@ class WebRAGResult: def to_context(self, max_chunks: int = 3, max_chunk_length: int = 1500) -> str: """Format as LLM context.""" context_parts = [ - f"{'='*60}", + f"{'=' * 60}", f"🌐 {self.page.title}", f"🔗 URL: {self.page.url}", f"📊 Relevance: {self.page.relevance_score:.3f}", - f"{'='*60}", + f"{'=' * 60}", "", ] diff --git a/ai-server/tools/web_search/tool.py b/ai-server/tools/web_search/tool.py index 146efdc..6ccb9a5 100644 --- a/ai-server/tools/web_search/tool.py +++ b/ai-server/tools/web_search/tool.py @@ -15,10 +15,13 @@ """ import asyncio +import logging from typing import Dict, Any, List from core import EmbeddingEngine +logger = logging.getLogger(__name__) + from tools.web_search.models import SearchResult, WebPage, TextChunk, WebRAGResult from tools.web_search.search import WebSearchClient from tools.web_search.content import WebContentProcessor @@ -120,10 +123,14 @@ async def _process_page_with_content( self._log(f" {page.url[:40]}...: {len(chunks)} chunks") if chunks: - # Re-rank chunks - relevant_chunks = await self._rerank_chunks_by_embedding( - chunks, query, top_n=3 - ) + # Re-rank chunks (graceful fallback if embedding unavailable) + try: + relevant_chunks = await self._rerank_chunks_by_embedding( + chunks, query, top_n=3 + ) + except Exception as e: + logger.warning(f"Chunk rerank failed for {page.url[:40]}: {e}") + relevant_chunks = chunks[:3] page.chunks = chunks return WebRAGResult(page=page, relevant_chunks=relevant_chunks) @@ -179,11 +186,18 @@ async def search( "results": [], } - # 2) Result-level re-ranking - results = await self._rerank_results_by_embedding( - results, query, top_n=max_results - ) - self._log(f"Selected top {len(results)} results") + # 2) Result-level re-ranking (graceful fallback if embedding unavailable) + try: + results = await self._rerank_results_by_embedding( + results, query, top_n=max_results + ) + self._log(f"Selected top {len(results)} results (reranked)") + except Exception as e: + self._log(f"Embedding rerank failed ({e}), using raw order") + logger.warning( + f"Embedding rerank failed, falling back to raw results: {e}" + ) + results = results[:max_results] # 3) Process each page (download, chunk, retrieve) rag_results: List[WebRAGResult] = [] @@ -225,11 +239,11 @@ async def search( # Page context page_context = [ - f"{'='*60}", + f"{'=' * 60}", f"{page.title}", f"URL: {page.url}", f"Relevance: {page.relevance_score:.3f}", - f"{'='*60}", + f"{'=' * 60}", "", ] diff --git a/app/api/internal/agent/run/route.ts b/app/api/internal/agent/run/route.ts new file mode 100644 index 0000000..207a733 --- /dev/null +++ b/app/api/internal/agent/run/route.ts @@ -0,0 +1,247 @@ +/** + * Internal API: Run Litewrite AI Agent (Synchronous) + * ==================================================== + * + * Internal endpoint for nanobot to invoke litewrite's built-in AI agent. + * Proxies to ai-server's /api/chat/run-sync endpoint. + * + * The agent runs in direct-apply mode: file edits are written directly + * to storage instead of creating shadow documents. + * + * Session support: when sessionId is provided, the agent uses the session's + * conversation history for context. When not provided, a new session named + * "nanobot" is created. The session is saved after each interaction so it + * appears in the web UI's Conversation History. + * + * This is NOT exposed to the public - protected by INTERNAL_API_SECRET. + */ + +import { NextRequest, NextResponse } from "next/server"; +import { + createSession, + getSessionById, + addUserMessage, + addAssistantMessage, +} from "@/lib/ask-session"; +import type { SessionMessageItem } from "@/types/ask"; + +const AI_SERVER_URL = process.env.AI_SERVER_URL || "http://localhost:6612"; + +// Timeout for agent execution (5 minutes) +const AGENT_TIMEOUT_MS = 300_000; + +// Verify internal API secret +function verifyInternalAuth(request: NextRequest): boolean { + const secret = request.headers.get("X-Internal-Secret"); + const expectedSecret = process.env.INTERNAL_API_SECRET; + + if (!expectedSecret) { + console.warn("[Internal API] INTERNAL_API_SECRET not configured"); + return false; + } + + return secret === expectedSecret; +} + +interface AgentRunRequest { + projectId: string; + userId?: string; + message: string; + mode?: "ask" | "agent"; + referencedFiles?: string[]; + sessionId?: string; +} + +export async function POST(request: NextRequest) { + // Verify authentication + if (!verifyInternalAuth(request)) { + return NextResponse.json( + { success: false, error: "Unauthorized" }, + { status: 401 } + ); + } + + try { + const body = (await request.json()) as AgentRunRequest; + const { projectId, userId, message, mode, referencedFiles, sessionId } = + body; + + if (!projectId || !message) { + return NextResponse.json({ + success: false, + error: "projectId and message are required", + }); + } + + const effectiveUserId = userId || ""; + + console.log( + `[Internal/AgentRun] Invoking agent: project=${projectId}, mode=${mode || "agent"}, message_len=${message.length}, sessionId=${sessionId || "(new)"}` + ); + + // --------------------------------------------------------------- + // Session management: load or create session + // --------------------------------------------------------------- + let currentSessionId = sessionId || ""; + let conversationHistory: Array<{ role: string; content: string }> | null = + null; + + if (effectiveUserId) { + if (currentSessionId) { + // Load existing session + const session = await getSessionById( + projectId, + effectiveUserId, + currentSessionId + ); + if (session) { + // Build conversation history from session messages + conversationHistory = session.messages + .map((msg) => { + const textContent = msg.items + ?.filter( + (item: SessionMessageItem) => item.type === "text" + ) + .map( + (item: SessionMessageItem) => + item.type === "text" ? item.content : "" + ) + .join(""); + return { + role: msg.role, + content: textContent || msg.content || "", + }; + }) + .filter((m) => m.content); + + console.log( + `[Internal/AgentRun] Loaded session ${currentSessionId}: ${conversationHistory.length} history messages` + ); + } else { + console.warn( + `[Internal/AgentRun] Session ${currentSessionId} not found, creating new` + ); + currentSessionId = ""; + } + } + + if (!currentSessionId) { + // Create a new session named "nanobot" + const newSession = await createSession( + projectId, + effectiveUserId, + "nanobot" + ); + currentSessionId = newSession.id; + console.log( + `[Internal/AgentRun] Created new session: ${currentSessionId}` + ); + } + + // Save the user message to the session + await addUserMessage( + projectId, + effectiveUserId, + currentSessionId, + message + ); + } + + // --------------------------------------------------------------- + // Build request for ai-server's sync endpoint + // --------------------------------------------------------------- + const aiServerPayload: Record = { + projectId, + userId: effectiveUserId, + message, + mode: mode || "agent", + referencedFiles: referencedFiles || [], + }; + + if (conversationHistory && conversationHistory.length > 0) { + aiServerPayload.conversationHistory = conversationHistory; + } + + // Call ai-server's synchronous endpoint + const controller = new AbortController(); + const timeoutId = setTimeout(() => controller.abort(), AGENT_TIMEOUT_MS); + + try { + const response = await fetch(`${AI_SERVER_URL}/api/chat/run-sync`, { + method: "POST", + headers: { + "Content-Type": "application/json", + ...(process.env.INTERNAL_API_SECRET + ? { "X-Internal-Secret": process.env.INTERNAL_API_SECRET } + : {}), + }, + body: JSON.stringify(aiServerPayload), + signal: controller.signal, + }); + + clearTimeout(timeoutId); + + if (!response.ok) { + const errorText = await response.text(); + console.error( + `[Internal/AgentRun] AI server error ${response.status}: ${errorText.slice(0, 500)}` + ); + return NextResponse.json({ + success: false, + error: `AI server returned ${response.status}`, + sessionId: currentSessionId || undefined, + }); + } + + const result = await response.json(); + + console.log( + `[Internal/AgentRun] Agent completed: success=${result.success}, response_len=${result.response?.length || 0}` + ); + + // --------------------------------------------------------------- + // Save assistant response to session + // --------------------------------------------------------------- + if (effectiveUserId && currentSessionId && result.response) { + const assistantItems: SessionMessageItem[] = [ + { type: "text", content: result.response }, + ]; + await addAssistantMessage( + projectId, + effectiveUserId, + currentSessionId, + assistantItems + ); + console.log( + `[Internal/AgentRun] Saved assistant message to session ${currentSessionId}` + ); + } + + return NextResponse.json({ + ...result, + sessionId: currentSessionId || undefined, + }); + } catch (fetchError) { + clearTimeout(timeoutId); + + if (fetchError instanceof Error && fetchError.name === "AbortError") { + console.error( + `[Internal/AgentRun] Timeout after ${AGENT_TIMEOUT_MS / 1000}s` + ); + return NextResponse.json({ + success: false, + error: `Agent execution timed out after ${AGENT_TIMEOUT_MS / 1000} seconds`, + sessionId: currentSessionId || undefined, + }); + } + + throw fetchError; + } + } catch (error) { + console.error("[Internal/AgentRun] Error:", error); + return NextResponse.json({ + success: false, + error: error instanceof Error ? error.message : "Unknown error", + }); + } +} diff --git a/app/api/internal/files/create/route.ts b/app/api/internal/files/create/route.ts new file mode 100644 index 0000000..947ad28 --- /dev/null +++ b/app/api/internal/files/create/route.ts @@ -0,0 +1,143 @@ +/** + * Internal API: Create File/Folder + * ================================== + * + * Internal endpoint for nanobot to create a new file or folder in a project. + * Protected by INTERNAL_API_SECRET. + */ + +import { NextRequest, NextResponse } from "next/server"; +import { getStorage, StoragePaths } from "@/lib/storage"; + +function verifyInternalAuth(request: NextRequest): boolean { + const secret = request.headers.get("X-Internal-Secret"); + const expectedSecret = process.env.INTERNAL_API_SECRET; + if (!expectedSecret) { + console.warn("[Internal API] INTERNAL_API_SECRET not configured"); + return false; + } + return secret === expectedSecret; +} + +export async function POST(request: NextRequest) { + if (!verifyInternalAuth(request)) { + return NextResponse.json( + { success: false, error: "Unauthorized" }, + { status: 401 } + ); + } + + try { + const body = await request.json(); + const { + projectId, + name, + type = "file", + parentPath = "", + content = "", + } = body as { + projectId: string; + name: string; + type?: "file" | "folder"; + parentPath?: string; + content?: string; + }; + + if (!projectId || typeof projectId !== "string") { + return NextResponse.json( + { success: false, error: "Project ID is required" }, + { status: 400 } + ); + } + + if (!name || typeof name !== "string" || name.trim().length === 0) { + return NextResponse.json( + { success: false, error: "File name is required" }, + { status: 400 } + ); + } + + const storage = await getStorage(); + + // Build full path + const relativePath = parentPath ? `${parentPath}/${name}` : name; + const key = StoragePaths.projectFile(projectId, relativePath); + + // Check whether a same-name file already exists + const fileExists = await storage.exists(key); + + // Check whether a same-name folder already exists + const folderPrefix = key.endsWith("/") ? key : `${key}/`; + const folderContents = await storage.list(folderPrefix); + const folderExists = folderContents.length > 0; + + if (type === "folder") { + if (folderExists) { + return NextResponse.json( + { success: false, error: "FOLDER_EXISTS" }, + { status: 409 } + ); + } + if (fileExists) { + return NextResponse.json( + { success: false, error: "FILE_EXISTS_WITH_SAME_NAME" }, + { status: 409 } + ); + } + // Create a .keep file to make the folder visible + await storage.upload(`${key}/.keep`, "", "text/plain"); + } else { + if (fileExists) { + return NextResponse.json( + { success: false, error: "FILE_EXISTS" }, + { status: 409 } + ); + } + if (folderExists) { + return NextResponse.json( + { success: false, error: "FOLDER_EXISTS_WITH_SAME_NAME" }, + { status: 409 } + ); + } + // Clear WS server in-memory doc (if cached) + const wsServerUrl = + process.env.WS_SERVER_URL || + process.env.NEXT_PUBLIC_WS_URL?.replace(/^wss?:\/\//, (m) => + m === "wss://" ? "https://" : "http://" + ) || + "http://localhost:1234"; + if (wsServerUrl) { + try { + const base = wsServerUrl.replace(/\/+$/, ""); + await fetch( + `${base}/clear/${projectId}/${encodeURIComponent(relativePath)}`, + { + method: "POST", + headers: process.env.INTERNAL_API_SECRET + ? { "x-internal-secret": process.env.INTERNAL_API_SECRET } + : undefined, + } + ); + } catch { + // ignore + } + } + await storage.upload(key, content, "text/plain"); + } + + console.log( + `[Internal/CreateFile] Created ${type} "${relativePath}" in project ${projectId}` + ); + + return NextResponse.json({ + success: true, + data: { path: relativePath, type }, + }); + } catch (error) { + console.error("[Internal/CreateFile] Error:", error); + return NextResponse.json({ + success: false, + error: error instanceof Error ? error.message : "Unknown error", + }); + } +} diff --git a/app/api/internal/files/delete/route.ts b/app/api/internal/files/delete/route.ts new file mode 100644 index 0000000..f13c62f --- /dev/null +++ b/app/api/internal/files/delete/route.ts @@ -0,0 +1,130 @@ +/** + * Internal API: Delete File/Folder + * ================================== + * + * Internal endpoint for nanobot to delete a file or folder in a project. + * Handles both single files and recursive folder deletion, + * clears Yjs cache for deleted files. + * Protected by INTERNAL_API_SECRET. + */ + +import { NextRequest, NextResponse } from "next/server"; +import { getStorage, StoragePaths } from "@/lib/storage"; + +function verifyInternalAuth(request: NextRequest): boolean { + const secret = request.headers.get("X-Internal-Secret"); + const expectedSecret = process.env.INTERNAL_API_SECRET; + if (!expectedSecret) { + console.warn("[Internal API] INTERNAL_API_SECRET not configured"); + return false; + } + return secret === expectedSecret; +} + +export async function POST(request: NextRequest) { + if (!verifyInternalAuth(request)) { + return NextResponse.json( + { success: false, error: "Unauthorized" }, + { status: 401 } + ); + } + + try { + const body = await request.json(); + const { projectId, filePath } = body as { + projectId: string; + filePath: string; + }; + + if (!projectId || typeof projectId !== "string") { + return NextResponse.json( + { success: false, error: "Project ID is required" }, + { status: 400 } + ); + } + + if (!filePath || typeof filePath !== "string") { + return NextResponse.json( + { success: false, error: "File path is required" }, + { status: 400 } + ); + } + + const storage = await getStorage(); + const projectPrefix = StoragePaths.projectPrefix(projectId); + const key = StoragePaths.projectFile(projectId, filePath); + + const wsServerUrl = + process.env.WS_SERVER_URL || + process.env.NEXT_PUBLIC_WS_URL?.replace(/^wss?:\/\//, (m) => + m === "wss://" ? "https://" : "http://" + ) || + "http://localhost:1234"; + + const clearWsDoc = async (relPath: string) => { + if (!wsServerUrl) return; + try { + const base = wsServerUrl.replace(/\/+$/, ""); + await fetch( + `${base}/clear/${projectId}/${encodeURIComponent(relPath)}`, + { + method: "POST", + headers: process.env.INTERNAL_API_SECRET + ? { "x-internal-secret": process.env.INTERNAL_API_SECRET } + : undefined, + } + ); + } catch { + // ignore + } + }; + + let deletedCount = 0; + + // Try deleting a single file + const exists = await storage.exists(key); + if (exists) { + await storage.delete(key); + await clearWsDoc(filePath); + deletedCount = 1; + } else { + // Might be a folder; delete everything under the prefix + const folderPrefix = `${key}/`; + const folderFiles = await storage.list(folderPrefix); + + if (folderFiles.length === 0) { + return NextResponse.json( + { success: false, error: "File or folder not found" }, + { status: 404 } + ); + } + + await storage.deletePrefix(folderPrefix); + await Promise.all( + folderFiles.map(async (f) => { + const relPath = f.key + .replace(projectPrefix, "") + .replace(/^\/+/, ""); + if (!relPath) return; + await clearWsDoc(relPath); + }) + ); + deletedCount = folderFiles.length; + } + + console.log( + `[Internal/DeleteFile] Deleted "${filePath}" (${deletedCount} file(s)) from project ${projectId}` + ); + + return NextResponse.json({ + success: true, + data: { path: filePath, deletedCount }, + }); + } catch (error) { + console.error("[Internal/DeleteFile] Error:", error); + return NextResponse.json({ + success: false, + error: error instanceof Error ? error.message : "Unknown error", + }); + } +} diff --git a/app/api/internal/files/edit/route.ts b/app/api/internal/files/edit/route.ts new file mode 100644 index 0000000..8271e69 --- /dev/null +++ b/app/api/internal/files/edit/route.ts @@ -0,0 +1,152 @@ +/** + * Internal API: Edit File (Full Replacement) + * ============================================= + * + * Internal endpoint for nanobot to replace a file's entire content. + * Simpler than the shadow-document based files/write endpoint. + * Writes directly to storage and clears Yjs cache. + * + * This is NOT exposed to the public - protected by INTERNAL_API_SECRET. + */ + +import { NextRequest, NextResponse } from "next/server"; +import { getStorage, StoragePaths } from "@/lib/storage"; + +// Verify internal API secret +function verifyInternalAuth(request: NextRequest): boolean { + const secret = request.headers.get("X-Internal-Secret"); + const expectedSecret = process.env.INTERNAL_API_SECRET; + + if (!expectedSecret) { + console.warn("[Internal API] INTERNAL_API_SECRET not configured"); + return false; + } + + return secret === expectedSecret; +} + +/** + * Push updated content into the WS server's Yjs document. + * + * If the document is loaded in memory (i.e. a browser has it open), the + * ws-server replaces the Y.Text content in-place and the change is + * automatically synced to every connected browser via the Yjs protocol. + * + * If no browser has the document open, the ws-server clears its Redis + * persistence so the next connection loads the fresh content from S3. + */ +async function replaceYjsContent( + projectId: string, + filePath: string, + content: string +): Promise { + const wsServerUrl = + process.env.WS_SERVER_URL || + process.env.NEXT_PUBLIC_WS_URL?.replace(/^wss?:\/\//, (m) => + m === "wss://" ? "https://" : "http://" + ) || + "http://localhost:1234"; + + try { + const base = wsServerUrl.replace(/\/+$/, ""); + const resp = await fetch( + `${base}/replace/${projectId}/${encodeURIComponent(filePath)}`, + { + method: "POST", + headers: { + "Content-Type": "application/json", + ...(process.env.INTERNAL_API_SECRET + ? { "x-internal-secret": process.env.INTERNAL_API_SECRET } + : {}), + }, + body: JSON.stringify({ content }), + } + ); + + if (!resp.ok) { + const text = await resp.text(); + console.warn( + `[Internal/EditFile] WS /replace returned ${resp.status}: ${text}` + ); + } else { + const result = await resp.json(); + console.log( + `[Internal/EditFile] Yjs document replaced for ${projectId}/${filePath}` + + ` (inMemory=${result.inMemory}, length=${result.contentLength})` + ); + } + } catch (err) { + // Non-fatal: WS server may be unavailable + console.warn( + `[Internal/EditFile] Failed to replace Yjs content for ${filePath}:`, + err + ); + } +} + +export async function POST(request: NextRequest) { + // Verify authentication + if (!verifyInternalAuth(request)) { + return NextResponse.json( + { success: false, error: "Unauthorized" }, + { status: 401 } + ); + } + + try { + const body = await request.json(); + const { projectId, filePath, content } = body as { + projectId: string; + filePath: string; + content: string; + }; + + if (!projectId || !filePath) { + return NextResponse.json( + { success: false, error: "projectId and filePath are required" }, + { status: 400 } + ); + } + + if (typeof content !== "string") { + return NextResponse.json( + { success: false, error: "content must be a string" }, + { status: 400 } + ); + } + + // Validate filePath to prevent path traversal + if (filePath.includes("..") || filePath.startsWith("/")) { + return NextResponse.json( + { success: false, error: "Invalid file path" }, + { status: 400 } + ); + } + + const storage = await getStorage(); + const key = StoragePaths.projectFile(projectId, filePath); + + // Write content to storage (full replacement) + await storage.upload(key, content, "text/plain"); + console.log( + `[Internal/EditFile] Written ${content.length} chars to ${projectId}/${filePath}` + ); + + // Push new content into the Yjs document so the editor picks it up + await replaceYjsContent(projectId, filePath, content); + + return NextResponse.json({ + success: true, + data: { + filePath, + length: content.length, + }, + }); + } catch (error) { + console.error("[Internal/EditFile] Error:", error); + return NextResponse.json({ + success: false, + error: error instanceof Error ? error.message : "Unknown error", + }); + } +} diff --git a/app/api/internal/files/read/route.ts b/app/api/internal/files/read/route.ts index 99bcbb0..c4540fe 100644 --- a/app/api/internal/files/read/route.ts +++ b/app/api/internal/files/read/route.ts @@ -39,10 +39,18 @@ export async function POST(request: NextRequest) { const { projectId, userId, filePath, startLine, endLine } = body; if (!projectId || !filePath) { - return NextResponse.json({ - success: false, - error: "projectId and filePath are required", - }); + return NextResponse.json( + { success: false, error: "projectId and filePath are required" }, + { status: 400 } + ); + } + + // Validate filePath to prevent path traversal + if (filePath.includes("..") || filePath.startsWith("/")) { + return NextResponse.json( + { success: false, error: "Invalid file path" }, + { status: 400 } + ); } // Get effective content (Yjs + pending edits) diff --git a/app/api/internal/files/rename/route.ts b/app/api/internal/files/rename/route.ts new file mode 100644 index 0000000..21e2c07 --- /dev/null +++ b/app/api/internal/files/rename/route.ts @@ -0,0 +1,172 @@ +/** + * Internal API: Rename/Move File + * ================================ + * + * Internal endpoint for nanobot to rename or move a file/folder in a project. + * Handles both single files and folder trees, clears Yjs cache for source and dest. + * Protected by INTERNAL_API_SECRET. + */ + +import { NextRequest, NextResponse } from "next/server"; +import { getStorage, StoragePaths } from "@/lib/storage"; + +function verifyInternalAuth(request: NextRequest): boolean { + const secret = request.headers.get("X-Internal-Secret"); + const expectedSecret = process.env.INTERNAL_API_SECRET; + if (!expectedSecret) { + console.warn("[Internal API] INTERNAL_API_SECRET not configured"); + return false; + } + return secret === expectedSecret; +} + +export async function POST(request: NextRequest) { + if (!verifyInternalAuth(request)) { + return NextResponse.json( + { success: false, error: "Unauthorized" }, + { status: 401 } + ); + } + + try { + const body = await request.json(); + const { projectId, sourcePath, newName, targetPath } = body as { + projectId: string; + sourcePath: string; + newName?: string; + targetPath?: string; + }; + + if (!projectId || typeof projectId !== "string") { + return NextResponse.json( + { success: false, error: "Project ID is required" }, + { status: 400 } + ); + } + + if (!sourcePath || typeof sourcePath !== "string") { + return NextResponse.json( + { success: false, error: "Source path is required" }, + { status: 400 } + ); + } + + const storage = await getStorage(); + const sourceKey = StoragePaths.projectFile(projectId, sourcePath); + + const wsServerUrl = + process.env.WS_SERVER_URL || + process.env.NEXT_PUBLIC_WS_URL?.replace(/^wss?:\/\//, (m) => + m === "wss://" ? "https://" : "http://" + ) || + "http://localhost:1234"; + + const clearWsDoc = async (relPath: string) => { + if (!wsServerUrl) return; + try { + const base = wsServerUrl.replace(/\/+$/, ""); + await fetch( + `${base}/clear/${projectId}/${encodeURIComponent(relPath)}`, + { + method: "POST", + headers: process.env.INTERNAL_API_SECRET + ? { "x-internal-secret": process.env.INTERNAL_API_SECRET } + : undefined, + } + ); + } catch { + // ignore + } + }; + + // Determine destination path + const originalFileName = sourcePath.split("/").pop() || sourcePath; + const fileName = newName || originalFileName; + const destPath = targetPath ? `${targetPath}/${fileName}` : fileName; + const destKey = StoragePaths.projectFile(projectId, destPath); + + // no-op: if source and destination are identical + if (sourceKey === destKey) { + return NextResponse.json({ + success: true, + data: { oldPath: sourcePath, newPath: destPath }, + }); + } + + // Check whether the source exists + const sourceExists = await storage.exists(sourceKey); + if (!sourceExists) { + // Might be a folder + const sourcePrefix = sourceKey + "/"; + const files = await storage.list(sourcePrefix); + + if (files.length === 0) { + return NextResponse.json( + { success: false, error: "Source file not found" }, + { status: 404 } + ); + } + + // Move a folder: copy all children, then delete the original + const destPrefix = destKey + "/"; + + if (sourcePrefix === destPrefix) { + return NextResponse.json({ + success: true, + data: { oldPath: sourcePath, newPath: destPath }, + }); + } + + for (const file of files) { + const relativePath = file.key.replace(sourcePrefix, ""); + const newKey = destPrefix + relativePath; + + const destRelPath = `${destPath}/${relativePath}` + .replace(/\/+/g, "/") + .replace(/^\/+/, ""); + await clearWsDoc(destRelPath); + + await storage.copy(file.key, newKey); + + const sourceRelPath = `${sourcePath}/${relativePath}` + .replace(/\/+/g, "/") + .replace(/^\/+/, ""); + await clearWsDoc(sourceRelPath); + } + await storage.deletePrefix(sourcePrefix); + } else { + // Move a single file + const destExists = await storage.exists(destKey); + if (destExists) { + return NextResponse.json( + { + success: false, + error: + "A file with the same name already exists in the target location", + }, + { status: 409 } + ); + } + + await clearWsDoc(destPath); + await storage.copy(sourceKey, destKey); + await storage.delete(sourceKey); + await clearWsDoc(sourcePath); + } + + console.log( + `[Internal/RenameFile] Moved "${sourcePath}" -> "${destPath}" in project ${projectId}` + ); + + return NextResponse.json({ + success: true, + data: { oldPath: sourcePath, newPath: destPath }, + }); + } catch (error) { + console.error("[Internal/RenameFile] Error:", error); + return NextResponse.json({ + success: false, + error: error instanceof Error ? error.message : "Unknown error", + }); + } +} diff --git a/app/api/internal/files/upload/route.ts b/app/api/internal/files/upload/route.ts new file mode 100644 index 0000000..59ed57d --- /dev/null +++ b/app/api/internal/files/upload/route.ts @@ -0,0 +1,161 @@ +/** + * Internal API: Upload Binary File + * ================================== + * + * Internal endpoint for nanobot to upload binary files (images, PDFs, etc.) + * to a Litewrite project. Accepts base64-encoded content. + * + * Protected by INTERNAL_API_SECRET. + */ + +import { NextRequest, NextResponse } from "next/server"; +import { getStorage, StoragePaths } from "@/lib/storage"; + +// Common MIME type mapping +const MIME_TYPES: Record = { + ".png": "image/png", + ".jpg": "image/jpeg", + ".jpeg": "image/jpeg", + ".gif": "image/gif", + ".svg": "image/svg+xml", + ".bmp": "image/bmp", + ".webp": "image/webp", + ".tiff": "image/tiff", + ".tif": "image/tiff", + ".pdf": "application/pdf", + ".eps": "application/postscript", + ".ps": "application/postscript", + ".tex": "text/x-tex", + ".bib": "text/x-bibtex", + ".sty": "text/x-tex", + ".cls": "text/x-tex", + ".txt": "text/plain", + ".md": "text/markdown", + ".json": "application/json", + ".xml": "application/xml", + ".csv": "text/csv", +}; + +function getMimeType(fileName: string): string { + const ext = fileName.lastIndexOf(".") >= 0 + ? fileName.substring(fileName.lastIndexOf(".")).toLowerCase() + : ""; + return MIME_TYPES[ext] || "application/octet-stream"; +} + +function verifyInternalAuth(request: NextRequest): boolean { + const secret = request.headers.get("X-Internal-Secret"); + const expectedSecret = process.env.INTERNAL_API_SECRET; + if (!expectedSecret) { + console.warn("[Internal API] INTERNAL_API_SECRET not configured"); + return false; + } + return secret === expectedSecret; +} + +export async function POST(request: NextRequest) { + if (!verifyInternalAuth(request)) { + return NextResponse.json( + { success: false, error: "Unauthorized" }, + { status: 401 } + ); + } + + try { + const body = await request.json(); + const { + projectId, + filePath, + contentBase64, + content, + overwrite = true, + } = body as { + projectId: string; + filePath: string; + contentBase64?: string; // Base64-encoded binary content + content?: string; // Plain text content (for text files) + overwrite?: boolean; + }; + + if (!projectId || typeof projectId !== "string") { + return NextResponse.json( + { success: false, error: "Project ID is required" }, + { status: 400 } + ); + } + + if (!filePath || typeof filePath !== "string") { + return NextResponse.json( + { success: false, error: "File path is required" }, + { status: 400 } + ); + } + + if (!contentBase64 && content === undefined) { + return NextResponse.json( + { success: false, error: "Either contentBase64 or content is required" }, + { status: 400 } + ); + } + + const storage = await getStorage(); + const key = StoragePaths.projectFile(projectId, filePath); + + // Check if file already exists (unless overwrite is allowed) + if (!overwrite) { + const exists = await storage.exists(key); + if (exists) { + return NextResponse.json( + { success: false, error: "File already exists" }, + { status: 409 } + ); + } + } + + const fileName = filePath.split("/").pop() || filePath; + const mimeType = getMimeType(fileName); + + if (contentBase64) { + // Binary upload: decode base64 to Buffer + const buffer = Buffer.from(contentBase64, "base64"); + await storage.upload(key, buffer, mimeType); + + console.log( + `[Internal/UploadFile] Uploaded binary file "${filePath}" ` + + `(${buffer.length} bytes, ${mimeType}) to project ${projectId}` + ); + + return NextResponse.json({ + success: true, + data: { + path: filePath, + size: buffer.length, + mimeType, + }, + }); + } else { + // Text upload + await storage.upload(key, content || "", mimeType); + + console.log( + `[Internal/UploadFile] Uploaded text file "${filePath}" ` + + `(${(content || "").length} chars, ${mimeType}) to project ${projectId}` + ); + + return NextResponse.json({ + success: true, + data: { + path: filePath, + size: (content || "").length, + mimeType, + }, + }); + } + } catch (error) { + console.error("[Internal/UploadFile] Error:", error); + return NextResponse.json({ + success: false, + error: error instanceof Error ? error.message : "Unknown error", + }); + } +} diff --git a/app/api/internal/projects/compile/route.ts b/app/api/internal/projects/compile/route.ts new file mode 100644 index 0000000..639d05c --- /dev/null +++ b/app/api/internal/projects/compile/route.ts @@ -0,0 +1,327 @@ +/** + * Internal API: Compile Project + * =============================== + * + * Internal endpoint for nanobot to trigger project compilation. + * Returns the compiled PDF as base64 (also saves to storage for web preview). + * + * This is NOT exposed to the public - protected by INTERNAL_API_SECRET. + */ + +import { NextRequest, NextResponse } from "next/server"; +import { prisma } from "@/lib/prisma"; +import { getStorage, StoragePaths } from "@/lib/storage"; +import { merkleService } from "@/lib/storage/merkle"; +import { VALID_COMPILERS, Compiler } from "@/lib/compiler-utils"; + +const COMPILE_SERVER_URL = + process.env.COMPILE_SERVER_URL || "http://localhost:3002"; + +// Text file extensions (sent as UTF-8 strings) +const TEXT_EXTENSIONS = new Set([ + ".tex", ".bib", ".bbl", ".sty", ".cls", ".txt", ".md", ".bst", + ".json", ".xml", ".cfg", ".def", ".fd", ".aux", ".toc", + ".lof", ".lot", ".idx", ".ind", ".glo", ".gls", ".out", ".blg", +]); + +// Binary file extensions (sent as base64) +const BINARY_EXTENSIONS = new Set([ + ".png", ".jpg", ".jpeg", ".gif", ".pdf", ".eps", ".ps", + ".svg", ".bmp", ".tiff", ".tif", +]); + +// Verify internal API secret +function verifyInternalAuth(request: NextRequest): boolean { + const secret = request.headers.get("X-Internal-Secret"); + const expectedSecret = process.env.INTERNAL_API_SECRET; + + if (!expectedSecret) { + console.warn("[Internal API] INTERNAL_API_SECRET not configured"); + return false; + } + + return secret === expectedSecret; +} + +function getExtension(filename: string): string { + const lastDot = filename.lastIndexOf("."); + return lastDot >= 0 ? filename.substring(lastDot).toLowerCase() : ""; +} + +function shouldIncludeFile(filename: string): boolean { + const excludePatterns = [ + ".log", ".aux", ".out", ".toc", ".synctex.gz", ".fls", ".fdb_latexmk", + ]; + if (excludePatterns.some((p) => filename.endsWith(p))) return false; + if (filename === "project.json" || filename.startsWith(".")) return false; + return true; +} + +/** + * Read all project files from storage for compilation. + */ +async function readProjectFiles(projectId: string): Promise<{ + textFiles: Record; + binaryFiles: Record; +}> { + const storage = await getStorage(); + const prefix = StoragePaths.projectPrefix(projectId); + const prefixLen = prefix.length; + + const textFiles: Record = {}; + const binaryFiles: Record = {}; + + const files = await storage.list(prefix); + + for (const file of files) { + const relativePath = file.key.substring(prefixLen); + if (!relativePath) continue; + + const filename = relativePath.split("/").pop() || ""; + if (!shouldIncludeFile(filename)) continue; + if (file.key.endsWith("/")) continue; + + try { + const content = await storage.download(file.key); + const ext = getExtension(filename); + + if (TEXT_EXTENSIONS.has(ext)) { + textFiles[relativePath] = content.toString("utf8"); + } else if (BINARY_EXTENSIONS.has(ext)) { + binaryFiles[relativePath] = content.toString("base64"); + } + } catch (error) { + console.error( + `[Internal/Compile] Failed to read file: ${file.key}`, + error + ); + } + } + + return { textFiles, binaryFiles }; +} + +export async function POST(request: NextRequest) { + // Verify authentication + if (!verifyInternalAuth(request)) { + return NextResponse.json( + { success: false, error: "Unauthorized" }, + { status: 401 } + ); + } + + try { + const body = await request.json(); + const { projectId, compiler: requestedCompiler, autoSave = true, userId } = body as { + projectId: string; + compiler?: string; + autoSave?: boolean; + userId?: string; + }; + + if (!projectId) { + return NextResponse.json( + { success: false, error: "projectId is required" }, + { status: 400 } + ); + } + + // Look up the project in the database + const project = await prisma.project.findUnique({ + where: { id: projectId }, + }); + + if (!project) { + return NextResponse.json( + { success: false, error: `Project not found: ${projectId}` }, + { status: 404 } + ); + } + + // Read all project files + console.log(`[Internal/Compile] Reading project files for: ${projectId}`); + const { textFiles, binaryFiles } = await readProjectFiles(projectId); + + console.log( + `[Internal/Compile] Text files: ${Object.keys(textFiles).length}, ` + + `Binary files: ${Object.keys(binaryFiles).length}` + ); + + const mainFile = project.mainFile || "main.tex"; + + // Check whether the main file exists + if (!textFiles[mainFile]) { + return NextResponse.json({ + success: false, + error: `Main file not found: ${mainFile}`, + }); + } + + // Resolve compiler: request param > project setting > default + let compiler = "pdflatex"; + if ( + requestedCompiler && + VALID_COMPILERS.has(requestedCompiler as Compiler) + ) { + compiler = requestedCompiler; + } else if ( + project.compiler && + VALID_COMPILERS.has(project.compiler as Compiler) + ) { + compiler = project.compiler; + } + + console.log(`[Internal/Compile] Using compiler: ${compiler}`); + + // Call compile server + const compileResponse = await fetch(`${COMPILE_SERVER_URL}/compile`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + mainFile, + compiler, + projectFiles: textFiles, + binaryFiles, + }), + cache: "no-store", + }); + + if (!compileResponse.ok) { + let errorDetail = ""; + try { + const errBody = await compileResponse.json(); + errorDetail = errBody.logs || errBody.error || ""; + } catch { + errorDetail = `HTTP ${compileResponse.status}`; + } + + console.error("[Internal/Compile] Compile server error:", errorDetail); + + return NextResponse.json({ + success: false, + error: "Compilation failed", + logs: errorDetail, + }); + } + + const result = await compileResponse.json(); + + if (!result.success || !result.pdfBase64) { + return NextResponse.json({ + success: false, + error: "Compilation did not produce a PDF", + logs: result.logs || "", + }); + } + + // Save PDF to storage (keeps web preview working) + const storage = await getStorage(); + const timestamp = Date.now(); + const pdfFileName = `output-${timestamp}.pdf`; + const pdfKey = StoragePaths.compiledFile(projectId, pdfFileName); + const pdfBuffer = Buffer.from(result.pdfBase64, "base64"); + await storage.upload(pdfKey, pdfBuffer, "application/pdf"); + console.log(`[Internal/Compile] PDF saved to storage: ${pdfKey}`); + + // Save SyncTeX file if present + if (result.synctexBase64) { + const synctexFileName = `output-${timestamp}.synctex.gz`; + const synctexKey = StoragePaths.compiledFile(projectId, synctexFileName); + const synctexBuffer = Buffer.from(result.synctexBase64, "base64"); + await storage.upload(synctexKey, synctexBuffer, "application/gzip"); + } + + // Clean up old compiled files + try { + const prefix = StoragePaths.compiledPrefix(projectId); + const oldFiles = await storage.list(prefix); + for (const f of oldFiles) { + const fname = f.key.split("/").pop() || ""; + if ( + !fname.includes(String(timestamp)) && + (fname.endsWith(".pdf") || fname.endsWith(".synctex.gz")) + ) { + await storage.delete(f.key); + } + } + } catch (e) { + console.warn("[Internal/Compile] Failed to clean old files:", e); + } + + console.log( + `[Internal/Compile] Compilation successful, PDF size: ${pdfBuffer.length} bytes` + ); + + // Clear project-level Yjs caches so the web editor shows fresh content + try { + const wsServerUrl = + process.env.WS_SERVER_URL || + process.env.NEXT_PUBLIC_WS_URL?.replace(/^wss?:\/\//, (m) => + m === "wss://" ? "https://" : "http://" + ) || + "http://localhost:1234"; + + const base = wsServerUrl.replace(/\/+$/, ""); + await fetch(`${base}/admin/clear-project/${projectId}`, { + method: "POST", + headers: process.env.INTERNAL_API_SECRET + ? { "x-internal-secret": process.env.INTERNAL_API_SECRET } + : undefined, + }); + console.log( + `[Internal/Compile] Cleared project Yjs caches for ${projectId}` + ); + } catch (e) { + console.warn("[Internal/Compile] Failed to clear project Yjs caches:", e); + } + + // Auto-save version after successful compilation + let versionSaved: { id: string; name: string } | null = null; + if (autoSave) { + try { + const now = new Date(); + const dateStr = now.toISOString().replace("T", " ").slice(0, 19); + const versionName = `Auto-saved (compile) - ${dateStr}`; + const saveUserId = userId || project.ownerId; + + const versionResult = await merkleService.createCommit( + projectId, + versionName, + saveUserId + ); + + versionSaved = { id: versionResult.id, name: versionName }; + console.log( + `[Internal/Compile] Auto-saved version: "${versionName}" (${versionResult.id})` + ); + } catch (versionError) { + // NO_CHANGES_DETECTED is expected if nothing changed since last save + if ( + versionError instanceof Error && + versionError.message === "NO_CHANGES_DETECTED" + ) { + console.log("[Internal/Compile] Auto-save: no changes detected, skipped"); + } else { + // Version save failure should not affect compile result + console.warn("[Internal/Compile] Auto-save version failed:", versionError); + } + } + } + + return NextResponse.json({ + success: true, + data: { + pdfBase64: result.pdfBase64, + pdfFileName, + logs: result.logs || "", + versionSaved, + }, + }); + } catch (error) { + console.error("[Internal/Compile] Error:", error); + return NextResponse.json({ + success: false, + error: error instanceof Error ? error.message : "Unknown error", + }); + } +} diff --git a/app/api/internal/projects/create/route.ts b/app/api/internal/projects/create/route.ts new file mode 100644 index 0000000..fb7974f --- /dev/null +++ b/app/api/internal/projects/create/route.ts @@ -0,0 +1,164 @@ +/** + * Internal API: Create Project + * ============================= + * + * Internal endpoint for nanobot to create a new project. + * Protected by INTERNAL_API_SECRET. + */ + +import { NextRequest, NextResponse } from "next/server"; +import { v4 as uuidv4 } from "uuid"; +import { prisma } from "@/lib/prisma"; +import { getStorage, StoragePaths } from "@/lib/storage"; + +// Verify internal API secret +function verifyInternalAuth(request: NextRequest): boolean { + const secret = request.headers.get("X-Internal-Secret"); + const expectedSecret = process.env.INTERNAL_API_SECRET; + + if (!expectedSecret) { + console.warn("[Internal API] INTERNAL_API_SECRET not configured"); + return false; + } + + return secret === expectedSecret; +} + +// Default LaTeX template - Chinese (ctex) +const DEFAULT_TEMPLATE_ZH = `\\documentclass{article} +\\usepackage{ctex} +\\usepackage{amsmath} +\\usepackage{graphicx} + +\\title{%TITLE%} +\\author{Author} +\\date{\\today} + +\\begin{document} + +\\maketitle + +\\section{Introduction} + +Start writing here... + +\\end{document}`; + +// Default LaTeX template - English +const DEFAULT_TEMPLATE_EN = `\\documentclass{article} +\\usepackage[utf8]{inputenc} +\\usepackage{amsmath} +\\usepackage{graphicx} + +\\title{%TITLE%} +\\author{Author} +\\date{\\today} + +\\begin{document} + +\\maketitle + +\\section{Introduction} + +Start writing here... + +\\end{document}`; + +function getDefaultTemplate(locale: string): string { + return locale === "zh" ? DEFAULT_TEMPLATE_ZH : DEFAULT_TEMPLATE_EN; +} + +export async function POST(request: NextRequest) { + if (!verifyInternalAuth(request)) { + return NextResponse.json( + { success: false, error: "Unauthorized" }, + { status: 401 } + ); + } + + try { + const body = await request.json(); + const { name, ownerId, description, locale = "en", mainFileContent } = body as { + name: string; + ownerId: string; + description?: string; + locale?: string; + mainFileContent?: string; + }; + + if (!name || typeof name !== "string" || name.trim().length === 0) { + return NextResponse.json( + { success: false, error: "Project name is required" }, + { status: 400 } + ); + } + + if (!ownerId || typeof ownerId !== "string") { + return NextResponse.json( + { success: false, error: "Owner ID is required" }, + { status: 400 } + ); + } + + const projectId = uuidv4(); + + // Create database record + const project = await prisma.project.create({ + data: { + id: projectId, + name: name.trim(), + description: description?.trim() || null, + mainFile: "main.tex", + ownerId, + visibility: "private", + status: "active", + }, + }); + + // Create files via the storage abstraction + const storage = await getStorage(); + + // Create default main.tex (use provided content or default template) + const mainTexContent = + mainFileContent || getDefaultTemplate(locale).replace("%TITLE%", name.trim()); + const mainTexKey = StoragePaths.projectFile(projectId, "main.tex"); + await storage.upload(mainTexKey, mainTexContent, "text/x-tex"); + + // Create project.json (metadata backup) + const meta = { + id: projectId, + name: name.trim(), + description: description?.trim() || undefined, + mainFile: "main.tex", + createdAt: project.createdAt.toISOString(), + updatedAt: project.updatedAt.toISOString(), + template: "nanobot", + }; + const metaKey = StoragePaths.projectFile(projectId, "project.json"); + await storage.upload(metaKey, JSON.stringify(meta, null, 2), "application/json"); + + console.log(`[Internal/CreateProject] Created project "${name.trim()}" (${projectId}) for owner ${ownerId}`); + + return NextResponse.json({ + success: true, + data: { + project: { + id: project.id, + name: project.name, + description: project.description, + mainFile: project.mainFile, + visibility: project.visibility, + createdAt: project.createdAt.toISOString(), + updatedAt: project.updatedAt.toISOString(), + ownerId: project.ownerId, + }, + }, + }); + } catch (error) { + console.error("[Internal/CreateProject] Error:", error); + return NextResponse.json({ + success: false, + error: error instanceof Error ? error.message : "Unknown error", + }); + } +} diff --git a/app/api/internal/projects/delete/route.ts b/app/api/internal/projects/delete/route.ts new file mode 100644 index 0000000..e676b5d --- /dev/null +++ b/app/api/internal/projects/delete/route.ts @@ -0,0 +1,111 @@ +/** + * Internal API: Delete Project + * ============================= + * + * Internal endpoint for nanobot to delete a project. + * Handles Merkle Tree blob ref cleanup, DB cascade delete, and storage cleanup. + * Protected by INTERNAL_API_SECRET. + */ + +import { NextRequest, NextResponse } from "next/server"; +import { prisma } from "@/lib/prisma"; +import { getStorage, StoragePaths } from "@/lib/storage"; +import { merkleService } from "@/lib/storage/merkle"; + +function verifyInternalAuth(request: NextRequest): boolean { + const secret = request.headers.get("X-Internal-Secret"); + const expectedSecret = process.env.INTERNAL_API_SECRET; + if (!expectedSecret) { + console.warn("[Internal API] INTERNAL_API_SECRET not configured"); + return false; + } + return secret === expectedSecret; +} + +export async function POST(request: NextRequest) { + if (!verifyInternalAuth(request)) { + return NextResponse.json( + { success: false, error: "Unauthorized" }, + { status: 401 } + ); + } + + try { + const body = await request.json(); + const { projectId } = body as { projectId: string }; + + if (!projectId || typeof projectId !== "string") { + return NextResponse.json( + { success: false, error: "Project ID is required" }, + { status: 400 } + ); + } + + // Verify project exists + const project = await prisma.project.findUnique({ + where: { id: projectId }, + select: { id: true, name: true }, + }); + + if (!project) { + return NextResponse.json( + { success: false, error: "Project not found" }, + { status: 404 } + ); + } + + // Handle blob refcounts for Merkle Tree versions + try { + const merkleVersions = await prisma.projectVersion.findMany({ + where: { + projectId, + rootTreeHash: { not: null }, + }, + select: { rootTreeHash: true }, + }); + + for (const version of merkleVersions) { + if (version.rootTreeHash) { + try { + const files = await merkleService.getTreeFiles(version.rootTreeHash); + for (const file of files) { + await merkleService.decrementBlobRef(file.hash); + } + } catch { + // Ignore; garbage collection will eventually clean up + } + } + } + } catch { + // Ignore errors + } + + // Delete database record (cascades collaborators, versions, etc.) + await prisma.project.delete({ + where: { id: projectId }, + }); + + // Delete project files from storage + try { + const storage = await getStorage(); + await storage.deletePrefix(StoragePaths.projectPrefix(projectId)); + await storage.deletePrefix(StoragePaths.compiledPrefix(projectId)); + await storage.deletePrefix(StoragePaths.versionsPrefix(projectId)); + } catch { + console.warn("[Internal/DeleteProject] Failed to delete storage files:", projectId); + } + + console.log(`[Internal/DeleteProject] Deleted project "${project.name}" (${projectId})`); + + return NextResponse.json({ + success: true, + data: { projectId, name: project.name }, + }); + } catch (error) { + console.error("[Internal/DeleteProject] Error:", error); + return NextResponse.json({ + success: false, + error: error instanceof Error ? error.message : "Unknown error", + }); + } +} diff --git a/app/api/internal/projects/import/arxiv/route.ts b/app/api/internal/projects/import/arxiv/route.ts new file mode 100644 index 0000000..804a349 --- /dev/null +++ b/app/api/internal/projects/import/arxiv/route.ts @@ -0,0 +1,333 @@ +/** + * Internal API: Import from arXiv + * ================================ + * + * Internal endpoint for nanobot to import a project from arXiv. + * Replicates the logic of the public /api/projects/import/arxiv endpoint + * but uses INTERNAL_API_SECRET + ownerId instead of user session auth. + * + * Protected by INTERNAL_API_SECRET. + */ + +import { NextRequest, NextResponse } from "next/server"; +import { promises as fs } from "fs"; +import path from "path"; +import os from "os"; +import { v4 as uuidv4 } from "uuid"; +import * as tar from "tar"; +import { prisma } from "@/lib/prisma"; +import { getStorage, StoragePaths, getMimeType } from "@/lib/storage"; + +function verifyInternalAuth(request: NextRequest): boolean { + const secret = request.headers.get("X-Internal-Secret"); + const expectedSecret = process.env.INTERNAL_API_SECRET; + if (!expectedSecret) { + console.warn("[Internal API] INTERNAL_API_SECRET not configured"); + return false; + } + return secret === expectedSecret; +} + +export async function POST(request: NextRequest) { + if (!verifyInternalAuth(request)) { + return NextResponse.json( + { success: false, error: "Unauthorized" }, + { status: 401 } + ); + } + + try { + const body = await request.json(); + const { arxivId: rawArxivId, ownerId, name, description } = body as { + arxivId: string; + ownerId: string; + name?: string; + description?: string; + }; + + if (!rawArxivId) { + return NextResponse.json( + { success: false, error: "arxivId is required" }, + { status: 400 } + ); + } + + if (!ownerId) { + return NextResponse.json( + { success: false, error: "ownerId is required" }, + { status: 400 } + ); + } + + // Parse arXiv ID + const arxivId = parseArxivId(rawArxivId); + if (!arxivId) { + return NextResponse.json( + { success: false, error: "Invalid arXiv ID or URL" }, + { status: 400 } + ); + } + + // Fetch paper metadata (title, etc.) + let paperTitle = ""; + try { + const metaResponse = await fetch( + `https://export.arxiv.org/api/query?id_list=${arxivId}` + ); + if (metaResponse.ok) { + const xml = await metaResponse.text(); + const entryMatch = xml.match( + /[\s\S]*?([^<]+)<\/title>/ + ); + if (entryMatch && entryMatch[1]) { + paperTitle = entryMatch[1].trim().replace(/\s+/g, " "); + } + } + } catch (e) { + console.warn("[Internal/ImportArxiv] Failed to fetch arXiv metadata:", e); + } + + // Download source + const downloadUrl = `https://arxiv.org/e-print/${arxivId}`; + const response = await fetch(downloadUrl, { + headers: { + "User-Agent": "Litewrite/1.0 (LaTeX Editor; +https://litewrite.io)", + }, + }); + + if (!response.ok) { + return NextResponse.json( + { + success: false, + error: + response.status === 404 + ? "No source code found on arXiv for this paper" + : `arXiv download failed with status ${response.status}`, + }, + { status: response.status === 404 ? 404 : 500 } + ); + } + + const arrayBuffer = await response.arrayBuffer(); + const buffer = Buffer.from(arrayBuffer); + + // Create project + const projectId = uuidv4(); + const tempDir = path.join(os.tmpdir(), `arxiv-${projectId}`); + const storage = await getStorage(); + + await fs.mkdir(tempDir, { recursive: true }); + + let extractedFiles: string[] = []; + let mainFile = "main.tex"; + + try { + // Try extracting as tar.gz + const tempFile = path.join(tempDir, "archive.tar.gz"); + await fs.writeFile(tempFile, buffer); + + try { + await tar.extract({ file: tempFile, cwd: tempDir }); + await fs.unlink(tempFile); + + const entries = await getAllFiles(tempDir); + if (entries.length === 0) { + throw new Error("No files extracted"); + } + + let hasMainTex = false; + for (const fullPath of entries) { + const relativePath = path.relative(tempDir, fullPath); + if ( + relativePath.startsWith(".") || + relativePath.includes("/.") || + relativePath.includes("..") + ) { + continue; + } + const content = await fs.readFile(fullPath); + const storageKey = StoragePaths.projectFile(projectId, relativePath); + await storage.upload(storageKey, content, getMimeType(relativePath)); + extractedFiles.push(relativePath); + + if (relativePath === "main.tex") { + hasMainTex = true; + mainFile = "main.tex"; + } + } + + if (!hasMainTex) { + const texFile = extractedFiles.find( + (f) => f.endsWith(".tex") && !f.includes("/") && !f.includes("\\") + ); + if (texFile) mainFile = texFile; + } + } catch (tarError) { + console.warn( + "[Internal/ImportArxiv] Not a tar.gz, trying single file:", + tarError + ); + + const zlib = await import("zlib"); + const { promisify } = await import("util"); + const gunzip = promisify(zlib.gunzip); + + try { + const decompressed = await gunzip(buffer); + mainFile = "main.tex"; + const storageKey = StoragePaths.projectFile(projectId, mainFile); + await storage.upload(storageKey, decompressed, "text/x-latex"); + extractedFiles = [mainFile]; + } catch { + mainFile = "main.tex"; + const storageKey = StoragePaths.projectFile(projectId, mainFile); + await storage.upload(storageKey, buffer, "text/x-latex"); + extractedFiles = [mainFile]; + } + } + + // Clean up temp dir + await fs.rm(tempDir, { recursive: true, force: true }); + + if (extractedFiles.length === 0) { + await storage.deletePrefix(StoragePaths.projectPrefix(projectId)); + return NextResponse.json( + { success: false, error: "No source files found in arXiv download" }, + { status: 400 } + ); + } + + // Project name + const projectName = + name?.trim() || paperTitle || `arXiv-${arxivId}`; + const projectDescription = + description?.trim() || + `Imported from arXiv: ${arxivId}${paperTitle ? ` - ${paperTitle}` : ""}`; + + // Create DB record + const project = await prisma.project.create({ + data: { + id: projectId, + name: projectName, + description: projectDescription || null, + mainFile, + ownerId, + visibility: "private", + status: "active", + }, + }); + + // Create project.json + const meta = { + id: projectId, + name: projectName, + description: projectDescription || undefined, + mainFile, + createdAt: project.createdAt.toISOString(), + updatedAt: project.updatedAt.toISOString(), + template: "arxiv-import", + arxivId, + arxivUrl: `https://arxiv.org/abs/${arxivId}`, + }; + const metaKey = StoragePaths.projectFile(projectId, "project.json"); + await storage.upload( + metaKey, + JSON.stringify(meta, null, 2), + "application/json" + ); + + console.log( + `[Internal/ImportArxiv] Imported arXiv ${arxivId} as project "${projectName}" (${projectId}) for owner ${ownerId}` + ); + + return NextResponse.json({ + success: true, + data: { + project: { + id: project.id, + name: project.name, + description: project.description, + mainFile: project.mainFile, + visibility: project.visibility, + createdAt: project.createdAt.toISOString(), + updatedAt: project.updatedAt.toISOString(), + }, + arxivId, + paperTitle: paperTitle || undefined, + filesCount: extractedFiles.length, + }, + }); + } catch (error) { + await fs.rm(tempDir, { recursive: true, force: true }).catch(() => {}); + try { + await storage.deletePrefix(StoragePaths.projectPrefix(projectId)); + } catch {} + throw error; + } + } catch (error) { + console.error("[Internal/ImportArxiv] Error:", error); + return NextResponse.json({ + success: false, + error: error instanceof Error ? error.message : "Unknown error", + }); + } +} + +/** + * Parse arXiv ID from various input formats. + */ +function parseArxivId(input: string): string | null { + input = input.trim(); + + // Direct id format: YYMM.NNNNN or YYMM.NNNNNvN + const idPattern = /^(\d{4}\.\d{4,5})(v\d+)?$/; + const idMatch = input.match(idPattern); + if (idMatch) return idMatch[1]; + + // Legacy format: category/YYMMNNN + const oldIdPattern = /^([a-z-]+\/\d{7})(v\d+)?$/i; + const oldIdMatch = input.match(oldIdPattern); + if (oldIdMatch) return oldIdMatch[1]; + + // URL format + try { + const url = new URL(input); + if (url.hostname.includes("arxiv.org")) { + const pathParts = url.pathname.split("/").filter(Boolean); + if (pathParts.length >= 2) { + let id = pathParts[pathParts.length - 1]; + id = id.replace(/\.pdf$/i, ""); + id = id.replace(/v\d+$/, ""); + if (/^\d{4}\.\d{4,5}$/.test(id) || /^[a-z-]+\/\d{7}$/i.test(id)) { + return id; + } + } + } + } catch { + // Not a valid URL + } + + return null; +} + +/** + * Recursively list all files under a directory. + */ +async function getAllFiles(dir: string): Promise<string[]> { + const results: string[] = []; + try { + const entries = await fs.readdir(dir, { withFileTypes: true }); + for (const entry of entries) { + const fullPath = path.join(dir, entry.name); + if (entry.isDirectory()) { + const subFiles = await getAllFiles(fullPath); + results.push(...subFiles); + } else { + results.push(fullPath); + } + } + } catch (e) { + console.warn("[Internal/ImportArxiv] Error reading directory:", dir, e); + } + return results; +} diff --git a/app/api/internal/projects/import/github/route.ts b/app/api/internal/projects/import/github/route.ts new file mode 100644 index 0000000..a6051f8 --- /dev/null +++ b/app/api/internal/projects/import/github/route.ts @@ -0,0 +1,315 @@ +/** + * Internal API: Import from GitHub/GitLab + * ========================================= + * + * Internal endpoint for nanobot to import a project from GitHub or GitLab. + * Replicates the logic of the public /api/projects/import/github endpoint + * but uses INTERNAL_API_SECRET + ownerId instead of user session auth. + * + * Protected by INTERNAL_API_SECRET. + */ + +import { NextRequest, NextResponse } from "next/server"; +import { v4 as uuidv4 } from "uuid"; +import AdmZip from "adm-zip"; +import { prisma } from "@/lib/prisma"; +import { getStorage, StoragePaths, getMimeType } from "@/lib/storage"; + +interface GitHubUrlInfo { + owner: string; + repo: string; + branch: string; + subPath: string; +} + +function verifyInternalAuth(request: NextRequest): boolean { + const secret = request.headers.get("X-Internal-Secret"); + const expectedSecret = process.env.INTERNAL_API_SECRET; + if (!expectedSecret) { + console.warn("[Internal API] INTERNAL_API_SECRET not configured"); + return false; + } + return secret === expectedSecret; +} + +export async function POST(request: NextRequest) { + if (!verifyInternalAuth(request)) { + return NextResponse.json( + { success: false, error: "Unauthorized" }, + { status: 401 } + ); + } + + try { + const body = await request.json(); + const { url, ownerId, name, description } = body as { + url: string; + ownerId: string; + name?: string; + description?: string; + }; + + if (!url) { + return NextResponse.json( + { success: false, error: "Repository URL is required" }, + { status: 400 } + ); + } + + if (!ownerId) { + return NextResponse.json( + { success: false, error: "ownerId is required" }, + { status: 400 } + ); + } + + // Parse URL + const urlInfo = parseGitUrl(url); + if (!urlInfo) { + return NextResponse.json( + { success: false, error: "Invalid GitHub/GitLab URL" }, + { status: 400 } + ); + } + + // Build download URL + const isGitLab = url.includes("gitlab.com"); + let downloadUrl: string; + + if (isGitLab) { + downloadUrl = `https://gitlab.com/${urlInfo.owner}/${urlInfo.repo}/-/archive/${urlInfo.branch}/${urlInfo.repo}-${urlInfo.branch}.zip`; + } else { + downloadUrl = `https://github.com/${urlInfo.owner}/${urlInfo.repo}/archive/refs/heads/${urlInfo.branch}.zip`; + } + + // Download ZIP + const response = await fetch(downloadUrl); + + if (!response.ok) { + return NextResponse.json( + { + success: false, + error: + response.status === 404 + ? "Repository not found" + : `Download failed with status ${response.status}`, + }, + { status: response.status === 404 ? 404 : 500 } + ); + } + + const arrayBuffer = await response.arrayBuffer(); + const buffer = Buffer.from(arrayBuffer); + + // Parse ZIP + const zip = new AdmZip(buffer); + const zipEntries = zip.getEntries(); + + if (zipEntries.length === 0) { + return NextResponse.json( + { success: false, error: "Repository is empty" }, + { status: 400 } + ); + } + + // Detect and strip top-level directory + const topLevelPrefix = detectTopLevelDirectory(zipEntries); + + // Create project + const projectId = uuidv4(); + const storage = await getStorage(); + const projectName = name?.trim() || urlInfo.repo; + const projectDescription = description?.trim() || `Imported from ${url}`; + + let mainFile = "main.tex"; + let hasMainTex = false; + const extractedFiles: string[] = []; + + // If subPath is provided, only extract files under that path + const subPathPrefix = urlInfo.subPath ? urlInfo.subPath + "/" : ""; + + for (const entry of zipEntries) { + if ( + entry.isDirectory || + entry.entryName.startsWith("__MACOSX") || + entry.entryName.includes("/.") + ) { + continue; + } + + let relativePath = entry.entryName; + + // Strip top-level directory + if (topLevelPrefix && relativePath.startsWith(topLevelPrefix)) { + relativePath = relativePath.substring(topLevelPrefix.length); + } + + // If subPath is provided, only extract files under that path + if (subPathPrefix) { + if (!relativePath.startsWith(subPathPrefix)) { + continue; + } + relativePath = relativePath.substring(subPathPrefix.length); + } + + if (!relativePath || relativePath === "" || relativePath.includes("..")) { + continue; + } + + const content = entry.getData(); + const storageKey = StoragePaths.projectFile(projectId, relativePath); + await storage.upload(storageKey, content, getMimeType(relativePath)); + extractedFiles.push(relativePath); + + if (relativePath === "main.tex") { + hasMainTex = true; + mainFile = "main.tex"; + } + } + + if (extractedFiles.length === 0) { + return NextResponse.json( + { + success: false, + error: urlInfo.subPath + ? `No files found in path: ${urlInfo.subPath}` + : "No files found in repository", + }, + { status: 400 } + ); + } + + if (!hasMainTex) { + const texFile = extractedFiles.find( + (f) => f.endsWith(".tex") && !f.includes("/") + ); + if (texFile) mainFile = texFile; + } + + // Create database record + const project = await prisma.project.create({ + data: { + id: projectId, + name: projectName, + description: projectDescription || null, + mainFile, + ownerId, + visibility: "private", + status: "active", + }, + }); + + // Create project.json + const meta = { + id: projectId, + name: projectName, + description: projectDescription || undefined, + mainFile, + createdAt: project.createdAt.toISOString(), + updatedAt: project.updatedAt.toISOString(), + template: "github-import", + sourceUrl: url, + }; + const metaKey = StoragePaths.projectFile(projectId, "project.json"); + await storage.upload( + metaKey, + JSON.stringify(meta, null, 2), + "application/json" + ); + + console.log( + `[Internal/ImportGithub] Imported ${url} as project "${projectName}" (${projectId}) for owner ${ownerId}` + ); + + return NextResponse.json({ + success: true, + data: { + project: { + id: project.id, + name: project.name, + description: project.description, + mainFile: project.mainFile, + visibility: project.visibility, + createdAt: project.createdAt.toISOString(), + updatedAt: project.updatedAt.toISOString(), + }, + filesCount: extractedFiles.length, + }, + }); + } catch (error) { + console.error("[Internal/ImportGithub] Error:", error); + return NextResponse.json({ + success: false, + error: error instanceof Error ? error.message : "Unknown error", + }); + } +} + +/** + * Parse a GitHub/GitLab URL. + */ +function parseGitUrl(url: string): GitHubUrlInfo | null { + try { + url = url.trim(); + if (!url.startsWith("http")) { + url = "https://" + url; + } + + const urlObj = new URL(url); + const host = urlObj.hostname.toLowerCase(); + + if (!host.includes("github.com") && !host.includes("gitlab.com")) { + return null; + } + + const pathParts = urlObj.pathname.split("/").filter(Boolean); + if (pathParts.length < 2) return null; + + const owner = pathParts[0]; + const repo = pathParts[1].replace(/\.git$/, ""); + let branch = "main"; + let subPath = ""; + + if (pathParts.length >= 4 && pathParts[2] === "tree") { + branch = pathParts[3]; + if (pathParts.length > 4) { + subPath = pathParts.slice(4).join("/"); + } + } else if ( + pathParts.length >= 4 && + pathParts[2] === "-" && + pathParts[3] === "tree" + ) { + branch = pathParts[4] || "main"; + if (pathParts.length > 5) { + subPath = pathParts.slice(5).join("/"); + } + } + + return { owner, repo, branch, subPath }; + } catch { + return null; + } +} + +/** + * Detect whether all ZIP entries are under the same top-level directory. + */ +function detectTopLevelDirectory(entries: AdmZip.IZipEntry[]): string { + const paths = entries + .map((e) => e.entryName) + .filter((p) => p && !p.startsWith("__MACOSX")); + + if (paths.length === 0) return ""; + + const firstPath = paths[0]; + const firstSlash = firstPath.indexOf("/"); + if (firstSlash === -1) return ""; + + const topDir = firstPath.substring(0, firstSlash + 1); + const allInSameDir = paths.every( + (p) => p.startsWith(topDir) || p === topDir.slice(0, -1) + ); + + return allInSameDir ? topDir : ""; +} diff --git a/app/api/internal/projects/import/upload/route.ts b/app/api/internal/projects/import/upload/route.ts new file mode 100644 index 0000000..a4cf50a --- /dev/null +++ b/app/api/internal/projects/import/upload/route.ts @@ -0,0 +1,511 @@ +/** + * Internal API: Import from file upload (base64) + * ================================================ + * + * Internal endpoint for nanobot to upload a file and create a new project. + * Unlike the public /api/projects/upload which uses multipart/form-data, + * this endpoint accepts base64-encoded file content in JSON body. + * + * Supports: .zip, .tar.gz, .tgz, .tar, .tex, .bib, .cls, .sty + * + * Protected by INTERNAL_API_SECRET. + */ + +import { NextRequest, NextResponse } from "next/server"; +import { promises as fs } from "fs"; +import path from "path"; +import os from "os"; +import { v4 as uuidv4 } from "uuid"; +import AdmZip from "adm-zip"; +import * as tar from "tar"; +import { prisma } from "@/lib/prisma"; +import { getStorage, StoragePaths, getMimeType } from "@/lib/storage"; + +const SUPPORTED_ARCHIVE_TYPES = [".zip", ".tar.gz", ".tgz", ".tar"]; +const SUPPORTED_TEX_TYPES = [".tex", ".bib", ".cls", ".sty"]; + +function verifyInternalAuth(request: NextRequest): boolean { + const secret = request.headers.get("X-Internal-Secret"); + const expectedSecret = process.env.INTERNAL_API_SECRET; + if (!expectedSecret) { + console.warn("[Internal API] INTERNAL_API_SECRET not configured"); + return false; + } + return secret === expectedSecret; +} + +export async function POST(request: NextRequest) { + if (!verifyInternalAuth(request)) { + return NextResponse.json( + { success: false, error: "Unauthorized" }, + { status: 401 } + ); + } + + try { + const body = await request.json(); + const { fileBase64, fileName, ownerId, name, description } = body as { + fileBase64: string; + fileName: string; + ownerId: string; + name?: string; + description?: string; + }; + + if (!fileBase64) { + return NextResponse.json( + { success: false, error: "fileBase64 is required" }, + { status: 400 } + ); + } + + if (!fileName) { + return NextResponse.json( + { success: false, error: "fileName is required" }, + { status: 400 } + ); + } + + if (!ownerId) { + return NextResponse.json( + { success: false, error: "ownerId is required" }, + { status: 400 } + ); + } + + const fileNameLower = fileName.toLowerCase(); + const buffer = Buffer.from(fileBase64, "base64"); + + // Dispatch based on file type + if (fileNameLower.endsWith(".zip")) { + return handleZipUpload(buffer, fileName, name, description, ownerId); + } else if ( + fileNameLower.endsWith(".tar.gz") || + fileNameLower.endsWith(".tgz") + ) { + return handleTarGzUpload(buffer, fileName, name, description, ownerId); + } else if (fileNameLower.endsWith(".tar")) { + return handleTarUpload(buffer, fileName, name, description, ownerId); + } else if (SUPPORTED_TEX_TYPES.some((ext) => fileNameLower.endsWith(ext))) { + return handleSingleTexUpload( + buffer, + fileName, + name, + description, + ownerId + ); + } else { + return NextResponse.json( + { + success: false, + error: `Unsupported file format. Supported: ${[...SUPPORTED_ARCHIVE_TYPES, ...SUPPORTED_TEX_TYPES].join(", ")}`, + }, + { status: 400 } + ); + } + } catch (error) { + console.error("[Internal/ImportUpload] Error:", error); + return NextResponse.json({ + success: false, + error: error instanceof Error ? error.message : "Unknown error", + }); + } +} + +/** + * Handle ZIP file upload. + */ +async function handleZipUpload( + buffer: Buffer, + fileName: string, + name: string | undefined, + description: string | undefined, + ownerId: string +) { + const projectName = name?.trim() || fileName.replace(/\.zip$/i, ""); + + const zip = new AdmZip(buffer); + const zipEntries = zip.getEntries(); + + if (zipEntries.length === 0) { + return NextResponse.json( + { success: false, error: "ZIP file is empty" }, + { status: 400 } + ); + } + + const topLevelPrefix = detectTopLevelDirectory(zipEntries); + const projectId = uuidv4(); + const storage = await getStorage(); + + let mainFile = "main.tex"; + let hasMainTex = false; + const extractedFiles: string[] = []; + + for (const entry of zipEntries) { + if (entry.isDirectory || entry.entryName.startsWith("__MACOSX")) { + continue; + } + + let relativePath = entry.entryName; + if (topLevelPrefix && relativePath.startsWith(topLevelPrefix)) { + relativePath = relativePath.substring(topLevelPrefix.length); + } + + if (!relativePath || relativePath === "" || relativePath.includes("..")) { + continue; + } + + const content = entry.getData(); + const storageKey = StoragePaths.projectFile(projectId, relativePath); + await storage.upload(storageKey, content, getMimeType(relativePath)); + extractedFiles.push(relativePath); + + if ( + relativePath === "main.tex" || + relativePath.endsWith("/main.tex") + ) { + hasMainTex = true; + if (relativePath === "main.tex") mainFile = "main.tex"; + } + } + + if (!hasMainTex) { + const texFile = extractedFiles.find( + (f) => f.endsWith(".tex") && !f.includes("/") + ); + if (texFile) mainFile = texFile; + } + + return createProject( + projectId, + projectName, + description, + mainFile, + ownerId, + extractedFiles + ); +} + +/** + * Handle tar.gz file upload. + */ +async function handleTarGzUpload( + buffer: Buffer, + fileName: string, + name: string | undefined, + description: string | undefined, + ownerId: string +) { + const projectName = + name?.trim() || fileName.replace(/\.(tar\.gz|tgz)$/i, ""); + const projectId = uuidv4(); + const tempDir = path.join(os.tmpdir(), `upload-${projectId}`); + const storage = await getStorage(); + + await fs.mkdir(tempDir, { recursive: true }); + + try { + const tempFile = path.join(tempDir, "archive.tar.gz"); + await fs.writeFile(tempFile, buffer); + await tar.extract({ file: tempFile, cwd: tempDir }); + await fs.unlink(tempFile); + + const entries = await getAllFiles(tempDir); + const topLevelPrefix = detectTopLevelFromPaths(entries, tempDir); + + const extractedFiles: string[] = []; + let mainFile = "main.tex"; + let hasMainTex = false; + + for (const fullPath of entries) { + let relativePath = path.relative(tempDir, fullPath); + if (topLevelPrefix && relativePath.startsWith(topLevelPrefix)) { + relativePath = relativePath.substring(topLevelPrefix.length); + } + if (!relativePath || relativePath.includes("..")) continue; + + const content = await fs.readFile(fullPath); + const storageKey = StoragePaths.projectFile(projectId, relativePath); + await storage.upload(storageKey, content, getMimeType(relativePath)); + extractedFiles.push(relativePath); + + if (relativePath === "main.tex") { + hasMainTex = true; + mainFile = "main.tex"; + } + } + + if (!hasMainTex) { + const texFile = extractedFiles.find( + (f) => + f.endsWith(".tex") && !f.includes("/") && !f.includes("\\") + ); + if (texFile) mainFile = texFile; + } + + await fs.rm(tempDir, { recursive: true, force: true }); + + return createProject( + projectId, + projectName, + description, + mainFile, + ownerId, + extractedFiles + ); + } catch (error) { + await fs.rm(tempDir, { recursive: true, force: true }).catch(() => {}); + try { + await storage.deletePrefix(StoragePaths.projectPrefix(projectId)); + } catch {} + throw error; + } +} + +/** + * Handle tar file upload (uncompressed). + */ +async function handleTarUpload( + buffer: Buffer, + fileName: string, + name: string | undefined, + description: string | undefined, + ownerId: string +) { + const projectName = name?.trim() || fileName.replace(/\.tar$/i, ""); + const projectId = uuidv4(); + const tempDir = path.join(os.tmpdir(), `upload-${projectId}`); + const storage = await getStorage(); + + await fs.mkdir(tempDir, { recursive: true }); + + try { + const tempFile = path.join(tempDir, "archive.tar"); + await fs.writeFile(tempFile, buffer); + await tar.extract({ file: tempFile, cwd: tempDir }); + await fs.unlink(tempFile); + + const entries = await getAllFiles(tempDir); + const topLevelPrefix = detectTopLevelFromPaths(entries, tempDir); + + const extractedFiles: string[] = []; + let mainFile = "main.tex"; + let hasMainTex = false; + + for (const fullPath of entries) { + let relativePath = path.relative(tempDir, fullPath); + if (topLevelPrefix && relativePath.startsWith(topLevelPrefix)) { + relativePath = relativePath.substring(topLevelPrefix.length); + } + if (!relativePath || relativePath.includes("..")) continue; + + const content = await fs.readFile(fullPath); + const storageKey = StoragePaths.projectFile(projectId, relativePath); + await storage.upload(storageKey, content, getMimeType(relativePath)); + extractedFiles.push(relativePath); + + if (relativePath === "main.tex") { + hasMainTex = true; + mainFile = "main.tex"; + } + } + + if (!hasMainTex) { + const texFile = extractedFiles.find( + (f) => + f.endsWith(".tex") && !f.includes("/") && !f.includes("\\") + ); + if (texFile) mainFile = texFile; + } + + await fs.rm(tempDir, { recursive: true, force: true }); + + return createProject( + projectId, + projectName, + description, + mainFile, + ownerId, + extractedFiles + ); + } catch (error) { + await fs.rm(tempDir, { recursive: true, force: true }).catch(() => {}); + try { + await storage.deletePrefix(StoragePaths.projectPrefix(projectId)); + } catch {} + throw error; + } +} + +/** + * Handle a single .tex file upload. + */ +async function handleSingleTexUpload( + buffer: Buffer, + fileName: string, + name: string | undefined, + description: string | undefined, + ownerId: string +) { + const projectName = name?.trim() || fileName.replace(/\.[^.]+$/, ""); + const projectId = uuidv4(); + const storage = await getStorage(); + + const mainFile = fileName.endsWith(".tex") ? fileName : "main.tex"; + const storageKey = StoragePaths.projectFile(projectId, fileName); + await storage.upload(storageKey, buffer, getMimeType(fileName)); + + const extractedFiles = [fileName]; + + if (!fileName.endsWith(".tex")) { + const basicMainTex = `\\documentclass{article} +\\usepackage[utf8]{inputenc} + +\\title{${projectName}} +\\author{} +\\date{\\today} + +\\begin{document} + +\\maketitle + +% Your content here + +\\end{document} +`; + const mainKey = StoragePaths.projectFile(projectId, "main.tex"); + await storage.upload(mainKey, basicMainTex, "text/x-latex"); + extractedFiles.push("main.tex"); + } + + return createProject( + projectId, + projectName, + description, + mainFile, + ownerId, + extractedFiles + ); +} + +/** + * Create a project record in the DB and project.json in storage. + */ +async function createProject( + projectId: string, + projectName: string, + description: string | undefined, + mainFile: string, + ownerId: string, + extractedFiles: string[] +) { + const project = await prisma.project.create({ + data: { + id: projectId, + name: projectName.trim(), + description: description?.trim() || null, + mainFile, + ownerId, + visibility: "private", + status: "active", + }, + }); + + const storage = await getStorage(); + const meta = { + id: projectId, + name: projectName.trim(), + description: description?.trim() || undefined, + mainFile, + createdAt: project.createdAt.toISOString(), + updatedAt: project.updatedAt.toISOString(), + template: "uploaded", + }; + const metaKey = StoragePaths.projectFile(projectId, "project.json"); + await storage.upload( + metaKey, + JSON.stringify(meta, null, 2), + "application/json" + ); + + console.log( + `[Internal/ImportUpload] Created project "${projectName.trim()}" (${projectId}) for owner ${ownerId}, ${extractedFiles.length} files` + ); + + return NextResponse.json({ + success: true, + data: { + project: { + id: project.id, + name: project.name, + description: project.description, + mainFile: project.mainFile, + visibility: project.visibility, + createdAt: project.createdAt.toISOString(), + updatedAt: project.updatedAt.toISOString(), + }, + filesCount: extractedFiles.length, + }, + }); +} + +/** + * Detect top-level directory in ZIP entries. + */ +function detectTopLevelDirectory(entries: AdmZip.IZipEntry[]): string { + const paths = entries + .map((e) => e.entryName) + .filter((p) => p && !p.startsWith("__MACOSX")); + + if (paths.length === 0) return ""; + + const firstPath = paths[0]; + const firstSlash = firstPath.indexOf("/"); + if (firstSlash === -1) return ""; + + const topDir = firstPath.substring(0, firstSlash + 1); + const allInSameDir = paths.every( + (p) => p.startsWith(topDir) || p === topDir.slice(0, -1) + ); + + return allInSameDir ? topDir : ""; +} + +/** + * Detect the top-level directory from extracted file paths. + */ +function detectTopLevelFromPaths(files: string[], baseDir: string): string { + const relativePaths = files.map((f) => path.relative(baseDir, f)); + if (relativePaths.length === 0) return ""; + + const firstPath = relativePaths[0]; + const sep = path.sep; + const firstSepIndex = firstPath.indexOf(sep); + if (firstSepIndex === -1) return ""; + + const topDir = firstPath.substring(0, firstSepIndex + 1); + const allInSameDir = relativePaths.every((p) => p.startsWith(topDir)); + + return allInSameDir ? topDir : ""; +} + +/** + * Recursively list all files under a directory. + */ +async function getAllFiles(dir: string): Promise<string[]> { + const results: string[] = []; + const entries = await fs.readdir(dir, { withFileTypes: true }); + + for (const entry of entries) { + const fullPath = path.join(dir, entry.name); + if (entry.isDirectory()) { + const subFiles = await getAllFiles(fullPath); + results.push(...subFiles); + } else { + results.push(fullPath); + } + } + + return results; +} diff --git a/app/api/internal/projects/list/route.ts b/app/api/internal/projects/list/route.ts new file mode 100644 index 0000000..26e7ae6 --- /dev/null +++ b/app/api/internal/projects/list/route.ts @@ -0,0 +1,121 @@ +/** + * Internal API: List Projects + * ============================ + * + * Internal endpoint for nanobot to list projects. + * Supports filtering by owner and searching by name. + * + * This is NOT exposed to the public - protected by INTERNAL_API_SECRET. + */ + +import { NextRequest, NextResponse } from "next/server"; +import { prisma } from "@/lib/prisma"; + +// Verify internal API secret +function verifyInternalAuth(request: NextRequest): boolean { + const secret = request.headers.get("X-Internal-Secret"); + const expectedSecret = process.env.INTERNAL_API_SECRET; + + if (!expectedSecret) { + console.warn("[Internal API] INTERNAL_API_SECRET not configured"); + return false; + } + + return secret === expectedSecret; +} + +export async function POST(request: NextRequest) { + // Verify authentication + if (!verifyInternalAuth(request)) { + return NextResponse.json( + { success: false, error: "Unauthorized" }, + { status: 401 } + ); + } + + try { + const body = await request.json(); + const { ownerId, search, limit = 50 } = body as { + ownerId?: string; + search?: string; + limit?: number; + }; + + // Security: ownerId is required to prevent cross-tenant data leakage + if (!ownerId || typeof ownerId !== "string" || ownerId.trim().length === 0) { + return NextResponse.json( + { + success: false, + error: "ownerId is required for security reasons", + }, + { status: 400 } + ); + } + + // Build where clause + const where: Record<string, unknown> = { + status: { not: "trashed" }, + ownerId: ownerId.trim(), + }; + + // Search by name (case-insensitive via Prisma contains) + if (search) { + where.AND = [ + { + OR: [ + { name: { contains: search } }, + { description: { contains: search } }, + ], + }, + ]; + } + + // Fetch projects + const projects = await prisma.project.findMany({ + where, + orderBy: { updatedAt: "desc" }, + take: Math.min(limit, 100), + select: { + id: true, + name: true, + description: true, + mainFile: true, + compiler: true, + updatedAt: true, + createdAt: true, + ownerId: true, + }, + }); + + const result = projects.map((p) => ({ + id: p.id, + name: p.name, + description: p.description, + mainFile: p.mainFile, + compiler: p.compiler, + updatedAt: p.updatedAt.toISOString(), + createdAt: p.createdAt.toISOString(), + ownerId: p.ownerId, + })); + + console.log( + `[Internal/ListProjects] Found ${result.length} projects` + + ` for owner ${ownerId}` + + (search ? ` matching "${search}"` : "") + ); + + return NextResponse.json({ + success: true, + data: { + projects: result, + count: result.length, + }, + }); + } catch (error) { + console.error("[Internal/ListProjects] Error:", error); + return NextResponse.json({ + success: false, + error: error instanceof Error ? error.message : "Unknown error", + }); + } +} diff --git a/app/api/internal/projects/rename/route.ts b/app/api/internal/projects/rename/route.ts new file mode 100644 index 0000000..46652c2 --- /dev/null +++ b/app/api/internal/projects/rename/route.ts @@ -0,0 +1,132 @@ +/** + * Internal API: Rename/Update Project + * ===================================== + * + * Internal endpoint for nanobot to rename or update a project's metadata. + * Protected by INTERNAL_API_SECRET. + */ + +import { NextRequest, NextResponse } from "next/server"; +import { prisma } from "@/lib/prisma"; +import { getStorage, StoragePaths } from "@/lib/storage"; +import type { ProjectMeta } from "@/types"; + +function verifyInternalAuth(request: NextRequest): boolean { + const secret = request.headers.get("X-Internal-Secret"); + const expectedSecret = process.env.INTERNAL_API_SECRET; + if (!expectedSecret) { + console.warn("[Internal API] INTERNAL_API_SECRET not configured"); + return false; + } + return secret === expectedSecret; +} + +async function writeProjectMeta(projectId: string, meta: ProjectMeta): Promise<void> { + try { + const storage = await getStorage(); + const metaKey = StoragePaths.projectFile(projectId, "project.json"); + await storage.upload(metaKey, JSON.stringify(meta, null, 2), "application/json"); + } catch { + console.log(`Note: Could not write project.json for ${projectId} (S3 backup)`); + } +} + +export async function POST(request: NextRequest) { + if (!verifyInternalAuth(request)) { + return NextResponse.json( + { success: false, error: "Unauthorized" }, + { status: 401 } + ); + } + + try { + const body = await request.json(); + const { projectId, name, description } = body as { + projectId: string; + name?: string; + description?: string; + }; + + if (!projectId || typeof projectId !== "string") { + return NextResponse.json( + { success: false, error: "Project ID is required" }, + { status: 400 } + ); + } + + // Verify project exists + const project = await prisma.project.findUnique({ + where: { id: projectId }, + select: { id: true }, + }); + + if (!project) { + return NextResponse.json( + { success: false, error: "Project not found" }, + { status: 404 } + ); + } + + // Build update payload + const updateData: { + name?: string; + description?: string | null; + updatedAt: Date; + } = { + updatedAt: new Date(), + }; + + if (name !== undefined) { + if (typeof name !== "string" || name.trim().length === 0) { + return NextResponse.json( + { success: false, error: "Project name cannot be empty" }, + { status: 400 } + ); + } + updateData.name = name.trim(); + } + + if (description !== undefined) { + updateData.description = description?.trim() || null; + } + + // Update database + const updatedProject = await prisma.project.update({ + where: { id: projectId }, + data: updateData, + }); + + // Sync update to project.json in storage + const meta: ProjectMeta = { + id: projectId, + name: updatedProject.name, + description: updatedProject.description || undefined, + mainFile: updatedProject.mainFile, + compiler: updatedProject.compiler || undefined, + createdAt: updatedProject.createdAt.toISOString(), + updatedAt: updatedProject.updatedAt.toISOString(), + }; + await writeProjectMeta(projectId, meta); + + console.log(`[Internal/RenameProject] Updated project ${projectId}: name="${updatedProject.name}"`); + + return NextResponse.json({ + success: true, + data: { + project: { + id: updatedProject.id, + name: updatedProject.name, + description: updatedProject.description, + createdAt: updatedProject.createdAt.toISOString(), + updatedAt: updatedProject.updatedAt.toISOString(), + }, + }, + }); + } catch (error) { + console.error("[Internal/RenameProject] Error:", error); + return NextResponse.json({ + success: false, + error: error instanceof Error ? error.message : "Unknown error", + }); + } +} diff --git a/app/api/internal/projects/versions/create/route.ts b/app/api/internal/projects/versions/create/route.ts new file mode 100644 index 0000000..df3fafb --- /dev/null +++ b/app/api/internal/projects/versions/create/route.ts @@ -0,0 +1,126 @@ +/** + * Internal API: Create Project Version + * ======================================= + * + * Internal endpoint for nanobot to save the current project state as a version. + * Uses Merkle Tree content-addressed storage. + * Protected by INTERNAL_API_SECRET. + */ + +import { NextRequest, NextResponse } from "next/server"; +import { prisma } from "@/lib/prisma"; +import { merkleService } from "@/lib/storage/merkle"; + +function verifyInternalAuth(request: NextRequest): boolean { + const secret = request.headers.get("X-Internal-Secret"); + const expectedSecret = process.env.INTERNAL_API_SECRET; + if (!expectedSecret) { + console.warn("[Internal API] INTERNAL_API_SECRET not configured"); + return false; + } + return secret === expectedSecret; +} + +export async function POST(request: NextRequest) { + if (!verifyInternalAuth(request)) { + return NextResponse.json( + { success: false, error: "Unauthorized" }, + { status: 401 } + ); + } + + try { + const body = await request.json(); + const { projectId, userId, name, description } = body as { + projectId: string; + userId?: string; + name?: string; + description?: string; + }; + + if (!projectId || typeof projectId !== "string") { + return NextResponse.json( + { success: false, error: "Project ID is required" }, + { status: 400 } + ); + } + + // Verify project exists and get owner + const project = await prisma.project.findUnique({ + where: { id: projectId }, + select: { id: true, name: true, ownerId: true }, + }); + + if (!project) { + return NextResponse.json( + { success: false, error: "Project not found" }, + { status: 404 } + ); + } + + // Generate version name if not provided + const now = new Date(); + const dateStr = now.toISOString().replace("T", " ").slice(0, 19); + const versionName = name?.trim() || `Saved - ${dateStr}`; + const saveUserId = userId || project.ownerId; + + try { + const result = await merkleService.createCommit( + projectId, + versionName, + saveUserId, + description?.trim() + ); + + // Fetch created version details + const version = await prisma.projectVersion.findUnique({ + where: { id: result.id }, + include: { + user: { select: { id: true, name: true, email: true } }, + }, + }); + + console.log( + `[Internal/CreateVersion] Saved version "${versionName}" (${result.id}) for project "${project.name}"` + ); + + return NextResponse.json({ + success: true, + data: { + version: { + id: version!.id, + name: version!.name, + description: version!.description, + user: version!.user, + fileCount: result.fileCount, + totalSize: result.totalSize, + createdAt: version!.createdAt.toISOString(), + }, + }, + }); + } catch (error) { + if (error instanceof Error) { + if (error.message === "NO_CHANGES_DETECTED") { + return NextResponse.json({ + success: true, + skipped: true, + message: "No changes detected since the last saved version.", + }); + } + if (error.message === "NO_FILES_TO_SAVE") { + return NextResponse.json( + { success: false, error: "No files found in the project to save" }, + { status: 400 } + ); + } + } + throw error; + } + } catch (error) { + console.error("[Internal/CreateVersion] Error:", error); + return NextResponse.json({ + success: false, + error: error instanceof Error ? error.message : "Unknown error", + }); + } +} diff --git a/app/api/internal/projects/versions/list/route.ts b/app/api/internal/projects/versions/list/route.ts new file mode 100644 index 0000000..73d6b20 --- /dev/null +++ b/app/api/internal/projects/versions/list/route.ts @@ -0,0 +1,93 @@ +/** + * Internal API: List Project Versions + * ===================================== + * + * Internal endpoint for nanobot to list all versions/history of a project. + * Protected by INTERNAL_API_SECRET. + */ + +import { NextRequest, NextResponse } from "next/server"; +import { prisma } from "@/lib/prisma"; + +function verifyInternalAuth(request: NextRequest): boolean { + const secret = request.headers.get("X-Internal-Secret"); + const expectedSecret = process.env.INTERNAL_API_SECRET; + if (!expectedSecret) { + console.warn("[Internal API] INTERNAL_API_SECRET not configured"); + return false; + } + return secret === expectedSecret; +} + +export async function POST(request: NextRequest) { + if (!verifyInternalAuth(request)) { + return NextResponse.json( + { success: false, error: "Unauthorized" }, + { status: 401 } + ); + } + + try { + const body = await request.json(); + const { projectId } = body as { projectId: string }; + + if (!projectId || typeof projectId !== "string") { + return NextResponse.json( + { success: false, error: "Project ID is required" }, + { status: 400 } + ); + } + + // Verify project exists + const project = await prisma.project.findUnique({ + where: { id: projectId }, + select: { id: true, name: true }, + }); + + if (!project) { + return NextResponse.json( + { success: false, error: "Project not found" }, + { status: 404 } + ); + } + + // Fetch version list + const versions = await prisma.projectVersion.findMany({ + where: { projectId }, + include: { + user: { select: { id: true, name: true, email: true } }, + _count: { select: { snapshots: true } }, + }, + orderBy: { createdAt: "desc" }, + }); + + const result = versions.map((v) => ({ + id: v.id, + name: v.name, + description: v.description, + user: v.user, + fileCount: v.fileCount || v._count.snapshots, + totalSize: v.totalSize || 0, + createdAt: v.createdAt.toISOString(), + })); + + console.log( + `[Internal/ListVersions] Found ${result.length} versions for project "${project.name}" (${projectId})` + ); + + return NextResponse.json({ + success: true, + data: { + projectName: project.name, + versions: result, + count: result.length, + }, + }); + } catch (error) { + console.error("[Internal/ListVersions] Error:", error); + return NextResponse.json({ + success: false, + error: error instanceof Error ? error.message : "Unknown error", + }); + } +} diff --git a/app/api/internal/projects/versions/restore/route.ts b/app/api/internal/projects/versions/restore/route.ts new file mode 100644 index 0000000..427b09b --- /dev/null +++ b/app/api/internal/projects/versions/restore/route.ts @@ -0,0 +1,220 @@ +/** + * Internal API: Restore Project Version + * ======================================== + * + * Internal endpoint for nanobot to restore a project to a specific version. + * Handles Merkle Tree, tar.gz, and DB snapshot formats. + * Clears Yjs Redis persistence and WS server in-memory rooms. + * Protected by INTERNAL_API_SECRET. + */ + +import { NextRequest, NextResponse } from "next/server"; +import { prisma } from "@/lib/prisma"; +import { merkleService } from "@/lib/storage/merkle"; +import { downloadSnapshot } from "@/lib/storage/snapshot"; +import { getStorage, StoragePaths } from "@/lib/storage"; +import { getRedis } from "@/server/redis-client"; + +export const runtime = "nodejs"; + +function verifyInternalAuth(request: NextRequest): boolean { + const secret = request.headers.get("X-Internal-Secret"); + const expectedSecret = process.env.INTERNAL_API_SECRET; + if (!expectedSecret) { + console.warn("[Internal API] INTERNAL_API_SECRET not configured"); + return false; + } + return secret === expectedSecret; +} + +async function clearYjsPersistenceForProject(projectId: string): Promise<number> { + if (!process.env.REDIS_URL) return 0; + + try { + const redis = await getRedis(); + if (!redis) return 0; + let deleted = 0; + + const patterns = [`yjs:${projectId}:*:updates`, `yjs:${projectId}:*:meta`]; + const count = 1000; + + for (const pattern of patterns) { + let cursor = "0"; + do { + const res = await redis.scan(cursor, "MATCH", pattern, "COUNT", count); + cursor = res[0]; + const keys = res[1] as string[]; + if (keys.length > 0) { + deleted += await redis.del(...keys); + } + } while (cursor !== "0"); + } + + return deleted; + } catch { + return 0; + } +} + +async function clearWsInMemoryRoomsForProject(projectId: string): Promise<{ + clearedDocs?: number; + closedClients?: number; +} | null> { + const wsUrl = process.env.WS_SERVER_URL; + const secret = process.env.INTERNAL_API_SECRET; + if (!wsUrl || !secret) return null; + + try { + const resp = await fetch(`${wsUrl}/admin/clear-project/${projectId}`, { + method: "POST", + headers: { "X-Internal-Secret": secret }, + signal: AbortSignal.timeout(5000), + }); + const text = await resp.text(); + if (!resp.ok) return null; + return JSON.parse(text) as { clearedDocs?: number; closedClients?: number }; + } catch { + return null; + } +} + +export async function POST(request: NextRequest) { + if (!verifyInternalAuth(request)) { + return NextResponse.json( + { success: false, error: "Unauthorized" }, + { status: 401 } + ); + } + + try { + const body = await request.json(); + const { projectId, versionId } = body as { + projectId: string; + versionId: string; + }; + + if (!projectId || typeof projectId !== "string") { + return NextResponse.json( + { success: false, error: "Project ID is required" }, + { status: 400 } + ); + } + + if (!versionId || typeof versionId !== "string") { + return NextResponse.json( + { success: false, error: "Version ID is required" }, + { status: 400 } + ); + } + + // Fetch version info + const version = await prisma.projectVersion.findUnique({ + where: { + id: versionId, + projectId, + }, + }); + + if (!version) { + return NextResponse.json( + { success: false, error: "Version not found" }, + { status: 404 } + ); + } + + let restoredFileCount = 0; + + if (version.rootTreeHash) { + // New format: restore via Merkle Tree + restoredFileCount = await merkleService.restoreCommit(projectId, versionId); + } else if (version.snapshotKey) { + // Legacy format: restore from tar.gz + const snapshotFiles = await downloadSnapshot(projectId, versionId); + const storage = await getStorage(); + const projectPrefix = StoragePaths.projectPrefix(projectId); + + // Delete current project files (keep dotfiles) + const currentFiles = await storage.list(projectPrefix); + for (const file of currentFiles) { + const filename = file.key.split("/").pop() || ""; + if (!filename.startsWith(".")) { + await storage.delete(file.key); + } + } + + // Restore files from snapshot + for (const [filePath, content] of snapshotFiles.entries()) { + const key = StoragePaths.projectFile(projectId, filePath); + await storage.upload(key, content); + restoredFileCount++; + } + } else { + // Oldest format: read from database + const snapshots = await prisma.fileSnapshot.findMany({ + where: { versionId }, + }); + + if (snapshots.length === 0) { + return NextResponse.json( + { success: false, error: "Version snapshot is empty" }, + { status: 400 } + ); + } + + const storage = await getStorage(); + const projectPrefix = StoragePaths.projectPrefix(projectId); + + // Delete current project files (keep dotfiles) + const currentFiles = await storage.list(projectPrefix); + for (const file of currentFiles) { + const filename = file.key.split("/").pop() || ""; + if (!filename.startsWith(".")) { + await storage.delete(file.key); + } + } + + // Restore files from snapshots + for (const snapshot of snapshots) { + const key = StoragePaths.projectFile(projectId, snapshot.filePath); + await storage.upload(key, Buffer.from(snapshot.content, "utf8")); + restoredFileCount++; + } + } + + // Clear Yjs Redis persistence and WS in-memory rooms + const clearedYjsKeys = await clearYjsPersistenceForProject(projectId); + const wsCleared = await clearWsInMemoryRoomsForProject(projectId); + + console.log( + `[Internal/RestoreVersion] Restored project ${projectId} to version "${version.name}" (${versionId}), ` + + `${restoredFileCount} files restored, ${clearedYjsKeys} Yjs keys cleared` + ); + + return NextResponse.json({ + success: true, + data: { + versionId, + versionName: version.name, + restoredFileCount, + clearedYjsKeys, + wsCleared, + }, + }); + } catch (error) { + console.error("[Internal/RestoreVersion] Error:", error); + + if (error instanceof Error) { + if (error.message === "VERSION_NOT_FOUND") { + return NextResponse.json( + { success: false, error: "Version not found" }, + { status: 404 } + ); + } + } + + return NextResponse.json({ + success: false, + error: error instanceof Error ? error.message : "Unknown error", + }); + } +} diff --git a/components/pdf-viewer/pdf-renderer.tsx b/components/pdf-viewer/pdf-renderer.tsx index 6f9487a..b2d285a 100644 --- a/components/pdf-viewer/pdf-renderer.tsx +++ b/components/pdf-viewer/pdf-renderer.tsx @@ -199,11 +199,18 @@ export const PdfRenderer = forwardRef<PdfRendererHandle, PdfRendererProps>(({ initPdfJs(); + // Capture ref values so the cleanup function uses the snapshot + const canvasRefsValue = canvasRefs.current; + const textLayerRefsValue = textLayerRefs.current; + const annotationLayerRefsValue = annotationLayerRefs.current; + const pageRefsValue = pageRefs.current; + const pageSizesRefValue = pageSizesRef.current; + // Cleanup: release references and clear canvases return () => { mounted = false; // Clear canvas contexts - canvasRefs.current.forEach((canvas) => { + canvasRefsValue.forEach((canvas) => { const ctx = canvas.getContext('2d'); if (ctx) { ctx.clearRect(0, 0, canvas.width, canvas.height); @@ -211,11 +218,11 @@ export const PdfRenderer = forwardRef<PdfRendererHandle, PdfRendererProps>(({ canvas.width = 0; canvas.height = 0; }); - canvasRefs.current.clear(); - textLayerRefs.current.clear(); - annotationLayerRefs.current.clear(); - pageRefs.current.clear(); - pageSizesRef.current.clear(); + canvasRefsValue.clear(); + textLayerRefsValue.clear(); + annotationLayerRefsValue.clear(); + pageRefsValue.clear(); + pageSizesRefValue.clear(); }; }, [pdfUrl, onLoadSuccess]); @@ -572,7 +579,7 @@ export const PdfRenderer = forwardRef<PdfRendererHandle, PdfRendererProps>(({ console.error(`[PdfRenderer] Render page ${pageNumber} error:`, err); return false; } - }, [pdfDoc]); + }, [pdfDoc, t]); // Process render queue const processRenderQueue = useCallback(async () => { diff --git a/components/pdf-viewer/pdf-viewer.tsx b/components/pdf-viewer/pdf-viewer.tsx index 897504f..d9f186e 100644 --- a/components/pdf-viewer/pdf-viewer.tsx +++ b/components/pdf-viewer/pdf-viewer.tsx @@ -149,7 +149,7 @@ export function PdfViewer({ setSyncTexStatus(t("pdfViewer.syncTex.locatedToPage", { page: syncTexTarget.page })); setTimeout(() => setSyncTexStatus(null), 3000); } - }, [syncTexTarget]); + }, [syncTexTarget, t]); // Handle PDF clicks (SyncTeX backward sync) const handlePageClick = useCallback(async ( @@ -208,7 +208,7 @@ export function PdfViewer({ setSyncTexStatus(t("syncTex.queryFailed")); setTimeout(() => setSyncTexStatus(null), 3000); } - }, [projectId, onSyncTexJump]); + }, [projectId, onSyncTexJump, t]); // Compile button const getCompileIcon = () => { diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index 87ef9f9..810a779 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -283,6 +283,37 @@ services: networks: - litewrite-network + # ============================================ + # nanobot AI assistant (Telegram/Feishu bot + Litewrite integration) + # ============================================ + nanobot: + build: + context: ./nanobot + dockerfile: Dockerfile + container_name: litewrite-nanobot + restart: unless-stopped + volumes: + - nanobot-data:/home/nanobot/.nanobot + environment: + # Litewrite integration + - NANOBOT__LITEWRITE__URL=http://web:3000 + - NANOBOT__LITEWRITE__API_SECRET=${INTERNAL_API_SECRET:-} + # Telegram channel + - NANOBOT__CHANNELS__TELEGRAM__ENABLED=${TELEGRAM_ENABLED:-false} + - NANOBOT__CHANNELS__TELEGRAM__TOKEN=${TELEGRAM_BOT_TOKEN:-} + # Feishu channel + - NANOBOT__CHANNELS__FEISHU__ENABLED=${FEISHU_ENABLED:-false} + - NANOBOT__CHANNELS__FEISHU__APP_ID=${FEISHU_APP_ID:-} + - NANOBOT__CHANNELS__FEISHU__APP_SECRET=${FEISHU_APP_SECRET:-} + - NANOBOT__CHANNELS__FEISHU__DEFAULT_LITEWRITE_USER_ID=${NANOBOT_DEFAULT_LITEWRITE_USER_ID:-} + # LLM provider + - NANOBOT__PROVIDERS__OPENROUTER__API_KEY=${OPENROUTER_API_KEY:-} + depends_on: + web: + condition: service_healthy + networks: + - litewrite-network + # ============================================ # Volumes # ============================================ @@ -293,6 +324,8 @@ volumes: driver: local redis-data: driver: local + nanobot-data: + driver: local # ============================================ # Network diff --git a/docker-compose.yml b/docker-compose.yml index 6f76c21..362b3f4 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -158,7 +158,7 @@ services: # Embedding config - EMBEDDING_API_BASE=${EMBEDDING_API_BASE:-https://api.openai.com/v1} - EMBEDDING_API_KEY=${EMBEDDING_API_KEY:-} - - EMBEDDING_MODEL=${EMBEDDING_MODEL:-text-embedding-3-small} + - EMBEDDING_MODEL=${EMBEDDING_MODEL:-text-embedding-3-large} # Web Search config - SERPER_API_KEY=${SERPER_API_KEY:-} # WebSocket service URL (internal) @@ -245,6 +245,40 @@ services: networks: - litewrite-network + # ============================================================================ + # nanobot AI assistant (Telegram/Feishu bot + Litewrite integration) + # ============================================================================ + nanobot: + build: + context: ./nanobot + dockerfile: Dockerfile.dev + container_name: litewrite-nanobot + volumes: + - nanobot-data:/root/.nanobot + # Mount source for hot reload during development + - ./nanobot:/app + environment: + # Litewrite integration + - NANOBOT__LITEWRITE__URL=http://web:3000 + - NANOBOT__LITEWRITE__API_SECRET=${INTERNAL_API_SECRET:-dev-internal-secret} + - NANOBOT__LITEWRITE__AI_SERVER_URL=http://ai-server:6612 + # Telegram channel + - NANOBOT__CHANNELS__TELEGRAM__ENABLED=${TELEGRAM_ENABLED:-false} + - NANOBOT__CHANNELS__TELEGRAM__TOKEN=${TELEGRAM_BOT_TOKEN:-} + # Feishu channel + - NANOBOT__CHANNELS__FEISHU__ENABLED=${FEISHU_ENABLED:-false} + - NANOBOT__CHANNELS__FEISHU__APP_ID=${FEISHU_APP_ID:-} + - NANOBOT__CHANNELS__FEISHU__APP_SECRET=${FEISHU_APP_SECRET:-} + - NANOBOT__CHANNELS__FEISHU__DEFAULT_LITEWRITE_USER_ID=${NANOBOT_DEFAULT_LITEWRITE_USER_ID:-} + # LLM provider & model + - NANOBOT__PROVIDERS__OPENROUTER__API_KEY=${OPENROUTER_API_KEY:-} + - NANOBOT__AGENTS__DEFAULTS__MODEL=${NANOBOT_LLM_MODEL:-anthropic/claude-sonnet-4-5} + depends_on: + - web + restart: unless-stopped + networks: + - litewrite-network + # MinIO init (create bucket) minio-init: image: minio/mc:latest @@ -273,6 +307,8 @@ volumes: driver: local redis-data: driver: local + nanobot-data: + driver: local # ============================================================================== # Networks diff --git a/env.example.oss b/env.example.oss index 5f4c9d3..d36f58c 100644 --- a/env.example.oss +++ b/env.example.oss @@ -41,6 +41,18 @@ EMBEDDING_API_KEY= # Optional: web search provider used by Deep Research SERPER_API_KEY= +# ------------------------------------------------------------------------------ +# nanobot (Feishu bot integration) +# ------------------------------------------------------------------------------ +# Set FEISHU_ENABLED=true and fill in the Feishu app credentials to enable. +FEISHU_ENABLED=false +FEISHU_APP_ID= +FEISHU_APP_SECRET= +# Your Litewrite user UUID (from the database) for project ownership mapping +NANOBOT_DEFAULT_LITEWRITE_USER_ID= +# Telegram channel +TELEGRAM_ENABLED=false +TELEGRAM_BOT_TOKEN= # ------------------------------------------------------------------------------ # Optional: Redis # ------------------------------------------------------------------------------ diff --git a/lib/hooks/use-auto-compile.ts b/lib/hooks/use-auto-compile.ts index b705214..26f50bc 100644 --- a/lib/hooks/use-auto-compile.ts +++ b/lib/hooks/use-auto-compile.ts @@ -258,7 +258,7 @@ export function useCompile({ isCompilingRef.current = false; clearProgressInterval(); } - }, [projectId, autoSaveVersion, startProgressSimulation, clearProgressInterval, t]); + }, [projectId, autoSaveVersion, startProgressSimulation, clearProgressInterval, t, tHistory]); // Cleanup timer useEffect(() => { diff --git a/nanobot/DEPLOYMENT.md b/nanobot/DEPLOYMENT.md new file mode 100644 index 0000000..f99b2a5 --- /dev/null +++ b/nanobot/DEPLOYMENT.md @@ -0,0 +1,332 @@ +# nanobot Deployment Guide + +nanobot is an AI assistant service integrated into Litewrite. It connects to messaging platforms (Feishu/Lark, Telegram) and enables users to manage LaTeX projects through natural language — listing projects, reading/editing files, compiling to PDF, and sending results back. + +## Architecture + +``` +┌──────────┐ WebSocket ┌──────────────┐ Internal API ┌──────────────┐ +│ Feishu │ ◄──────────────► │ │ ─────────────────► │ Litewrite │ +│ User │ │ nanobot │ │ Web (Next) │ +├──────────┤ Polling │ (Python) │ └──────┬───────┘ +│ Telegram │ ◄──────────────► │ │ │ +│ User │ └──────┬───────┘ │ +└──────────┘ │ │ + │ │ + │ LLM API │ Compile + ▼ ▼ + ┌──────────┐ ┌──────────────┐ + │ OpenRouter│ │ Compile │ + │ / LLM │ │ Server │ + └──────────┘ └──────────────┘ +``` + +All services run within the same Docker Compose network. nanobot communicates with Litewrite via Internal API endpoints authenticated by `INTERNAL_API_SECRET`. + +## Prerequisites + +- Litewrite running via `docker compose` (see main README) +- An LLM API key (OpenRouter recommended) +- A Feishu enterprise app (for Feishu bot integration), and/or +- A Telegram bot token (for Telegram bot integration) + +## Configuration + +All nanobot configuration is done through environment variables in the root `.env` file. + +### Required Variables + +| Variable | Description | Example | +|----------|-------------|---------| +| `OPENROUTER_API_KEY` | LLM API key (shared with Litewrite AI features) | `sk-or-v1-xxxx` | +| `INTERNAL_API_SECRET` | Internal API auth (shared across all services) | Auto-generated by setup script | + +### Telegram Bot Variables + +| Variable | Description | Example | +|----------|-------------|---------| +| `TELEGRAM_ENABLED` | Enable/disable Telegram channel | `true` or `false` | +| `TELEGRAM_BOT_TOKEN` | Bot token from @BotFather | `123456:ABC-DEF1234ghIkl-zyx57W2v1u123ew11` | + +### Feishu Bot Variables + +| Variable | Description | Example | +|----------|-------------|---------| +| `FEISHU_ENABLED` | Enable/disable Feishu channel | `true` or `false` | +| `FEISHU_APP_ID` | Feishu app ID | `cli_xxxx` | +| `FEISHU_APP_SECRET` | Feishu app secret | `xxxx` | +| `NANOBOT_DEFAULT_LITEWRITE_USER_ID` | Litewrite user UUID for project operations | `cmlbewues00001hs5wdxvmd7q` | + +### LLM Model Configuration + +The default model is set in `nanobot/nanobot/config/schema.py`: + +```python +model: str = "minimax/minimax-m2.1" +``` + +To change it, modify the default or override via environment variable: +``` +NANOBOT__AGENTS__DEFAULTS__MODEL=anthropic/claude-sonnet-4-20250514 +``` + +Supported models (via OpenRouter): any model available on [openrouter.ai/models](https://openrouter.ai/models). + +## Telegram Bot Setup (Step by Step) + +### 1. Create Bot via @BotFather + +1. Open Telegram and search for [@BotFather](https://t.me/BotFather) +2. Send `/newbot` and follow the prompts to choose a name and username +3. BotFather will return a **bot token** (e.g. `123456:ABC-DEF1234ghIkl-zyx57W2v1u123ew11`) +4. Save this token — you'll need it for the `.env` file + +### 2. Configure Environment Variables + +Add these to your root `.env` file: + +```bash +TELEGRAM_ENABLED=true +TELEGRAM_BOT_TOKEN=your-bot-token-here +``` + +### 3. Start nanobot + +```bash +docker compose up -d nanobot +``` + +Verify the bot is running: +```bash +docker logs litewrite-nanobot 2>&1 | grep "Telegram bot" +# Should show: "Telegram bot @your_bot_username connected" +``` + +### 4. Test + +Open a chat with your bot in Telegram and send a message. The bot uses **long polling** so no webhook or public IP is needed. + +### Optional: Restrict Access + +To limit who can use the bot, set an allow list of Telegram user IDs or usernames via the nested env var: + +```bash +NANOBOT__CHANNELS__TELEGRAM__ALLOW_FROM=["123456789","your_username"] +``` + +You can find your Telegram user ID by messaging [@userinfobot](https://t.me/userinfobot). + +--- + +## Feishu App Setup (Step by Step) + +### 1. Create App + +1. Go to [Feishu Open Platform](https://open.feishu.cn/) +2. Click "Create App" → "Enterprise Self-built App" +3. Fill in app name and description +4. Note the **App ID** and **App Secret** + +### 2. Enable Bot + +1. In the app settings, go to "App Features" +2. Enable **Bot** capability + +### 3. Configure Event Subscription + +1. Go to "Event & Callback" → "Event Configuration" +2. **Important**: You must start the nanobot service FIRST (see step 5 below), then come back and set the subscription mode +3. Select "**Use Long Connection to Receive Events**" (WebSocket mode) +4. Add event: `im.message.receive_v1` (Receive messages) + +### 4. Configure Permissions + +Go to "Permissions & Scopes" and add: + +| Permission | Description | +|------------|-------------| +| `im:message` | Access messages | +| `im:message:send_as_bot` | Send messages as bot | +| `im:resource` | Access message resources | +| `im:chat` | Access chat info | + +### 5. First-Time Connection (Chicken-and-Egg Problem) + +Feishu requires an active WebSocket connection before you can save the "Long Connection" event subscription mode. Follow this order: + +1. Set `FEISHU_ENABLED=true`, `FEISHU_APP_ID`, `FEISHU_APP_SECRET` in `.env` +2. Start nanobot: `docker compose up -d nanobot` +3. Verify connection: `docker logs litewrite-nanobot` — look for `connected to wss://msg-frontier.feishu.cn` +4. Go back to Feishu console → "Event & Callback" → select "Long Connection" → Save +5. Add the `im.message.receive_v1` event subscription + +### 6. Publish App + +1. Go to "Version Management" +2. Create a new version +3. Submit for review (self-approve for internal apps) +4. Once published, find the bot in Feishu and send it a message + +### 7. Get Your Litewrite User ID + +After registering a Litewrite account, get your user UUID: + +```bash +# Option 1: Check the database +docker compose exec web npx prisma studio +# Open http://localhost:5555, find your user in the User table + +# Option 2: Use the API +curl -s http://localhost:3000/api/auth/session -b <your-cookie> | python3 -m json.tool +``` + +Set it in `.env`: +``` +NANOBOT_DEFAULT_LITEWRITE_USER_ID=your-uuid-here +``` + +Then restart nanobot: +```bash +docker compose up -d nanobot +``` + +## Internal API Endpoints + +nanobot communicates with Litewrite through these internal API endpoints (all use `X-Internal-Secret` header): + +| Endpoint | Method | Description | +|----------|--------|-------------| +| `/api/internal/projects/list` | POST | List/search projects | +| `/api/internal/projects/compile` | POST | Compile project, return PDF as base64 | +| `/api/internal/files/list` | POST | List files in a project | +| `/api/internal/files/read` | POST | Read file content | +| `/api/internal/files/edit` | POST | Replace file content (full-file) | +| `/api/internal/files/write` | POST | Edit file (shadow document, diff-based) | + +## nanobot Agent Tools + +The agent has access to these Litewrite-specific tools: + +| Tool | Description | +|------|-------------| +| `litewrite_list_projects` | Search projects by name | +| `litewrite_list_files` | List files in a project | +| `litewrite_read_file` | Read a file's content | +| `litewrite_edit_file` | Replace a file's entire content | +| `litewrite_compile` | Compile to PDF (pdflatex/xelatex/lualatex) | +| `message` | Send text/file messages back to the user | + +## Docker Compose Services + +nanobot runs as a Docker service alongside other Litewrite services: + +```yaml +# docker-compose.yml (dev) +nanobot: + build: + context: ./nanobot + dockerfile: Dockerfile.dev + volumes: + - ./nanobot:/app # Hot reload in dev + - nanobot-data:/root/.nanobot + environment: + - NANOBOT__LITEWRITE__URL=http://web:3000 + - NANOBOT__LITEWRITE__API_SECRET=${INTERNAL_API_SECRET} + - NANOBOT__CHANNELS__TELEGRAM__ENABLED=${TELEGRAM_ENABLED} + - NANOBOT__CHANNELS__TELEGRAM__TOKEN=${TELEGRAM_BOT_TOKEN} + - NANOBOT__CHANNELS__FEISHU__ENABLED=${FEISHU_ENABLED} + - NANOBOT__CHANNELS__FEISHU__APP_ID=${FEISHU_APP_ID} + - NANOBOT__CHANNELS__FEISHU__APP_SECRET=${FEISHU_APP_SECRET} + - NANOBOT__PROVIDERS__OPENROUTER__API_KEY=${OPENROUTER_API_KEY} + depends_on: + - web +``` + +## Common Operations + +```bash +# Check nanobot status +docker compose ps nanobot + +# View nanobot logs +docker compose logs -f nanobot + +# Restart nanobot (after .env changes) +docker compose up -d nanobot + +# Rebuild nanobot (after code changes) +docker compose build nanobot && docker compose up -d nanobot + +# Check Feishu connection +docker logs litewrite-nanobot 2>&1 | grep "connected to wss" + +# clear nanobot chat history (method 1: send /clear in any chat) +# Just type /clear in Feishu, WhatsApp, Telegram, etc. — instant! + +# clear nanobot chat history (method 2: docker CLI, clears ALL sessions) +docker exec litewrite-nanobot sh -c 'rm -rf /root/.nanobot/sessions/*.jsonl' +docker restart litewrite-nanobot + +# Test Litewrite Internal API manually +curl -s -X POST http://localhost:3000/api/internal/projects/list \ + -H "Content-Type: application/json" \ + -H "X-Internal-Secret: $(grep INTERNAL_API_SECRET .env | cut -d= -f2)" \ + -d '{}' | python3 -m json.tool +``` + +## Troubleshooting + +### nanobot keeps restarting with "No API key configured" +- Check that `OPENROUTER_API_KEY` is set in `.env` +- Rebuild: `docker compose build nanobot && docker compose up -d nanobot` + +### Feishu bot doesn't receive messages +- Check connection: `docker logs litewrite-nanobot | grep connected` +- If no "connected" log, verify `FEISHU_APP_ID` and `FEISHU_APP_SECRET` +- Ensure the app is published in Feishu console +- Ensure "Long Connection" mode is saved in event subscription settings +- Restart: `docker restart litewrite-nanobot` + +### Compilation fails with Chinese text +- Use `compiler="xelatex"` in the compile command +- The agent should do this automatically when the Skill detects CJK content +- Ensure the compile server has CJK fonts installed (default Docker image includes them) + +### File edits don't appear in the browser +- nanobot's `edit_file` clears the Yjs cache after writing +- If the browser still shows old content, refresh the page +- Check WS server is running: `docker compose ps ws` + +## Project Structure + +``` +nanobot/ +├── main.py # Entry point (gateway) +├── requirements.txt # Python dependencies +├── Dockerfile # Production image +├── Dockerfile.dev # Development image +├── DEPLOYMENT.md # This file +└── nanobot/ # Python package + ├── agent/ # LLM agent loop + tools + │ ├── loop.py # Core agent loop + │ ├── context.py # System prompt builder + │ ├── skills.py # Skill loading + │ └── tools/ + │ ├── litewrite.py # Litewrite API tools + │ ├── message.py # Send messages (with file support) + │ ├── filesystem.py # Local file operations + │ ├── shell.py # Shell command execution + │ └── web.py # Web search/fetch + ├── channels/ + │ ├── base.py # Channel interface + │ ├── feishu.py # Feishu/Lark (WebSocket) + │ ├── telegram.py # Telegram (polling) + │ └── manager.py # Channel lifecycle + ├── bus/ # Async message bus + ├── providers/ # LLM provider abstraction + ├── session/ # Conversation history + ├── config/ # Configuration (env vars + JSON) + └── skills/ + └── litewrite/ + └── SKILL.md # Agent instructions for Litewrite +``` diff --git a/nanobot/Dockerfile b/nanobot/Dockerfile new file mode 100644 index 0000000..eca39ef --- /dev/null +++ b/nanobot/Dockerfile @@ -0,0 +1,68 @@ +# ============================================================================== +# Litewrite nanobot Service Dockerfile +# ============================================================================== +# Multi-stage build, production-ready +# +# Build: +# docker build -t litewrite-nanobot . +# +# Run: +# docker run --env-file .env litewrite-nanobot +# ============================================================================== + +# ------------------------------------------------------------------------------ +# Stage 1: Builder +# ------------------------------------------------------------------------------ +FROM python:3.12-slim AS builder + +WORKDIR /build + +# Install build dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + && rm -rf /var/lib/apt/lists/* + +# Copy dependency file +COPY requirements.txt . + +# Create venv and install dependencies +RUN python -m venv /opt/venv +ENV PATH="/opt/venv/bin:$PATH" + +RUN pip install --no-cache-dir --upgrade pip && \ + pip install --no-cache-dir -r requirements.txt + +# ------------------------------------------------------------------------------ +# Stage 2: Runtime +# ------------------------------------------------------------------------------ +FROM python:3.12-slim AS runtime + +LABEL maintainer="Litewrite Team" +LABEL description="Litewrite nanobot Service" + +# Create non-root user with a proper home directory +RUN groupadd -r nanobot && useradd -r -g nanobot -m -d /home/nanobot nanobot + +WORKDIR /app + +# Copy venv from builder stage +COPY --from=builder /opt/venv /opt/venv +ENV PATH="/opt/venv/bin:$PATH" + +# Copy application code +COPY --chown=nanobot:nanobot . . + +# Create data directory at the correct home path +# (Python's Path.home() resolves to /home/nanobot for the nanobot user) +RUN mkdir -p /home/nanobot/.nanobot && chown -R nanobot:nanobot /home/nanobot/.nanobot + +# Environment variables +ENV PYTHONUNBUFFERED=1 +ENV PYTHONDONTWRITEBYTECODE=1 +ENV HOME=/home/nanobot + +# Switch to non-root user +USER nanobot + +# Default command: start gateway +CMD ["python", "main.py"] diff --git a/nanobot/Dockerfile.dev b/nanobot/Dockerfile.dev new file mode 100644 index 0000000..d6fb031 --- /dev/null +++ b/nanobot/Dockerfile.dev @@ -0,0 +1,22 @@ +# ============================================================================== +# Litewrite nanobot Service Dockerfile (Development) +# ============================================================================== +# Hot-reload via volume mounts in docker-compose.yml +# ============================================================================== + +FROM python:3.12-slim + +WORKDIR /app + +# Install dependencies +COPY requirements.txt . +RUN pip install --no-cache-dir --upgrade pip && \ + pip install --no-cache-dir -r requirements.txt + +# Copy application code (overridden by volume mount in dev) +COPY . . + +ENV PYTHONUNBUFFERED=1 + +# Start with auto-reload-friendly wrapper +CMD ["python", "main.py"] diff --git a/nanobot/main.py b/nanobot/main.py new file mode 100644 index 0000000..cea2d4a --- /dev/null +++ b/nanobot/main.py @@ -0,0 +1,125 @@ +#!/usr/bin/env python3 +""" +Litewrite nanobot Service +========================= + +AI assistant gateway for Litewrite, providing: +- Feishu bot integration (WebSocket long-connection) +- Litewrite project operations (list/read/edit/compile via Internal API) +- LLM-powered agent with tool use + +Usage: + python main.py + python main.py --verbose + +Environment variables (set via docker-compose): + NANOBOT__LITEWRITE__URL Litewrite API base URL + NANOBOT__LITEWRITE__API_SECRET Internal API secret + NANOBOT__CHANNELS__FEISHU__ENABLED Enable Feishu channel + NANOBOT__CHANNELS__FEISHU__APP_ID Feishu App ID + NANOBOT__CHANNELS__FEISHU__APP_SECRET Feishu App Secret + NANOBOT__PROVIDERS__OPENROUTER__API_KEY LLM API key +""" + +import asyncio +import sys +from pathlib import Path + +# Ensure the nanobot package is importable +sys.path.insert(0, str(Path(__file__).parent)) + +from loguru import logger # noqa: E402 + + +def main(): + """Start the nanobot gateway.""" + from nanobot.config.loader import load_config + from nanobot.bus.queue import MessageBus + from nanobot.providers.litellm_provider import LiteLLMProvider + from nanobot.agent.loop import AgentLoop + from nanobot.channels.manager import ChannelManager + + verbose = "--verbose" in sys.argv or "-v" in sys.argv + + if verbose: + import logging + + logging.basicConfig(level=logging.DEBUG) + + logger.info("Starting nanobot gateway...") + + config = load_config() + + # Create message bus + bus = MessageBus() + + # Create LLM provider + api_key = config.get_api_key() + api_base = config.get_api_base() + + if not api_key: + logger.error( + "No API key configured. Set NANOBOT__PROVIDERS__OPENROUTER__API_KEY" + ) + sys.exit(1) + + provider = LiteLLMProvider( + api_key=api_key, + api_base=api_base, + default_model=config.agents.defaults.model, + ) + + # Create agent loop + agent = AgentLoop( + bus=bus, + provider=provider, + workspace=config.workspace_path, + model=config.agents.defaults.model, + max_iterations=config.agents.defaults.max_tool_iterations, + brave_api_key=config.tools.web.search.api_key or None, + exec_config=config.tools.exec, + litewrite_config=config.litewrite, + feishu_config=config.channels.feishu, + ) + + # Create channel manager + channels = ChannelManager(config, bus) + + if channels.enabled_channels: + logger.info( + "Channels enabled: {}", + ", ".join(channels.enabled_channels), + ) + else: + logger.warning("No channels enabled") + + if config.litewrite.api_secret: + logger.info("Litewrite integration: {}", config.litewrite.url) + else: + logger.warning("Litewrite integration not configured (no api_secret)") + + async def run(): + try: + await asyncio.gather( + agent.run(), + channels.start_all(), + ) + except KeyboardInterrupt: + logger.info("Shutting down...") + agent.stop() + await channels.stop_all() + + print("=" * 50) + print("Litewrite nanobot Service") + print("=" * 50) + print(f" LLM Model: {config.agents.defaults.model}") + print(f" Litewrite: {config.litewrite.url}") + enabled = ", ".join(channels.enabled_channels) or "none" + print(f" Channels: {enabled}") + print("=" * 50) + + asyncio.run(run()) + + +if __name__ == "__main__": + main() diff --git a/nanobot/nanobot/__init__.py b/nanobot/nanobot/__init__.py new file mode 100644 index 0000000..ee0445b --- /dev/null +++ b/nanobot/nanobot/__init__.py @@ -0,0 +1,6 @@ +""" +nanobot - A lightweight AI agent framework +""" + +__version__ = "0.1.0" +__logo__ = "🐈" diff --git a/nanobot/nanobot/__main__.py b/nanobot/nanobot/__main__.py new file mode 100644 index 0000000..c7f5620 --- /dev/null +++ b/nanobot/nanobot/__main__.py @@ -0,0 +1,8 @@ +""" +Entry point for running nanobot as a module: python -m nanobot +""" + +from nanobot.cli.commands import app + +if __name__ == "__main__": + app() diff --git a/nanobot/nanobot/agent/__init__.py b/nanobot/nanobot/agent/__init__.py new file mode 100644 index 0000000..c3fc97b --- /dev/null +++ b/nanobot/nanobot/agent/__init__.py @@ -0,0 +1,8 @@ +"""Agent core module.""" + +from nanobot.agent.loop import AgentLoop +from nanobot.agent.context import ContextBuilder +from nanobot.agent.memory import MemoryStore +from nanobot.agent.skills import SkillsLoader + +__all__ = ["AgentLoop", "ContextBuilder", "MemoryStore", "SkillsLoader"] diff --git a/nanobot/nanobot/agent/context.py b/nanobot/nanobot/agent/context.py new file mode 100644 index 0000000..671c0c4 --- /dev/null +++ b/nanobot/nanobot/agent/context.py @@ -0,0 +1,273 @@ +"""Context builder for assembling agent prompts.""" + +import base64 +import mimetypes +from pathlib import Path +from typing import Any + +from nanobot.agent.memory import MemoryStore +from nanobot.agent.skills import SkillsLoader + + +class ContextBuilder: + """ + Builds the context (system prompt + messages) for the agent. + + Assembles bootstrap files, memory, skills, and conversation history + into a coherent prompt for the LLM. + """ + + BOOTSTRAP_FILES = ["AGENTS.md", "SOUL.md", "USER.md", "TOOLS.md", "IDENTITY.md"] + + def __init__(self, workspace: Path): + self.workspace = workspace + self.memory = MemoryStore(workspace) + self.skills = SkillsLoader(workspace) + + def build_system_prompt(self, skill_names: list[str] | None = None) -> str: + """ + Build the system prompt from bootstrap files, memory, and skills. + + Args: + skill_names: Optional list of skills to include. + + Returns: + Complete system prompt. + """ + parts = [] + + # Core identity + parts.append(self._get_identity()) + + # Bootstrap files + bootstrap = self._load_bootstrap_files() + if bootstrap: + parts.append(bootstrap) + + # Memory context + memory = self.memory.get_memory_context() + if memory: + parts.append(f"# Memory\n\n{memory}") + + # Skills - progressive loading + # 1. Always-loaded skills: include full content + always_skills = self.skills.get_always_skills() + if always_skills: + always_content = self.skills.load_skills_for_context(always_skills) + if always_content: + parts.append(f"# Active Skills\n\n{always_content}") + + # 2. Available skills: only show summary (agent uses read_file to load) + skills_summary = self.skills.build_skills_summary() + if skills_summary: + parts.append(f"""# Skills + +The following skills extend your capabilities. To use a skill, read its SKILL.md file using the read_file tool. +Skills with available="false" need dependencies installed first - you can try installing them with apt/brew. + +{skills_summary}""") + + return "\n\n---\n\n".join(parts) + + def _get_identity(self) -> str: + """Get the core identity section.""" + from datetime import datetime + + dt = datetime.now() + now = dt.strftime("%Y-%m-%d %H:%M (%A)") + year = dt.strftime("%Y") + workspace_path = str(self.workspace.expanduser().resolve()) + + return f"""# nanobot 🐈 + +You are nanobot, a helpful AI assistant. You have access to tools that allow you to: +- Read, write, and edit files +- Execute shell commands +- Search the web and fetch web pages +- Send messages to users on chat channels +- Spawn subagents for complex background tasks +- Manage Litewrite projects (compile, edit, create, etc.) + +## Current Date & Time +**Today is {now}. The current year is {year}.** +When searching for latest research or information, include the current year ({year}) in your queries. + +## Workspace +Your workspace is at: {workspace_path} +- Memory files: {workspace_path}/memory/MEMORY.md +- Daily notes: {workspace_path}/memory/YYYY-MM-DD.md +- Custom skills: {workspace_path}/skills/{{skill-name}}/SKILL.md + +## CRITICAL RULES — Tool Usage (MUST follow) + +1. **ALWAYS use tools for ANY action.** You MUST call the actual tool function. + NEVER claim or pretend you performed an action (compile, file edit, send, etc.) + without having called the corresponding tool first. + +2. **Compilation**: To compile a LaTeX project, you MUST call `litewrite_compile`. + The tool will automatically send the PDF to the user — do NOT call message afterwards. + NEVER say "编译成功" or "PDF已发送" without having actually called `litewrite_compile`. + +3. **File operations**: To read or edit project files, call `litewrite_agent`. + To list projects, call `litewrite_list_projects`. + +4. **Sending messages**: Only use the `message` tool when you need to send + additional messages or file attachments to the user's chat. + For normal conversation, just respond with text. + +5. **Honesty**: If you cannot complete an action or a tool call fails, tell the + user honestly. NEVER fabricate file paths, tool results, or success messages. + +Always be helpful, accurate, and concise. +When remembering something, write to {workspace_path}/memory/MEMORY.md""" + + def _load_bootstrap_files(self) -> str: + """Load all bootstrap files from workspace.""" + parts = [] + + for filename in self.BOOTSTRAP_FILES: + file_path = self.workspace / filename + if file_path.exists(): + content = file_path.read_text(encoding="utf-8") + parts.append(f"## {filename}\n\n{content}") + + return "\n\n".join(parts) if parts else "" + + def build_messages( + self, + history: list[dict[str, Any]], + current_message: str, + skill_names: list[str] | None = None, + media: list[str] | None = None, + ) -> list[dict[str, Any]]: + """ + Build the complete message list for an LLM call. + + Args: + history: Previous conversation messages. + current_message: The new user message. + skill_names: Optional skills to include. + media: Optional list of local file paths for images/media. + + Returns: + List of messages including system prompt. + """ + messages = [] + + # System prompt + system_prompt = self.build_system_prompt(skill_names) + messages.append({"role": "system", "content": system_prompt}) + + # History + messages.extend(history) + + # Current message (with optional image attachments) + user_content = self._build_user_content(current_message, media) + messages.append({"role": "user", "content": user_content}) + + return messages + + def _build_user_content( + self, text: str, media: list[str] | None + ) -> str | list[dict[str, Any]]: + """Build user message content with optional base64-encoded images. + + Images are encoded as vision content for multi-modal LLMs. + File paths are appended to the text so the LLM can reference them + when using tools (e.g., uploading files to a project). + """ + if not media: + return text + + images = [] + file_details: list[str] = [] + for path in media: + p = Path(path) + if not p.is_file(): + continue + mime, _ = mimetypes.guess_type(path) + size = p.stat().st_size + is_image = mime is not None and mime.startswith("image/") + + if is_image: + b64 = base64.b64encode(p.read_bytes()).decode() + images.append( + { + "type": "image_url", + "image_url": {"url": f"data:{mime};base64,{b64}"}, + } + ) + + # Build a descriptive line for each file + type_label = "image" if is_image else (mime or "file") + size_str = f"{size} bytes" if size < 1024 else f"{size / 1024:.1f} KB" + file_details.append(f" - {path} ({type_label}, {size_str})") + + if not file_details: + return text + + # Append file path info so the LLM can reference files in tool calls + attachment_block = "\n".join(file_details) + augmented_text = ( + f"{text}\n\n" + f"[Attached files — use these local paths with tools like " + f"litewrite_upload_file:\n{attachment_block}]" + ) + + if not images: + return augmented_text + return images + [{"type": "text", "text": augmented_text}] + + def add_tool_result( + self, + messages: list[dict[str, Any]], + tool_call_id: str, + tool_name: str, + result: str, + ) -> list[dict[str, Any]]: + """ + Add a tool result to the message list. + + Args: + messages: Current message list. + tool_call_id: ID of the tool call. + tool_name: Name of the tool. + result: Tool execution result. + + Returns: + Updated message list. + """ + messages.append( + { + "role": "tool", + "tool_call_id": tool_call_id, + "name": tool_name, + "content": result, + } + ) + return messages + + def add_assistant_message( + self, + messages: list[dict[str, Any]], + content: str | None, + tool_calls: list[dict[str, Any]] | None = None, + ) -> list[dict[str, Any]]: + """ + Add an assistant message to the message list. + + Args: + messages: Current message list. + content: Message content. + tool_calls: Optional tool calls. + + Returns: + Updated message list. + """ + msg: dict[str, Any] = {"role": "assistant", "content": content or ""} + + if tool_calls: + msg["tool_calls"] = tool_calls + + messages.append(msg) + return messages diff --git a/nanobot/nanobot/agent/loop.py b/nanobot/nanobot/agent/loop.py new file mode 100644 index 0000000..9127476 --- /dev/null +++ b/nanobot/nanobot/agent/loop.py @@ -0,0 +1,587 @@ +"""Agent loop: the core processing engine.""" + +from __future__ import annotations + +import asyncio +import json +from pathlib import Path +from typing import TYPE_CHECKING + +from loguru import logger + +from nanobot.bus.events import InboundMessage, OutboundMessage +from nanobot.bus.queue import MessageBus +from nanobot.providers.base import LLMProvider +from nanobot.agent.context import ContextBuilder +from nanobot.agent.tools.registry import ToolRegistry +from nanobot.agent.tools.filesystem import ( + ReadFileTool, + WriteFileTool, + EditFileTool, + ListDirTool, +) +from nanobot.agent.tools.shell import ExecTool +from nanobot.agent.tools.web import WebSearchTool, WebFetchTool +from nanobot.agent.tools.message import MessageTool +from nanobot.agent.tools.spawn import SpawnTool +from nanobot.agent.tools.session import ( + SessionInfoTool, + SessionGetHistoryTool, + SessionClearTool, + SessionSummarizeTool, +) +from nanobot.agent.subagent import SubagentManager +from nanobot.session.manager import SessionManager + +if TYPE_CHECKING: + from nanobot.config.schema import ExecToolConfig, FeishuConfig, LitewriteConfig + + +class AgentLoop: + """ + The agent loop is the core processing engine. + + It: + 1. Receives messages from the bus + 2. Builds context with history, memory, skills + 3. Calls the LLM + 4. Executes tool calls + 5. Sends responses back + """ + + def __init__( + self, + bus: MessageBus, + provider: LLMProvider, + workspace: Path, + model: str | None = None, + max_iterations: int = 20, + brave_api_key: str | None = None, + exec_config: "ExecToolConfig | None" = None, + litewrite_config: "LitewriteConfig | None" = None, + feishu_config: "FeishuConfig | None" = None, + ): + from nanobot.config.schema import ExecToolConfig + + self.bus = bus + self.provider = provider + self.workspace = workspace + self.model = model or provider.get_default_model() + self.max_iterations = max_iterations + self.brave_api_key = brave_api_key + self.exec_config = exec_config or ExecToolConfig() + self.litewrite_config = litewrite_config + self.feishu_config = feishu_config + + self.context = ContextBuilder(workspace) + self.sessions = SessionManager(workspace) + self.tools = ToolRegistry() + self.subagents = SubagentManager( + provider=provider, + workspace=workspace, + bus=bus, + model=self.model, + brave_api_key=brave_api_key, + exec_config=self.exec_config, + ) + + self._running = False + self._register_default_tools() + + def _register_default_tools(self) -> None: + """Register the default set of tools.""" + # File tools (workspace-sandboxed) + self.tools.register(ReadFileTool(workspace=self.workspace)) + self.tools.register(WriteFileTool(workspace=self.workspace)) + self.tools.register(EditFileTool(workspace=self.workspace)) + self.tools.register(ListDirTool(workspace=self.workspace)) + + # Shell tool + self.tools.register( + ExecTool( + working_dir=str(self.workspace), + timeout=self.exec_config.timeout, + restrict_to_workspace=self.exec_config.restrict_to_workspace, + ) + ) + + # Web tools + # Note: WebSearchTool (Brave) removed — litewrite_deep_research covers + # search via ai-server (arXiv + web). Only keep WebFetchTool for URL fetching. + if self.brave_api_key: + self.tools.register(WebSearchTool(api_key=self.brave_api_key)) + self.tools.register(WebFetchTool()) + + # Message tool + message_tool = MessageTool(send_callback=self.bus.publish_outbound) + self.tools.register(message_tool) + + # Spawn tool (for subagents) + spawn_tool = SpawnTool(manager=self.subagents) + self.tools.register(spawn_tool) + + # Session management tools + self.tools.register(SessionInfoTool(self.sessions)) + self.tools.register(SessionGetHistoryTool(self.sessions)) + self.tools.register(SessionClearTool(self.sessions)) + summarize_tool = SessionSummarizeTool( + self.sessions, provider=self.provider, model=self.model + ) + self.tools.register(summarize_tool) + + # Litewrite tools (registered when Litewrite integration is configured) + if self.litewrite_config and self.litewrite_config.api_secret: + self._register_litewrite_tools() + + def _register_litewrite_tools(self) -> None: + """Register Litewrite integration tools. + + Only *management-level* tools are exposed to the bot. All file + reading / editing / listing is delegated to the ``litewrite_agent`` + tool which internally invokes Litewrite's AI sub-agents. This keeps + the bot in a **Manager** role and prevents it from bypassing the + agent's orchestration layer. + """ + from nanobot.agent.tools.litewrite import ( + LitewriteClient, + # Manager-level tools + LitewriteListProjectsTool, + LitewriteCompileTool, + LitewriteAgentTool, + # Project management + LitewriteCreateProjectTool, + LitewriteDeleteProjectTool, + LitewriteRenameProjectTool, + # Version management + LitewriteListVersionsTool, + LitewriteSaveVersionTool, + LitewriteRestoreVersionTool, + # File management + LitewriteUploadFileTool, + LitewriteCreateFileTool, + # Import tools + LitewriteImportArxivTool, + LitewriteImportGithubTool, + LitewriteImportUploadTool, + ) + from nanobot.agent.tools.deep_research import LitewriteDeepResearchTool + + client = LitewriteClient( + base_url=self.litewrite_config.url, + api_secret=self.litewrite_config.api_secret, + ) + + # Resolve default owner ID from Feishu config + default_owner_id = "" + if self.feishu_config: + default_owner_id = self.feishu_config.default_litewrite_user_id + + # Core tools — litewrite_agent is the primary interface for + # reading, analysing, writing, and editing project files. + self.tools.register(LitewriteListProjectsTool(client, default_owner_id)) + self.tools.register(LitewriteAgentTool(client, default_owner_id)) + self.tools.register(LitewriteCompileTool(client, default_owner_id)) + + # Project management + self.tools.register(LitewriteCreateProjectTool(client, default_owner_id)) + self.tools.register(LitewriteDeleteProjectTool(client)) + self.tools.register(LitewriteRenameProjectTool(client)) + + # Version management + self.tools.register(LitewriteListVersionsTool(client)) + self.tools.register(LitewriteSaveVersionTool(client, default_owner_id)) + self.tools.register(LitewriteRestoreVersionTool(client)) + + # File management + self.tools.register(LitewriteUploadFileTool(client)) + self.tools.register(LitewriteCreateFileTool(client)) + + # Import tools (arXiv, GitHub/GitLab, file upload) + self.tools.register(LitewriteImportArxivTool(client, default_owner_id)) + self.tools.register(LitewriteImportGithubTool(client, default_owner_id)) + self.tools.register(LitewriteImportUploadTool(client, default_owner_id)) + + # Deep Research tool (calls AI server directly) + self.tools.register( + LitewriteDeepResearchTool( + ai_server_url=self.litewrite_config.ai_server_url, + ) + ) + + logger.info(f"Litewrite tools registered (url={self.litewrite_config.url})") + + async def run(self) -> None: + """Run the agent loop, processing messages from the bus.""" + self._running = True + logger.info("Agent loop started") + + while self._running: + try: + # Wait for next message + msg = await asyncio.wait_for(self.bus.consume_inbound(), timeout=1.0) + + # Process it + try: + response = await self._process_message(msg) + if response: + await self.bus.publish_outbound(response) + except Exception as e: + logger.error(f"Error processing message: {e}", exc_info=True) + # Send generic error response (do not leak internal details) + await self.bus.publish_outbound( + OutboundMessage( + channel=msg.channel, + chat_id=msg.chat_id, + content="Sorry, I encountered an error processing your message. Please try again.", + ) + ) + except asyncio.TimeoutError: + continue + + def stop(self) -> None: + """Stop the agent loop.""" + self._running = False + logger.info("Agent loop stopping") + + async def _process_message(self, msg: InboundMessage) -> OutboundMessage | None: + """ + Process a single inbound message. + + Args: + msg: The inbound message to process. + + Returns: + The response message, or None if no response needed. + """ + # Handle system messages (subagent announces) + # The chat_id contains the original "channel:chat_id" to route back to + if msg.channel == "system": + return await self._process_system_message(msg) + + logger.info(f"Processing message from {msg.channel}:{msg.sender_id}") + + # Get or create session + session = self.sessions.get_or_create(msg.session_key) + + # ── Slash commands (handled before LLM) ────────────────────────── + if msg.content.strip() == "/clear": + count = len(session.messages) + session.clear() + self.sessions.save(session) + logger.info( + f"Session {msg.session_key} cleared via /clear ({count} messages)" + ) + return OutboundMessage( + channel=msg.channel, + chat_id=msg.chat_id, + content=f"✅ Chat history cleared ({count} messages removed). Starting fresh!", + ) + + # Update tool contexts + message_tool = self.tools.get("message") + if isinstance(message_tool, MessageTool): + message_tool.set_context(msg.channel, msg.chat_id) + + spawn_tool = self.tools.get("spawn") + if isinstance(spawn_tool, SpawnTool): + spawn_tool.set_context(msg.channel, msg.chat_id) + + # Set compile tool context so it can auto-send PDFs + compile_tool = self.tools.get("litewrite_compile") + if compile_tool is not None: + from nanobot.agent.tools.litewrite import LitewriteCompileTool + + if isinstance(compile_tool, LitewriteCompileTool): + compile_tool.set_context( + msg.channel, msg.chat_id, self.bus.publish_outbound + ) + + # Set session context on session tools + from nanobot.agent.tools.session import _SessionToolBase + + for tool in self.tools._tools.values(): + if isinstance(tool, _SessionToolBase): + tool.set_session(session) + + # Build initial messages (use get_history for LLM-formatted messages) + messages = self.context.build_messages( + history=session.get_history(), + current_message=msg.content, + media=msg.media if msg.media else None, + ) + + # Agent loop + iteration = 0 + final_content = None + any_tool_called = False + + while iteration < self.max_iterations: + iteration += 1 + + # Call LLM + logger.info( + f"Agent iteration {iteration}/{self.max_iterations} " + f"(tools_available={len(self.tools)}, history_msgs={len(messages)})" + ) + response = await self.provider.chat( + messages=messages, tools=self.tools.get_definitions(), model=self.model + ) + + # Handle tool calls + if response.has_tool_calls: + tool_names = [tc.name for tc in response.tool_calls] + logger.info(f"LLM requested tool calls: {tool_names}") + any_tool_called = True + + # Add assistant message with tool calls + tool_call_dicts = [ + { + "id": tc.id, + "type": "function", + "function": { + "name": tc.name, + "arguments": json.dumps( + tc.arguments + ), # Must be JSON string + }, + } + for tc in response.tool_calls + ] + messages = self.context.add_assistant_message( + messages, response.content, tool_call_dicts + ) + + # Execute tools + for tool_call in response.tool_calls: + args_str = json.dumps(tool_call.arguments) + logger.info(f"Executing tool: {tool_call.name}({args_str})") + result = await self.tools.execute( + tool_call.name, tool_call.arguments + ) + result_preview = result[:200] if len(result) > 200 else result + logger.info(f"Tool {tool_call.name} result: {result_preview}") + messages = self.context.add_tool_result( + messages, tool_call.id, tool_call.name, result + ) + else: + # No tool calls — check for suspected hallucination + final_content = response.content or "" + logger.info( + f"LLM returned text (no tool calls) at iteration {iteration}: " + f"{final_content[:120]}..." + ) + + # Hallucination guard: if the model claims it performed an + # action (compile, send PDF, edit file, etc.) on the FIRST + # iteration without ever calling a tool, inject a correction + # prompt and retry. + if not any_tool_called and iteration == 1: + if self._looks_like_hallucinated_action(final_content): + logger.warning( + "Hallucination detected: model claims action without " + "tool calls. Injecting correction prompt." + ) + messages = self.context.add_assistant_message( + messages, final_content + ) + messages.append( + { + "role": "user", + "content": ( + "[System] You did NOT actually perform any action — " + "you MUST call the appropriate tool function to " + "complete the user's request. You cannot claim " + "an action was done without calling a tool. " + "Available tools include: litewrite_agent, " + "litewrite_compile, litewrite_create_file, " + "litewrite_list_projects, litewrite_import_arxiv, " + "etc. Please call the correct tool(s) NOW." + ), + } + ) + final_content = None + continue + + break + + if final_content is None: + final_content = "I've completed processing but have no response to give." + + # Save to session + session.add_message("user", msg.content) + session.add_message("assistant", final_content) + self.sessions.save(session) + + return OutboundMessage( + channel=msg.channel, chat_id=msg.chat_id, content=final_content + ) + + async def _process_system_message( + self, msg: InboundMessage + ) -> OutboundMessage | None: + """ + Process a system message (e.g., subagent announce). + + The chat_id field contains "original_channel:original_chat_id" to route + the response back to the correct destination. + """ + logger.info(f"Processing system message from {msg.sender_id}") + + # Parse origin from chat_id (format: "channel:chat_id") + if ":" in msg.chat_id: + parts = msg.chat_id.split(":", 1) + origin_channel = parts[0] + origin_chat_id = parts[1] + else: + # Fallback + origin_channel = "cli" + origin_chat_id = msg.chat_id + + # Use the origin session for context + session_key = f"{origin_channel}:{origin_chat_id}" + session = self.sessions.get_or_create(session_key) + + # Update tool contexts + message_tool = self.tools.get("message") + if isinstance(message_tool, MessageTool): + message_tool.set_context(origin_channel, origin_chat_id) + + spawn_tool = self.tools.get("spawn") + if isinstance(spawn_tool, SpawnTool): + spawn_tool.set_context(origin_channel, origin_chat_id) + + # Set session context on session tools + from nanobot.agent.tools.session import _SessionToolBase + + for tool in self.tools._tools.values(): + if isinstance(tool, _SessionToolBase): + tool.set_session(session) + + # Build messages with the announce content + messages = self.context.build_messages( + history=session.get_history(), current_message=msg.content + ) + + # Agent loop (limited for announce handling) + iteration = 0 + final_content = None + + while iteration < self.max_iterations: + iteration += 1 + + response = await self.provider.chat( + messages=messages, tools=self.tools.get_definitions(), model=self.model + ) + + if response.has_tool_calls: + tool_call_dicts = [ + { + "id": tc.id, + "type": "function", + "function": { + "name": tc.name, + "arguments": json.dumps(tc.arguments), + }, + } + for tc in response.tool_calls + ] + messages = self.context.add_assistant_message( + messages, response.content, tool_call_dicts + ) + + for tool_call in response.tool_calls: + args_str = json.dumps(tool_call.arguments) + logger.debug( + f"Executing tool: {tool_call.name} with arguments: {args_str}" + ) + result = await self.tools.execute( + tool_call.name, tool_call.arguments + ) + messages = self.context.add_tool_result( + messages, tool_call.id, tool_call.name, result + ) + else: + final_content = response.content + break + + if final_content is None: + final_content = "Background task completed." + + # Save to session (mark as system message in history) + session.add_message("user", f"[System: {msg.sender_id}] {msg.content}") + session.add_message("assistant", final_content) + self.sessions.save(session) + + return OutboundMessage( + channel=origin_channel, chat_id=origin_chat_id, content=final_content + ) + + @staticmethod + def _looks_like_hallucinated_action(text: str) -> bool: + """Detect if the model's text response looks like it performed an + action that should have required a tool call. + + Returns ``True`` when suspicious patterns are found. + """ + if not text: + return False + t = text.lower() + # Patterns that strongly suggest the model is claiming it did something + # that requires a tool call. + action_phrases = [ + # Compilation + "编译成功", + "编译完成", + "pdf已发送", + "pdf 已发送", + "pdf已编译", + "compilation successful", + "pdf sent", + # Project operations + "已创建项目", + "项目已创建", + "project created", + # File operations + "已修改", + "修改完成", + "文件已编辑", + "file edited", + "已创建", + "已保存", + "已完成", + "已新建", + "已添加", + "已写入", + "已生成", + "已导入", + "已上传", + "file created", + "successfully created", + "saved to", + # Agent operations + "已总结", + "已分析", + "已重写", + "已更新", + ] + return any(phrase in t for phrase in action_phrases) + + async def process_direct( + self, content: str, session_key: str = "cli:direct" + ) -> str: + """ + Process a message directly (for CLI usage). + + Args: + content: The message content. + session_key: Session identifier. + + Returns: + The agent's response. + """ + msg = InboundMessage( + channel="cli", sender_id="user", chat_id="direct", content=content + ) + + response = await self._process_message(msg) + return response.content if response else "" diff --git a/nanobot/nanobot/agent/memory.py b/nanobot/nanobot/agent/memory.py new file mode 100644 index 0000000..22297f4 --- /dev/null +++ b/nanobot/nanobot/agent/memory.py @@ -0,0 +1,109 @@ +"""Memory system for persistent agent memory.""" + +from pathlib import Path +from datetime import datetime + +from nanobot.utils.helpers import ensure_dir, today_date + + +class MemoryStore: + """ + Memory system for the agent. + + Supports daily notes (memory/YYYY-MM-DD.md) and long-term memory (MEMORY.md). + """ + + def __init__(self, workspace: Path): + self.workspace = workspace + self.memory_dir = ensure_dir(workspace / "memory") + self.memory_file = self.memory_dir / "MEMORY.md" + + def get_today_file(self) -> Path: + """Get path to today's memory file.""" + return self.memory_dir / f"{today_date()}.md" + + def read_today(self) -> str: + """Read today's memory notes.""" + today_file = self.get_today_file() + if today_file.exists(): + return today_file.read_text(encoding="utf-8") + return "" + + def append_today(self, content: str) -> None: + """Append content to today's memory notes.""" + today_file = self.get_today_file() + + if today_file.exists(): + existing = today_file.read_text(encoding="utf-8") + content = existing + "\n" + content + else: + # Add header for new day + header = f"# {today_date()}\n\n" + content = header + content + + today_file.write_text(content, encoding="utf-8") + + def read_long_term(self) -> str: + """Read long-term memory (MEMORY.md).""" + if self.memory_file.exists(): + return self.memory_file.read_text(encoding="utf-8") + return "" + + def write_long_term(self, content: str) -> None: + """Write to long-term memory (MEMORY.md).""" + self.memory_file.write_text(content, encoding="utf-8") + + def get_recent_memories(self, days: int = 7) -> str: + """ + Get memories from the last N days. + + Args: + days: Number of days to look back. + + Returns: + Combined memory content. + """ + from datetime import timedelta + + memories = [] + today = datetime.now().date() + + for i in range(days): + date = today - timedelta(days=i) + date_str = date.strftime("%Y-%m-%d") + file_path = self.memory_dir / f"{date_str}.md" + + if file_path.exists(): + content = file_path.read_text(encoding="utf-8") + memories.append(content) + + return "\n\n---\n\n".join(memories) + + def list_memory_files(self) -> list[Path]: + """List all memory files sorted by date (newest first).""" + if not self.memory_dir.exists(): + return [] + + files = list(self.memory_dir.glob("????-??-??.md")) + return sorted(files, reverse=True) + + def get_memory_context(self) -> str: + """ + Get memory context for the agent. + + Returns: + Formatted memory context including long-term and recent memories. + """ + parts = [] + + # Long-term memory + long_term = self.read_long_term() + if long_term: + parts.append("## Long-term Memory\n" + long_term) + + # Today's notes + today = self.read_today() + if today: + parts.append("## Today's Notes\n" + today) + + return "\n\n".join(parts) if parts else "" diff --git a/nanobot/nanobot/agent/skills.py b/nanobot/nanobot/agent/skills.py new file mode 100644 index 0000000..4779b9c --- /dev/null +++ b/nanobot/nanobot/agent/skills.py @@ -0,0 +1,246 @@ +"""Skills loader for agent capabilities.""" + +import json +import os +import re +import shutil +from pathlib import Path + +# Default builtin skills directory (relative to this file) +BUILTIN_SKILLS_DIR = Path(__file__).parent.parent / "skills" + + +class SkillsLoader: + """ + Loader for agent skills. + + Skills are markdown files (SKILL.md) that teach the agent how to use + specific tools or perform certain tasks. + """ + + def __init__(self, workspace: Path, builtin_skills_dir: Path | None = None): + self.workspace = workspace + self.workspace_skills = workspace / "skills" + self.builtin_skills = builtin_skills_dir or BUILTIN_SKILLS_DIR + + def list_skills(self, filter_unavailable: bool = True) -> list[dict[str, str]]: + """ + List all available skills. + + Args: + filter_unavailable: If True, filter out skills with unmet requirements. + + Returns: + List of skill info dicts with 'name', 'path', 'source'. + """ + skills = [] + + # Workspace skills (highest priority) + if self.workspace_skills.exists(): + for skill_dir in self.workspace_skills.iterdir(): + if skill_dir.is_dir(): + skill_file = skill_dir / "SKILL.md" + if skill_file.exists(): + skills.append( + { + "name": skill_dir.name, + "path": str(skill_file), + "source": "workspace", + } + ) + + # Built-in skills + if self.builtin_skills and self.builtin_skills.exists(): + for skill_dir in self.builtin_skills.iterdir(): + if skill_dir.is_dir(): + skill_file = skill_dir / "SKILL.md" + if skill_file.exists() and not any( + s["name"] == skill_dir.name for s in skills + ): + skills.append( + { + "name": skill_dir.name, + "path": str(skill_file), + "source": "builtin", + } + ) + + # Filter by requirements + if filter_unavailable: + return [ + s + for s in skills + if self._check_requirements(self._get_skill_meta(s["name"])) + ] + return skills + + def load_skill(self, name: str) -> str | None: + """ + Load a skill by name. + + Args: + name: Skill name (directory name). + + Returns: + Skill content or None if not found. + """ + # Check workspace first + workspace_skill = self.workspace_skills / name / "SKILL.md" + if workspace_skill.exists(): + return workspace_skill.read_text(encoding="utf-8") + + # Check built-in + if self.builtin_skills: + builtin_skill = self.builtin_skills / name / "SKILL.md" + if builtin_skill.exists(): + return builtin_skill.read_text(encoding="utf-8") + + return None + + def load_skills_for_context(self, skill_names: list[str]) -> str: + """ + Load specific skills for inclusion in agent context. + + Args: + skill_names: List of skill names to load. + + Returns: + Formatted skills content. + """ + parts = [] + for name in skill_names: + content = self.load_skill(name) + if content: + content = self._strip_frontmatter(content) + parts.append(f"### Skill: {name}\n\n{content}") + + return "\n\n---\n\n".join(parts) if parts else "" + + def build_skills_summary(self) -> str: + """ + Build a summary of all skills (name, description, path, availability). + + This is used for progressive loading - the agent can read the full + skill content using read_file when needed. + + Returns: + XML-formatted skills summary. + """ + all_skills = self.list_skills(filter_unavailable=False) + if not all_skills: + return "" + + def escape_xml(s: str) -> str: + return s.replace("&", "&").replace("<", "<").replace(">", ">") + + lines = ["<skills>"] + for s in all_skills: + name = escape_xml(s["name"]) + path = s["path"] + desc = escape_xml(self._get_skill_description(s["name"])) + skill_meta = self._get_skill_meta(s["name"]) + available = self._check_requirements(skill_meta) + + lines.append(f' <skill available="{str(available).lower()}">') + lines.append(f" <name>{name}</name>") + lines.append(f" <description>{desc}</description>") + lines.append(f" <location>{path}</location>") + + # Show missing requirements for unavailable skills + if not available: + missing = self._get_missing_requirements(skill_meta) + if missing: + lines.append(f" <requires>{escape_xml(missing)}</requires>") + + lines.append(" </skill>") + lines.append("</skills>") + + return "\n".join(lines) + + def _get_missing_requirements(self, skill_meta: dict) -> str: + """Get a description of missing requirements.""" + missing = [] + requires = skill_meta.get("requires", {}) + for b in requires.get("bins", []): + if not shutil.which(b): + missing.append(f"CLI: {b}") + for env in requires.get("env", []): + if not os.environ.get(env): + missing.append(f"ENV: {env}") + return ", ".join(missing) + + def _get_skill_description(self, name: str) -> str: + """Get the description of a skill from its frontmatter.""" + meta = self.get_skill_metadata(name) + if meta and meta.get("description"): + return meta["description"] + return name # Fallback to skill name + + def _strip_frontmatter(self, content: str) -> str: + """Remove YAML frontmatter from markdown content.""" + if content.startswith("---"): + match = re.match(r"^---\n.*?\n---\n", content, re.DOTALL) + if match: + return content[match.end() :].strip() + return content + + def _parse_nanobot_metadata(self, raw: str) -> dict: + """Parse nanobot metadata JSON from frontmatter.""" + try: + data = json.loads(raw) + return data.get("nanobot", {}) if isinstance(data, dict) else {} + except (json.JSONDecodeError, TypeError): + return {} + + def _check_requirements(self, skill_meta: dict) -> bool: + """Check if skill requirements are met (bins, env vars).""" + requires = skill_meta.get("requires", {}) + for b in requires.get("bins", []): + if not shutil.which(b): + return False + for env in requires.get("env", []): + if not os.environ.get(env): + return False + return True + + def _get_skill_meta(self, name: str) -> dict: + """Get nanobot metadata for a skill (cached in frontmatter).""" + meta = self.get_skill_metadata(name) or {} + return self._parse_nanobot_metadata(meta.get("metadata", "")) + + def get_always_skills(self) -> list[str]: + """Get skills marked as always=true that meet requirements.""" + result = [] + for s in self.list_skills(filter_unavailable=True): + meta = self.get_skill_metadata(s["name"]) or {} + skill_meta = self._parse_nanobot_metadata(meta.get("metadata", "")) + if skill_meta.get("always") or meta.get("always"): + result.append(s["name"]) + return result + + def get_skill_metadata(self, name: str) -> dict | None: + """ + Get metadata from a skill's frontmatter. + + Args: + name: Skill name. + + Returns: + Metadata dict or None. + """ + content = self.load_skill(name) + if not content: + return None + + if content.startswith("---"): + match = re.match(r"^---\n(.*?)\n---", content, re.DOTALL) + if match: + # Simple YAML parsing + metadata = {} + for line in match.group(1).split("\n"): + if ":" in line: + key, value = line.split(":", 1) + metadata[key.strip()] = value.strip().strip("\"'") + return metadata + + return None diff --git a/nanobot/nanobot/agent/subagent.py b/nanobot/nanobot/agent/subagent.py new file mode 100644 index 0000000..6691166 --- /dev/null +++ b/nanobot/nanobot/agent/subagent.py @@ -0,0 +1,273 @@ +"""Subagent manager for background task execution.""" + +from __future__ import annotations + +import asyncio +import json +import uuid +from pathlib import Path +from typing import TYPE_CHECKING, Any + +from loguru import logger + +from nanobot.bus.events import InboundMessage +from nanobot.bus.queue import MessageBus +from nanobot.providers.base import LLMProvider +from nanobot.agent.tools.registry import ToolRegistry +from nanobot.agent.tools.filesystem import ReadFileTool, WriteFileTool, ListDirTool +from nanobot.agent.tools.shell import ExecTool +from nanobot.agent.tools.web import WebSearchTool, WebFetchTool + +if TYPE_CHECKING: + from nanobot.config.schema import ExecToolConfig + + +class SubagentManager: + """ + Manages background subagent execution. + + Subagents are lightweight agent instances that run in the background + to handle specific tasks. They share the same LLM provider but have + isolated context and a focused system prompt. + """ + + def __init__( + self, + provider: LLMProvider, + workspace: Path, + bus: MessageBus, + model: str | None = None, + brave_api_key: str | None = None, + exec_config: "ExecToolConfig | None" = None, + ): + from nanobot.config.schema import ExecToolConfig + + self.provider = provider + self.workspace = workspace + self.bus = bus + self.model = model or provider.get_default_model() + self.brave_api_key = brave_api_key + self.exec_config = exec_config or ExecToolConfig() + self._running_tasks: dict[str, asyncio.Task[None]] = {} + self._max_concurrent = ( + 5 # Limit concurrent subagents to prevent resource exhaustion + ) + + async def spawn( + self, + task: str, + label: str | None = None, + origin_channel: str = "cli", + origin_chat_id: str = "direct", + ) -> str: + """ + Spawn a subagent to execute a task in the background. + + Args: + task: The task description for the subagent. + label: Optional human-readable label for the task. + origin_channel: The channel to announce results to. + origin_chat_id: The chat ID to announce results to. + + Returns: + Status message indicating the subagent was started. + """ + # Enforce concurrency limit + if len(self._running_tasks) >= self._max_concurrent: + return ( + f"Error: Too many concurrent subagents ({len(self._running_tasks)}/{self._max_concurrent}). " + "Please wait for existing tasks to complete before spawning new ones." + ) + + task_id = str(uuid.uuid4())[:8] + display_label = label or task[:30] + ("..." if len(task) > 30 else "") + + origin = { + "channel": origin_channel, + "chat_id": origin_chat_id, + } + + # Create background task + bg_task = asyncio.create_task( + self._run_subagent(task_id, task, display_label, origin) + ) + self._running_tasks[task_id] = bg_task + + # Cleanup when done + bg_task.add_done_callback(lambda _: self._running_tasks.pop(task_id, None)) + + logger.info(f"Spawned subagent [{task_id}]: {display_label}") + return f"Subagent [{display_label}] started (id: {task_id}). I'll notify you when it completes." + + async def _run_subagent( + self, + task_id: str, + task: str, + label: str, + origin: dict[str, str], + ) -> None: + """Execute the subagent task and announce the result.""" + logger.info(f"Subagent [{task_id}] starting task: {label}") + + try: + # Build subagent tools (no message tool, no spawn tool) + # File tools are workspace-sandboxed + tools = ToolRegistry() + tools.register(ReadFileTool(workspace=self.workspace)) + tools.register(WriteFileTool(workspace=self.workspace)) + tools.register(ListDirTool(workspace=self.workspace)) + tools.register( + ExecTool( + working_dir=str(self.workspace), + timeout=self.exec_config.timeout, + restrict_to_workspace=self.exec_config.restrict_to_workspace, + ) + ) + tools.register(WebSearchTool(api_key=self.brave_api_key)) + tools.register(WebFetchTool()) + + # Build messages with subagent-specific prompt + system_prompt = self._build_subagent_prompt(task) + messages: list[dict[str, Any]] = [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": task}, + ] + + # Run agent loop (limited iterations) + max_iterations = 15 + iteration = 0 + final_result: str | None = None + + while iteration < max_iterations: + iteration += 1 + + response = await self.provider.chat( + messages=messages, + tools=tools.get_definitions(), + model=self.model, + ) + + if response.has_tool_calls: + # Add assistant message with tool calls + tool_call_dicts = [ + { + "id": tc.id, + "type": "function", + "function": { + "name": tc.name, + "arguments": json.dumps(tc.arguments), + }, + } + for tc in response.tool_calls + ] + messages.append( + { + "role": "assistant", + "content": response.content or "", + "tool_calls": tool_call_dicts, + } + ) + + # Execute tools + for tool_call in response.tool_calls: + logger.debug( + f"Subagent [{task_id}] executing: {tool_call.name}" + ) + result = await tools.execute( + tool_call.name, tool_call.arguments + ) + messages.append( + { + "role": "tool", + "tool_call_id": tool_call.id, + "name": tool_call.name, + "content": result, + } + ) + else: + final_result = response.content + break + + if final_result is None: + final_result = "Task completed but no final response was generated." + + logger.info(f"Subagent [{task_id}] completed successfully") + await self._announce_result( + task_id, label, task, final_result, origin, "ok" + ) + + except Exception as e: + error_msg = f"Error: {str(e)}" + logger.error(f"Subagent [{task_id}] failed: {e}") + await self._announce_result( + task_id, label, task, error_msg, origin, "error" + ) + + async def _announce_result( + self, + task_id: str, + label: str, + task: str, + result: str, + origin: dict[str, str], + status: str, + ) -> None: + """Announce the subagent result to the main agent via the message bus.""" + status_text = "completed successfully" if status == "ok" else "failed" + + announce_content = f"""[Subagent '{label}' {status_text}] + +Task: {task} + +Result: +{result} + +Summarize this naturally for the user. Keep it brief (1-2 sentences). Do not mention technical details like "subagent" or task IDs.""" + + # Inject as system message to trigger main agent + msg = InboundMessage( + channel="system", + sender_id="subagent", + chat_id=f"{origin['channel']}:{origin['chat_id']}", + content=announce_content, + ) + + await self.bus.publish_inbound(msg) + logger.debug( + f"Subagent [{task_id}] announced result to {origin['channel']}:{origin['chat_id']}" + ) + + def _build_subagent_prompt(self, task: str) -> str: + """Build a focused system prompt for the subagent.""" + return f"""# Subagent + +You are a subagent spawned by the main agent to complete a specific task. + +## Your Task +{task} + +## Rules +1. Stay focused - complete only the assigned task, nothing else +2. Your final response will be reported back to the main agent +3. Do not initiate conversations or take on side tasks +4. Be concise but informative in your findings + +## What You Can Do +- Read and write files in the workspace +- Execute shell commands +- Search the web and fetch web pages +- Complete the task thoroughly + +## What You Cannot Do +- Send messages directly to users (no message tool available) +- Spawn other subagents +- Access the main agent's conversation history + +## Workspace +Your workspace is at: {self.workspace} + +When you have completed the task, provide a clear summary of your findings or actions.""" + + def get_running_count(self) -> int: + """Return the number of currently running subagents.""" + return len(self._running_tasks) diff --git a/nanobot/nanobot/agent/tools/__init__.py b/nanobot/nanobot/agent/tools/__init__.py new file mode 100644 index 0000000..aac5d7d --- /dev/null +++ b/nanobot/nanobot/agent/tools/__init__.py @@ -0,0 +1,6 @@ +"""Agent tools module.""" + +from nanobot.agent.tools.base import Tool +from nanobot.agent.tools.registry import ToolRegistry + +__all__ = ["Tool", "ToolRegistry"] diff --git a/nanobot/nanobot/agent/tools/base.py b/nanobot/nanobot/agent/tools/base.py new file mode 100644 index 0000000..8e2ae3f --- /dev/null +++ b/nanobot/nanobot/agent/tools/base.py @@ -0,0 +1,108 @@ +"""Base class for agent tools.""" + +from abc import ABC, abstractmethod +from typing import Any + + +class Tool(ABC): + """ + Abstract base class for agent tools. + + Tools are capabilities that the agent can use to interact with + the environment, such as reading files, executing commands, etc. + """ + + _TYPE_MAP = { + "string": str, + "integer": int, + "number": (int, float), + "boolean": bool, + "array": list, + "object": dict, + } + + @property + @abstractmethod + def name(self) -> str: + """Tool name used in function calls.""" + pass + + @property + @abstractmethod + def description(self) -> str: + """Description of what the tool does.""" + pass + + @property + @abstractmethod + def parameters(self) -> dict[str, Any]: + """JSON Schema for tool parameters.""" + pass + + @abstractmethod + async def execute(self, **kwargs: Any) -> str: + """ + Execute the tool with given parameters. + + Args: + **kwargs: Tool-specific parameters. + + Returns: + String result of the tool execution. + """ + pass + + def validate_params(self, params: dict[str, Any]) -> list[str]: + """Validate tool parameters against JSON schema. Returns error list (empty if valid).""" + schema = self.parameters or {} + if schema.get("type", "object") != "object": + raise ValueError(f"Schema must be object type, got {schema.get('type')!r}") + return self._validate(params, {**schema, "type": "object"}, "") + + def _validate(self, val: Any, schema: dict[str, Any], path: str) -> list[str]: + t, label = schema.get("type"), path or "parameter" + if t in self._TYPE_MAP and not isinstance(val, self._TYPE_MAP[t]): + return [f"{label} should be {t}"] + + errors = [] + if "enum" in schema and val not in schema["enum"]: + errors.append(f"{label} must be one of {schema['enum']}") + if t in ("integer", "number"): + if "minimum" in schema and val < schema["minimum"]: + errors.append(f"{label} must be >= {schema['minimum']}") + if "maximum" in schema and val > schema["maximum"]: + errors.append(f"{label} must be <= {schema['maximum']}") + if t == "string": + if "minLength" in schema and len(val) < schema["minLength"]: + errors.append(f"{label} must be at least {schema['minLength']} chars") + if "maxLength" in schema and len(val) > schema["maxLength"]: + errors.append(f"{label} must be at most {schema['maxLength']} chars") + if t == "object": + props = schema.get("properties", {}) + for k in schema.get("required", []): + if k not in val: + errors.append(f"missing required {path + '.' + k if path else k}") + for k, v in val.items(): + if k in props: + errors.extend( + self._validate(v, props[k], path + "." + k if path else k) + ) + if t == "array" and "items" in schema: + for i, item in enumerate(val): + errors.extend( + self._validate( + item, schema["items"], f"{path}[{i}]" if path else f"[{i}]" + ) + ) + return errors + + def to_schema(self) -> dict[str, Any]: + """Convert tool to OpenAI function schema format.""" + return { + "type": "function", + "function": { + "name": self.name, + "description": self.description, + "parameters": self.parameters, + }, + } diff --git a/nanobot/nanobot/agent/tools/deep_research.py b/nanobot/nanobot/agent/tools/deep_research.py new file mode 100644 index 0000000..fef2723 --- /dev/null +++ b/nanobot/nanobot/agent/tools/deep_research.py @@ -0,0 +1,223 @@ +"""Deep Research tool for nanobot — calls AI server's Deep Research SSE endpoint.""" + +import json +from typing import Any + +import httpx +from loguru import logger + +from nanobot.agent.tools.base import Tool + + +class LitewriteDeepResearchTool(Tool): + """ + Tool to perform deep research via Litewrite's AI server. + + Sends a query to the Deep Research SSE endpoint, consumes the event stream, + and returns the aggregated report with references and BibTeX. + """ + + def __init__(self, ai_server_url: str): + self._ai_server_url = ai_server_url.rstrip("/") + + @property + def name(self) -> str: + return "litewrite_deep_research" + + @property + def description(self) -> str: + return ( + "Perform deep research on a topic. " + "Searches arXiv papers and the web, analyzes knowledge gaps with " + "multi-iteration search, then generates a comprehensive research report " + "with references and BibTeX. " + "This tool may take a few minutes to complete. " + "The result includes the full report in Markdown, a references list, " + "and BibTeX entries for academic citations." + ) + + @property + def parameters(self) -> dict[str, Any]: + return { + "type": "object", + "properties": { + "query": { + "type": "string", + "description": "The research question or topic to investigate", + }, + "max_iterations": { + "type": "integer", + "description": ( + "Maximum number of search iterations for knowledge gap " + "analysis (default: 3, range: 1-5)" + ), + "minimum": 1, + "maximum": 5, + }, + "structured": { + "type": "boolean", + "description": ( + "Whether to use structured mode with outline planning " + "and section-by-section generation (default: true)" + ), + }, + }, + "required": ["query"], + } + + async def execute( + self, + query: str, + max_iterations: int = 3, + structured: bool = True, + **kwargs: Any, + ) -> str: + """Execute deep research by consuming the AI server's SSE stream.""" + url = f"{self._ai_server_url}/api/deep-research/stream" + + payload = { + "query": query, + "arxiv_papers": 10, + "web_pages": 10, + "structured": structured, + "max_iterations": max_iterations, + } + + logger.info( + f"Starting deep research: query={query!r}, " + f"max_iterations={max_iterations}, structured={structured}" + ) + + report_chunks: list[str] = [] + # Shared mutable state dict — created ONCE, passed by reference to _process_event + result_state: dict[str, Any] = { + "bibtex": "", + "references_markdown": "", + "arxiv_count": 0, + "web_count": 0, + "error_message": "", + } + + try: + async with httpx.AsyncClient(timeout=httpx.Timeout(300.0)) as client: + async with client.stream( + "POST", + url, + json=payload, + headers={"Content-Type": "application/json"}, + ) as response: + if response.status_code != 200: + body = await response.aread() + return ( + f"Deep research request failed (HTTP {response.status_code}): " + f"{body.decode(errors='replace')}" + ) + + # Parse SSE stream + event_type = "" + + async for line in response.aiter_lines(): + # SSE format: "event: <type>\ndata: <json>\n\n" + if line.startswith("event: "): + event_type = line[7:].strip() + continue + + if line.startswith("data: "): + data_buffer = line[6:] + self._process_event( + event_type, + data_buffer, + report_chunks, + result_state, + ) + continue + + if line == "": + event_type = "" + + except httpx.TimeoutException: + return "Error: Deep research timed out after 300 seconds." + except httpx.ConnectError as e: + return ( + f"Error: Could not connect to AI server at {self._ai_server_url}: {e}" + ) + except Exception as e: + logger.error(f"Deep research error: {e}") + return f"Error during deep research: {e}" + + if result_state["error_message"]: + return f"Deep research failed: {result_state['error_message']}" + + report_content = "".join(report_chunks) + + if not report_content.strip(): + return "Deep research completed but produced no report content." + + # Build final output + parts = [report_content.strip()] + + if result_state["references_markdown"]: + parts.append( + f"\n\n---\n\n## References\n\n{result_state['references_markdown']}" + ) + + if result_state["bibtex"]: + parts.append( + f"\n\n---\n\n## BibTeX\n\n```bibtex\n{result_state['bibtex']}\n```" + ) + + arxiv_count = result_state["arxiv_count"] + web_count = result_state["web_count"] + summary = ( + f"\n\n---\nResearch complete: {arxiv_count} arXiv papers, " + f"{web_count} web sources." + ) + parts.append(summary) + + result = "\n".join(parts) + logger.info( + f"Deep research done: {len(result)} chars, " + f"{arxiv_count} papers, {web_count} web sources" + ) + return result + + def _process_event( + self, + event_type: str, + data_str: str, + report_chunks: list[str], + result_state: dict[str, Any], + ) -> None: + """Process a single SSE event and update state accordingly.""" + try: + data = json.loads(data_str) + except json.JSONDecodeError: + return + + event_data = data.get("data", {}) + + if event_type in ("report_chunk", "section_chunk"): + chunk = event_data.get("chunk", "") + if chunk: + report_chunks.append(chunk) + + elif event_type == "done": + result_state["bibtex"] = event_data.get("bibtex", "") + result_state["references_markdown"] = event_data.get( + "references_markdown", "" + ) + result_state["arxiv_count"] = event_data.get("arxiv_count", 0) + result_state["web_count"] = event_data.get("web_count", 0) + + elif event_type == "error": + result_state["error_message"] = data.get("message", "Unknown error") + + elif event_type == "search_result": + logger.debug( + f"Search results: {event_data.get('arxiv_count', 0)} papers, " + f"{event_data.get('web_count', 0)} web pages " + f"(iteration {event_data.get('iteration', '?')})" + ) + + elif event_type == "progress": + logger.debug(f"Research progress: {data.get('message', '')}") diff --git a/nanobot/nanobot/agent/tools/filesystem.py b/nanobot/nanobot/agent/tools/filesystem.py new file mode 100644 index 0000000..95ab1f3 --- /dev/null +++ b/nanobot/nanobot/agent/tools/filesystem.py @@ -0,0 +1,237 @@ +"""File system tools: read, write, edit.""" + +from pathlib import Path +from typing import Any + + +from nanobot.agent.tools.base import Tool + + +def _resolve_and_check( + raw_path: str, workspace: Path | None +) -> tuple[Path | None, str]: + """ + Resolve a path and verify it is within the allowed workspace. + + Returns (resolved_path, error_message). error_message is empty on success. + """ + try: + file_path = Path(raw_path).expanduser().resolve() + except Exception as e: + return None, f"Invalid path: {e}" + + if workspace is not None: + ws = workspace.expanduser().resolve() + # Allow paths under workspace or under ~/.nanobot (data dir) + nanobot_data = Path.home() / ".nanobot" + nanobot_data_resolved = nanobot_data.resolve() + if not ( + file_path == ws + or ws in file_path.parents + or file_path == nanobot_data_resolved + or nanobot_data_resolved in file_path.parents + ): + return None, ( + f"Error: Access denied. Path '{raw_path}' is outside the allowed workspace. " + f"Allowed: {ws} and {nanobot_data_resolved}" + ) + + return file_path, "" + + +class ReadFileTool(Tool): + """Tool to read file contents.""" + + def __init__(self, workspace: Path | None = None): + self._workspace = workspace + + @property + def name(self) -> str: + return "read_file" + + @property + def description(self) -> str: + return "Read the contents of a file at the given path." + + @property + def parameters(self) -> dict[str, Any]: + return { + "type": "object", + "properties": { + "path": {"type": "string", "description": "The file path to read"} + }, + "required": ["path"], + } + + async def execute(self, path: str, **kwargs: Any) -> str: + file_path, err = _resolve_and_check(path, self._workspace) + if err: + return err + assert file_path is not None + try: + if not file_path.exists(): + return f"Error: File not found: {path}" + if not file_path.is_file(): + return f"Error: Not a file: {path}" + + content = file_path.read_text(encoding="utf-8") + return content + except PermissionError: + return f"Error: Permission denied: {path}" + except Exception as e: + return f"Error reading file: {str(e)}" + + +class WriteFileTool(Tool): + """Tool to write content to a file.""" + + def __init__(self, workspace: Path | None = None): + self._workspace = workspace + + @property + def name(self) -> str: + return "write_file" + + @property + def description(self) -> str: + return "Write content to a file at the given path. Creates parent directories if needed." + + @property + def parameters(self) -> dict[str, Any]: + return { + "type": "object", + "properties": { + "path": {"type": "string", "description": "The file path to write to"}, + "content": {"type": "string", "description": "The content to write"}, + }, + "required": ["path", "content"], + } + + async def execute(self, path: str, content: str, **kwargs: Any) -> str: + file_path, err = _resolve_and_check(path, self._workspace) + if err: + return err + assert file_path is not None + try: + file_path.parent.mkdir(parents=True, exist_ok=True) + file_path.write_text(content, encoding="utf-8") + return f"Successfully wrote {len(content)} bytes to {path}" + except PermissionError: + return f"Error: Permission denied: {path}" + except Exception as e: + return f"Error writing file: {str(e)}" + + +class EditFileTool(Tool): + """Tool to edit a file by replacing text.""" + + def __init__(self, workspace: Path | None = None): + self._workspace = workspace + + @property + def name(self) -> str: + return "edit_file" + + @property + def description(self) -> str: + return "Edit a file by replacing old_text with new_text. The old_text must exist exactly in the file." + + @property + def parameters(self) -> dict[str, Any]: + return { + "type": "object", + "properties": { + "path": {"type": "string", "description": "The file path to edit"}, + "old_text": { + "type": "string", + "description": "The exact text to find and replace", + }, + "new_text": { + "type": "string", + "description": "The text to replace with", + }, + }, + "required": ["path", "old_text", "new_text"], + } + + async def execute( + self, path: str, old_text: str, new_text: str, **kwargs: Any + ) -> str: + file_path, err = _resolve_and_check(path, self._workspace) + if err: + return err + assert file_path is not None + try: + if not file_path.exists(): + return f"Error: File not found: {path}" + + content = file_path.read_text(encoding="utf-8") + + if old_text not in content: + return ( + "Error: old_text not found in file. Make sure it matches exactly." + ) + + # Count occurrences + count = content.count(old_text) + if count > 1: + return f"Warning: old_text appears {count} times. Please provide more context to make it unique." + + new_content = content.replace(old_text, new_text, 1) + file_path.write_text(new_content, encoding="utf-8") + + return f"Successfully edited {path}" + except PermissionError: + return f"Error: Permission denied: {path}" + except Exception as e: + return f"Error editing file: {str(e)}" + + +class ListDirTool(Tool): + """Tool to list directory contents.""" + + def __init__(self, workspace: Path | None = None): + self._workspace = workspace + + @property + def name(self) -> str: + return "list_dir" + + @property + def description(self) -> str: + return "List the contents of a directory." + + @property + def parameters(self) -> dict[str, Any]: + return { + "type": "object", + "properties": { + "path": {"type": "string", "description": "The directory path to list"} + }, + "required": ["path"], + } + + async def execute(self, path: str, **kwargs: Any) -> str: + file_path, err = _resolve_and_check(path, self._workspace) + if err: + return err + assert file_path is not None + try: + if not file_path.exists(): + return f"Error: Directory not found: {path}" + if not file_path.is_dir(): + return f"Error: Not a directory: {path}" + + items = [] + for item in sorted(file_path.iterdir()): + prefix = "d " if item.is_dir() else "f " + items.append(f"{prefix}{item.name}") + + if not items: + return f"Directory {path} is empty" + + return "\n".join(items) + except PermissionError: + return f"Error: Permission denied: {path}" + except Exception as e: + return f"Error listing directory: {str(e)}" diff --git a/nanobot/nanobot/agent/tools/litewrite.py b/nanobot/nanobot/agent/tools/litewrite.py new file mode 100644 index 0000000..cc8e618 --- /dev/null +++ b/nanobot/nanobot/agent/tools/litewrite.py @@ -0,0 +1,1568 @@ +"""Litewrite integration tools for managing LaTeX projects.""" + +import base64 +import mimetypes +from pathlib import Path +from typing import Any, Callable, Awaitable + +import httpx +from loguru import logger + +from nanobot.agent.tools.base import Tool +from nanobot.bus.events import OutboundMessage + + +# --------------------------------------------------------------------------- +# Litewrite Agent session ID cache (shared across tool instances) +# Maps project_id -> session_id for reusing the same session per project +# --------------------------------------------------------------------------- +_agent_session_cache: dict[str, str] = {} + + +class LitewriteClient: + """HTTP client for Litewrite Internal API.""" + + def __init__(self, base_url: str, api_secret: str): + self.base_url = base_url.rstrip("/") + self.api_secret = api_secret + + async def request(self, endpoint: str, data: dict[str, Any]) -> dict[str, Any]: + """Send a POST request to a Litewrite internal API endpoint.""" + url = f"{self.base_url}{endpoint}" + headers = {"X-Internal-Secret": self.api_secret} + + async with httpx.AsyncClient(timeout=120) as client: + resp = await client.post(url, json=data, headers=headers) + return resp.json() + + async def validate_project( + self, project_id: str, owner_id: str = "" + ) -> tuple[bool, str]: + """Check whether *project_id* exists. + + Returns ``(True, "")`` on success, or ``(False, error_message)`` + with a helpful listing of available projects when the ID is invalid. + """ + try: + data: dict[str, Any] = {} + if owner_id: + data["ownerId"] = owner_id + result = await self.request("/api/internal/projects/list", data) + + # If the API call itself failed (e.g. ownerId missing → 400), + # skip validation and let the downstream call proceed. + if not result.get("success"): + logger.warning( + f"Project list API failed: {result.get('error', 'unknown')}; " + "skipping validation" + ) + return True, "" + + projects = result.get("data", {}).get("projects", []) + ids = {p["id"] for p in projects} + if project_id in ids: + return True, "" + available = ", ".join(f"{p['name']} [{p['id']}]" for p in projects) + return False, ( + f"Project '{project_id}' not found. " + f"Available projects: {available or '(none)'}" + ) + except Exception as e: + # If validation itself fails, let the call proceed + logger.warning(f"Project validation failed: {e}") + return True, "" + + +class LitewriteListProjectsTool(Tool): + """Tool to list/search Litewrite projects.""" + + def __init__(self, client: LitewriteClient, default_owner_id: str = ""): + self._client = client + self._default_owner_id = default_owner_id + + @property + def name(self) -> str: + return "litewrite_list_projects" + + @property + def description(self) -> str: + return ( + "List LaTeX projects in Litewrite. " + "Use the search parameter to find projects by name (partial match)." + ) + + @property + def parameters(self) -> dict[str, Any]: + return { + "type": "object", + "properties": { + "search": { + "type": "string", + "description": "Search keyword to filter projects by name", + }, + }, + } + + async def execute(self, search: str = "", **kwargs: Any) -> str: + # Security: require default_owner_id to prevent cross-tenant enumeration + if not self._default_owner_id: + return ( + "Error: No default owner ID configured. " + "Cannot list projects without user scope. " + "Please configure NANOBOT_DEFAULT_LITEWRITE_USER_ID." + ) + + data: dict[str, Any] = {"ownerId": self._default_owner_id} + if search: + data["search"] = search + + result = await self._client.request("/api/internal/projects/list", data) + + if not result.get("success"): + return f"Error listing projects: {result.get('error', 'Unknown error')}" + + projects = result.get("data", {}).get("projects", []) + if not projects: + return "No projects found." + + lines = [f"Found {len(projects)} project(s):"] + for p in projects: + lines.append( + f"- [{p['id']}] {p['name']}" + + (f" ({p.get('description', '')})" if p.get("description") else "") + + f" (main: {p.get('mainFile', 'main.tex')})" + ) + return "\n".join(lines) + + +class LitewriteListFilesTool(Tool): + """Tool to list files in a Litewrite project.""" + + def __init__(self, client: LitewriteClient): + self._client = client + + @property + def name(self) -> str: + return "litewrite_list_files" + + @property + def description(self) -> str: + return "List all files in a Litewrite project." + + @property + def parameters(self) -> dict[str, Any]: + return { + "type": "object", + "properties": { + "project_id": { + "type": "string", + "description": "The project ID", + }, + }, + "required": ["project_id"], + } + + async def execute(self, project_id: str, **kwargs: Any) -> str: + result = await self._client.request( + "/api/internal/files/list", + {"projectId": project_id}, + ) + + if not result.get("success"): + return f"Error listing files: {result.get('error', 'Unknown error')}" + + files = result.get("data", {}).get("files", []) + if not files: + return "No files found in this project." + + lines = [f"Files in project ({len(files)}):"] + for f in files: + size_str = f" ({f['size']} bytes)" if f.get("size") else "" + lines.append(f"- [{f['type']}] {f['path']}{size_str}") + return "\n".join(lines) + + +class LitewriteReadFileTool(Tool): + """Tool to read a file from a Litewrite project.""" + + def __init__(self, client: LitewriteClient): + self._client = client + + @property + def name(self) -> str: + return "litewrite_read_file" + + @property + def description(self) -> str: + return "Read the content of a file in a Litewrite project." + + @property + def parameters(self) -> dict[str, Any]: + return { + "type": "object", + "properties": { + "project_id": { + "type": "string", + "description": "The project ID", + }, + "file_path": { + "type": "string", + "description": "The file path within the project (e.g. 'main.tex')", + }, + }, + "required": ["project_id", "file_path"], + } + + async def execute(self, project_id: str, file_path: str, **kwargs: Any) -> str: + result = await self._client.request( + "/api/internal/files/read", + {"projectId": project_id, "filePath": file_path}, + ) + + if not result.get("success"): + return f"Error reading file: {result.get('error', 'Unknown error')}" + + content = result.get("data", {}).get("content", "") + total_lines = result.get("data", {}).get("totalLines", 0) + return f"File: {file_path} ({total_lines} lines)\n\n{content}" + + +class LitewriteEditFileTool(Tool): + """Tool to edit (replace) a file in a Litewrite project.""" + + def __init__(self, client: LitewriteClient): + self._client = client + + @property + def name(self) -> str: + return "litewrite_edit_file" + + @property + def description(self) -> str: + return ( + "Replace the entire content of a file in a Litewrite project. " + "You must provide the complete new file content." + ) + + @property + def parameters(self) -> dict[str, Any]: + return { + "type": "object", + "properties": { + "project_id": { + "type": "string", + "description": "The project ID", + }, + "file_path": { + "type": "string", + "description": "The file path within the project (e.g. 'main.tex')", + }, + "content": { + "type": "string", + "description": "The complete new file content", + }, + }, + "required": ["project_id", "file_path", "content"], + } + + async def execute( + self, project_id: str, file_path: str, content: str, **kwargs: Any + ) -> str: + result = await self._client.request( + "/api/internal/files/edit", + {"projectId": project_id, "filePath": file_path, "content": content}, + ) + + if not result.get("success"): + return f"Error editing file: {result.get('error', 'Unknown error')}" + + length = result.get("data", {}).get("length", len(content)) + return f"Successfully updated {file_path} ({length} chars)" + + +class LitewriteAgentTool(Tool): + """Tool to invoke litewrite's built-in AI agent for writing/editing tasks. + + Maintains a per-project session cache so that consecutive calls to the + same project reuse the same conversation session. This allows the + litewrite agent to see prior conversation context and ensures the + session appears in the web UI's Conversation History as "nanobot". + """ + + def __init__(self, client: LitewriteClient, default_owner_id: str = ""): + self._client = client + self._default_owner_id = default_owner_id + + @property + def name(self) -> str: + return "litewrite_agent" + + @property + def description(self) -> str: + return ( + "Invoke Litewrite's built-in AI agent to handle complex writing and editing tasks. " + "The agent understands LaTeX structure and can read files, plan multi-step edits, " + "and apply precise line-based changes. Use this for writing new content, " + "rewriting/restructuring sections, complex multi-file edits, or analyzing documents. " + "Edits are applied directly to the project files." + ) + + @property + def parameters(self) -> dict[str, Any]: + return { + "type": "object", + "properties": { + "project_id": { + "type": "string", + "description": "The project ID", + }, + "message": { + "type": "string", + "description": ( + "The instruction for the agent. Be specific about what to write, " + "edit, or analyze. Examples: 'Add an abstract section to main.tex', " + "'Rewrite the introduction to be more concise', " + "'Fix all citation formatting issues'" + ), + }, + "mode": { + "type": "string", + "enum": ["agent", "ask"], + "description": ( + "Agent mode: 'agent' (default) for editing/writing tasks, " + "'ask' for read-only analysis and Q&A about the project" + ), + }, + "referenced_files": { + "type": "array", + "items": {"type": "string"}, + "description": ( + "Optional list of file paths to reference. The agent will " + "prioritize reading these files. E.g. ['main.tex', 'sections/intro.tex']" + ), + }, + }, + "required": ["project_id", "message"], + } + + async def execute( + self, + project_id: str, + message: str, + mode: str = "agent", + referenced_files: list[str] | None = None, + **kwargs: Any, + ) -> str: + # Validate project ID before calling the agent (catches hallucinated IDs) + valid, err = await self._client.validate_project( + project_id, self._default_owner_id + ) + if not valid: + logger.warning(f"litewrite_agent: {err}") + return f"Error: {err}" + + # Look up cached session ID for this project + cached_session_id = _agent_session_cache.get(project_id) + + logger.info( + f"Invoking Litewrite agent: project={project_id}, mode={mode}, " + f"message_len={len(message)}, session={cached_session_id or '(new)'}" + ) + + data: dict[str, Any] = { + "projectId": project_id, + "message": message, + "mode": mode, + } + + if self._default_owner_id: + data["userId"] = self._default_owner_id + + if referenced_files: + data["referencedFiles"] = referenced_files + + # Pass session ID if we have one cached + if cached_session_id: + data["sessionId"] = cached_session_id + + try: + # Use longer timeout for agent execution (5 minutes) + url = f"{self._client.base_url}/api/internal/agent/run" + headers = {"X-Internal-Secret": self._client.api_secret} + + async with httpx.AsyncClient(timeout=300) as client: + resp = await client.post(url, json=data, headers=headers) + result = resp.json() + except httpx.TimeoutException: + logger.error(f"Litewrite agent timed out for project {project_id}") + return ( + "Error: Litewrite agent execution timed out after 5 minutes. " + "The task may be too complex. Try breaking it into smaller steps." + ) + except Exception as e: + logger.error(f"Litewrite agent error: {e}") + return f"Error invoking Litewrite agent: {str(e)}" + + # Cache the session ID returned by the server (created or reused) + returned_session_id = result.get("sessionId") + if returned_session_id: + _agent_session_cache[project_id] = returned_session_id + if not cached_session_id: + logger.info( + f"Cached new session for project {project_id}: {returned_session_id}" + ) + + if not result.get("success"): + error = result.get("error", "Unknown error") + logger.warning(f"Litewrite agent failed: {error}") + return f"Litewrite agent error: {error}" + + response = result.get("response", "") + logger.info(f"Litewrite agent completed: response_len={len(response)}") + + return f"Litewrite agent completed:\n\n{response}" + + +class LitewriteCompileTool(Tool): + """Tool to compile a Litewrite project and get the PDF.""" + + def __init__(self, client: LitewriteClient, default_owner_id: str = ""): + self._client = client + self._default_owner_id = default_owner_id + self._send_callback: Callable[[OutboundMessage], Awaitable[None]] | None = None + self._channel: str = "" + self._chat_id: str = "" + + def set_context( + self, + channel: str, + chat_id: str, + send_callback: Callable[[OutboundMessage], Awaitable[None]] | None = None, + ) -> None: + """Set the current message context for auto-sending PDFs.""" + self._channel = channel + self._chat_id = chat_id + if send_callback is not None: + self._send_callback = send_callback + + @property + def name(self) -> str: + return "litewrite_compile" + + @property + def description(self) -> str: + return ( + "Compile a Litewrite LaTeX project to PDF. " + "Supported compilers: pdflatex (default), xelatex, lualatex. " + "Use xelatex when the document contains Chinese/Japanese/Korean text or uses fontspec/xeCJK packages. " + "By default, the project is automatically saved as a version after successful compilation. " + "The compiled PDF is automatically sent to the user — you do NOT need to call the message tool afterwards." + ) + + @property + def parameters(self) -> dict[str, Any]: + return { + "type": "object", + "properties": { + "project_id": { + "type": "string", + "description": "The project ID to compile", + }, + "compiler": { + "type": "string", + "enum": ["pdflatex", "xelatex", "lualatex"], + "description": ( + "LaTeX compiler to use. Use 'xelatex' for documents with " + "Chinese/Japanese/Korean text or custom fonts. Default: pdflatex" + ), + }, + "auto_save": { + "type": "boolean", + "description": ( + "Whether to auto-save the project as a version after successful compilation. " + "Default: true" + ), + }, + }, + "required": ["project_id"], + } + + async def execute( + self, + project_id: str, + compiler: str = "pdflatex", + auto_save: bool = True, + **kwargs: Any, + ) -> str: + # Validate project ID before compiling + valid, err = await self._client.validate_project( + project_id, self._default_owner_id + ) + if not valid: + logger.warning(f"litewrite_compile: {err}") + return f"Error: {err}" + + logger.info( + f"Compiling Litewrite project: {project_id} (compiler={compiler}, auto_save={auto_save})" + ) + + data: dict[str, Any] = {"projectId": project_id, "autoSave": auto_save} + if compiler and compiler != "pdflatex": + data["compiler"] = compiler + if self._default_owner_id: + data["userId"] = self._default_owner_id + + result = await self._client.request( + "/api/internal/projects/compile", + data, + ) + + if not result.get("success"): + error = result.get("error", "Unknown error") + logs = result.get("logs", "") + return ( + f"Compilation failed: {error}\n{logs}" + if logs + else f"Compilation failed: {error}" + ) + + pdf_base64 = result.get("data", {}).get("pdfBase64", "") + pdf_filename = result.get("data", {}).get("pdfFileName", "output.pdf") + + if not pdf_base64: + return "Compilation succeeded but no PDF was produced." + + # Decode and save PDF to local file + media_dir = Path.home() / ".nanobot" / "media" + media_dir.mkdir(parents=True, exist_ok=True) + pdf_path = media_dir / f"{project_id}_{pdf_filename}" + + pdf_bytes = base64.b64decode(pdf_base64) + pdf_path.write_bytes(pdf_bytes) + + logger.info(f"PDF saved to {pdf_path} ({len(pdf_bytes)} bytes)") + + # Auto-send the PDF to the user + pdf_sent = False + if self._send_callback and self._channel and self._chat_id: + try: + await self._send_callback( + OutboundMessage( + channel=self._channel, + chat_id=self._chat_id, + content="", + media=[str(pdf_path)], + ) + ) + pdf_sent = True + logger.info(f"PDF auto-sent to {self._channel}:{self._chat_id}") + except Exception as e: + logger.error(f"Failed to auto-send PDF: {e}") + + # Build response with version info + lines = ["Compilation successful."] + if pdf_sent: + lines.append("The compiled PDF has been sent to the user.") + else: + lines.append(f"PDF saved to: {pdf_path}") + lines.append( + f'Use the message tool with media=["{pdf_path}"] to send it to the user.' + ) + + version_saved = result.get("data", {}).get("versionSaved") + if version_saved: + lines.append(f'Version auto-saved: "{version_saved.get("name", "")}"') + + return "\n".join(lines) + + +# --------------------------------------------------------------------------- +# Project Management Tools +# --------------------------------------------------------------------------- + + +class LitewriteCreateProjectTool(Tool): + """Tool to create a new Litewrite project.""" + + def __init__(self, client: LitewriteClient, default_owner_id: str = ""): + self._client = client + self._default_owner_id = default_owner_id + + @property + def name(self) -> str: + return "litewrite_create_project" + + @property + def description(self) -> str: + return ( + "Create a new LaTeX project in Litewrite. " + "A default main.tex file will be created automatically. " + "Use locale='zh' for Chinese documents (uses ctex template with xelatex)." + ) + + @property + def parameters(self) -> dict[str, Any]: + return { + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "The project name", + }, + "description": { + "type": "string", + "description": "Optional project description", + }, + "locale": { + "type": "string", + "enum": ["en", "zh"], + "description": ( + "Template locale: 'en' (default, English template) or " + "'zh' (Chinese template with ctex)" + ), + }, + }, + "required": ["name"], + } + + async def execute( + self, + name: str, + description: str = "", + locale: str = "en", + **kwargs: Any, + ) -> str: + data: dict[str, Any] = {"name": name, "locale": locale} + + if description: + data["description"] = description + + if self._default_owner_id: + data["ownerId"] = self._default_owner_id + else: + return "Error: Owner ID is not configured. Cannot create project." + + result = await self._client.request("/api/internal/projects/create", data) + + if not result.get("success"): + return f"Error creating project: {result.get('error', 'Unknown error')}" + + project = result.get("data", {}).get("project", {}) + return ( + f"Project created successfully:\n" + f"- ID: {project.get('id')}\n" + f"- Name: {project.get('name')}\n" + f"- Created: {project.get('createdAt')}" + ) + + +class LitewriteDeleteProjectTool(Tool): + """Tool to delete a Litewrite project.""" + + def __init__(self, client: LitewriteClient): + self._client = client + + @property + def name(self) -> str: + return "litewrite_delete_project" + + @property + def description(self) -> str: + return ( + "Permanently delete a Litewrite project. " + "This removes the project, all its files, compiled artifacts, and version history. " + "This action is IRREVERSIBLE. Always confirm with the user before deleting." + ) + + @property + def parameters(self) -> dict[str, Any]: + return { + "type": "object", + "properties": { + "project_id": { + "type": "string", + "description": "The project ID to delete", + }, + }, + "required": ["project_id"], + } + + async def execute(self, project_id: str, **kwargs: Any) -> str: + result = await self._client.request( + "/api/internal/projects/delete", + {"projectId": project_id}, + ) + + if not result.get("success"): + return f"Error deleting project: {result.get('error', 'Unknown error')}" + + name = result.get("data", {}).get("name", "") + return f"Project '{name}' ({project_id}) has been permanently deleted." + + +class LitewriteRenameProjectTool(Tool): + """Tool to rename or update a Litewrite project's metadata.""" + + def __init__(self, client: LitewriteClient): + self._client = client + + @property + def name(self) -> str: + return "litewrite_rename_project" + + @property + def description(self) -> str: + return ( + "Rename a Litewrite project or update its description. " + "Provide at least one of 'name' or 'description'." + ) + + @property + def parameters(self) -> dict[str, Any]: + return { + "type": "object", + "properties": { + "project_id": { + "type": "string", + "description": "The project ID", + }, + "name": { + "type": "string", + "description": "New project name", + }, + "description": { + "type": "string", + "description": "New project description", + }, + }, + "required": ["project_id"], + } + + async def execute( + self, + project_id: str, + name: str = "", + description: str | None = None, + **kwargs: Any, + ) -> str: + data: dict[str, Any] = {"projectId": project_id} + + if name: + data["name"] = name + if description is not None: + data["description"] = description + + if len(data) <= 1: + return "Error: Provide at least 'name' or 'description' to update." + + result = await self._client.request("/api/internal/projects/rename", data) + + if not result.get("success"): + return f"Error updating project: {result.get('error', 'Unknown error')}" + + project = result.get("data", {}).get("project", {}) + return ( + f"Project updated successfully:\n" + f"- ID: {project.get('id')}\n" + f"- Name: {project.get('name')}\n" + f"- Description: {project.get('description', 'N/A')}" + ) + + +# --------------------------------------------------------------------------- +# Version Management Tools +# --------------------------------------------------------------------------- + + +class LitewriteListVersionsTool(Tool): + """Tool to list all versions/history of a Litewrite project.""" + + def __init__(self, client: LitewriteClient): + self._client = client + + @property + def name(self) -> str: + return "litewrite_list_versions" + + @property + def description(self) -> str: + return ( + "List all saved versions (history) of a Litewrite project. " + "Returns version IDs, names, creation dates, and file counts. " + "Use this to find a version ID for restoring." + ) + + @property + def parameters(self) -> dict[str, Any]: + return { + "type": "object", + "properties": { + "project_id": { + "type": "string", + "description": "The project ID", + }, + }, + "required": ["project_id"], + } + + async def execute(self, project_id: str, **kwargs: Any) -> str: + result = await self._client.request( + "/api/internal/projects/versions/list", + {"projectId": project_id}, + ) + + if not result.get("success"): + return f"Error listing versions: {result.get('error', 'Unknown error')}" + + data = result.get("data", {}) + project_name = data.get("projectName", "") + versions = data.get("versions", []) + + if not versions: + return f"No saved versions found for project '{project_name}'." + + lines = [f"Versions for project '{project_name}' ({len(versions)} total):"] + for v in versions: + user_name = ( + v.get("user", {}).get("name", "Unknown") if v.get("user") else "Unknown" + ) + lines.append( + f'- [{v["id"]}] "{v["name"]}" ' + f"(by {user_name}, {v.get('fileCount', '?')} files, " + f"{v['createdAt']})" + ) + if v.get("description"): + lines.append(f" Description: {v['description']}") + return "\n".join(lines) + + +class LitewriteSaveVersionTool(Tool): + """Tool to manually save the current project state as a version.""" + + def __init__(self, client: LitewriteClient, default_owner_id: str = ""): + self._client = client + self._default_owner_id = default_owner_id + + @property + def name(self) -> str: + return "litewrite_save_version" + + @property + def description(self) -> str: + return ( + "Save the current state of a Litewrite project as a named version. " + "This creates a snapshot that can be restored later. " + "Note: compilation already auto-saves a version by default, " + "so use this only when you want to save without compiling." + ) + + @property + def parameters(self) -> dict[str, Any]: + return { + "type": "object", + "properties": { + "project_id": { + "type": "string", + "description": "The project ID", + }, + "name": { + "type": "string", + "description": "Version name (e.g. 'Before major rewrite'). Auto-generated if not provided.", + }, + "description": { + "type": "string", + "description": "Optional version description", + }, + }, + "required": ["project_id"], + } + + async def execute( + self, + project_id: str, + name: str = "", + description: str = "", + **kwargs: Any, + ) -> str: + data: dict[str, Any] = {"projectId": project_id} + + if name: + data["name"] = name + if description: + data["description"] = description + if self._default_owner_id: + data["userId"] = self._default_owner_id + + result = await self._client.request( + "/api/internal/projects/versions/create", + data, + ) + + if not result.get("success"): + if result.get("skipped"): + return ( + "No changes detected since the last saved version. Nothing to save." + ) + return f"Error saving version: {result.get('error', 'Unknown error')}" + + version = result.get("data", {}).get("version", {}) + return ( + f"Version saved successfully:\n" + f"- ID: {version.get('id')}\n" + f'- Name: "{version.get("name")}"\n' + f"- Files: {version.get('fileCount', '?')}\n" + f"- Created: {version.get('createdAt')}" + ) + + +class LitewriteRestoreVersionTool(Tool): + """Tool to restore a Litewrite project to a specific version.""" + + def __init__(self, client: LitewriteClient): + self._client = client + + @property + def name(self) -> str: + return "litewrite_restore_version" + + @property + def description(self) -> str: + return ( + "Restore a Litewrite project to a specific saved version. " + "This replaces ALL current project files with the version's files. " + "The current state will be lost unless it was saved as a version. " + "Always confirm with the user and suggest saving a version first." + ) + + @property + def parameters(self) -> dict[str, Any]: + return { + "type": "object", + "properties": { + "project_id": { + "type": "string", + "description": "The project ID", + }, + "version_id": { + "type": "string", + "description": "The version ID to restore to (get from litewrite_list_versions)", + }, + }, + "required": ["project_id", "version_id"], + } + + async def execute(self, project_id: str, version_id: str, **kwargs: Any) -> str: + result = await self._client.request( + "/api/internal/projects/versions/restore", + {"projectId": project_id, "versionId": version_id}, + ) + + if not result.get("success"): + return f"Error restoring version: {result.get('error', 'Unknown error')}" + + data = result.get("data", {}) + return ( + f'Project restored to version "{data.get("versionName", "")}" successfully.\n' + f"- Restored files: {data.get('restoredFileCount', 0)}\n" + f"- Yjs cache cleared: {data.get('clearedYjsKeys', 0)} keys" + ) + + +# --------------------------------------------------------------------------- +# File Management Tools +# --------------------------------------------------------------------------- + + +class LitewriteCreateFileTool(Tool): + """Tool to create a new file or folder in a Litewrite project.""" + + def __init__(self, client: LitewriteClient): + self._client = client + + @property + def name(self) -> str: + return "litewrite_create_file" + + @property + def description(self) -> str: + return ( + "Create a new file or folder in a Litewrite project. " + "For files, you can optionally provide initial content. " + "For folders, a placeholder is created to make the folder visible." + ) + + @property + def parameters(self) -> dict[str, Any]: + return { + "type": "object", + "properties": { + "project_id": { + "type": "string", + "description": "The project ID", + }, + "name": { + "type": "string", + "description": "File or folder name (e.g. 'chapter1.tex', 'images')", + }, + "type": { + "type": "string", + "enum": ["file", "folder"], + "description": "Type to create: 'file' (default) or 'folder'", + }, + "parent_path": { + "type": "string", + "description": "Parent directory path (e.g. 'sections'). Empty for project root.", + }, + "content": { + "type": "string", + "description": "Initial file content (only for type='file')", + }, + }, + "required": ["project_id", "name"], + } + + async def execute( + self, + project_id: str, + name: str, + type: str = "file", + parent_path: str = "", + content: str = "", + **kwargs: Any, + ) -> str: + data: dict[str, Any] = { + "projectId": project_id, + "name": name, + "type": type, + } + + if parent_path: + data["parentPath"] = parent_path + if content and type == "file": + data["content"] = content + + result = await self._client.request("/api/internal/files/create", data) + + if not result.get("success"): + error = result.get("error", "Unknown error") + if error == "FILE_EXISTS": + return f"Error: A file named '{name}' already exists at the specified location." + if error == "FOLDER_EXISTS": + return f"Error: A folder named '{name}' already exists at the specified location." + return f"Error creating {type}: {error}" + + path = result.get("data", {}).get("path", name) + return f"Successfully created {type} '{path}' in project." + + +class LitewriteRenameFileTool(Tool): + """Tool to rename or move a file/folder in a Litewrite project.""" + + def __init__(self, client: LitewriteClient): + self._client = client + + @property + def name(self) -> str: + return "litewrite_rename_file" + + @property + def description(self) -> str: + return ( + "Rename or move a file/folder in a Litewrite project. " + "To rename: provide source_path and new_name. " + "To move: provide source_path and target_path (destination folder). " + "To move and rename: provide all three." + ) + + @property + def parameters(self) -> dict[str, Any]: + return { + "type": "object", + "properties": { + "project_id": { + "type": "string", + "description": "The project ID", + }, + "source_path": { + "type": "string", + "description": "Current file path (e.g. 'main.tex', 'sections/intro.tex')", + }, + "new_name": { + "type": "string", + "description": "New file name (e.g. 'introduction.tex')", + }, + "target_path": { + "type": "string", + "description": "Destination folder path for moving (e.g. 'sections')", + }, + }, + "required": ["project_id", "source_path"], + } + + async def execute( + self, + project_id: str, + source_path: str, + new_name: str = "", + target_path: str = "", + **kwargs: Any, + ) -> str: + data: dict[str, Any] = { + "projectId": project_id, + "sourcePath": source_path, + } + + if new_name: + data["newName"] = new_name + if target_path: + data["targetPath"] = target_path + + if not new_name and not target_path: + return "Error: Provide at least 'new_name' or 'target_path'." + + result = await self._client.request("/api/internal/files/rename", data) + + if not result.get("success"): + return f"Error renaming file: {result.get('error', 'Unknown error')}" + + res_data = result.get("data", {}) + return ( + f"File renamed/moved successfully:\n" + f"- From: {res_data.get('oldPath', source_path)}\n" + f"- To: {res_data.get('newPath', '')}" + ) + + +class LitewriteDeleteFileTool(Tool): + """Tool to delete a file or folder in a Litewrite project.""" + + def __init__(self, client: LitewriteClient): + self._client = client + + @property + def name(self) -> str: + return "litewrite_delete_file" + + @property + def description(self) -> str: + return ( + "Delete a file or folder from a Litewrite project. " + "If the path points to a folder, all files within it are deleted recursively. " + "This action is IRREVERSIBLE. Always confirm with the user before deleting." + ) + + @property + def parameters(self) -> dict[str, Any]: + return { + "type": "object", + "properties": { + "project_id": { + "type": "string", + "description": "The project ID", + }, + "file_path": { + "type": "string", + "description": "Path of the file or folder to delete (e.g. 'old_chapter.tex', 'drafts')", + }, + }, + "required": ["project_id", "file_path"], + } + + async def execute(self, project_id: str, file_path: str, **kwargs: Any) -> str: + result = await self._client.request( + "/api/internal/files/delete", + {"projectId": project_id, "filePath": file_path}, + ) + + if not result.get("success"): + return f"Error deleting file: {result.get('error', 'Unknown error')}" + + deleted_count = result.get("data", {}).get("deletedCount", 0) + return f"Successfully deleted '{file_path}' ({deleted_count} file(s) removed)." + + +class LitewriteUploadFileTool(Tool): + """Tool to upload a local file (image, PDF, etc.) to a Litewrite project.""" + + def __init__(self, client: LitewriteClient): + self._client = client + + @property + def name(self) -> str: + return "litewrite_upload_file" + + @property + def description(self) -> str: + return ( + "Upload a local file to a Litewrite project. " + "Supports images (png, jpg, gif, etc.), PDFs, and any other file type. " + "The file is read from the local path and uploaded to the specified project path. " + "Use this to add images, figures, or other assets to a project. " + "If a file already exists at the target path, it will be overwritten by default." + ) + + @property + def parameters(self) -> dict[str, Any]: + return { + "type": "object", + "properties": { + "project_id": { + "type": "string", + "description": "The project ID", + }, + "local_path": { + "type": "string", + "description": ( + "Local file path to upload (e.g. from an attached file). " + "Use the path provided in the '[Attached files]' section of the user message." + ), + }, + "target_path": { + "type": "string", + "description": ( + "Destination path in the project (e.g. 'figures/diagram.png', 'images/photo.jpg'). " + "Include the file name." + ), + }, + "overwrite": { + "type": "boolean", + "description": "Whether to overwrite if the file already exists. Default: true", + }, + }, + "required": ["project_id", "local_path", "target_path"], + } + + async def execute( + self, + project_id: str, + local_path: str, + target_path: str, + overwrite: bool = True, + **kwargs: Any, + ) -> str: + # Validate local file exists + p = Path(local_path) + if not p.is_file(): + return f"Error: Local file not found: {local_path}" + + # Read file and encode as base64 + try: + file_bytes = p.read_bytes() + except Exception as e: + return f"Error reading local file: {e}" + + mime, _ = mimetypes.guess_type(local_path) + is_text = mime and mime.startswith("text/") + + data: dict[str, Any] = { + "projectId": project_id, + "filePath": target_path, + "overwrite": overwrite, + } + + if is_text: + # Text file: send as plain string + data["content"] = file_bytes.decode("utf-8", errors="replace") + else: + # Binary file: send as base64 + data["contentBase64"] = base64.b64encode(file_bytes).decode() + + logger.info( + f"Uploading {local_path} ({len(file_bytes)} bytes) " + f"-> project {project_id}/{target_path}" + ) + + result = await self._client.request("/api/internal/files/upload", data) + + if not result.get("success"): + return f"Error uploading file: {result.get('error', 'Unknown error')}" + + res_data = result.get("data", {}) + return ( + f"File uploaded successfully:\n" + f"- Project path: {res_data.get('path', target_path)}\n" + f"- Size: {res_data.get('size', len(file_bytes))} bytes\n" + f"- Type: {res_data.get('mimeType', mime or 'unknown')}" + ) + + +# --------------------------------------------------------------------------- +# Import Tools +# --------------------------------------------------------------------------- + + +class LitewriteImportArxivTool(Tool): + """Tool to import a project from arXiv.""" + + def __init__(self, client: LitewriteClient, default_owner_id: str = ""): + self._client = client + self._default_owner_id = default_owner_id + + @property + def name(self) -> str: + return "litewrite_import_arxiv" + + @property + def description(self) -> str: + return ( + "Import a LaTeX project from arXiv. Provide an arXiv ID or URL. " + "The paper's source files will be downloaded and created as a new project. " + "Supported formats: arXiv ID (e.g. '2301.07041'), " + "arXiv URL (e.g. 'https://arxiv.org/abs/2301.07041'), " + "or PDF URL (e.g. 'https://arxiv.org/pdf/2301.07041.pdf')." + ) + + @property + def parameters(self) -> dict[str, Any]: + return { + "type": "object", + "properties": { + "arxiv_id": { + "type": "string", + "description": ( + "arXiv paper ID or URL. Examples: '2301.07041', " + "'https://arxiv.org/abs/2301.07041'" + ), + }, + "name": { + "type": "string", + "description": "Optional project name. Defaults to the paper title or arXiv ID.", + }, + "description": { + "type": "string", + "description": "Optional project description.", + }, + }, + "required": ["arxiv_id"], + } + + async def execute( + self, + arxiv_id: str, + name: str = "", + description: str = "", + **kwargs: Any, + ) -> str: + if not self._default_owner_id: + return "Error: Owner ID is not configured. Cannot import project." + + data: dict[str, Any] = { + "arxivId": arxiv_id, + "ownerId": self._default_owner_id, + } + if name: + data["name"] = name + if description: + data["description"] = description + + logger.info(f"Importing arXiv paper: {arxiv_id}") + + try: + result = await self._client.request( + "/api/internal/projects/import/arxiv", data + ) + except Exception as e: + logger.error(f"arXiv import error: {e}") + return f"Error importing from arXiv: {e}" + + if not result.get("success"): + return f"Error importing from arXiv: {result.get('error', 'Unknown error')}" + + project_data = result.get("data", {}) + project = project_data.get("project", {}) + return ( + f"arXiv paper imported successfully:\n" + f"- Project ID: {project.get('id')}\n" + f"- Name: {project.get('name')}\n" + f"- arXiv ID: {project_data.get('arxivId', arxiv_id)}\n" + f"- Paper title: {project_data.get('paperTitle', 'N/A')}\n" + f"- Files: {project_data.get('filesCount', '?')}\n" + f"- Main file: {project.get('mainFile', 'main.tex')}" + ) + + +class LitewriteImportGithubTool(Tool): + """Tool to import a project from GitHub/GitLab.""" + + def __init__(self, client: LitewriteClient, default_owner_id: str = ""): + self._client = client + self._default_owner_id = default_owner_id + + @property + def name(self) -> str: + return "litewrite_import_github" + + @property + def description(self) -> str: + return ( + "Import a LaTeX project from a GitHub or GitLab repository. " + "Provide the repository URL. Supports importing entire repos " + "or specific subdirectories via tree URLs. " + "Examples: 'https://github.com/user/repo', " + "'https://github.com/user/repo/tree/main/latex-source'." + ) + + @property + def parameters(self) -> dict[str, Any]: + return { + "type": "object", + "properties": { + "url": { + "type": "string", + "description": "GitHub or GitLab repository URL.", + }, + "name": { + "type": "string", + "description": "Optional project name. Defaults to the repo name.", + }, + "description": { + "type": "string", + "description": "Optional project description.", + }, + }, + "required": ["url"], + } + + async def execute( + self, + url: str, + name: str = "", + description: str = "", + **kwargs: Any, + ) -> str: + if not self._default_owner_id: + return "Error: Owner ID is not configured. Cannot import project." + + data: dict[str, Any] = { + "url": url, + "ownerId": self._default_owner_id, + } + if name: + data["name"] = name + if description: + data["description"] = description + + logger.info(f"Importing from GitHub/GitLab: {url}") + + try: + result = await self._client.request( + "/api/internal/projects/import/github", data + ) + except Exception as e: + logger.error(f"GitHub import error: {e}") + return f"Error importing from GitHub/GitLab: {e}" + + if not result.get("success"): + return f"Error importing from GitHub/GitLab: {result.get('error', 'Unknown error')}" + + project_data = result.get("data", {}) + project = project_data.get("project", {}) + return ( + f"Repository imported successfully:\n" + f"- Project ID: {project.get('id')}\n" + f"- Name: {project.get('name')}\n" + f"- Files: {project_data.get('filesCount', '?')}\n" + f"- Main file: {project.get('mainFile', 'main.tex')}" + ) + + +class LitewriteImportUploadTool(Tool): + """Tool to create a project by uploading a local file (zip/tex/etc.).""" + + def __init__(self, client: LitewriteClient, default_owner_id: str = ""): + self._client = client + self._default_owner_id = default_owner_id + + @property + def name(self) -> str: + return "litewrite_import_upload" + + @property + def description(self) -> str: + return ( + "Create a new Litewrite project by uploading a local file. " + "Supports ZIP archives, tar.gz archives, and individual LaTeX files " + "(.tex, .bib, .cls, .sty). The file is read from a local path " + "(e.g. from an attached file the user sent). " + "A new project will be created with the uploaded content." + ) + + @property + def parameters(self) -> dict[str, Any]: + return { + "type": "object", + "properties": { + "local_path": { + "type": "string", + "description": ( + "Local file path to upload. Use the path from the " + "'[Attached files]' section of the user message." + ), + }, + "name": { + "type": "string", + "description": "Optional project name. Defaults to the file name.", + }, + "description": { + "type": "string", + "description": "Optional project description.", + }, + }, + "required": ["local_path"], + } + + async def execute( + self, + local_path: str, + name: str = "", + description: str = "", + **kwargs: Any, + ) -> str: + if not self._default_owner_id: + return "Error: Owner ID is not configured. Cannot import project." + + p = Path(local_path) + if not p.is_file(): + return f"Error: Local file not found: {local_path}" + + try: + file_bytes = p.read_bytes() + except Exception as e: + return f"Error reading local file: {e}" + + file_b64 = base64.b64encode(file_bytes).decode() + + data: dict[str, Any] = { + "fileBase64": file_b64, + "fileName": p.name, + "ownerId": self._default_owner_id, + } + if name: + data["name"] = name + if description: + data["description"] = description + + logger.info( + f"Uploading {local_path} ({len(file_bytes)} bytes) to create project" + ) + + try: + result = await self._client.request( + "/api/internal/projects/import/upload", data + ) + except Exception as e: + logger.error(f"Upload import error: {e}") + return f"Error uploading file to create project: {e}" + + if not result.get("success"): + return f"Error creating project from upload: {result.get('error', 'Unknown error')}" + + project_data = result.get("data", {}) + project = project_data.get("project", {}) + return ( + f"Project created from uploaded file:\n" + f"- Project ID: {project.get('id')}\n" + f"- Name: {project.get('name')}\n" + f"- Files: {project_data.get('filesCount', '?')}\n" + f"- Main file: {project.get('mainFile', 'main.tex')}" + ) diff --git a/nanobot/nanobot/agent/tools/message.py b/nanobot/nanobot/agent/tools/message.py new file mode 100644 index 0000000..f62db11 --- /dev/null +++ b/nanobot/nanobot/agent/tools/message.py @@ -0,0 +1,102 @@ +"""Message tool for sending messages to users.""" + +from typing import Any, Callable, Awaitable + +from nanobot.agent.tools.base import Tool +from nanobot.bus.events import OutboundMessage + + +class MessageTool(Tool): + """Tool to send messages to users on chat channels.""" + + def __init__( + self, + send_callback: Callable[[OutboundMessage], Awaitable[None]] | None = None, + default_channel: str = "", + default_chat_id: str = "", + ): + self._send_callback = send_callback + self._default_channel = default_channel + self._default_chat_id = default_chat_id + + def set_context(self, channel: str, chat_id: str) -> None: + """Set the current message context.""" + self._default_channel = channel + self._default_chat_id = chat_id + + def set_send_callback( + self, callback: Callable[[OutboundMessage], Awaitable[None]] + ) -> None: + """Set the callback for sending messages.""" + self._send_callback = callback + + @property + def name(self) -> str: + return "message" + + @property + def description(self) -> str: + return ( + "Send a message to the user. Use this when you want to communicate something. " + "You can optionally attach files (e.g. PDF) via the media parameter." + ) + + @property + def parameters(self) -> dict[str, Any]: + return { + "type": "object", + "properties": { + "content": { + "type": "string", + "description": "The message content to send", + }, + "channel": { + "type": "string", + "description": "Optional: target channel (telegram, feishu, etc.)", + }, + "chat_id": { + "type": "string", + "description": "Optional: target chat/user ID", + }, + "media": { + "type": "array", + "items": {"type": "string"}, + "description": ( + "Optional: list of local file paths to send as attachments " + "(e.g. compiled PDF files)" + ), + }, + }, + "required": ["content"], + } + + async def execute( + self, + content: str, + channel: str | None = None, + chat_id: str | None = None, + media: list[str] | None = None, + **kwargs: Any, + ) -> str: + channel = channel or self._default_channel + chat_id = chat_id or self._default_chat_id + + if not channel or not chat_id: + return "Error: No target channel/chat specified" + + if not self._send_callback: + return "Error: Message sending not configured" + + msg = OutboundMessage( + channel=channel, + chat_id=chat_id, + content=content, + media=media or [], + ) + + try: + await self._send_callback(msg) + attachment_info = f" with {len(media)} attachment(s)" if media else "" + return f"Message sent to {channel}:{chat_id}{attachment_info}" + except Exception as e: + return f"Error sending message: {str(e)}" diff --git a/nanobot/nanobot/agent/tools/registry.py b/nanobot/nanobot/agent/tools/registry.py new file mode 100644 index 0000000..da64d0a --- /dev/null +++ b/nanobot/nanobot/agent/tools/registry.py @@ -0,0 +1,75 @@ +"""Tool registry for dynamic tool management.""" + +from typing import Any + +from nanobot.agent.tools.base import Tool + + +class ToolRegistry: + """ + Registry for agent tools. + + Allows dynamic registration and execution of tools. + """ + + def __init__(self): + self._tools: dict[str, Tool] = {} + + def register(self, tool: Tool) -> None: + """Register a tool.""" + self._tools[tool.name] = tool + + def unregister(self, name: str) -> None: + """Unregister a tool by name.""" + self._tools.pop(name, None) + + def get(self, name: str) -> Tool | None: + """Get a tool by name.""" + return self._tools.get(name) + + def has(self, name: str) -> bool: + """Check if a tool is registered.""" + return name in self._tools + + def get_definitions(self) -> list[dict[str, Any]]: + """Get all tool definitions in OpenAI format.""" + return [tool.to_schema() for tool in self._tools.values()] + + async def execute(self, name: str, params: dict[str, Any]) -> str: + """ + Execute a tool by name with given parameters. + + Args: + name: Tool name. + params: Tool parameters. + + Returns: + Tool execution result as string. + + Raises: + KeyError: If tool not found. + """ + tool = self._tools.get(name) + if not tool: + return f"Error: Tool '{name}' not found" + + try: + errors = tool.validate_params(params) + if errors: + return f"Error: Invalid parameters for tool '{name}': " + "; ".join( + errors + ) + return await tool.execute(**params) + except Exception as e: + return f"Error executing {name}: {str(e)}" + + @property + def tool_names(self) -> list[str]: + """Get list of registered tool names.""" + return list(self._tools.keys()) + + def __len__(self) -> int: + return len(self._tools) + + def __contains__(self, name: str) -> bool: + return name in self._tools diff --git a/nanobot/nanobot/agent/tools/session.py b/nanobot/nanobot/agent/tools/session.py new file mode 100644 index 0000000..8b359b9 --- /dev/null +++ b/nanobot/nanobot/agent/tools/session.py @@ -0,0 +1,308 @@ +"""Session management tools for viewing, clearing, and summarizing chat history.""" + +from __future__ import annotations + +from typing import Any, TYPE_CHECKING + +from nanobot.agent.tools.base import Tool + +if TYPE_CHECKING: + from nanobot.providers.base import LLMProvider + from nanobot.session.manager import Session, SessionManager + + +class _SessionToolBase(Tool): + """Base class for session management tools. + + These tools operate on the *current* session. The session reference is + set by ``AgentLoop`` at the start of each message processing cycle via + ``set_session()``. + """ + + def __init__(self, session_manager: "SessionManager"): + self._manager = session_manager + self._session: "Session | None" = None + + def set_session(self, session: "Session") -> None: + """Set the active session (called per-message by AgentLoop).""" + self._session = session + + +# --------------------------------------------------------------------------- +# session_info +# --------------------------------------------------------------------------- + + +class SessionInfoTool(_SessionToolBase): + """Inspect metadata about the current conversation session.""" + + @property + def name(self) -> str: + return "session_info" + + @property + def description(self) -> str: + return ( + "Show information about the current chat session: total message " + "count, creation time, last update, and estimated token count. " + "Useful for understanding how much context is available." + ) + + @property + def parameters(self) -> dict[str, Any]: + return {"type": "object", "properties": {}} + + async def execute(self, **kwargs: Any) -> str: + if self._session is None: + return "Error: No active session." + + total = len(self._session.messages) + + # Rough token estimate (≈4 chars/token for English, ≈2 for Chinese) + char_count = sum(len(m.get("content", "")) for m in self._session.messages) + est_tokens = char_count // 3 # conservative average + + lines = [ + f"Session: {self._session.key}", + f"Total messages: {total}", + f"Created: {self._session.created_at.isoformat()}", + f"Last updated: {self._session.updated_at.isoformat()}", + f"Estimated tokens: ~{est_tokens}", + ] + return "\n".join(lines) + + +# --------------------------------------------------------------------------- +# session_get_history +# --------------------------------------------------------------------------- + + +class SessionGetHistoryTool(_SessionToolBase): + """Retrieve past conversation messages.""" + + @property + def name(self) -> str: + return "session_get_history" + + @property + def description(self) -> str: + return ( + "Get past messages in the current session. You can specify how many " + "messages to retrieve and an offset to scroll through history." + ) + + @property + def parameters(self) -> dict[str, Any]: + return { + "type": "object", + "properties": { + "count": { + "type": "integer", + "description": "Number of messages to retrieve (default 20, max 100).", + }, + "offset": { + "type": "integer", + "description": ( + "Skip this many recent messages (0 = most recent). " + "Use to page through older history." + ), + }, + }, + } + + async def execute(self, count: int = 20, offset: int = 0, **kwargs: Any) -> str: + if self._session is None: + return "Error: No active session." + + count = max(1, min(count, 100)) + offset = max(0, offset) + total = len(self._session.messages) + + if total == 0: + return "The session has no messages." + + # Slice from end (most recent first) + end_idx = total - offset + start_idx = max(0, end_idx - count) + + if end_idx <= 0: + return f"Offset {offset} exceeds total messages ({total})." + + msgs = self._session.messages[start_idx:end_idx] + + lines = [f"Messages {start_idx + 1}–{end_idx} of {total}:"] + for i, m in enumerate(msgs, start=start_idx + 1): + role = m.get("role", "?") + content = m.get("content", "") + ts = m.get("timestamp", "") + # Truncate long messages for overview + preview = content[:200] + ("…" if len(content) > 200 else "") + lines.append(f"\n[{i}] {role} ({ts}):\n{preview}") + + return "\n".join(lines) + + +# --------------------------------------------------------------------------- +# session_clear +# --------------------------------------------------------------------------- + + +class SessionClearTool(_SessionToolBase): + """Clear the current session's conversation history.""" + + @property + def name(self) -> str: + return "session_clear" + + @property + def description(self) -> str: + return ( + "Clear all messages in the current chat session. " + "This is irreversible. Use when the conversation has become too " + "long or when the user explicitly asks to start fresh." + ) + + @property + def parameters(self) -> dict[str, Any]: + return { + "type": "object", + "properties": { + "confirm": { + "type": "boolean", + "description": "Must be true to confirm clearing. Safety check.", + }, + }, + "required": ["confirm"], + } + + async def execute(self, confirm: bool = False, **kwargs: Any) -> str: + if self._session is None: + return "Error: No active session." + + if not confirm: + return "Clear cancelled. Set confirm=true to proceed." + + count = len(self._session.messages) + self._session.clear() + self._manager.save(self._session) + return f"Session cleared. {count} message(s) removed." + + +# --------------------------------------------------------------------------- +# session_summarize +# --------------------------------------------------------------------------- + + +class SessionSummarizeTool(_SessionToolBase): + """Compress session history by replacing old messages with a summary.""" + + def __init__( + self, + session_manager: "SessionManager", + provider: "LLMProvider | None" = None, + model: str | None = None, + ): + super().__init__(session_manager) + self._provider = provider + self._model = model + + def set_provider(self, provider: "LLMProvider", model: str) -> None: + self._provider = provider + self._model = model + + @property + def name(self) -> str: + return "session_summarize" + + @property + def description(self) -> str: + return ( + "Compress the session history by summarizing older messages and " + "keeping only the most recent ones. This reduces token usage while " + "preserving important context. The summary replaces old messages." + ) + + @property + def parameters(self) -> dict[str, Any]: + return { + "type": "object", + "properties": { + "keep_recent": { + "type": "integer", + "description": ( + "Number of most-recent messages to keep verbatim " + "(default 10). Older messages are replaced by a summary." + ), + }, + }, + } + + async def execute(self, keep_recent: int = 10, **kwargs: Any) -> str: + if self._session is None: + return "Error: No active session." + + if self._provider is None: + return "Error: LLM provider not configured for summarization." + + total = len(self._session.messages) + keep_recent = max(2, min(keep_recent, total)) + + if total <= keep_recent: + return ( + f"Only {total} message(s) in session — nothing to summarize. " + f"(keep_recent={keep_recent})" + ) + + # Split into old (to summarize) and recent (to keep) + old_messages = self._session.messages[: total - keep_recent] + recent_messages = self._session.messages[total - keep_recent :] + + # Build summarization prompt + conversation_text = "\n".join( + f"[{m.get('role', '?')}] {m.get('content', '')}" for m in old_messages + ) + + summarize_messages = [ + { + "role": "system", + "content": ( + "You are a conversation summarizer. Produce a concise summary " + "of the following conversation. Preserve key facts, decisions, " + "project names, file names, and any important context. " + "The summary will be used as context for future interactions. " + "Write the summary in the same language as the conversation." + ), + }, + { + "role": "user", + "content": f"Summarize this conversation:\n\n{conversation_text}", + }, + ] + + try: + response = await self._provider.chat( + messages=summarize_messages, + model=self._model, + ) + summary_text = response.content or "No summary generated." + except Exception as e: + return f"Error generating summary: {e}" + + # Replace old messages with a single summary message + summary_msg = { + "role": "system", + "content": ( + f"[Session summary — {len(old_messages)} messages compressed]\n\n" + f"{summary_text}" + ), + "timestamp": self._session.messages[0].get("timestamp", ""), + } + + self._session.messages = [summary_msg] + recent_messages + self._manager.save(self._session) + + return ( + f"Session compressed: {len(old_messages)} old messages replaced by a " + f"summary. {len(recent_messages)} recent messages kept. " + f"New total: {len(self._session.messages)} messages." + ) diff --git a/nanobot/nanobot/agent/tools/shell.py b/nanobot/nanobot/agent/tools/shell.py new file mode 100644 index 0000000..207b747 --- /dev/null +++ b/nanobot/nanobot/agent/tools/shell.py @@ -0,0 +1,147 @@ +"""Shell execution tool.""" + +import asyncio +import os +import re +from pathlib import Path +from typing import Any + +from nanobot.agent.tools.base import Tool + + +class ExecTool(Tool): + """Tool to execute shell commands.""" + + def __init__( + self, + timeout: int = 60, + working_dir: str | None = None, + deny_patterns: list[str] | None = None, + allow_patterns: list[str] | None = None, + restrict_to_workspace: bool = False, + ): + self.timeout = timeout + self.working_dir = working_dir + self.deny_patterns = deny_patterns or [ + r"\brm\s+-[rf]{1,2}\b", # rm -r, rm -rf, rm -fr + r"\bdel\s+/[fq]\b", # del /f, del /q + r"\brmdir\s+/s\b", # rmdir /s + r"\b(format|mkfs|diskpart)\b", # disk operations + r"\bdd\s+if=", # dd + r">\s*/dev/sd", # write to disk + r"\b(shutdown|reboot|poweroff)\b", # system power + r":\(\)\s*\{.*\};\s*:", # fork bomb + ] + self.allow_patterns = allow_patterns or [] + self.restrict_to_workspace = restrict_to_workspace + + @property + def name(self) -> str: + return "exec" + + @property + def description(self) -> str: + return "Execute a shell command and return its output. Use with caution." + + @property + def parameters(self) -> dict[str, Any]: + return { + "type": "object", + "properties": { + "command": { + "type": "string", + "description": "The shell command to execute", + }, + "working_dir": { + "type": "string", + "description": "Optional working directory for the command", + }, + }, + "required": ["command"], + } + + async def execute( + self, command: str, working_dir: str | None = None, **kwargs: Any + ) -> str: + cwd = working_dir or self.working_dir or os.getcwd() + guard_error = self._guard_command(command, cwd) + if guard_error: + return guard_error + + try: + process = await asyncio.create_subprocess_shell( + command, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + cwd=cwd, + ) + + try: + stdout, stderr = await asyncio.wait_for( + process.communicate(), timeout=self.timeout + ) + except asyncio.TimeoutError: + process.kill() + return f"Error: Command timed out after {self.timeout} seconds" + + output_parts = [] + + if stdout: + output_parts.append(stdout.decode("utf-8", errors="replace")) + + if stderr: + stderr_text = stderr.decode("utf-8", errors="replace") + if stderr_text.strip(): + output_parts.append(f"STDERR:\n{stderr_text}") + + if process.returncode != 0: + output_parts.append(f"\nExit code: {process.returncode}") + + result = "\n".join(output_parts) if output_parts else "(no output)" + + # Truncate very long output + max_len = 10000 + if len(result) > max_len: + result = ( + result[:max_len] + + f"\n... (truncated, {len(result) - max_len} more chars)" + ) + + return result + + except Exception as e: + return f"Error executing command: {str(e)}" + + def _guard_command(self, command: str, cwd: str) -> str | None: + """Best-effort safety guard for potentially destructive commands.""" + cmd = command.strip() + lower = cmd.lower() + + for pattern in self.deny_patterns: + if re.search(pattern, lower): + return "Error: Command blocked by safety guard (dangerous pattern detected)" + + if self.allow_patterns: + if not any(re.search(p, lower) for p in self.allow_patterns): + return "Error: Command blocked by safety guard (not in allowlist)" + + if self.restrict_to_workspace: + if "..\\" in cmd or "../" in cmd: + return ( + "Error: Command blocked by safety guard (path traversal detected)" + ) + + cwd_path = Path(cwd).resolve() + + win_paths = re.findall(r"[A-Za-z]:\\[^\\\"']+", cmd) + posix_paths = re.findall(r"/[^\s\"']+", cmd) + + for raw in win_paths + posix_paths: + try: + p = Path(raw).resolve() + except Exception: + continue + if cwd_path not in p.parents and p != cwd_path: + return "Error: Command blocked by safety guard (path outside working dir)" + + return None diff --git a/nanobot/nanobot/agent/tools/spawn.py b/nanobot/nanobot/agent/tools/spawn.py new file mode 100644 index 0000000..4510aab --- /dev/null +++ b/nanobot/nanobot/agent/tools/spawn.py @@ -0,0 +1,65 @@ +"""Spawn tool for creating background subagents.""" + +from typing import Any, TYPE_CHECKING + +from nanobot.agent.tools.base import Tool + +if TYPE_CHECKING: + from nanobot.agent.subagent import SubagentManager + + +class SpawnTool(Tool): + """ + Tool to spawn a subagent for background task execution. + + The subagent runs asynchronously and announces its result back + to the main agent when complete. + """ + + def __init__(self, manager: "SubagentManager"): + self._manager = manager + self._origin_channel = "cli" + self._origin_chat_id = "direct" + + def set_context(self, channel: str, chat_id: str) -> None: + """Set the origin context for subagent announcements.""" + self._origin_channel = channel + self._origin_chat_id = chat_id + + @property + def name(self) -> str: + return "spawn" + + @property + def description(self) -> str: + return ( + "Spawn a subagent to handle a task in the background. " + "Use this for complex or time-consuming tasks that can run independently. " + "The subagent will complete the task and report back when done." + ) + + @property + def parameters(self) -> dict[str, Any]: + return { + "type": "object", + "properties": { + "task": { + "type": "string", + "description": "The task for the subagent to complete", + }, + "label": { + "type": "string", + "description": "Optional short label for the task (for display)", + }, + }, + "required": ["task"], + } + + async def execute(self, task: str, label: str | None = None, **kwargs: Any) -> str: + """Spawn a subagent to execute the given task.""" + return await self._manager.spawn( + task=task, + label=label, + origin_channel=self._origin_channel, + origin_chat_id=self._origin_chat_id, + ) diff --git a/nanobot/nanobot/agent/tools/web.py b/nanobot/nanobot/agent/tools/web.py new file mode 100644 index 0000000..f346ddd --- /dev/null +++ b/nanobot/nanobot/agent/tools/web.py @@ -0,0 +1,279 @@ +"""Web tools: web_search and web_fetch.""" + +import html +import json +import os +import re +from typing import Any +from urllib.parse import urlparse + +import httpx + +from nanobot.agent.tools.base import Tool + +# Shared constants +USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 14_7_2) AppleWebKit/537.36" +MAX_REDIRECTS = 5 # Limit redirects to prevent DoS attacks + + +def _strip_tags(text: str) -> str: + """Remove HTML tags and decode entities.""" + text = re.sub(r"<script[\s\S]*?</script>", "", text, flags=re.I) + text = re.sub(r"<style[\s\S]*?</style>", "", text, flags=re.I) + text = re.sub(r"<[^>]+>", "", text) + return html.unescape(text).strip() + + +def _normalize(text: str) -> str: + """Normalize whitespace.""" + text = re.sub(r"[ \t]+", " ", text) + return re.sub(r"\n{3,}", "\n\n", text).strip() + + +def _is_private_host(hostname: str) -> bool: + """Check if hostname resolves to a private/internal IP address.""" + import ipaddress + import socket + + # Block known cloud metadata hostnames + BLOCKED_HOSTS = { + "metadata.google.internal", + "metadata", + "169.254.169.254", + } + if hostname.lower() in BLOCKED_HOSTS: + return True + + # Block Docker internal service names commonly used in this project + INTERNAL_HOSTS = { + "web", + "ai-server", + "redis", + "minio", + "compile-server", + "ws-server", + } + if hostname.lower().split(".")[0] in INTERNAL_HOSTS: + return True + + try: + # Resolve hostname to IP and check if it's private + for info in socket.getaddrinfo( + hostname, None, socket.AF_UNSPEC, socket.SOCK_STREAM + ): + addr = info[4][0] + ip = ipaddress.ip_address(addr) + if ( + ip.is_private + or ip.is_loopback + or ip.is_link_local + or ip.is_reserved + or ip.is_multicast + ): + return True + except (socket.gaierror, ValueError): + # If we can't resolve, allow (may be valid external host) + pass + return False + + +def _validate_url(url: str) -> tuple[bool, str]: + """Validate URL: must be http(s) with valid domain, not targeting private networks.""" + try: + p = urlparse(url) + if p.scheme not in ("http", "https"): + return False, f"Only http/https allowed, got '{p.scheme or 'none'}'" + if not p.netloc: + return False, "Missing domain" + + # Extract hostname (strip port) + hostname = p.hostname or "" + if not hostname: + return False, "Missing hostname" + + # Block private/internal addresses (SSRF protection) + if _is_private_host(hostname): + return False, f"Blocked: private or internal address ({hostname})" + + return True, "" + except Exception as e: + return False, str(e) + + +class WebSearchTool(Tool): + """Search the web using Brave Search API.""" + + name = "web_search" + description = "Search the web. Returns titles, URLs, and snippets." + parameters = { + "type": "object", + "properties": { + "query": {"type": "string", "description": "Search query"}, + "count": { + "type": "integer", + "description": "Results (1-10)", + "minimum": 1, + "maximum": 10, + }, + }, + "required": ["query"], + } + + def __init__(self, api_key: str | None = None, max_results: int = 5): + self.api_key = api_key or os.environ.get("BRAVE_API_KEY", "") + self.max_results = max_results + + async def execute(self, query: str, count: int | None = None, **kwargs: Any) -> str: + if not self.api_key: + return "Error: BRAVE_API_KEY not configured" + + try: + n = min(max(count or self.max_results, 1), 10) + async with httpx.AsyncClient() as client: + r = await client.get( + "https://api.search.brave.com/res/v1/web/search", + params={"q": query, "count": n}, + headers={ + "Accept": "application/json", + "X-Subscription-Token": self.api_key, + }, + timeout=10.0, + ) + r.raise_for_status() + + results = r.json().get("web", {}).get("results", []) + if not results: + return f"No results for: {query}" + + lines = [f"Results for: {query}\n"] + for i, item in enumerate(results[:n], 1): + lines.append(f"{i}. {item.get('title', '')}\n {item.get('url', '')}") + if desc := item.get("description"): + lines.append(f" {desc}") + return "\n".join(lines) + except Exception as e: + return f"Error: {e}" + + +class WebFetchTool(Tool): + """Fetch and extract content from a URL using Readability.""" + + name = "web_fetch" + description = "Fetch URL and extract readable content (HTML → markdown/text)." + parameters = { + "type": "object", + "properties": { + "url": {"type": "string", "description": "URL to fetch"}, + "extractMode": { + "type": "string", + "enum": ["markdown", "text"], + "default": "markdown", + }, + "maxChars": {"type": "integer", "minimum": 100}, + }, + "required": ["url"], + } + + def __init__(self, max_chars: int = 50000): + self.max_chars = max_chars + + async def execute( + self, + url: str, + extractMode: str = "markdown", + maxChars: int | None = None, + **kwargs: Any, + ) -> str: + from readability import Document + + max_chars = maxChars or self.max_chars + + # Validate URL before fetching + is_valid, error_msg = _validate_url(url) + if not is_valid: + return json.dumps( + {"error": f"URL validation failed: {error_msg}", "url": url} + ) + + try: + async with httpx.AsyncClient( + follow_redirects=True, max_redirects=MAX_REDIRECTS, timeout=30.0 + ) as client: + r = await client.get(url, headers={"User-Agent": USER_AGENT}) + r.raise_for_status() + + # Validate the final URL after redirects (prevent SSRF via open redirect) + final_url = str(r.url) + if final_url != url: + is_final_valid, final_error = _validate_url(final_url) + if not is_final_valid: + return json.dumps( + { + "error": f"Redirect target blocked: {final_error}", + "url": url, + "finalUrl": final_url, + } + ) + + ctype = r.headers.get("content-type", "") + + # JSON + if "application/json" in ctype: + text, extractor = json.dumps(r.json(), indent=2), "json" + # HTML + elif "text/html" in ctype or r.text[:256].lower().startswith( + ("<!doctype", "<html") + ): + doc = Document(r.text) + content = ( + self._to_markdown(doc.summary()) + if extractMode == "markdown" + else _strip_tags(doc.summary()) + ) + text = f"# {doc.title()}\n\n{content}" if doc.title() else content + extractor = "readability" + else: + text, extractor = r.text, "raw" + + truncated = len(text) > max_chars + if truncated: + text = text[:max_chars] + + return json.dumps( + { + "url": url, + "finalUrl": str(r.url), + "status": r.status_code, + "extractor": extractor, + "truncated": truncated, + "length": len(text), + "text": text, + } + ) + except Exception as e: + return json.dumps({"error": str(e), "url": url}) + + def _to_markdown(self, html: str) -> str: + """Convert HTML to markdown.""" + # Convert links, headings, lists before stripping tags + text = re.sub( + r'<a\s+[^>]*href=["\']([^"\']+)["\'][^>]*>([\s\S]*?)</a>', + lambda m: f"[{_strip_tags(m[2])}]({m[1]})", + html, + flags=re.I, + ) + text = re.sub( + r"<h([1-6])[^>]*>([\s\S]*?)</h\1>", + lambda m: f"\n{'#' * int(m[1])} {_strip_tags(m[2])}\n", + text, + flags=re.I, + ) + text = re.sub( + r"<li[^>]*>([\s\S]*?)</li>", + lambda m: f"\n- {_strip_tags(m[1])}", + text, + flags=re.I, + ) + text = re.sub(r"</(p|div|section|article)>", "\n\n", text, flags=re.I) + text = re.sub(r"<(br|hr)\s*/?>", "\n", text, flags=re.I) + return _normalize(_strip_tags(text)) diff --git a/nanobot/nanobot/bus/__init__.py b/nanobot/nanobot/bus/__init__.py new file mode 100644 index 0000000..c7b282d --- /dev/null +++ b/nanobot/nanobot/bus/__init__.py @@ -0,0 +1,6 @@ +"""Message bus module for decoupled channel-agent communication.""" + +from nanobot.bus.events import InboundMessage, OutboundMessage +from nanobot.bus.queue import MessageBus + +__all__ = ["MessageBus", "InboundMessage", "OutboundMessage"] diff --git a/nanobot/nanobot/bus/events.py b/nanobot/nanobot/bus/events.py new file mode 100644 index 0000000..8d622eb --- /dev/null +++ b/nanobot/nanobot/bus/events.py @@ -0,0 +1,35 @@ +"""Event types for the message bus.""" + +from dataclasses import dataclass, field +from datetime import datetime +from typing import Any + + +@dataclass +class InboundMessage: + """Message received from a chat channel.""" + + channel: str # telegram, discord, slack, whatsapp + sender_id: str # User identifier + chat_id: str # Chat/channel identifier + content: str # Message text + timestamp: datetime = field(default_factory=datetime.now) + media: list[str] = field(default_factory=list) # Media URLs + metadata: dict[str, Any] = field(default_factory=dict) # Channel-specific data + + @property + def session_key(self) -> str: + """Unique key for session identification.""" + return f"{self.channel}:{self.chat_id}" + + +@dataclass +class OutboundMessage: + """Message to send to a chat channel.""" + + channel: str + chat_id: str + content: str + reply_to: str | None = None + media: list[str] = field(default_factory=list) + metadata: dict[str, Any] = field(default_factory=dict) diff --git a/nanobot/nanobot/bus/queue.py b/nanobot/nanobot/bus/queue.py new file mode 100644 index 0000000..2044929 --- /dev/null +++ b/nanobot/nanobot/bus/queue.py @@ -0,0 +1,81 @@ +"""Async message queue for decoupled channel-agent communication.""" + +import asyncio +from typing import Callable, Awaitable + +from loguru import logger + +from nanobot.bus.events import InboundMessage, OutboundMessage + + +class MessageBus: + """ + Async message bus that decouples chat channels from the agent core. + + Channels push messages to the inbound queue, and the agent processes + them and pushes responses to the outbound queue. + """ + + def __init__(self): + self.inbound: asyncio.Queue[InboundMessage] = asyncio.Queue() + self.outbound: asyncio.Queue[OutboundMessage] = asyncio.Queue() + self._outbound_subscribers: dict[ + str, list[Callable[[OutboundMessage], Awaitable[None]]] + ] = {} + self._running = False + + async def publish_inbound(self, msg: InboundMessage) -> None: + """Publish a message from a channel to the agent.""" + await self.inbound.put(msg) + + async def consume_inbound(self) -> InboundMessage: + """Consume the next inbound message (blocks until available).""" + return await self.inbound.get() + + async def publish_outbound(self, msg: OutboundMessage) -> None: + """Publish a response from the agent to channels.""" + await self.outbound.put(msg) + + async def consume_outbound(self) -> OutboundMessage: + """Consume the next outbound message (blocks until available).""" + return await self.outbound.get() + + def subscribe_outbound( + self, channel: str, callback: Callable[[OutboundMessage], Awaitable[None]] + ) -> None: + """Subscribe to outbound messages for a specific channel.""" + if channel not in self._outbound_subscribers: + self._outbound_subscribers[channel] = [] + self._outbound_subscribers[channel].append(callback) + + async def dispatch_outbound(self) -> None: + """ + Dispatch outbound messages to subscribed channels. + Run this as a background task. + """ + self._running = True + while self._running: + try: + msg = await asyncio.wait_for(self.outbound.get(), timeout=1.0) + subscribers = self._outbound_subscribers.get(msg.channel, []) + for callback in subscribers: + try: + await callback(msg) + except Exception as e: + logger.error(f"Error dispatching to {msg.channel}: {e}") + except asyncio.TimeoutError: + continue + + def stop(self) -> None: + """Stop the dispatcher loop.""" + self._running = False + + @property + def inbound_size(self) -> int: + """Number of pending inbound messages.""" + return self.inbound.qsize() + + @property + def outbound_size(self) -> int: + """Number of pending outbound messages.""" + return self.outbound.qsize() diff --git a/nanobot/nanobot/channels/__init__.py b/nanobot/nanobot/channels/__init__.py new file mode 100644 index 0000000..588169d --- /dev/null +++ b/nanobot/nanobot/channels/__init__.py @@ -0,0 +1,6 @@ +"""Chat channels module with plugin architecture.""" + +from nanobot.channels.base import BaseChannel +from nanobot.channels.manager import ChannelManager + +__all__ = ["BaseChannel", "ChannelManager"] diff --git a/nanobot/nanobot/channels/base.py b/nanobot/nanobot/channels/base.py new file mode 100644 index 0000000..dfd53d5 --- /dev/null +++ b/nanobot/nanobot/channels/base.py @@ -0,0 +1,132 @@ +"""Base channel interface for chat platforms.""" + +from abc import ABC, abstractmethod +from typing import Any + +from nanobot.bus.events import InboundMessage, OutboundMessage +from nanobot.bus.queue import MessageBus + + +class BaseChannel(ABC): + """ + Abstract base class for chat channel implementations. + + Each channel (Telegram, Discord, etc.) should implement this interface + to integrate with the nanobot message bus. + """ + + name: str = "base" + + def __init__(self, config: Any, bus: MessageBus): + """ + Initialize the channel. + + Args: + config: Channel-specific configuration. + bus: The message bus for communication. + """ + self.config = config + self.bus = bus + self._running = False + + @abstractmethod + async def start(self) -> None: + """ + Start the channel and begin listening for messages. + + This should be a long-running async task that: + 1. Connects to the chat platform + 2. Listens for incoming messages + 3. Forwards messages to the bus via _handle_message() + """ + pass + + @abstractmethod + async def stop(self) -> None: + """Stop the channel and clean up resources.""" + pass + + @abstractmethod + async def send(self, msg: OutboundMessage) -> None: + """ + Send a message through this channel. + + Args: + msg: The message to send. + """ + pass + + def is_allowed(self, sender_id: str) -> bool: + """ + Check if a sender is allowed to use this bot. + + Args: + sender_id: The sender's identifier. + + Returns: + True if allowed, False otherwise. + """ + from loguru import logger + + allow_list = getattr(self.config, "allow_from", []) + + # If no allow list configured, allow all (but log a warning once) + if not allow_list: + if not getattr(self, "_warned_open_access", False): + logger.warning( + f"Channel '{self.name}': allow_from is empty — all senders are allowed. " + "Set allow_from to restrict access." + ) + self._warned_open_access = True + return True + + sender_str = str(sender_id) + # Check full sender ID first + if sender_str in allow_list: + return True + # For composite IDs like "user_id|username", check individual parts + # Only the numeric user_id part is trusted (first segment) + if "|" in sender_str: + primary_id = sender_str.split("|", 1)[0] + if primary_id and primary_id in allow_list: + return True + return False + + async def _handle_message( + self, + sender_id: str, + chat_id: str, + content: str, + media: list[str] | None = None, + metadata: dict[str, Any] | None = None, + ) -> None: + """ + Handle an incoming message from the chat platform. + + This method checks permissions and forwards to the bus. + + Args: + sender_id: The sender's identifier. + chat_id: The chat/channel identifier. + content: Message text content. + media: Optional list of media URLs. + metadata: Optional channel-specific metadata. + """ + if not self.is_allowed(sender_id): + return + + msg = InboundMessage( + channel=self.name, + sender_id=str(sender_id), + chat_id=str(chat_id), + content=content, + media=media or [], + metadata=metadata or {}, + ) + + await self.bus.publish_inbound(msg) + + @property + def is_running(self) -> bool: + """Check if the channel is running.""" + return self._running diff --git a/nanobot/nanobot/channels/feishu.py b/nanobot/nanobot/channels/feishu.py new file mode 100644 index 0000000..0f2600c --- /dev/null +++ b/nanobot/nanobot/channels/feishu.py @@ -0,0 +1,545 @@ +"""Feishu/Lark channel implementation using lark-oapi SDK.""" + +import asyncio +import io +import json +from pathlib import Path +from typing import Any + +from loguru import logger + +from nanobot.bus.events import OutboundMessage +from nanobot.bus.queue import MessageBus +from nanobot.channels.base import BaseChannel +from nanobot.config.schema import FeishuConfig +from nanobot.media.manager import MediaManager + +try: + import lark_oapi as lark + from lark_oapi import ws, EventDispatcherHandler + from lark_oapi.api.im.v1 import ( + CreateMessageRequest, + CreateMessageRequestBody, + CreateFileRequest, + CreateFileRequestBody, + GetMessageResourceRequest, + CreateMessageReactionRequest, + CreateMessageReactionRequestBody, + Emoji, + ) + + FEISHU_AVAILABLE = True +except ImportError: + FEISHU_AVAILABLE = False + + +class FeishuChannel(BaseChannel): + """ + Feishu/Lark channel using WebSocket long-connection. + + Uses lark-oapi SDK's ws.Client for receiving messages (no public IP needed). + Uses lark-oapi API client for sending messages and uploading files. + """ + + name = "feishu" + + def __init__(self, config: FeishuConfig, bus: MessageBus): + super().__init__(config, bus) + self.config: FeishuConfig = config + self._ws_client: Any = None + self._lark_client: Any = None + self._loop: asyncio.AbstractEventLoop | None = None + self._media = MediaManager() + + async def start(self) -> None: + """Start the Feishu bot with WebSocket long-connection.""" + if not FEISHU_AVAILABLE: + logger.error("lark-oapi not installed. Run: pip install lark-oapi") + return + + if not self.config.app_id or not self.config.app_secret: + logger.error("Feishu app_id or app_secret not configured") + return + + self._running = True + self._loop = asyncio.get_running_loop() + + # Create lark API client for sending messages + self._lark_client = ( + lark.Client.builder() + .app_id(self.config.app_id) + .app_secret(self.config.app_secret) + .log_level(lark.LogLevel.DEBUG) + .build() + ) + + logger.info("Starting Feishu bot (WebSocket mode)...") + + # Run blocking WebSocket client in a separate thread + await asyncio.to_thread(self._run_ws_client) + + def _run_ws_client(self) -> None: + """Run the blocking WebSocket client (called in a thread).""" + # Build event handler + handler = ( + EventDispatcherHandler.builder("", "") + .register_p2_im_message_receive_v1(self._on_message_sync) + .build() + ) + + # Create and start WebSocket client + self._ws_client = ws.Client( + self.config.app_id, + self.config.app_secret, + event_handler=handler, + log_level=lark.LogLevel.DEBUG, + ) + + logger.info("Feishu WebSocket client connecting...") + self._ws_client.start() + + def _on_message_sync(self, data: Any) -> None: + """ + Handle incoming message from Feishu SDK (called from SDK thread). + Bridges to async via run_coroutine_threadsafe. + """ + if self._loop is None: + return + + asyncio.run_coroutine_threadsafe(self._handle_feishu_message(data), self._loop) + + @staticmethod + def _extract_post_text(content_json: dict[str, Any]) -> tuple[str, list[str]]: + """Extract plain text and image keys from a Feishu 'post' (rich text) message. + + Feishu event payloads use the **flat** format for ``message.content``:: + + {"title": "...", "content": [[{"tag": "text", "text": "..."}], ...]} + + The Send-API uses a **wrapped** format:: + + {"post": {"zh_cn": {"title": "...", "content": [...]}}} + + This method handles both. + + Returns: + (text, image_keys) — extracted plain text and list of Feishu image keys. + """ + # Determine which format we have + if "content" in content_json and isinstance(content_json["content"], list): + # Flat format (from event payload): {"title": "...", "content": [...]} + lang_data = content_json + elif "post" in content_json: + # Wrapped format (from Send API): {"post": {"zh_cn": {...}}} + post_data = content_json["post"] + lang_data = None + for key in ("zh_cn", "en_us"): + if key in post_data: + lang_data = post_data[key] + break + if lang_data is None: + for v in post_data.values(): + if isinstance(v, dict): + lang_data = v + break + if not lang_data: + return "", [] + else: + return "", [] + + title = lang_data.get("title", "") + paragraphs = lang_data.get("content", []) + + text_parts: list[str] = [] + image_keys: list[str] = [] + + if title: + text_parts.append(title) + + for paragraph in paragraphs: + if not isinstance(paragraph, list): + continue + para_text: list[str] = [] + for block in paragraph: + if not isinstance(block, dict): + continue + tag = block.get("tag", "") + if tag == "text": + para_text.append(block.get("text", "")) + elif tag == "a": + para_text.append(block.get("text", "")) + elif tag == "at": + user_name = block.get("user_name") or block.get("user_id", "") + if user_name: + para_text.append(f"@{user_name}") + elif tag == "img": + image_key = block.get("image_key", "") + if image_key: + image_keys.append(image_key) + para_text.append("[Image]") + elif tag == "media": + para_text.append("[Media]") + if para_text: + text_parts.append("".join(para_text)) + + return "\n".join(text_parts), image_keys + + async def _download_feishu_image( + self, message_id: str, image_key: str + ) -> str | None: + """Download an image from Feishu using its image_key. + + Returns the local file path on success, or None on failure. + """ + if not self._lark_client: + logger.warning("Feishu client not initialized, cannot download image") + return None + + try: + request = ( + GetMessageResourceRequest.builder() + .message_id(message_id) + .file_key(image_key) + .type("image") + .build() + ) + + response = await asyncio.to_thread( + self._lark_client.im.v1.message_resource.get, request + ) + + if not response.success(): + logger.error( + f"Failed to download Feishu image {image_key}: " + f"code={response.code}, msg={response.msg}" + ) + return None + + # Save via MediaManager + raw_bytes = response.file.read() + file_path = self._media.save( + data=raw_bytes, + channel="feishu", + original_name=f"{image_key[:20]}.png", + extension=".png", + ) + + logger.info(f"Downloaded Feishu image: {image_key[:20]}... -> {file_path}") + return file_path + + except Exception as e: + logger.error(f"Error downloading Feishu image {image_key}: {e}") + return None + + async def _download_feishu_file( + self, message_id: str, file_key: str, file_name: str = "" + ) -> str | None: + """Download a file from Feishu using its file_key. + + Returns the local file path on success, or None on failure. + """ + if not self._lark_client: + logger.warning("Feishu client not initialized, cannot download file") + return None + + try: + request = ( + GetMessageResourceRequest.builder() + .message_id(message_id) + .file_key(file_key) + .type("file") + .build() + ) + + response = await asyncio.to_thread( + self._lark_client.im.v1.message_resource.get, request + ) + + if not response.success(): + logger.error( + f"Failed to download Feishu file {file_key}: " + f"code={response.code}, msg={response.msg}" + ) + return None + + # Save via MediaManager + raw_bytes = response.file.read() + original_name = file_name or f"{file_key[:20]}.bin" + extension = Path(original_name).suffix if original_name else ".bin" + + file_path = self._media.save( + data=raw_bytes, + channel="feishu", + original_name=original_name, + extension=extension or ".bin", + ) + + logger.info(f"Downloaded Feishu file: {original_name} -> {file_path}") + return file_path + + except Exception as e: + logger.error(f"Error downloading Feishu file {file_key}: {e}") + return None + + async def _add_reaction(self, message_id: str, emoji_type: str = "DONE") -> None: + """Add an emoji reaction (thumbs-up / done) to a Feishu message. + + This is fire-and-forget; failures are logged but never raised. + """ + if not self._lark_client or not message_id: + return + + try: + body = ( + CreateMessageReactionRequestBody.builder() + .reaction_type(Emoji.builder().emoji_type(emoji_type).build()) + .build() + ) + + request = ( + CreateMessageReactionRequest.builder() + .message_id(message_id) + .request_body(body) + .build() + ) + + response = await asyncio.to_thread( + self._lark_client.im.v1.message_reaction.create, request + ) + + if not response.success(): + logger.debug( + f"Failed to add reaction to {message_id}: " + f"code={response.code}, msg={response.msg}" + ) + else: + logger.debug(f"Added reaction '{emoji_type}' to message {message_id}") + + except Exception as e: + logger.debug(f"Error adding reaction: {e}") + + async def _handle_feishu_message(self, data: Any) -> None: + """Process a Feishu message event and forward to the message bus.""" + try: + # Extract message data from the event + event = data.event + message = event.message + sender = event.sender + + # Get sender open_id + sender_id = sender.sender_id.open_id if sender.sender_id else "" + chat_id = message.chat_id or "" + + msg_type = message.message_type + message_id = message.message_id or "" + + # Parse message content based on type + content = "" + media_paths: list[str] = [] + image_keys: list[str] = [] + metadata: dict[str, Any] = { + "message_id": message_id, + "chat_type": message.chat_type, + "msg_type": msg_type, + } + + try: + content_json = json.loads(message.content) + + if msg_type == "text": + # Text messages: {"text": "..."} + content = content_json.get("text", "") + + elif msg_type == "post": + # Rich text (post) messages: extract text and image keys + content, image_keys = self._extract_post_text(content_json) + + elif msg_type == "image": + # Standalone image messages: {"image_key": "..."} + img_key = content_json.get("image_key", "") + if img_key: + image_keys.append(img_key) + content = "[User sent an image]" + + elif msg_type == "file": + # File messages: {"file_key": "...", "file_name": "...", "size": "..."} + file_key = content_json.get("file_key", "") + file_name = content_json.get("file_name", "file") + if file_key and message_id: + local_path = await self._download_feishu_file( + message_id, file_key, file_name + ) + if local_path: + media_paths.append(local_path) + content = f"[User sent a file: {file_name}]" + + else: + logger.debug(f"Ignoring unsupported message type: {msg_type}") + return + + except (json.JSONDecodeError, TypeError): + content = message.content or "" + + if not content: + return + + # Download images from Feishu + if image_keys and message_id: + for img_key in image_keys: + local_path = await self._download_feishu_image(message_id, img_key) + if local_path: + media_paths.append(local_path) + + logger.info( + f"Feishu {msg_type} from {sender_id}: {content[:100]}..." + + (f" ({len(media_paths)} media files)" if media_paths else "") + ) + + # Acknowledge receipt with an emoji reaction (fire-and-forget) + if message_id: + asyncio.create_task(self._add_reaction(message_id, "DONE")) + + # Forward to the message bus + await self._handle_message( + sender_id=sender_id, + chat_id=chat_id, + content=content, + media=media_paths if media_paths else None, + metadata=metadata, + ) + + except Exception as e: + logger.error(f"Error handling Feishu message: {e}") + + async def stop(self) -> None: + """Stop the Feishu bot.""" + self._running = False + # The ws.Client doesn't have a clean stop mechanism; + # it will terminate when the process exits. + logger.info("Feishu channel stopped") + + async def send(self, msg: OutboundMessage) -> None: + """Send a message through Feishu.""" + if not self._lark_client: + logger.warning("Feishu client not initialized") + return + + try: + # Send text content + if msg.content: + await self._send_text(msg.chat_id, msg.content) + + # Send file attachments + if msg.media: + for file_path in msg.media: + await self._send_file(msg.chat_id, file_path) + + except Exception as e: + logger.error(f"Error sending Feishu message: {e}") + + async def _send_text(self, chat_id: str, content: str) -> None: + """Send a text message to a Feishu chat.""" + body = ( + CreateMessageRequestBody.builder() + .receive_id(chat_id) + .msg_type("text") + .content(json.dumps({"text": content})) + .build() + ) + + request = ( + CreateMessageRequest.builder() + .receive_id_type("chat_id") + .request_body(body) + .build() + ) + + # Run sync API call in thread to avoid blocking the event loop + response = await asyncio.to_thread( + self._lark_client.im.v1.message.create, request + ) + + if not response.success(): + logger.error( + f"Failed to send Feishu text: code={response.code}, msg={response.msg}" + ) + + async def _send_file(self, chat_id: str, file_path: str) -> None: + """Upload a file to Feishu and send it as a file message.""" + path = Path(file_path) + if not path.exists(): + logger.error(f"File not found: {file_path}") + return + + # Determine file type for Feishu API + suffix = path.suffix.lower() + if suffix == ".pdf": + file_type = "pdf" + elif suffix in (".png", ".jpg", ".jpeg", ".gif"): + file_type = "image" + else: + file_type = "stream" + + try: + # Step 1: Upload file to Feishu + # Read file bytes into memory first, then wrap in BytesIO. + # This avoids leaking file handles (the old `open(path, "rb")` + # was never closed, causing FD exhaustion on repeated compiles). + file_bytes = path.read_bytes() + file_stream = io.BytesIO(file_bytes) + + upload_body = ( + CreateFileRequestBody.builder() + .file_type(file_type) + .file_name(path.name) + .file(file_stream) + .build() + ) + + upload_request = ( + CreateFileRequest.builder().request_body(upload_body).build() + ) + + upload_response = await asyncio.to_thread( + self._lark_client.im.v1.file.create, upload_request + ) + + if not upload_response.success(): + logger.error( + f"Failed to upload file to Feishu: code={upload_response.code}, " + f"msg={upload_response.msg}" + ) + return + + file_key = upload_response.data.file_key + + # Step 2: Send file message + msg_body = ( + CreateMessageRequestBody.builder() + .receive_id(chat_id) + .msg_type("file") + .content(json.dumps({"file_key": file_key})) + .build() + ) + + msg_request = ( + CreateMessageRequest.builder() + .receive_id_type("chat_id") + .request_body(msg_body) + .build() + ) + + msg_response = await asyncio.to_thread( + self._lark_client.im.v1.message.create, msg_request + ) + + if not msg_response.success(): + logger.error( + f"Failed to send file message: code={msg_response.code}, " + f"msg={msg_response.msg}" + ) + else: + logger.info(f"Sent file {path.name} to Feishu chat {chat_id}") + + except Exception as e: + logger.error(f"Error sending file to Feishu: {e}") diff --git a/nanobot/nanobot/channels/manager.py b/nanobot/nanobot/channels/manager.py new file mode 100644 index 0000000..70a6d2a --- /dev/null +++ b/nanobot/nanobot/channels/manager.py @@ -0,0 +1,150 @@ +"""Channel manager for coordinating chat channels.""" + +import asyncio +from typing import Any + +from loguru import logger + +from nanobot.bus.queue import MessageBus +from nanobot.channels.base import BaseChannel +from nanobot.config.schema import Config + + +class ChannelManager: + """ + Manages chat channels and coordinates message routing. + + Responsibilities: + - Initialize enabled channels (Telegram, WhatsApp, etc.) + - Start/stop channels + - Route outbound messages + """ + + def __init__(self, config: Config, bus: MessageBus): + self.config = config + self.bus = bus + self.channels: dict[str, BaseChannel] = {} + self._dispatch_task: asyncio.Task | None = None + + self._init_channels() + + def _init_channels(self) -> None: + """Initialize channels based on config.""" + + # Telegram channel + if self.config.channels.telegram.enabled: + try: + from nanobot.channels.telegram import TelegramChannel + + self.channels["telegram"] = TelegramChannel( + self.config.channels.telegram, + self.bus, + groq_api_key=self.config.providers.groq.api_key, + ) + logger.info("Telegram channel enabled") + except ImportError as e: + logger.warning(f"Telegram channel not available: {e}") + + # WhatsApp channel + if self.config.channels.whatsapp.enabled: + try: + from nanobot.channels.whatsapp import WhatsAppChannel + + self.channels["whatsapp"] = WhatsAppChannel( + self.config.channels.whatsapp, self.bus + ) + logger.info("WhatsApp channel enabled") + except ImportError as e: + logger.warning(f"WhatsApp channel not available: {e}") + + # Feishu channel + if self.config.channels.feishu.enabled: + try: + from nanobot.channels.feishu import FeishuChannel + + self.channels["feishu"] = FeishuChannel( + self.config.channels.feishu, self.bus + ) + logger.info("Feishu channel enabled") + except ImportError as e: + logger.warning(f"Feishu channel not available: {e}") + + async def start_all(self) -> None: + """Start all enabled channels and the outbound dispatcher.""" + if not self.channels: + logger.warning("No channels enabled") + return + + # Start outbound dispatcher + self._dispatch_task = asyncio.create_task(self._dispatch_outbound()) + + # Start all enabled channels + tasks = [] + for name, channel in self.channels.items(): + logger.info(f"Starting {name} channel...") + tasks.append(asyncio.create_task(channel.start())) + + # Wait for all to complete (they should run forever) + # Check results for startup exceptions to avoid silent failures + results = await asyncio.gather(*tasks, return_exceptions=True) + for name, result in zip(self.channels.keys(), results): + if isinstance(result, Exception): + logger.error(f"Channel '{name}' failed to start: {result}") + + async def stop_all(self) -> None: + """Stop all channels and the dispatcher.""" + logger.info("Stopping all channels...") + + # Stop dispatcher + if self._dispatch_task: + self._dispatch_task.cancel() + try: + await self._dispatch_task + except asyncio.CancelledError: + pass + + # Stop all channels + for name, channel in self.channels.items(): + try: + await channel.stop() + logger.info(f"Stopped {name} channel") + except Exception as e: + logger.error(f"Error stopping {name}: {e}") + + async def _dispatch_outbound(self) -> None: + """Dispatch outbound messages to the appropriate channel.""" + logger.info("Outbound dispatcher started") + + while True: + try: + msg = await asyncio.wait_for(self.bus.consume_outbound(), timeout=1.0) + + channel = self.channels.get(msg.channel) + if channel: + try: + await channel.send(msg) + except Exception as e: + logger.error(f"Error sending to {msg.channel}: {e}") + else: + logger.warning(f"Unknown channel: {msg.channel}") + + except asyncio.TimeoutError: + continue + except asyncio.CancelledError: + break + + def get_channel(self, name: str) -> BaseChannel | None: + """Get a channel by name.""" + return self.channels.get(name) + + def get_status(self) -> dict[str, Any]: + """Get status of all channels.""" + return { + name: {"enabled": True, "running": channel.is_running} + for name, channel in self.channels.items() + } + + @property + def enabled_channels(self) -> list[str]: + """Get list of enabled channel names.""" + return list(self.channels.keys()) diff --git a/nanobot/nanobot/channels/telegram.py b/nanobot/nanobot/channels/telegram.py new file mode 100644 index 0000000..0c7bf82 --- /dev/null +++ b/nanobot/nanobot/channels/telegram.py @@ -0,0 +1,413 @@ +"""Telegram channel implementation using python-telegram-bot.""" + +import asyncio +import re + +from loguru import logger +from telegram import Update +from telegram.ext import Application, MessageHandler, filters, ContextTypes + +from nanobot.bus.events import OutboundMessage +from nanobot.bus.queue import MessageBus +from nanobot.channels.base import BaseChannel +from nanobot.config.schema import TelegramConfig + + +def _markdown_to_telegram_html(text: str) -> str: + """ + Convert markdown to Telegram-safe HTML. + """ + if not text: + return "" + + # 1. Extract and protect code blocks (preserve content from other processing) + code_blocks: list[str] = [] + + def save_code_block(m: re.Match) -> str: + code_blocks.append(m.group(1)) + return f"\x00CB{len(code_blocks) - 1}\x00" + + text = re.sub(r"```[\w]*\n?([\s\S]*?)```", save_code_block, text) + + # 2. Extract and protect inline code + inline_codes: list[str] = [] + + def save_inline_code(m: re.Match) -> str: + inline_codes.append(m.group(1)) + return f"\x00IC{len(inline_codes) - 1}\x00" + + text = re.sub(r"`([^`]+)`", save_inline_code, text) + + # 3. Headers # Title -> just the title text + text = re.sub(r"^#{1,6}\s+(.+)$", r"\1", text, flags=re.MULTILINE) + + # 4. Blockquotes > text -> just the text (before HTML escaping) + text = re.sub(r"^>\s*(.*)$", r"\1", text, flags=re.MULTILINE) + + # 5. Escape HTML special characters + text = text.replace("&", "&").replace("<", "<").replace(">", ">") + + # 6. Links [text](url) - must be before bold/italic to handle nested cases + text = re.sub(r"\[([^\]]+)\]\(([^)]+)\)", r'<a href="\2">\1</a>', text) + + # 7. Bold **text** or __text__ + text = re.sub(r"\*\*(.+?)\*\*", r"<b>\1</b>", text) + text = re.sub(r"__(.+?)__", r"<b>\1</b>", text) + + # 8. Italic _text_ (avoid matching inside words like some_var_name) + text = re.sub(r"(?<![a-zA-Z0-9])_([^_]+)_(?![a-zA-Z0-9])", r"<i>\1</i>", text) + + # 9. Strikethrough ~~text~~ + text = re.sub(r"~~(.+?)~~", r"<s>\1</s>", text) + + # 10. Bullet lists - item -> • item + text = re.sub(r"^[-*]\s+", "• ", text, flags=re.MULTILINE) + + # 11. Restore inline code with HTML tags + for i, code in enumerate(inline_codes): + # Escape HTML in code content + escaped = code.replace("&", "&").replace("<", "<").replace(">", ">") + text = text.replace(f"\x00IC{i}\x00", f"<code>{escaped}</code>") + + # 12. Restore code blocks with HTML tags + for i, code in enumerate(code_blocks): + # Escape HTML in code content + escaped = code.replace("&", "&").replace("<", "<").replace(">", ">") + text = text.replace(f"\x00CB{i}\x00", f"<pre><code>{escaped}</code></pre>") + + return text + + +class TelegramChannel(BaseChannel): + """ + Telegram channel using long polling. + + Simple and reliable - no webhook/public IP needed. + """ + + name = "telegram" + + def __init__(self, config: TelegramConfig, bus: MessageBus, groq_api_key: str = ""): + super().__init__(config, bus) + self.config: TelegramConfig = config + self.groq_api_key = groq_api_key + self._app: Application | None = None + self._chat_ids: dict[str, int] = {} # Map sender_id to chat_id for replies + + async def start(self) -> None: + """Start the Telegram bot with long polling.""" + if not self.config.token: + logger.error("Telegram bot token not configured") + return + + self._running = True + + # Build the application + self._app = Application.builder().token(self.config.token).build() + + # Add message handler for text, photos, voice, documents + self._app.add_handler( + MessageHandler( + ( + filters.TEXT + | filters.PHOTO + | filters.VOICE + | filters.AUDIO + | filters.Document.ALL + ) + & ~filters.COMMAND, + self._on_message, + ) + ) + + # Add command handlers + from telegram.ext import CommandHandler + + self._app.add_handler(CommandHandler("start", self._on_start)) + self._app.add_handler(CommandHandler("clear", self._on_clear)) + + logger.info("Starting Telegram bot (polling mode)...") + + # Initialize and start polling + await self._app.initialize() + await self._app.start() + + # Get bot info + bot_info = await self._app.bot.get_me() + logger.info(f"Telegram bot @{bot_info.username} connected") + + # Start polling (this runs until stopped) + await self._app.updater.start_polling( + allowed_updates=["message"], + drop_pending_updates=True, # Ignore old messages on startup + ) + + # Keep running until stopped + while self._running: + await asyncio.sleep(1) + + async def stop(self) -> None: + """Stop the Telegram bot.""" + self._running = False + + if self._app: + logger.info("Stopping Telegram bot...") + await self._app.updater.stop() + await self._app.stop() + await self._app.shutdown() + self._app = None + + async def send(self, msg: OutboundMessage) -> None: + """Send a message through Telegram.""" + if not self._app: + logger.warning("Telegram bot not running") + return + + try: + chat_id = int(msg.chat_id) + except ValueError: + logger.error(f"Invalid chat_id: {msg.chat_id}") + return + + # Send text content (with Telegram 4096 char limit handling) + if msg.content: + await self._send_text(chat_id, msg.content) + + # Send file attachments + if msg.media: + for file_path in msg.media: + await self._send_file(chat_id, file_path) + + async def _send_text(self, chat_id: int, content: str) -> None: + """Send text message, splitting if it exceeds Telegram's 4096 char limit.""" + html_content = _markdown_to_telegram_html(content) + + # Telegram message limit is 4096 characters + MAX_LEN = 4096 + if len(html_content) <= MAX_LEN: + await self._send_text_chunk(chat_id, html_content, content) + return + + # Split long messages — try to split on newlines + chunks = self._split_text(html_content, MAX_LEN) + for chunk in chunks: + await self._send_text_chunk(chat_id, chunk, chunk) + + async def _send_text_chunk( + self, chat_id: int, html_content: str, fallback_content: str + ) -> None: + """Send a single text chunk, falling back to plain text if HTML fails.""" + try: + await self._app.bot.send_message( + chat_id=chat_id, text=html_content, parse_mode="HTML" + ) + except Exception as e: + logger.warning(f"HTML parse failed, falling back to plain text: {e}") + try: + await self._app.bot.send_message(chat_id=chat_id, text=fallback_content) + except Exception as e2: + logger.error(f"Error sending Telegram message: {e2}") + + async def _send_file(self, chat_id: int, file_path: str) -> None: + """Send a file as a Telegram document.""" + from pathlib import Path + + path = Path(file_path) + if not path.exists(): + logger.error(f"File not found: {file_path}") + return + + try: + with open(path, "rb") as f: + await self._app.bot.send_document( + chat_id=chat_id, + document=f, + filename=path.name, + ) + logger.info(f"Sent file to Telegram: {path.name}") + except Exception as e: + logger.error(f"Error sending file to Telegram: {e}") + + @staticmethod + def _split_text(text: str, max_len: int) -> list[str]: + """Split text into chunks of max_len, preferring newline boundaries.""" + chunks: list[str] = [] + while len(text) > max_len: + # Try to split at the last newline before max_len + split_at = text.rfind("\n", 0, max_len) + if split_at == -1 or split_at < max_len // 2: + # No good newline found, split at max_len + split_at = max_len + chunks.append(text[:split_at]) + text = text[split_at:].lstrip("\n") + if text: + chunks.append(text) + return chunks + + async def _on_start( + self, update: Update, context: ContextTypes.DEFAULT_TYPE + ) -> None: + """Handle /start command.""" + if not update.message or not update.effective_user: + return + + user = update.effective_user + await update.message.reply_text( + f"👋 Hi {user.first_name}! I'm nanobot.\n\n" + "Send me a message and I'll respond!" + ) + + async def _on_clear( + self, update: Update, context: ContextTypes.DEFAULT_TYPE + ) -> None: + """Handle /clear command by forwarding it to the agent loop.""" + if not update.message or not update.effective_user: + return + + user = update.effective_user + chat_id = update.message.chat_id + sender_id = str(user.id) + if user.username: + sender_id = f"{sender_id}|{user.username}" + + # Store chat_id for replies + self._chat_ids[sender_id] = chat_id + + # Forward "/clear" as a regular message to the agent loop + await self._handle_message( + sender_id=sender_id, + chat_id=str(chat_id), + content="/clear", + media=[], + metadata={ + "message_id": update.message.message_id, + "user_id": user.id, + "username": user.username, + "first_name": user.first_name, + "is_group": update.message.chat.type != "private", + }, + ) + + async def _on_message( + self, update: Update, context: ContextTypes.DEFAULT_TYPE + ) -> None: + """Handle incoming messages (text, photos, voice, documents).""" + if not update.message or not update.effective_user: + return + + message = update.message + user = update.effective_user + chat_id = message.chat_id + + # Use stable numeric ID, but keep username for allowlist compatibility + sender_id = str(user.id) + if user.username: + sender_id = f"{sender_id}|{user.username}" + + # Store chat_id for replies + self._chat_ids[sender_id] = chat_id + + # Build content from text and/or media + content_parts = [] + media_paths = [] + + # Text content + if message.text: + content_parts.append(message.text) + if message.caption: + content_parts.append(message.caption) + + # Handle media files + media_file = None + media_type = None + + if message.photo: + media_file = message.photo[-1] # Largest photo + media_type = "image" + elif message.voice: + media_file = message.voice + media_type = "voice" + elif message.audio: + media_file = message.audio + media_type = "audio" + elif message.document: + media_file = message.document + media_type = "file" + + # Download media if present + if media_file and self._app: + try: + file = await self._app.bot.get_file(media_file.file_id) + ext = self._get_extension( + media_type, getattr(media_file, "mime_type", None) + ) + + # Save to workspace/media/ + from pathlib import Path + + media_dir = Path.home() / ".nanobot" / "media" + media_dir.mkdir(parents=True, exist_ok=True) + + file_path = media_dir / f"{media_file.file_id[:16]}{ext}" + await file.download_to_drive(str(file_path)) + + media_paths.append(str(file_path)) + + # Handle voice transcription + if media_type == "voice" or media_type == "audio": + from nanobot.providers.transcription import ( + GroqTranscriptionProvider, + ) + + transcriber = GroqTranscriptionProvider(api_key=self.groq_api_key) + transcription = await transcriber.transcribe(file_path) + if transcription: + logger.info( + f"Transcribed {media_type}: {transcription[:50]}..." + ) + content_parts.append(f"[transcription: {transcription}]") + else: + content_parts.append(f"[{media_type}: {file_path}]") + else: + content_parts.append(f"[{media_type}: {file_path}]") + + logger.debug(f"Downloaded {media_type} to {file_path}") + except Exception as e: + logger.error(f"Failed to download media: {e}") + content_parts.append(f"[{media_type}: download failed]") + + content = "\n".join(content_parts) if content_parts else "[empty message]" + + logger.debug(f"Telegram message from {sender_id}: {content[:50]}...") + + # Forward to the message bus + await self._handle_message( + sender_id=sender_id, + chat_id=str(chat_id), + content=content, + media=media_paths, + metadata={ + "message_id": message.message_id, + "user_id": user.id, + "username": user.username, + "first_name": user.first_name, + "is_group": message.chat.type != "private", + }, + ) + + def _get_extension(self, media_type: str, mime_type: str | None) -> str: + """Get file extension based on media type.""" + if mime_type: + ext_map = { + "image/jpeg": ".jpg", + "image/png": ".png", + "image/gif": ".gif", + "audio/ogg": ".ogg", + "audio/mpeg": ".mp3", + "audio/mp4": ".m4a", + } + if mime_type in ext_map: + return ext_map[mime_type] + + type_map = {"image": ".jpg", "voice": ".ogg", "audio": ".mp3", "file": ""} + return type_map.get(media_type, "") diff --git a/nanobot/nanobot/channels/whatsapp.py b/nanobot/nanobot/channels/whatsapp.py new file mode 100644 index 0000000..7c0009b --- /dev/null +++ b/nanobot/nanobot/channels/whatsapp.py @@ -0,0 +1,140 @@ +"""WhatsApp channel implementation using Node.js bridge.""" + +import asyncio +import json + +from loguru import logger + +from nanobot.bus.events import OutboundMessage +from nanobot.bus.queue import MessageBus +from nanobot.channels.base import BaseChannel +from nanobot.config.schema import WhatsAppConfig + + +class WhatsAppChannel(BaseChannel): + """ + WhatsApp channel that connects to a Node.js bridge. + + The bridge uses @whiskeysockets/baileys to handle the WhatsApp Web protocol. + Communication between Python and Node.js is via WebSocket. + """ + + name = "whatsapp" + + def __init__(self, config: WhatsAppConfig, bus: MessageBus): + super().__init__(config, bus) + self.config: WhatsAppConfig = config + self._ws = None + self._connected = False + + async def start(self) -> None: + """Start the WhatsApp channel by connecting to the bridge.""" + import websockets + + bridge_url = self.config.bridge_url + + logger.info(f"Connecting to WhatsApp bridge at {bridge_url}...") + + self._running = True + + while self._running: + try: + async with websockets.connect(bridge_url) as ws: + self._ws = ws + self._connected = True + logger.info("Connected to WhatsApp bridge") + + # Listen for messages + async for message in ws: + try: + await self._handle_bridge_message(message) + except Exception as e: + logger.error(f"Error handling bridge message: {e}") + + except asyncio.CancelledError: + break + except Exception as e: + self._connected = False + self._ws = None + logger.warning(f"WhatsApp bridge connection error: {e}") + + if self._running: + logger.info("Reconnecting in 5 seconds...") + await asyncio.sleep(5) + + async def stop(self) -> None: + """Stop the WhatsApp channel.""" + self._running = False + self._connected = False + + if self._ws: + await self._ws.close() + self._ws = None + + async def send(self, msg: OutboundMessage) -> None: + """Send a message through WhatsApp.""" + if not self._ws or not self._connected: + logger.warning("WhatsApp bridge not connected") + return + + try: + payload = {"type": "send", "to": msg.chat_id, "text": msg.content} + await self._ws.send(json.dumps(payload)) + except Exception as e: + logger.error(f"Error sending WhatsApp message: {e}") + + async def _handle_bridge_message(self, raw: str) -> None: + """Handle a message from the bridge.""" + try: + data = json.loads(raw) + except json.JSONDecodeError: + logger.warning(f"Invalid JSON from bridge: {raw[:100]}") + return + + msg_type = data.get("type") + + if msg_type == "message": + # Incoming message from WhatsApp + sender = data.get("sender", "") + content = data.get("content", "") + + # sender is typically: <phone>@s.whatsapp.net + # Extract just the phone number as chat_id + chat_id = sender.split("@")[0] if "@" in sender else sender + + # Handle voice transcription if it's a voice message + if content == "[Voice Message]": + logger.info( + f"Voice message received from {chat_id}, but direct download from bridge is not yet supported." + ) + content = ( + "[Voice Message: Transcription not available for WhatsApp yet]" + ) + + await self._handle_message( + sender_id=chat_id, + chat_id=sender, # Use full JID for replies + content=content, + metadata={ + "message_id": data.get("id"), + "timestamp": data.get("timestamp"), + "is_group": data.get("isGroup", False), + }, + ) + + elif msg_type == "status": + # Connection status update + status = data.get("status") + logger.info(f"WhatsApp status: {status}") + + if status == "connected": + self._connected = True + elif status == "disconnected": + self._connected = False + + elif msg_type == "qr": + # QR code for authentication + logger.info("Scan QR code in the bridge terminal to connect WhatsApp") + + elif msg_type == "error": + logger.error(f"WhatsApp bridge error: {data.get('error')}") diff --git a/nanobot/nanobot/cli/__init__.py b/nanobot/nanobot/cli/__init__.py new file mode 100644 index 0000000..b023cad --- /dev/null +++ b/nanobot/nanobot/cli/__init__.py @@ -0,0 +1 @@ +"""CLI module for nanobot.""" diff --git a/nanobot/nanobot/cli/commands.py b/nanobot/nanobot/cli/commands.py new file mode 100644 index 0000000..c5bad8e --- /dev/null +++ b/nanobot/nanobot/cli/commands.py @@ -0,0 +1,691 @@ +"""CLI commands for nanobot.""" + +import asyncio +from pathlib import Path + +import typer +from rich.console import Console +from rich.table import Table + +from nanobot import __version__, __logo__ + +app = typer.Typer( + name="nanobot", + help=f"{__logo__} nanobot - Personal AI Assistant", + no_args_is_help=True, +) + +console = Console() + + +def version_callback(value: bool): + if value: + console.print(f"{__logo__} nanobot v{__version__}") + raise typer.Exit() + + +@app.callback() +def main( + version: bool = typer.Option( + None, "--version", "-v", callback=version_callback, is_eager=True + ), +): + """nanobot - Personal AI Assistant.""" + pass + + +# ============================================================================ +# Onboard / Setup +# ============================================================================ + + +@app.command() +def onboard(): + """Initialize nanobot configuration and workspace.""" + from nanobot.config.loader import get_config_path, save_config + from nanobot.config.schema import Config + from nanobot.utils.helpers import get_workspace_path + + config_path = get_config_path() + + if config_path.exists(): + console.print(f"[yellow]Config already exists at {config_path}[/yellow]") + if not typer.confirm("Overwrite?"): + raise typer.Exit() + + # Create default config + config = Config() + save_config(config) + console.print(f"[green]✓[/green] Created config at {config_path}") + + # Create workspace + workspace = get_workspace_path() + console.print(f"[green]✓[/green] Created workspace at {workspace}") + + # Create default bootstrap files + _create_workspace_templates(workspace) + + console.print(f"\n{__logo__} nanobot is ready!") + console.print("\nNext steps:") + console.print(" 1. Add your API key to [cyan]~/.nanobot/config.json[/cyan]") + console.print(" Get one at: https://openrouter.ai/keys") + console.print(' 2. Chat: [cyan]nanobot agent -m "Hello!"[/cyan]') + console.print( + "\n[dim]Want Telegram/WhatsApp? See: https://github.com/HKUDS/nanobot#-chat-apps[/dim]" + ) + + +def _create_workspace_templates(workspace: Path): + """Create default workspace template files.""" + templates = { + "AGENTS.md": """# Agent Instructions + +You are a helpful AI assistant. Be concise, accurate, and friendly. + +## Guidelines + +- Always explain what you're doing before taking actions +- Ask for clarification when the request is ambiguous +- Use tools to help accomplish tasks +- Remember important information in your memory files +""", + "SOUL.md": """# Soul + +I am nanobot, a lightweight AI assistant. + +## Personality + +- Helpful and friendly +- Concise and to the point +- Curious and eager to learn + +## Values + +- Accuracy over speed +- User privacy and safety +- Transparency in actions +""", + "USER.md": """# User + +Information about the user goes here. + +## Preferences + +- Communication style: (casual/formal) +- Timezone: (your timezone) +- Language: (your preferred language) +""", + } + + for filename, content in templates.items(): + file_path = workspace / filename + if not file_path.exists(): + file_path.write_text(content) + console.print(f" [dim]Created {filename}[/dim]") + + # Create memory directory and MEMORY.md + memory_dir = workspace / "memory" + memory_dir.mkdir(exist_ok=True) + memory_file = memory_dir / "MEMORY.md" + if not memory_file.exists(): + memory_file.write_text("""# Long-term Memory + +This file stores important information that should persist across sessions. + +## User Information + +(Important facts about the user) + +## Preferences + +(User preferences learned over time) + +## Important Notes + +(Things to remember) +""") + console.print(" [dim]Created memory/MEMORY.md[/dim]") + + +# ============================================================================ +# Gateway / Server +# ============================================================================ + + +@app.command() +def gateway( + port: int = typer.Option(18790, "--port", "-p", help="Gateway port"), + verbose: bool = typer.Option(False, "--verbose", "-v", help="Verbose output"), +): + """Start the nanobot gateway.""" + from nanobot.config.loader import load_config, get_data_dir + from nanobot.bus.queue import MessageBus + from nanobot.providers.litellm_provider import LiteLLMProvider + from nanobot.agent.loop import AgentLoop + from nanobot.channels.manager import ChannelManager + from nanobot.cron.service import CronService + from nanobot.cron.types import CronJob + from nanobot.heartbeat.service import HeartbeatService + + if verbose: + import logging + + logging.basicConfig(level=logging.DEBUG) + + console.print(f"{__logo__} Starting nanobot gateway on port {port}...") + + config = load_config() + + # Create components + bus = MessageBus() + + # Create provider (supports OpenRouter, Anthropic, OpenAI, Bedrock) + api_key = config.get_api_key() + api_base = config.get_api_base() + model = config.agents.defaults.model + is_bedrock = model.startswith("bedrock/") + + if not api_key and not is_bedrock: + console.print("[red]Error: No API key configured.[/red]") + console.print( + "Set one in ~/.nanobot/config.json under providers.openrouter.apiKey" + ) + raise typer.Exit(1) + + provider = LiteLLMProvider( + api_key=api_key, api_base=api_base, default_model=config.agents.defaults.model + ) + + # Create agent + agent = AgentLoop( + bus=bus, + provider=provider, + workspace=config.workspace_path, + model=config.agents.defaults.model, + max_iterations=config.agents.defaults.max_tool_iterations, + brave_api_key=config.tools.web.search.api_key or None, + exec_config=config.tools.exec, + litewrite_config=config.litewrite, + feishu_config=config.channels.feishu, + ) + + # Create cron service + async def on_cron_job(job: CronJob) -> str | None: + """Execute a cron job through the agent.""" + response = await agent.process_direct( + job.payload.message, session_key=f"cron:{job.id}" + ) + # Optionally deliver to channel + if job.payload.deliver and job.payload.to: + from nanobot.bus.events import OutboundMessage + + await bus.publish_outbound( + OutboundMessage( + channel=job.payload.channel or "whatsapp", + chat_id=job.payload.to, + content=response or "", + ) + ) + return response + + cron_store_path = get_data_dir() / "cron" / "jobs.json" + cron = CronService(cron_store_path, on_job=on_cron_job) + + # Create heartbeat service + async def on_heartbeat(prompt: str) -> str: + """Execute heartbeat through the agent.""" + return await agent.process_direct(prompt, session_key="heartbeat") + + heartbeat = HeartbeatService( + workspace=config.workspace_path, + on_heartbeat=on_heartbeat, + interval_s=30 * 60, # 30 minutes + enabled=True, + ) + + # Create channel manager + channels = ChannelManager(config, bus) + + if channels.enabled_channels: + console.print( + f"[green]✓[/green] Channels enabled: {', '.join(channels.enabled_channels)}" + ) + else: + console.print("[yellow]Warning: No channels enabled[/yellow]") + + cron_status = cron.status() + if cron_status["jobs"] > 0: + console.print(f"[green]✓[/green] Cron: {cron_status['jobs']} scheduled jobs") + + console.print("[green]✓[/green] Heartbeat: every 30m") + + async def run(): + try: + await cron.start() + await heartbeat.start() + await asyncio.gather( + agent.run(), + channels.start_all(), + ) + except KeyboardInterrupt: + console.print("\nShutting down...") + heartbeat.stop() + cron.stop() + agent.stop() + await channels.stop_all() + + asyncio.run(run()) + + +# ============================================================================ +# Agent Commands +# ============================================================================ + + +@app.command() +def agent( + message: str = typer.Option( + None, "--message", "-m", help="Message to send to the agent" + ), + session_id: str = typer.Option("cli:default", "--session", "-s", help="Session ID"), +): + """Interact with the agent directly.""" + from nanobot.config.loader import load_config + from nanobot.bus.queue import MessageBus + from nanobot.providers.litellm_provider import LiteLLMProvider + from nanobot.agent.loop import AgentLoop + + config = load_config() + + api_key = config.get_api_key() + api_base = config.get_api_base() + model = config.agents.defaults.model + is_bedrock = model.startswith("bedrock/") + + if not api_key and not is_bedrock: + console.print("[red]Error: No API key configured.[/red]") + raise typer.Exit(1) + + bus = MessageBus() + provider = LiteLLMProvider( + api_key=api_key, api_base=api_base, default_model=config.agents.defaults.model + ) + + agent_loop = AgentLoop( + bus=bus, + provider=provider, + workspace=config.workspace_path, + brave_api_key=config.tools.web.search.api_key or None, + exec_config=config.tools.exec, + litewrite_config=config.litewrite, + feishu_config=config.channels.feishu, + ) + + if message: + # Single message mode + async def run_once(): + response = await agent_loop.process_direct(message, session_id) + console.print(f"\n{__logo__} {response}") + + asyncio.run(run_once()) + else: + # Interactive mode + console.print(f"{__logo__} Interactive mode (Ctrl+C to exit)\n") + + async def run_interactive(): + while True: + try: + user_input = console.input("[bold blue]You:[/bold blue] ") + if not user_input.strip(): + continue + + response = await agent_loop.process_direct(user_input, session_id) + console.print(f"\n{__logo__} {response}\n") + except KeyboardInterrupt: + console.print("\nGoodbye!") + break + + asyncio.run(run_interactive()) + + +# ============================================================================ +# Channel Commands +# ============================================================================ + + +channels_app = typer.Typer(help="Manage channels") +app.add_typer(channels_app, name="channels") + + +@channels_app.command("status") +def channels_status(): + """Show channel status.""" + from nanobot.config.loader import load_config + + config = load_config() + + table = Table(title="Channel Status") + table.add_column("Channel", style="cyan") + table.add_column("Enabled", style="green") + table.add_column("Configuration", style="yellow") + + # WhatsApp + wa = config.channels.whatsapp + table.add_row("WhatsApp", "✓" if wa.enabled else "✗", wa.bridge_url) + + # Telegram + tg = config.channels.telegram + tg_config = ( + f"token: {tg.token[:10]}..." if tg.token else "[dim]not configured[/dim]" + ) + table.add_row("Telegram", "✓" if tg.enabled else "✗", tg_config) + + console.print(table) + + +def _get_bridge_dir() -> Path: + """Get the bridge directory, setting it up if needed.""" + import shutil + import subprocess + + # User's bridge location + user_bridge = Path.home() / ".nanobot" / "bridge" + + # Check if already built + if (user_bridge / "dist" / "index.js").exists(): + return user_bridge + + # Check for npm + if not shutil.which("npm"): + console.print("[red]npm not found. Please install Node.js >= 18.[/red]") + raise typer.Exit(1) + + # Find source bridge: first check package data, then source dir + pkg_bridge = Path(__file__).parent.parent / "bridge" # nanobot/bridge (installed) + src_bridge = ( + Path(__file__).parent.parent.parent / "bridge" + ) # repo root/bridge (dev) + + source = None + if (pkg_bridge / "package.json").exists(): + source = pkg_bridge + elif (src_bridge / "package.json").exists(): + source = src_bridge + + if not source: + console.print("[red]Bridge source not found.[/red]") + console.print("Try reinstalling: pip install --force-reinstall nanobot") + raise typer.Exit(1) + + console.print(f"{__logo__} Setting up bridge...") + + # Copy to user directory + user_bridge.parent.mkdir(parents=True, exist_ok=True) + if user_bridge.exists(): + shutil.rmtree(user_bridge) + shutil.copytree( + source, user_bridge, ignore=shutil.ignore_patterns("node_modules", "dist") + ) + + # Install and build + try: + console.print(" Installing dependencies...") + subprocess.run( + ["npm", "install"], cwd=user_bridge, check=True, capture_output=True + ) + + console.print(" Building...") + subprocess.run( + ["npm", "run", "build"], cwd=user_bridge, check=True, capture_output=True + ) + + console.print("[green]✓[/green] Bridge ready\n") + except subprocess.CalledProcessError as e: + console.print(f"[red]Build failed: {e}[/red]") + if e.stderr: + console.print(f"[dim]{e.stderr.decode()[:500]}[/dim]") + raise typer.Exit(1) + + return user_bridge + + +@channels_app.command("login") +def channels_login(): + """Link device via QR code.""" + import subprocess + + bridge_dir = _get_bridge_dir() + + console.print(f"{__logo__} Starting bridge...") + console.print("Scan the QR code to connect.\n") + + try: + subprocess.run(["npm", "start"], cwd=bridge_dir, check=True) + except subprocess.CalledProcessError as e: + console.print(f"[red]Bridge failed: {e}[/red]") + except FileNotFoundError: + console.print("[red]npm not found. Please install Node.js.[/red]") + + +# ============================================================================ +# Cron Commands +# ============================================================================ + +cron_app = typer.Typer(help="Manage scheduled tasks") +app.add_typer(cron_app, name="cron") + + +@cron_app.command("list") +def cron_list( + all: bool = typer.Option(False, "--all", "-a", help="Include disabled jobs"), +): + """List scheduled jobs.""" + from nanobot.config.loader import get_data_dir + from nanobot.cron.service import CronService + + store_path = get_data_dir() / "cron" / "jobs.json" + service = CronService(store_path) + + jobs = service.list_jobs(include_disabled=all) + + if not jobs: + console.print("No scheduled jobs.") + return + + table = Table(title="Scheduled Jobs") + table.add_column("ID", style="cyan") + table.add_column("Name") + table.add_column("Schedule") + table.add_column("Status") + table.add_column("Next Run") + + import time + + for job in jobs: + # Format schedule + if job.schedule.kind == "every": + sched = f"every {(job.schedule.every_ms or 0) // 1000}s" + elif job.schedule.kind == "cron": + sched = job.schedule.expr or "" + else: + sched = "one-time" + + # Format next run + next_run = "" + if job.state.next_run_at_ms: + next_time = time.strftime( + "%Y-%m-%d %H:%M", time.localtime(job.state.next_run_at_ms / 1000) + ) + next_run = next_time + + status = "[green]enabled[/green]" if job.enabled else "[dim]disabled[/dim]" + + table.add_row(job.id, job.name, sched, status, next_run) + + console.print(table) + + +@cron_app.command("add") +def cron_add( + name: str = typer.Option(..., "--name", "-n", help="Job name"), + message: str = typer.Option(..., "--message", "-m", help="Message for agent"), + every: int = typer.Option(None, "--every", "-e", help="Run every N seconds"), + cron_expr: str = typer.Option( + None, "--cron", "-c", help="Cron expression (e.g. '0 9 * * *')" + ), + at: str = typer.Option(None, "--at", help="Run once at time (ISO format)"), + deliver: bool = typer.Option( + False, "--deliver", "-d", help="Deliver response to channel" + ), + to: str = typer.Option(None, "--to", help="Recipient for delivery"), + channel: str = typer.Option( + None, "--channel", help="Channel for delivery (e.g. 'telegram', 'whatsapp')" + ), +): + """Add a scheduled job.""" + from nanobot.config.loader import get_data_dir + from nanobot.cron.service import CronService + from nanobot.cron.types import CronSchedule + + # Determine schedule type + if every: + schedule = CronSchedule(kind="every", every_ms=every * 1000) + elif cron_expr: + schedule = CronSchedule(kind="cron", expr=cron_expr) + elif at: + import datetime + + dt = datetime.datetime.fromisoformat(at) + schedule = CronSchedule(kind="at", at_ms=int(dt.timestamp() * 1000)) + else: + console.print("[red]Error: Must specify --every, --cron, or --at[/red]") + raise typer.Exit(1) + + store_path = get_data_dir() / "cron" / "jobs.json" + service = CronService(store_path) + + job = service.add_job( + name=name, + schedule=schedule, + message=message, + deliver=deliver, + to=to, + channel=channel, + ) + + console.print(f"[green]✓[/green] Added job '{job.name}' ({job.id})") + + +@cron_app.command("remove") +def cron_remove( + job_id: str = typer.Argument(..., help="Job ID to remove"), +): + """Remove a scheduled job.""" + from nanobot.config.loader import get_data_dir + from nanobot.cron.service import CronService + + store_path = get_data_dir() / "cron" / "jobs.json" + service = CronService(store_path) + + if service.remove_job(job_id): + console.print(f"[green]✓[/green] Removed job {job_id}") + else: + console.print(f"[red]Job {job_id} not found[/red]") + + +@cron_app.command("enable") +def cron_enable( + job_id: str = typer.Argument(..., help="Job ID"), + disable: bool = typer.Option(False, "--disable", help="Disable instead of enable"), +): + """Enable or disable a job.""" + from nanobot.config.loader import get_data_dir + from nanobot.cron.service import CronService + + store_path = get_data_dir() / "cron" / "jobs.json" + service = CronService(store_path) + + job = service.enable_job(job_id, enabled=not disable) + if job: + status = "disabled" if disable else "enabled" + console.print(f"[green]✓[/green] Job '{job.name}' {status}") + else: + console.print(f"[red]Job {job_id} not found[/red]") + + +@cron_app.command("run") +def cron_run( + job_id: str = typer.Argument(..., help="Job ID to run"), + force: bool = typer.Option(False, "--force", "-f", help="Run even if disabled"), +): + """Manually run a job.""" + from nanobot.config.loader import get_data_dir + from nanobot.cron.service import CronService + + store_path = get_data_dir() / "cron" / "jobs.json" + service = CronService(store_path) + + async def run(): + return await service.run_job(job_id, force=force) + + if asyncio.run(run()): + console.print("[green]✓[/green] Job executed") + else: + console.print(f"[red]Failed to run job {job_id}[/red]") + + +# ============================================================================ +# Status Commands +# ============================================================================ + + +@app.command() +def status(): + """Show nanobot status.""" + from nanobot.config.loader import load_config, get_config_path + + config_path = get_config_path() + config = load_config() + workspace = config.workspace_path + + console.print(f"{__logo__} nanobot Status\n") + + console.print( + f"Config: {config_path} {'[green]✓[/green]' if config_path.exists() else '[red]✗[/red]'}" + ) + console.print( + f"Workspace: {workspace} {'[green]✓[/green]' if workspace.exists() else '[red]✗[/red]'}" + ) + + if config_path.exists(): + console.print(f"Model: {config.agents.defaults.model}") + + # Check API keys + has_openrouter = bool(config.providers.openrouter.api_key) + has_anthropic = bool(config.providers.anthropic.api_key) + has_openai = bool(config.providers.openai.api_key) + has_gemini = bool(config.providers.gemini.api_key) + has_vllm = bool(config.providers.vllm.api_base) + + console.print( + f"OpenRouter API: {'[green]✓[/green]' if has_openrouter else '[dim]not set[/dim]'}" + ) + console.print( + f"Anthropic API: {'[green]✓[/green]' if has_anthropic else '[dim]not set[/dim]'}" + ) + console.print( + f"OpenAI API: {'[green]✓[/green]' if has_openai else '[dim]not set[/dim]'}" + ) + console.print( + f"Gemini API: {'[green]✓[/green]' if has_gemini else '[dim]not set[/dim]'}" + ) + vllm_status = ( + f"[green]✓ {config.providers.vllm.api_base}[/green]" + if has_vllm + else "[dim]not set[/dim]" + ) + console.print(f"vLLM/Local: {vllm_status}") + + +if __name__ == "__main__": + app() diff --git a/nanobot/nanobot/config/__init__.py b/nanobot/nanobot/config/__init__.py new file mode 100644 index 0000000..88e8e9b --- /dev/null +++ b/nanobot/nanobot/config/__init__.py @@ -0,0 +1,6 @@ +"""Configuration module for nanobot.""" + +from nanobot.config.loader import load_config, get_config_path +from nanobot.config.schema import Config + +__all__ = ["Config", "load_config", "get_config_path"] diff --git a/nanobot/nanobot/config/loader.py b/nanobot/nanobot/config/loader.py new file mode 100644 index 0000000..2c58dbe --- /dev/null +++ b/nanobot/nanobot/config/loader.py @@ -0,0 +1,103 @@ +"""Configuration loading utilities.""" + +import json +from pathlib import Path +from typing import Any + +from nanobot.config.schema import Config + + +def get_config_path() -> Path: + """Get the default configuration file path.""" + return Path.home() / ".nanobot" / "config.json" + + +def get_data_dir() -> Path: + """Get the nanobot data directory.""" + from nanobot.utils.helpers import get_data_path + + return get_data_path() + + +def load_config(config_path: Path | None = None) -> Config: + """ + Load configuration from file and/or environment variables. + + Priority: environment variables > config file > defaults. + Environment variables use the NANOBOT__ prefix with __ as nested delimiter, + e.g. NANOBOT__PROVIDERS__OPENROUTER__API_KEY. + + Args: + config_path: Optional path to config file. Uses default if not provided. + + Returns: + Loaded configuration object. + """ + path = config_path or get_config_path() + + if path.exists(): + try: + with open(path) as f: + data = json.load(f) + # Load from file first, then env vars override via Pydantic Settings + file_config = convert_keys(data) + return Config(**file_config) + except (json.JSONDecodeError, ValueError) as e: + print(f"Warning: Failed to load config from {path}: {e}") + print("Using default configuration + environment variables.") + + # No config file: Pydantic Settings auto-reads NANOBOT__* env vars + return Config() + + +def save_config(config: Config, config_path: Path | None = None) -> None: + """ + Save configuration to file. + + Args: + config: Configuration to save. + config_path: Optional path to save to. Uses default if not provided. + """ + path = config_path or get_config_path() + path.parent.mkdir(parents=True, exist_ok=True) + + # Convert to camelCase format + data = config.model_dump() + data = convert_to_camel(data) + + with open(path, "w") as f: + json.dump(data, f, indent=2) + + +def convert_keys(data: Any) -> Any: + """Convert camelCase keys to snake_case for Pydantic.""" + if isinstance(data, dict): + return {camel_to_snake(k): convert_keys(v) for k, v in data.items()} + if isinstance(data, list): + return [convert_keys(item) for item in data] + return data + + +def convert_to_camel(data: Any) -> Any: + """Convert snake_case keys to camelCase.""" + if isinstance(data, dict): + return {snake_to_camel(k): convert_to_camel(v) for k, v in data.items()} + if isinstance(data, list): + return [convert_to_camel(item) for item in data] + return data + + +def camel_to_snake(name: str) -> str: + """Convert camelCase to snake_case.""" + result = [] + for i, char in enumerate(name): + if char.isupper() and i > 0: + result.append("_") + result.append(char.lower()) + return "".join(result) + + +def snake_to_camel(name: str) -> str: + """Convert snake_case to camelCase.""" + components = name.split("_") + return components[0] + "".join(x.title() for x in components[1:]) diff --git a/nanobot/nanobot/config/schema.py b/nanobot/nanobot/config/schema.py new file mode 100644 index 0000000..778d200 --- /dev/null +++ b/nanobot/nanobot/config/schema.py @@ -0,0 +1,161 @@ +"""Configuration schema using Pydantic.""" + +from pathlib import Path +from pydantic import BaseModel, Field +from pydantic_settings import BaseSettings + + +class WhatsAppConfig(BaseModel): + """WhatsApp channel configuration.""" + + enabled: bool = False + bridge_url: str = "ws://localhost:3001" + allow_from: list[str] = Field(default_factory=list) # Allowed phone numbers + + +class TelegramConfig(BaseModel): + """Telegram channel configuration.""" + + enabled: bool = False + token: str = "" # Bot token from @BotFather + allow_from: list[str] = Field(default_factory=list) # Allowed user IDs or usernames + + +class FeishuConfig(BaseModel): + """Feishu/Lark channel configuration.""" + + enabled: bool = False + app_id: str = "" + app_secret: str = "" + allow_from: list[str] = Field(default_factory=list) # Allowed Feishu open_ids + default_litewrite_user_id: str = "" # Litewrite user ID for project operations + + +class ChannelsConfig(BaseModel): + """Configuration for chat channels.""" + + whatsapp: WhatsAppConfig = Field(default_factory=WhatsAppConfig) + telegram: TelegramConfig = Field(default_factory=TelegramConfig) + feishu: FeishuConfig = Field(default_factory=FeishuConfig) + + +class AgentDefaults(BaseModel): + """Default agent configuration.""" + + workspace: str = "~/.nanobot/workspace" + model: str = "anthropic/claude-opus-4-5" + max_tokens: int = 8192 + temperature: float = 0.7 + max_tool_iterations: int = 20 + + +class AgentsConfig(BaseModel): + """Agent configuration.""" + + defaults: AgentDefaults = Field(default_factory=AgentDefaults) + + +class ProviderConfig(BaseModel): + """LLM provider configuration.""" + + api_key: str = "" + api_base: str | None = None + + +class ProvidersConfig(BaseModel): + """Configuration for LLM providers.""" + + anthropic: ProviderConfig = Field(default_factory=ProviderConfig) + openai: ProviderConfig = Field(default_factory=ProviderConfig) + openrouter: ProviderConfig = Field(default_factory=ProviderConfig) + groq: ProviderConfig = Field(default_factory=ProviderConfig) + zhipu: ProviderConfig = Field(default_factory=ProviderConfig) + vllm: ProviderConfig = Field(default_factory=ProviderConfig) + gemini: ProviderConfig = Field(default_factory=ProviderConfig) + + +class GatewayConfig(BaseModel): + """Gateway/server configuration.""" + + host: str = "0.0.0.0" + port: int = 18790 + + +class WebSearchConfig(BaseModel): + """Web search tool configuration.""" + + api_key: str = "" # Brave Search API key + max_results: int = 5 + + +class WebToolsConfig(BaseModel): + """Web tools configuration.""" + + search: WebSearchConfig = Field(default_factory=WebSearchConfig) + + +class ExecToolConfig(BaseModel): + """Shell exec tool configuration.""" + + timeout: int = 60 + restrict_to_workspace: bool = ( + True # Block commands accessing paths outside workspace + ) + + +class ToolsConfig(BaseModel): + """Tools configuration.""" + + web: WebToolsConfig = Field(default_factory=WebToolsConfig) + exec: ExecToolConfig = Field(default_factory=ExecToolConfig) + + +class LitewriteConfig(BaseModel): + """Litewrite integration configuration.""" + + url: str = "http://web:3000" # Litewrite API base URL (Docker network) + api_secret: str = "" # INTERNAL_API_SECRET for authentication + ai_server_url: str = "http://ai-server:6612" # AI server URL (Docker network) + + +class Config(BaseSettings): + """Root configuration for nanobot.""" + + agents: AgentsConfig = Field(default_factory=AgentsConfig) + channels: ChannelsConfig = Field(default_factory=ChannelsConfig) + providers: ProvidersConfig = Field(default_factory=ProvidersConfig) + gateway: GatewayConfig = Field(default_factory=GatewayConfig) + tools: ToolsConfig = Field(default_factory=ToolsConfig) + litewrite: LitewriteConfig = Field(default_factory=LitewriteConfig) + + @property + def workspace_path(self) -> Path: + """Get expanded workspace path.""" + return Path(self.agents.defaults.workspace).expanduser() + + def get_api_key(self) -> str | None: + """Get API key in priority order: OpenRouter > Anthropic > OpenAI > Gemini > Zhipu > Groq > vLLM.""" + return ( + self.providers.openrouter.api_key + or self.providers.anthropic.api_key + or self.providers.openai.api_key + or self.providers.gemini.api_key + or self.providers.zhipu.api_key + or self.providers.groq.api_key + or self.providers.vllm.api_key + or None + ) + + def get_api_base(self) -> str | None: + """Get API base URL if using OpenRouter, Zhipu or vLLM.""" + if self.providers.openrouter.api_key: + return self.providers.openrouter.api_base or "https://openrouter.ai/api/v1" + if self.providers.zhipu.api_key: + return self.providers.zhipu.api_base + if self.providers.vllm.api_base: + return self.providers.vllm.api_base + return None + + class Config: + env_prefix = "NANOBOT__" + env_nested_delimiter = "__" diff --git a/nanobot/nanobot/cron/__init__.py b/nanobot/nanobot/cron/__init__.py new file mode 100644 index 0000000..a9d4cad --- /dev/null +++ b/nanobot/nanobot/cron/__init__.py @@ -0,0 +1,6 @@ +"""Cron service for scheduled agent tasks.""" + +from nanobot.cron.service import CronService +from nanobot.cron.types import CronJob, CronSchedule + +__all__ = ["CronService", "CronJob", "CronSchedule"] diff --git a/nanobot/nanobot/cron/service.py b/nanobot/nanobot/cron/service.py new file mode 100644 index 0000000..90f7bf6 --- /dev/null +++ b/nanobot/nanobot/cron/service.py @@ -0,0 +1,362 @@ +"""Cron service for scheduling agent tasks.""" + +import asyncio +import json +import time +import uuid +from pathlib import Path +from typing import Any, Callable, Coroutine + +from loguru import logger + +from nanobot.cron.types import ( + CronJob, + CronJobState, + CronPayload, + CronSchedule, + CronStore, +) + + +def _now_ms() -> int: + return int(time.time() * 1000) + + +def _compute_next_run(schedule: CronSchedule, now_ms: int) -> int | None: + """Compute next run time in ms.""" + if schedule.kind == "at": + return schedule.at_ms if schedule.at_ms and schedule.at_ms > now_ms else None + + if schedule.kind == "every": + if not schedule.every_ms or schedule.every_ms <= 0: + return None + # Next interval from now + return now_ms + schedule.every_ms + + if schedule.kind == "cron" and schedule.expr: + try: + from croniter import croniter + + cron = croniter(schedule.expr, time.time()) + next_time = cron.get_next() + return int(next_time * 1000) + except Exception: + return None + + return None + + +class CronService: + """Service for managing and executing scheduled jobs.""" + + def __init__( + self, + store_path: Path, + on_job: Callable[[CronJob], Coroutine[Any, Any, str | None]] | None = None, + ): + self.store_path = store_path + self.on_job = on_job # Callback to execute job, returns response text + self._store: CronStore | None = None + self._timer_task: asyncio.Task | None = None + self._running = False + + def _load_store(self) -> CronStore: + """Load jobs from disk.""" + if self._store: + return self._store + + if self.store_path.exists(): + try: + data = json.loads(self.store_path.read_text()) + jobs = [] + for j in data.get("jobs", []): + jobs.append( + CronJob( + id=j["id"], + name=j["name"], + enabled=j.get("enabled", True), + schedule=CronSchedule( + kind=j["schedule"]["kind"], + at_ms=j["schedule"].get("atMs"), + every_ms=j["schedule"].get("everyMs"), + expr=j["schedule"].get("expr"), + tz=j["schedule"].get("tz"), + ), + payload=CronPayload( + kind=j["payload"].get("kind", "agent_turn"), + message=j["payload"].get("message", ""), + deliver=j["payload"].get("deliver", False), + channel=j["payload"].get("channel"), + to=j["payload"].get("to"), + ), + state=CronJobState( + next_run_at_ms=j.get("state", {}).get("nextRunAtMs"), + last_run_at_ms=j.get("state", {}).get("lastRunAtMs"), + last_status=j.get("state", {}).get("lastStatus"), + last_error=j.get("state", {}).get("lastError"), + ), + created_at_ms=j.get("createdAtMs", 0), + updated_at_ms=j.get("updatedAtMs", 0), + delete_after_run=j.get("deleteAfterRun", False), + ) + ) + self._store = CronStore(jobs=jobs) + except Exception as e: + logger.warning(f"Failed to load cron store: {e}") + self._store = CronStore() + else: + self._store = CronStore() + + return self._store + + def _save_store(self) -> None: + """Save jobs to disk.""" + if not self._store: + return + + self.store_path.parent.mkdir(parents=True, exist_ok=True) + + data = { + "version": self._store.version, + "jobs": [ + { + "id": j.id, + "name": j.name, + "enabled": j.enabled, + "schedule": { + "kind": j.schedule.kind, + "atMs": j.schedule.at_ms, + "everyMs": j.schedule.every_ms, + "expr": j.schedule.expr, + "tz": j.schedule.tz, + }, + "payload": { + "kind": j.payload.kind, + "message": j.payload.message, + "deliver": j.payload.deliver, + "channel": j.payload.channel, + "to": j.payload.to, + }, + "state": { + "nextRunAtMs": j.state.next_run_at_ms, + "lastRunAtMs": j.state.last_run_at_ms, + "lastStatus": j.state.last_status, + "lastError": j.state.last_error, + }, + "createdAtMs": j.created_at_ms, + "updatedAtMs": j.updated_at_ms, + "deleteAfterRun": j.delete_after_run, + } + for j in self._store.jobs + ], + } + + self.store_path.write_text(json.dumps(data, indent=2)) + + async def start(self) -> None: + """Start the cron service.""" + self._running = True + self._load_store() + self._recompute_next_runs() + self._save_store() + self._arm_timer() + logger.info( + f"Cron service started with {len(self._store.jobs if self._store else [])} jobs" + ) + + def stop(self) -> None: + """Stop the cron service.""" + self._running = False + if self._timer_task: + self._timer_task.cancel() + self._timer_task = None + + def _recompute_next_runs(self) -> None: + """Recompute next run times for all enabled jobs.""" + if not self._store: + return + now = _now_ms() + for job in self._store.jobs: + if job.enabled: + job.state.next_run_at_ms = _compute_next_run(job.schedule, now) + + def _get_next_wake_ms(self) -> int | None: + """Get the earliest next run time across all jobs.""" + if not self._store: + return None + times = [ + j.state.next_run_at_ms + for j in self._store.jobs + if j.enabled and j.state.next_run_at_ms + ] + return min(times) if times else None + + def _arm_timer(self) -> None: + """Schedule the next timer tick.""" + if self._timer_task: + self._timer_task.cancel() + + next_wake = self._get_next_wake_ms() + if not next_wake or not self._running: + return + + delay_ms = max(0, next_wake - _now_ms()) + delay_s = delay_ms / 1000 + + async def tick(): + await asyncio.sleep(delay_s) + if self._running: + await self._on_timer() + + self._timer_task = asyncio.create_task(tick()) + + async def _on_timer(self) -> None: + """Handle timer tick - run due jobs.""" + if not self._store: + return + + now = _now_ms() + due_jobs = [ + j + for j in self._store.jobs + if j.enabled and j.state.next_run_at_ms and now >= j.state.next_run_at_ms + ] + + for job in due_jobs: + await self._execute_job(job) + + self._save_store() + self._arm_timer() + + async def _execute_job(self, job: CronJob) -> None: + """Execute a single job.""" + start_ms = _now_ms() + logger.info(f"Cron: executing job '{job.name}' ({job.id})") + + try: + if self.on_job: + await self.on_job(job) + + job.state.last_status = "ok" + job.state.last_error = None + logger.info(f"Cron: job '{job.name}' completed") + + except Exception as e: + job.state.last_status = "error" + job.state.last_error = str(e) + logger.error(f"Cron: job '{job.name}' failed: {e}") + + job.state.last_run_at_ms = start_ms + job.updated_at_ms = _now_ms() + + # Handle one-shot jobs + if job.schedule.kind == "at": + if job.delete_after_run: + self._store.jobs = [j for j in self._store.jobs if j.id != job.id] + else: + job.enabled = False + job.state.next_run_at_ms = None + else: + # Compute next run + job.state.next_run_at_ms = _compute_next_run(job.schedule, _now_ms()) + + # ========== Public API ========== + + def list_jobs(self, include_disabled: bool = False) -> list[CronJob]: + """List all jobs.""" + store = self._load_store() + jobs = store.jobs if include_disabled else [j for j in store.jobs if j.enabled] + return sorted(jobs, key=lambda j: j.state.next_run_at_ms or float("inf")) + + def add_job( + self, + name: str, + schedule: CronSchedule, + message: str, + deliver: bool = False, + channel: str | None = None, + to: str | None = None, + delete_after_run: bool = False, + ) -> CronJob: + """Add a new job.""" + store = self._load_store() + now = _now_ms() + + job = CronJob( + id=str(uuid.uuid4())[:8], + name=name, + enabled=True, + schedule=schedule, + payload=CronPayload( + kind="agent_turn", + message=message, + deliver=deliver, + channel=channel, + to=to, + ), + state=CronJobState(next_run_at_ms=_compute_next_run(schedule, now)), + created_at_ms=now, + updated_at_ms=now, + delete_after_run=delete_after_run, + ) + + store.jobs.append(job) + self._save_store() + self._arm_timer() + + logger.info(f"Cron: added job '{name}' ({job.id})") + return job + + def remove_job(self, job_id: str) -> bool: + """Remove a job by ID.""" + store = self._load_store() + before = len(store.jobs) + store.jobs = [j for j in store.jobs if j.id != job_id] + removed = len(store.jobs) < before + + if removed: + self._save_store() + self._arm_timer() + logger.info(f"Cron: removed job {job_id}") + + return removed + + def enable_job(self, job_id: str, enabled: bool = True) -> CronJob | None: + """Enable or disable a job.""" + store = self._load_store() + for job in store.jobs: + if job.id == job_id: + job.enabled = enabled + job.updated_at_ms = _now_ms() + if enabled: + job.state.next_run_at_ms = _compute_next_run( + job.schedule, _now_ms() + ) + else: + job.state.next_run_at_ms = None + self._save_store() + self._arm_timer() + return job + return None + + async def run_job(self, job_id: str, force: bool = False) -> bool: + """Manually run a job.""" + store = self._load_store() + for job in store.jobs: + if job.id == job_id: + if not force and not job.enabled: + return False + await self._execute_job(job) + self._save_store() + self._arm_timer() + return True + return False + + def status(self) -> dict: + """Get service status.""" + store = self._load_store() + return { + "enabled": self._running, + "jobs": len(store.jobs), + "next_wake_at_ms": self._get_next_wake_ms(), + } diff --git a/nanobot/nanobot/cron/types.py b/nanobot/nanobot/cron/types.py new file mode 100644 index 0000000..d814189 --- /dev/null +++ b/nanobot/nanobot/cron/types.py @@ -0,0 +1,64 @@ +"""Cron types.""" + +from dataclasses import dataclass, field +from typing import Literal + + +@dataclass +class CronSchedule: + """Schedule definition for a cron job.""" + + kind: Literal["at", "every", "cron"] + # For "at": timestamp in ms + at_ms: int | None = None + # For "every": interval in ms + every_ms: int | None = None + # For "cron": cron expression (e.g. "0 9 * * *") + expr: str | None = None + # Timezone for cron expressions + tz: str | None = None + + +@dataclass +class CronPayload: + """What to do when the job runs.""" + + kind: Literal["system_event", "agent_turn"] = "agent_turn" + message: str = "" + # Deliver response to channel + deliver: bool = False + channel: str | None = None # e.g. "whatsapp" + to: str | None = None # e.g. phone number + + +@dataclass +class CronJobState: + """Runtime state of a job.""" + + next_run_at_ms: int | None = None + last_run_at_ms: int | None = None + last_status: Literal["ok", "error", "skipped"] | None = None + last_error: str | None = None + + +@dataclass +class CronJob: + """A scheduled job.""" + + id: str + name: str + enabled: bool = True + schedule: CronSchedule = field(default_factory=lambda: CronSchedule(kind="every")) + payload: CronPayload = field(default_factory=CronPayload) + state: CronJobState = field(default_factory=CronJobState) + created_at_ms: int = 0 + updated_at_ms: int = 0 + delete_after_run: bool = False + + +@dataclass +class CronStore: + """Persistent store for cron jobs.""" + + version: int = 1 + jobs: list[CronJob] = field(default_factory=list) diff --git a/nanobot/nanobot/heartbeat/__init__.py b/nanobot/nanobot/heartbeat/__init__.py new file mode 100644 index 0000000..2ecd879 --- /dev/null +++ b/nanobot/nanobot/heartbeat/__init__.py @@ -0,0 +1,5 @@ +"""Heartbeat service for periodic agent wake-ups.""" + +from nanobot.heartbeat.service import HeartbeatService + +__all__ = ["HeartbeatService"] diff --git a/nanobot/nanobot/heartbeat/service.py b/nanobot/nanobot/heartbeat/service.py new file mode 100644 index 0000000..ff2f065 --- /dev/null +++ b/nanobot/nanobot/heartbeat/service.py @@ -0,0 +1,137 @@ +"""Heartbeat service - periodic agent wake-up to check for tasks.""" + +import asyncio +from pathlib import Path +from typing import Any, Callable, Coroutine + +from loguru import logger + +# Default interval: 30 minutes +DEFAULT_HEARTBEAT_INTERVAL_S = 30 * 60 + +# The prompt sent to agent during heartbeat +HEARTBEAT_PROMPT = """Read HEARTBEAT.md in your workspace (if it exists). +Follow any instructions or tasks listed there. +If nothing needs attention, reply with just: HEARTBEAT_OK""" + +# Token that indicates "nothing to do" +HEARTBEAT_OK_TOKEN = "HEARTBEAT_OK" + + +def _is_heartbeat_empty(content: str | None) -> bool: + """Check if HEARTBEAT.md has no actionable content.""" + if not content: + return True + + # Lines to skip: empty, headers, HTML comments, empty checkboxes + skip_patterns = {"- [ ]", "* [ ]", "- [x]", "* [x]"} + + for line in content.split("\n"): + line = line.strip() + if ( + not line + or line.startswith("#") + or line.startswith("<!--") + or line in skip_patterns + ): + continue + return False # Found actionable content + + return True + + +class HeartbeatService: + """ + Periodic heartbeat service that wakes the agent to check for tasks. + + The agent reads HEARTBEAT.md from the workspace and executes any + tasks listed there. If nothing needs attention, it replies HEARTBEAT_OK. + """ + + def __init__( + self, + workspace: Path, + on_heartbeat: Callable[[str], Coroutine[Any, Any, str]] | None = None, + interval_s: int = DEFAULT_HEARTBEAT_INTERVAL_S, + enabled: bool = True, + ): + self.workspace = workspace + self.on_heartbeat = on_heartbeat + self.interval_s = interval_s + self.enabled = enabled + self._running = False + self._task: asyncio.Task | None = None + + @property + def heartbeat_file(self) -> Path: + return self.workspace / "HEARTBEAT.md" + + def _read_heartbeat_file(self) -> str | None: + """Read HEARTBEAT.md content.""" + if self.heartbeat_file.exists(): + try: + return self.heartbeat_file.read_text() + except Exception: + return None + return None + + async def start(self) -> None: + """Start the heartbeat service.""" + if not self.enabled: + logger.info("Heartbeat disabled") + return + + self._running = True + self._task = asyncio.create_task(self._run_loop()) + logger.info(f"Heartbeat started (every {self.interval_s}s)") + + def stop(self) -> None: + """Stop the heartbeat service.""" + self._running = False + if self._task: + self._task.cancel() + self._task = None + + async def _run_loop(self) -> None: + """Main heartbeat loop.""" + while self._running: + try: + await asyncio.sleep(self.interval_s) + if self._running: + await self._tick() + except asyncio.CancelledError: + break + except Exception as e: + logger.error(f"Heartbeat error: {e}") + + async def _tick(self) -> None: + """Execute a single heartbeat tick.""" + content = self._read_heartbeat_file() + + # Skip if HEARTBEAT.md is empty or doesn't exist + if _is_heartbeat_empty(content): + logger.debug("Heartbeat: no tasks (HEARTBEAT.md empty)") + return + + logger.info("Heartbeat: checking for tasks...") + + if self.on_heartbeat: + try: + response = await self.on_heartbeat(HEARTBEAT_PROMPT) + + # Check if agent said "nothing to do" + if HEARTBEAT_OK_TOKEN.replace("_", "") in response.upper().replace( + "_", "" + ): + logger.info("Heartbeat: OK (no action needed)") + else: + logger.info("Heartbeat: completed task") + + except Exception as e: + logger.error(f"Heartbeat execution failed: {e}") + + async def trigger_now(self) -> str | None: + """Manually trigger a heartbeat.""" + if self.on_heartbeat: + return await self.on_heartbeat(HEARTBEAT_PROMPT) + return None diff --git a/nanobot/nanobot/media/__init__.py b/nanobot/nanobot/media/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/nanobot/nanobot/media/manager.py b/nanobot/nanobot/media/manager.py new file mode 100644 index 0000000..5239d3f --- /dev/null +++ b/nanobot/nanobot/media/manager.py @@ -0,0 +1,170 @@ +"""Unified media manager for downloading, storing, and managing media files. + +All channels (Feishu, Telegram, WhatsApp, etc.) should use this module to +persist user-sent media. Files are saved under ``~/.nanobot/media/`` with a +standardised naming scheme:: + + {channel}_{timestamp}_{sanitised_original_name}.{ext} + +The manager also handles housekeeping (e.g. cleaning up old files). +""" + +from __future__ import annotations + +import re +import time +from datetime import datetime, timedelta +from pathlib import Path +from typing import Any + +from loguru import logger + + +# Where media files are stored +_DEFAULT_MEDIA_DIR = Path.home() / ".nanobot" / "media" + +# Default max age before a file is eligible for cleanup (7 days) +_DEFAULT_MAX_AGE_DAYS = 7 + +# Regex to sanitise filenames +_UNSAFE_CHARS = re.compile(r"[^\w.\-]") + + +def _sanitise(name: str, max_len: int = 40) -> str: + """Return a filesystem-safe version of *name*, truncated to *max_len*.""" + safe = _UNSAFE_CHARS.sub("_", name) + return safe[:max_len] if len(safe) > max_len else safe + + +class MediaManager: + """Centralised media file management. + + Usage:: + + mgr = MediaManager() + + # Save bytes that were already downloaded + path = mgr.save(data=raw_bytes, channel="feishu", + original_name="image.png") + + # Clean up files older than 7 days + mgr.cleanup() + """ + + def __init__( + self, media_dir: Path | None = None, max_age_days: int = _DEFAULT_MAX_AGE_DAYS + ): + self.media_dir = media_dir or _DEFAULT_MEDIA_DIR + self.media_dir.mkdir(parents=True, exist_ok=True) + self.max_age_days = max_age_days + + # ------------------------------------------------------------------ + # Core API + # ------------------------------------------------------------------ + + def save( + self, + data: bytes, + channel: str, + original_name: str = "file", + extension: str | None = None, + ) -> str: + """Persist *data* and return the absolute local file path. + + Parameters + ---------- + data: + Raw file bytes. + channel: + Source channel name (``feishu``, ``telegram``, …). + original_name: + The original file name (used for the saved filename). + extension: + Explicit extension (e.g. ``.png``). If *None* the extension + is inferred from *original_name*. + """ + if extension is None: + suffix = Path(original_name).suffix + extension = suffix if suffix else ".bin" + if not extension.startswith("."): + extension = f".{extension}" + + ts = int(time.time() * 1000) + safe_name = _sanitise(Path(original_name).stem) + filename = f"{channel}_{ts}_{safe_name}{extension}" + filepath = self.media_dir / filename + + filepath.write_bytes(data) + logger.info(f"MediaManager: saved {len(data)} bytes -> {filepath}") + return str(filepath) + + def save_file( + self, + source_path: str | Path, + channel: str, + original_name: str | None = None, + ) -> str: + """Copy an existing file into the managed media directory. + + Returns the new absolute path. + """ + src = Path(source_path) + if not src.is_file(): + raise FileNotFoundError(f"Source file not found: {source_path}") + + data = src.read_bytes() + name = original_name or src.name + return self.save(data=data, channel=channel, original_name=name) + + def get_path(self, filename: str) -> Path: + """Return the full path for a managed filename.""" + return self.media_dir / filename + + def list_files(self, channel: str | None = None) -> list[dict[str, Any]]: + """List managed media files, optionally filtered by *channel*. + + Returns a list of dicts with ``path``, ``size``, ``modified`` + and ``channel`` keys. + """ + results: list[dict[str, Any]] = [] + for f in sorted(self.media_dir.iterdir()): + if f.is_dir(): + continue + parts = f.name.split("_", 2) + file_channel = parts[0] if len(parts) >= 2 else "unknown" + if channel and file_channel != channel: + continue + results.append( + { + "path": str(f), + "name": f.name, + "size": f.stat().st_size, + "modified": datetime.fromtimestamp(f.stat().st_mtime).isoformat(), + "channel": file_channel, + } + ) + return results + + def cleanup(self, max_age_days: int | None = None) -> int: + """Remove media files older than *max_age_days*. + + Returns the number of files removed. + """ + age = max_age_days if max_age_days is not None else self.max_age_days + cutoff = datetime.now() - timedelta(days=age) + removed = 0 + + for f in self.media_dir.iterdir(): + if f.is_dir(): + continue + try: + mtime = datetime.fromtimestamp(f.stat().st_mtime) + if mtime < cutoff: + f.unlink() + removed += 1 + except Exception as e: + logger.warning(f"MediaManager: failed to remove {f}: {e}") + + if removed: + logger.info(f"MediaManager: cleaned up {removed} old file(s)") + return removed diff --git a/nanobot/nanobot/providers/__init__.py b/nanobot/nanobot/providers/__init__.py new file mode 100644 index 0000000..ceff8fa --- /dev/null +++ b/nanobot/nanobot/providers/__init__.py @@ -0,0 +1,6 @@ +"""LLM provider abstraction module.""" + +from nanobot.providers.base import LLMProvider, LLMResponse +from nanobot.providers.litellm_provider import LiteLLMProvider + +__all__ = ["LLMProvider", "LLMResponse", "LiteLLMProvider"] diff --git a/nanobot/nanobot/providers/base.py b/nanobot/nanobot/providers/base.py new file mode 100644 index 0000000..a30e4ef --- /dev/null +++ b/nanobot/nanobot/providers/base.py @@ -0,0 +1,71 @@ +"""Base LLM provider interface.""" + +from abc import ABC, abstractmethod +from dataclasses import dataclass, field +from typing import Any + + +@dataclass +class ToolCallRequest: + """A tool call request from the LLM.""" + + id: str + name: str + arguments: dict[str, Any] + + +@dataclass +class LLMResponse: + """Response from an LLM provider.""" + + content: str | None + tool_calls: list[ToolCallRequest] = field(default_factory=list) + finish_reason: str = "stop" + usage: dict[str, int] = field(default_factory=dict) + + @property + def has_tool_calls(self) -> bool: + """Check if response contains tool calls.""" + return len(self.tool_calls) > 0 + + +class LLMProvider(ABC): + """ + Abstract base class for LLM providers. + + Implementations should handle the specifics of each provider's API + while maintaining a consistent interface. + """ + + def __init__(self, api_key: str | None = None, api_base: str | None = None): + self.api_key = api_key + self.api_base = api_base + + @abstractmethod + async def chat( + self, + messages: list[dict[str, Any]], + tools: list[dict[str, Any]] | None = None, + model: str | None = None, + max_tokens: int = 4096, + temperature: float = 0.7, + ) -> LLMResponse: + """ + Send a chat completion request. + + Args: + messages: List of message dicts with 'role' and 'content'. + tools: Optional list of tool definitions. + model: Model identifier (provider-specific). + max_tokens: Maximum tokens in response. + temperature: Sampling temperature. + + Returns: + LLMResponse with content and/or tool calls. + """ + pass + + @abstractmethod + def get_default_model(self) -> str: + """Get the default model for this provider.""" + pass diff --git a/nanobot/nanobot/providers/litellm_provider.py b/nanobot/nanobot/providers/litellm_provider.py new file mode 100644 index 0000000..4084f24 --- /dev/null +++ b/nanobot/nanobot/providers/litellm_provider.py @@ -0,0 +1,179 @@ +"""LiteLLM provider implementation for multi-provider support.""" + +import os +from typing import Any + +import litellm +from litellm import acompletion + +from nanobot.providers.base import LLMProvider, LLMResponse, ToolCallRequest + + +class LiteLLMProvider(LLMProvider): + """ + LLM provider using LiteLLM for multi-provider support. + + Supports OpenRouter, Anthropic, OpenAI, Gemini, and many other providers through + a unified interface. + """ + + def __init__( + self, + api_key: str | None = None, + api_base: str | None = None, + default_model: str = "anthropic/claude-opus-4-5", + ): + super().__init__(api_key, api_base) + self.default_model = default_model + + # Detect OpenRouter by api_key prefix or explicit api_base + self.is_openrouter = (api_key and api_key.startswith("sk-or-")) or ( + api_base and "openrouter" in api_base + ) + + # Track if using custom endpoint (vLLM, etc.) + self.is_vllm = bool(api_base) and not self.is_openrouter + + # Configure LiteLLM based on provider + if api_key: + if self.is_openrouter: + # OpenRouter mode - set key + os.environ["OPENROUTER_API_KEY"] = api_key + elif self.is_vllm: + # vLLM/custom endpoint - uses OpenAI-compatible API + os.environ["OPENAI_API_KEY"] = api_key + elif "anthropic" in default_model: + os.environ.setdefault("ANTHROPIC_API_KEY", api_key) + elif "openai" in default_model or "gpt" in default_model: + os.environ.setdefault("OPENAI_API_KEY", api_key) + elif "gemini" in default_model.lower(): + os.environ.setdefault("GEMINI_API_KEY", api_key) + elif ( + "zhipu" in default_model + or "glm" in default_model + or "zai" in default_model + ): + os.environ.setdefault("ZHIPUAI_API_KEY", api_key) + elif "groq" in default_model: + os.environ.setdefault("GROQ_API_KEY", api_key) + + if api_base: + litellm.api_base = api_base + + # Disable LiteLLM logging noise + litellm.suppress_debug_info = True + + async def chat( + self, + messages: list[dict[str, Any]], + tools: list[dict[str, Any]] | None = None, + model: str | None = None, + max_tokens: int = 4096, + temperature: float = 0.7, + ) -> LLMResponse: + """ + Send a chat completion request via LiteLLM. + + Args: + messages: List of message dicts with 'role' and 'content'. + tools: Optional list of tool definitions in OpenAI format. + model: Model identifier (e.g., 'anthropic/claude-sonnet-4-5'). + max_tokens: Maximum tokens in response. + temperature: Sampling temperature. + + Returns: + LLMResponse with content and/or tool calls. + """ + model = model or self.default_model + + # For OpenRouter, prefix model name if not already prefixed + if self.is_openrouter and not model.startswith("openrouter/"): + model = f"openrouter/{model}" + + # For Zhipu/Z.ai, ensure prefix is present + # Handle cases like "glm-4.7-flash" -> "zai/glm-4.7-flash" + if ("glm" in model.lower() or "zhipu" in model.lower()) and not ( + model.startswith("zhipu/") + or model.startswith("zai/") + or model.startswith("openrouter/") + ): + model = f"zai/{model}" + + # For vLLM, use hosted_vllm/ prefix per LiteLLM docs + # Convert openai/ prefix to hosted_vllm/ if user specified it + if self.is_vllm: + model = f"hosted_vllm/{model}" + + # For Gemini, ensure gemini/ prefix if not already present + if "gemini" in model.lower() and not model.startswith("gemini/"): + model = f"gemini/{model}" + + kwargs: dict[str, Any] = { + "model": model, + "messages": messages, + "max_tokens": max_tokens, + "temperature": temperature, + } + + # Pass api_base directly for custom endpoints (vLLM, etc.) + if self.api_base: + kwargs["api_base"] = self.api_base + + if tools: + kwargs["tools"] = tools + kwargs["tool_choice"] = "auto" + + try: + response = await acompletion(**kwargs) + return self._parse_response(response) + except Exception as e: + # Return error as content for graceful handling + return LLMResponse( + content=f"Error calling LLM: {str(e)}", + finish_reason="error", + ) + + def _parse_response(self, response: Any) -> LLMResponse: + """Parse LiteLLM response into our standard format.""" + choice = response.choices[0] + message = choice.message + + tool_calls = [] + if hasattr(message, "tool_calls") and message.tool_calls: + for tc in message.tool_calls: + # Parse arguments from JSON string if needed + args = tc.function.arguments + if isinstance(args, str): + import json + + try: + args = json.loads(args) + except json.JSONDecodeError: + args = {"raw": args} + + tool_calls.append( + ToolCallRequest( + id=tc.id, + name=tc.function.name, + arguments=args, + ) + ) + + usage = {} + if hasattr(response, "usage") and response.usage: + usage = { + "prompt_tokens": response.usage.prompt_tokens, + "completion_tokens": response.usage.completion_tokens, + "total_tokens": response.usage.total_tokens, + } + + return LLMResponse( + content=message.content, + tool_calls=tool_calls, + finish_reason=choice.finish_reason or "stop", + usage=usage, + ) + + def get_default_model(self) -> str: + """Get the default model.""" + return self.default_model diff --git a/nanobot/nanobot/providers/transcription.py b/nanobot/nanobot/providers/transcription.py new file mode 100644 index 0000000..24b365a --- /dev/null +++ b/nanobot/nanobot/providers/transcription.py @@ -0,0 +1,61 @@ +"""Voice transcription provider using Groq.""" + +import os +from pathlib import Path + +import httpx +from loguru import logger + + +class GroqTranscriptionProvider: + """ + Voice transcription provider using Groq's Whisper API. + + Groq offers extremely fast transcription with a generous free tier. + """ + + def __init__(self, api_key: str | None = None): + self.api_key = api_key or os.environ.get("GROQ_API_KEY") + self.api_url = "https://api.groq.com/openai/v1/audio/transcriptions" + + async def transcribe(self, file_path: str | Path) -> str: + """ + Transcribe an audio file using Groq. + + Args: + file_path: Path to the audio file. + + Returns: + Transcribed text. + """ + if not self.api_key: + logger.warning("Groq API key not configured for transcription") + return "" + + path = Path(file_path) + if not path.exists(): + logger.error(f"Audio file not found: {file_path}") + return "" + + try: + async with httpx.AsyncClient() as client: + with open(path, "rb") as f: + files = { + "file": (path.name, f), + "model": (None, "whisper-large-v3"), + } + headers = { + "Authorization": f"Bearer {self.api_key}", + } + + response = await client.post( + self.api_url, headers=headers, files=files, timeout=60.0 + ) + + response.raise_for_status() + data = response.json() + return data.get("text", "") + + except Exception as e: + logger.error(f"Groq transcription error: {e}") + return "" diff --git a/nanobot/nanobot/session/__init__.py b/nanobot/nanobot/session/__init__.py new file mode 100644 index 0000000..3faf424 --- /dev/null +++ b/nanobot/nanobot/session/__init__.py @@ -0,0 +1,5 @@ +"""Session management module.""" + +from nanobot.session.manager import SessionManager, Session + +__all__ = ["SessionManager", "Session"] diff --git a/nanobot/nanobot/session/manager.py b/nanobot/nanobot/session/manager.py new file mode 100644 index 0000000..8fb321f --- /dev/null +++ b/nanobot/nanobot/session/manager.py @@ -0,0 +1,212 @@ +"""Session management for conversation history.""" + +import json +from pathlib import Path +from dataclasses import dataclass, field +from datetime import datetime +from typing import Any + +from loguru import logger + +from nanobot.utils.helpers import ensure_dir, safe_filename + + +@dataclass +class Session: + """ + A conversation session. + + Stores messages in JSONL format for easy reading and persistence. + """ + + key: str # channel:chat_id + messages: list[dict[str, Any]] = field(default_factory=list) + created_at: datetime = field(default_factory=datetime.now) + updated_at: datetime = field(default_factory=datetime.now) + metadata: dict[str, Any] = field(default_factory=dict) + + def add_message(self, role: str, content: str, **kwargs: Any) -> None: + """Add a message to the session.""" + msg = { + "role": role, + "content": content, + "timestamp": datetime.now().isoformat(), + **kwargs, + } + self.messages.append(msg) + self.updated_at = datetime.now() + + def get_history(self, max_messages: int = 50) -> list[dict[str, Any]]: + """ + Get message history for LLM context. + + Args: + max_messages: Maximum messages to return. + + Returns: + List of messages in LLM format. + """ + # Get recent messages + recent = ( + self.messages[-max_messages:] + if len(self.messages) > max_messages + else self.messages + ) + + # Convert to LLM format (just role and content) + return [{"role": m["role"], "content": m["content"]} for m in recent] + + def clear(self) -> None: + """Clear all messages in the session.""" + self.messages = [] + self.updated_at = datetime.now() + + +class SessionManager: + """ + Manages conversation sessions. + + Sessions are stored as JSONL files in the sessions directory. + """ + + def __init__(self, workspace: Path): + self.workspace = workspace + self.sessions_dir = ensure_dir(Path.home() / ".nanobot" / "sessions") + self._cache: dict[str, Session] = {} + + def _get_session_path(self, key: str) -> Path: + """Get the file path for a session.""" + safe_key = safe_filename(key.replace(":", "_")) + return self.sessions_dir / f"{safe_key}.jsonl" + + def get_or_create(self, key: str) -> Session: + """ + Get an existing session or create a new one. + + Args: + key: Session key (usually channel:chat_id). + + Returns: + The session. + """ + # Check cache + if key in self._cache: + return self._cache[key] + + # Try to load from disk + session = self._load(key) + if session is None: + session = Session(key=key) + + self._cache[key] = session + return session + + def _load(self, key: str) -> Session | None: + """Load a session from disk.""" + path = self._get_session_path(key) + + if not path.exists(): + return None + + try: + messages = [] + metadata = {} + created_at = None + + with open(path) as f: + for line in f: + line = line.strip() + if not line: + continue + + data = json.loads(line) + + if data.get("_type") == "metadata": + metadata = data.get("metadata", {}) + created_at = ( + datetime.fromisoformat(data["created_at"]) + if data.get("created_at") + else None + ) + else: + messages.append(data) + + return Session( + key=key, + messages=messages, + created_at=created_at or datetime.now(), + metadata=metadata, + ) + except Exception as e: + logger.warning(f"Failed to load session {key}: {e}") + return None + + def save(self, session: Session) -> None: + """Save a session to disk.""" + path = self._get_session_path(session.key) + + with open(path, "w") as f: + # Write metadata first + metadata_line = { + "_type": "metadata", + "created_at": session.created_at.isoformat(), + "updated_at": session.updated_at.isoformat(), + "metadata": session.metadata, + } + f.write(json.dumps(metadata_line) + "\n") + + # Write messages + for msg in session.messages: + f.write(json.dumps(msg) + "\n") + + self._cache[session.key] = session + + def delete(self, key: str) -> bool: + """ + Delete a session. + + Args: + key: Session key. + + Returns: + True if deleted, False if not found. + """ + # Remove from cache + self._cache.pop(key, None) + + # Remove file + path = self._get_session_path(key) + if path.exists(): + path.unlink() + return True + return False + + def list_sessions(self) -> list[dict[str, Any]]: + """ + List all sessions. + + Returns: + List of session info dicts. + """ + sessions = [] + + for path in self.sessions_dir.glob("*.jsonl"): + try: + # Read just the metadata line + with open(path) as f: + first_line = f.readline().strip() + if first_line: + data = json.loads(first_line) + if data.get("_type") == "metadata": + sessions.append( + { + "key": path.stem.replace("_", ":"), + "created_at": data.get("created_at"), + "updated_at": data.get("updated_at"), + "path": str(path), + } + ) + except Exception: + continue + + return sorted(sessions, key=lambda x: x.get("updated_at", ""), reverse=True) diff --git a/nanobot/nanobot/skills/README.md b/nanobot/nanobot/skills/README.md new file mode 100644 index 0000000..fad21be --- /dev/null +++ b/nanobot/nanobot/skills/README.md @@ -0,0 +1,24 @@ +# nanobot Skills + +This directory contains built-in skills that extend nanobot's capabilities. + +## Skill Format + +Each skill is a directory containing a `SKILL.md` file with: +- YAML frontmatter (name, description, metadata) +- Markdown instructions for the agent + +## Attribution + +These skills are adapted from [OpenClaw](https://github.com/openclaw/openclaw)'s skill system. +The skill format and metadata structure follow OpenClaw's conventions to maintain compatibility. + +## Available Skills + +| Skill | Description | +|-------|-------------| +| `github` | Interact with GitHub using the `gh` CLI | +| `weather` | Get weather info using wttr.in and Open-Meteo | +| `summarize` | Summarize URLs, files, and YouTube videos | +| `tmux` | Remote-control tmux sessions | +| `skill-creator` | Create new skills | diff --git a/nanobot/nanobot/skills/github/SKILL.md b/nanobot/nanobot/skills/github/SKILL.md new file mode 100644 index 0000000..57d8127 --- /dev/null +++ b/nanobot/nanobot/skills/github/SKILL.md @@ -0,0 +1,48 @@ +--- +name: github +description: "Interact with GitHub using the `gh` CLI. Use `gh issue`, `gh pr`, `gh run`, and `gh api` for issues, PRs, CI runs, and advanced queries." +metadata: {"nanobot":{"emoji":"🐙","requires":{"bins":["gh"]},"install":[{"id":"brew","kind":"brew","formula":"gh","bins":["gh"],"label":"Install GitHub CLI (brew)"},{"id":"apt","kind":"apt","package":"gh","bins":["gh"],"label":"Install GitHub CLI (apt)"}]}} +--- + +# GitHub Skill + +Use the `gh` CLI to interact with GitHub. Always specify `--repo owner/repo` when not in a git directory, or use URLs directly. + +## Pull Requests + +Check CI status on a PR: +```bash +gh pr checks 55 --repo owner/repo +``` + +List recent workflow runs: +```bash +gh run list --repo owner/repo --limit 10 +``` + +View a run and see which steps failed: +```bash +gh run view <run-id> --repo owner/repo +``` + +View logs for failed steps only: +```bash +gh run view <run-id> --repo owner/repo --log-failed +``` + +## API for Advanced Queries + +The `gh api` command is useful for accessing data not available through other subcommands. + +Get PR with specific fields: +```bash +gh api repos/owner/repo/pulls/55 --jq '.title, .state, .user.login' +``` + +## JSON Output + +Most commands support `--json` for structured output. You can use `--jq` to filter: + +```bash +gh issue list --repo owner/repo --json number,title --jq '.[] | "\(.number): \(.title)"' +``` diff --git a/nanobot/nanobot/skills/litewrite/SKILL.md b/nanobot/nanobot/skills/litewrite/SKILL.md new file mode 100644 index 0000000..d2b24af --- /dev/null +++ b/nanobot/nanobot/skills/litewrite/SKILL.md @@ -0,0 +1,231 @@ +--- +name: litewrite +description: "Manage LaTeX projects in Litewrite - use the built-in AI agent for all writing/editing/analysis, manage projects and versions, compile to PDF, and upload binary assets." +metadata: {"nanobot":{"always":true}} +--- + +# Litewrite Skill + +You have access to **Litewrite**, a LaTeX writing platform with a built-in AI agent. Use the `litewrite_*` tools to manage LaTeX projects. + +## Architecture + +You act as a **Manager**. Every user message is potentially an action request — think about what tools to call before responding. + +- **Modifications** (any add/delete/modify intent): call `litewrite_agent` with `mode="agent"`, then **summarise what the agent did** in your own words. +- **Pure questions** (reading, analysis, Q&A with NO change intent): call `litewrite_agent` with `mode="ask"`, then **answer the user based on what the agent returned**. +- **When in doubt, default to mode="agent"** — it is better to use agent mode unnecessarily than to miss a modification the user wanted. +- **Never** read or edit project files yourself. Always delegate to the Litewrite Agent and compose your reply from its output. + +## Available Tools + +### Content Operations (via Agent) +- `litewrite_agent` — **The primary tool for ALL content work**. Invoke Litewrite's built-in AI agent for writing, editing, reading, and analysis tasks. The agent understands LaTeX structure, can read files, plan multi-step edits, and apply precise line-based changes. + +### Project Management +- `litewrite_list_projects` — Search and list projects by name +- `litewrite_create_project` — Create a new project (auto-generates main.tex) +- `litewrite_rename_project` — Rename a project or update its description +- `litewrite_delete_project` — **Permanently** delete a project (IRREVERSIBLE — always confirm with user) + +### Compilation +- `litewrite_compile` — Compile the project to PDF. The PDF is **automatically sent** to the user. Auto-saves a version after successful compilation. + +### Version History +- `litewrite_list_versions` — List all saved versions of a project +- `litewrite_save_version` — Save the current project state as a named version +- `litewrite_restore_version` — Restore a project to a specific saved version (DESTRUCTIVE — suggest saving current state first) + +### File Management +- `litewrite_create_file` — Create a new file or folder in a project. Supports any file type (.tex, .md, .bib, etc.) with optional initial content. +- `litewrite_upload_file` — Upload a local file (image, PDF, etc.) to a project. Use for binary assets like figures, diagrams, photos. + +### Project Import +- `litewrite_import_arxiv` — Import a project from arXiv by paper ID or URL. Downloads the source files and creates a new project. +- `litewrite_import_github` — Import a project from a GitHub or GitLab repository URL. Supports subdirectory imports via tree URLs. +- `litewrite_import_upload` — Create a new project by uploading a local file (ZIP, tar.gz, .tex, etc.). Use when the user sends a file attachment. + +## Tool Selection Strategy + +### Deciding between mode="agent" and mode="ask" + +**Default to mode="agent"** whenever the user's intent involves ANY change to project files. If in doubt, use agent mode. + +#### Use mode="agent" when the user wants to: +- **Add** anything: new sections, paragraphs, text, references, figures, packages, etc. +- **Delete** anything: remove sections, sentences, paragraphs, files, etc. +- **Modify** anything: rewrite, rephrase, restructure, improve, fix, update, translate, shorten, expand, etc. +- **Fix** anything: typos, grammar, formatting, citations, compilation errors, etc. +- **Move** or reorganise: reorder sections, split/merge files, etc. + +Key signals that mean agent mode: "add", "delete", "remove", "modify", "change", "update", "rewrite", "fix", "improve", "shorten", "expand", "translate", "move", "replace", "insert", "append", "restructure", "refactor", "polish", "proofread", "revise", "correct", "adjust", "把...改成", "添加", "删除", "修改", "修正", "优化", "润色", "翻译", "重写", "缩短", "扩展", "补充". + +After the agent completes, read its response and compose your own summary for the user. Do NOT just echo the raw agent output. + +#### Use mode="ask" ONLY when the user is purely asking a question with NO intent to change anything: +- "What sections does my paper have?" +- "Summarise the introduction" +- "Are there any citation issues?" (if they just want to KNOW, not fix) +- "What packages are used?" +- "How long is the paper?" + +If the user asks a question that implies action (e.g. "Are there citation issues? Fix them." or "What's wrong with the introduction?"), use mode="agent" — the user likely wants the problem fixed, not just reported. + +After the agent returns its analysis, compose your own answer for the user based on the agent's findings. + +### Compilation — when to call `litewrite_compile` +Think carefully about the user's intent: +- If the user explicitly asks for a PDF, or says "compile", "build", "generate PDF" — always compile. +- If the user asks for edits AND their message implies they want to see the result (e.g. "help me fix X and send me the PDF", "update Y and compile"), compile AFTER the agent finishes editing. +- If the user only asks for edits without mentioning PDF, do NOT compile automatically — just report what was changed. + +### Creating new files in a project — IMPORTANT +When the user wants to create a new file (e.g. .md, .txt, .bib) inside an existing project: + +**Option A: File with known content (preferred for non-.tex files like .md)** +If you already know the content to write, or you can compose it yourself: +1. Call `litewrite_create_file(project_id, name="filename.md", content="... full content here ...")` — create the file WITH content in a single call. +This is the most reliable method. The `content` parameter writes directly to storage. + +**Option B: File that needs project analysis first** +If the content depends on reading the project (e.g. "summarise this paper into a .md file"): +1. First call `litewrite_agent(mode="ask")` to gather the information you need (e.g. "Summarise this paper comprehensively"). +2. Read the agent's response and compose the file content yourself. +3. Call `litewrite_create_file(project_id, name="filename.md", content="... your composed content ...")` — create the file with the full content. + +**Option C: New .tex file that needs the agent to write LaTeX** +For LaTeX files that need the agent's LaTeX expertise: +1. Call `litewrite_create_file(project_id, name="chapter.tex")` to create an empty file. +2. Call `litewrite_agent(mode="agent")` to write LaTeX content into it. + +**NEVER** claim you created a file without calling `litewrite_create_file`. You MUST call the tool. + +### Management tools +- `litewrite_list_projects`: Always call first to find the project ID when the user refers to a project by name. +- `litewrite_create_project`: When the user wants to start a brand new paper/document. +- `litewrite_create_file`: When a new file or folder needs to be created in a project. You MUST call this tool — never pretend you created a file. +- `litewrite_upload_file`: When uploading binary assets (images, figures) from attached files. +- `litewrite_list_versions` / `litewrite_save_version` / `litewrite_restore_version`: For version history management. + +### Import tools +- `litewrite_import_arxiv`: User provides an arXiv link or ID. +- `litewrite_import_github`: User provides a GitHub/GitLab repository URL. +- `litewrite_import_upload`: User sends a file attachment (ZIP, tar.gz, .tex) and wants a new project created from it. + +## Typical Workflows + +### Workflow 1: Edit and compile +1. `litewrite_list_projects(search="...")` → find project ID +2. `litewrite_agent(project_id, message="Rewrite the introduction to ...", mode="agent")` → agent handles all reading and editing +3. `litewrite_compile(project_id)` → compile PDF (auto-sent to user) +4. Report the result to the user + +### Workflow 2: Create a new project +1. `litewrite_create_project(name="My Paper", locale="en")` → get project ID +2. `litewrite_agent(project_id, message="Write a complete paper about ...", mode="agent")` → agent writes content +3. `litewrite_compile(project_id)` → compile PDF (auto-sent to user) + +### Workflow 3: Upload an image and use it +User sends an image and says: "Put this image in the RAGAnything project as figures/architecture.png" + +1. `litewrite_list_projects(search="RAGAnything")` → find project ID +2. `litewrite_upload_file(project_id, local_path="/path/from/attached/files", target_path="figures/architecture.png")` → upload the image +3. `litewrite_agent(project_id, message="Add \\includegraphics for figures/architecture.png in the appropriate section")` → agent updates LaTeX +4. `litewrite_compile(project_id)` → compile PDF (auto-sent to user) + +### Workflow 4: Analyse a document +User: "What sections does my paper have?" + +1. `litewrite_list_projects(search=...)` → find project ID +2. `litewrite_agent(project_id, message="List all sections and subsections with a brief summary of each", mode="ask")` → agent analyses without editing +3. Report the analysis to the user + +### Workflow 5: Create a new file and write content into it +User: "Create a .md file in the project and write a summary of the paper into it" + +1. `litewrite_list_projects(search="...")` → find project ID +2. `litewrite_agent(project_id, message="Provide a comprehensive summary of this paper", mode="ask")` → agent reads the paper and returns a summary +3. Compose the summary into well-formatted markdown content yourself +4. `litewrite_create_file(project_id, name="paper_summary.md", content="# Paper Summary\n\n...")` → create the file WITH the full content +5. Report the result to the user + +NOTE: Do NOT create an empty file then ask `litewrite_agent` to write non-LaTeX content into it — the agent's file editing is optimised for LaTeX and may truncate content for .md files. Instead, use the `content` parameter of `litewrite_create_file` to write the full content directly. + +### Workflow 6: Import from arXiv +User: "Import this paper: https://arxiv.org/abs/2301.07041" + +1. `litewrite_import_arxiv(arxiv_id="https://arxiv.org/abs/2301.07041")` → creates project from arXiv source +2. Report the result (project name, ID, files count) +3. Optionally `litewrite_compile(project_id)` if the user wants to see the PDF + +### Workflow 7: Import from GitHub +User: "Import this repo: https://github.com/user/latex-paper" + +1. `litewrite_import_github(url="https://github.com/user/latex-paper")` → creates project from repo +2. Report the result + +### Workflow 8: Import from uploaded file +User sends a ZIP file and says: "Create a project from this file" + +1. `litewrite_import_upload(local_path="/path/from/attached/files")` → creates project from file +2. Report the result + +### Workflow 9: Restore a previous version +1. `litewrite_list_projects(search="...")` → find project ID +2. `litewrite_list_versions(project_id)` → see all saved versions +3. `litewrite_save_version(project_id, name="Before restore")` → save current state first +4. `litewrite_restore_version(project_id, version_id)` → restore to chosen version + +### Deep Research (MANDATORY workflow) + +When the user asks to research/survey/investigate a topic, you MUST follow ALL these steps: + +1. **Research**: Use `litewrite_deep_research(query="...")` — this returns a Markdown report with citations and BibTeX +2. **Create project**: Use `litewrite_create_project(name="<topic> Survey", main_file_content="<LaTeX version of the report>")` — convert the Markdown report to a proper LaTeX document with `\documentclass`, `\begin{document}`, sections, `\bibliography`, etc. +3. **Compile**: Use `litewrite_compile(project_id, compiler="xelatex")` to build the PDF +4. **Send PDF**: Use `message(content="...", media=[pdf_path])` to send the compiled PDF to the user + +**NEVER skip steps 2-4.** The user expects a compiled PDF, not raw Markdown text. +**NEVER just send the Markdown report as a text message.** Always compile it into a PDF first. + +## CRITICAL Rules + +### Response Pattern +Every time you receive a user message, follow this pattern: +1. **Understand intent** — Is it a modification, a question, a compile request, an import, or something else? +2. **Call the right tool(s)** — Modifications use agent mode, questions use ask mode, compile when needed. Creating files uses `litewrite_create_file`. +3. **Compose your reply** — Read the tool output and write your own coherent, helpful response. Never dump raw tool output. + +**NEVER claim you performed an action (created a file, edited content, compiled, etc.) without having actually called the corresponding tool. If you haven't called the tool, you haven't done the action.** + +### Agent Mode Rules +- ALL content modifications go through `litewrite_agent(mode="agent")`. NEVER edit files yourself. +- ALL content questions go through `litewrite_agent(mode="ask")`. NEVER read files yourself. +- Be specific in your instructions: instead of "improve the paper", say "rewrite the introduction to emphasise the novelty of our approach". +- The agent reads files automatically — you do NOT need to read them first. +- The agent applies edits directly. Changes take effect immediately. +- For complex tasks, the agent may take 30–60 seconds. + +### LaTeX Compilation +- **ALWAYS use `litewrite_compile`** to compile LaTeX documents. NEVER use `exec` to run `pdflatex`, `xelatex`, or `lualatex` directly - the compiler is NOT installed locally. +- **ALWAYS create a Litewrite project first** (with `litewrite_create_project`) before compiling. Do NOT write `.tex` files locally with `write_file`. + +### Compiler Selection +- **pdflatex** (default): Standard LaTeX compiler. Works for most English-only documents. +- **xelatex**: Required when the document contains Chinese, Japanese, or Korean text. Also needed for `fontspec`, `xeCJK`, or custom Unicode fonts. +- **lualatex**: Alternative Unicode-aware compiler. +- **Rule of thumb**: If the document contains Chinese/CJK content, you MUST use `compiler="xelatex"`. + +### Compilation & PDF Delivery +- `litewrite_compile` automatically sends the compiled PDF to the user. Do NOT call the message tool to send the PDF again. +- `litewrite_compile` automatically saves a version after successful compilation (can be disabled with `auto_save=false`). +- When the user says things like "compile", "give me the PDF", "send me the latest version", you MUST call `litewrite_compile`. + +### Destructive Operations +- **Always confirm with the user** before using `litewrite_delete_project` or `litewrite_restore_version`. +- For `litewrite_restore_version`, **always save the current state first** using `litewrite_save_version`. + +### Handling Attached Files +- When the user sends files (images, documents), their **local paths** appear in the `[Attached files]` section of the message. +- Use these paths with `litewrite_upload_file` to upload binary files to a project, or `litewrite_import_upload` to create a new project from an archive. +- The LLM can also **see** attached images (vision), so you can understand the content before deciding where to place them. diff --git a/nanobot/nanobot/skills/skill-creator/SKILL.md b/nanobot/nanobot/skills/skill-creator/SKILL.md new file mode 100644 index 0000000..9b5eb6f --- /dev/null +++ b/nanobot/nanobot/skills/skill-creator/SKILL.md @@ -0,0 +1,371 @@ +--- +name: skill-creator +description: Create or update AgentSkills. Use when designing, structuring, or packaging skills with scripts, references, and assets. +--- + +# Skill Creator + +This skill provides guidance for creating effective skills. + +## About Skills + +Skills are modular, self-contained packages that extend the agent's capabilities by providing +specialized knowledge, workflows, and tools. Think of them as "onboarding guides" for specific +domains or tasks—they transform the agent from a general-purpose agent into a specialized agent +equipped with procedural knowledge that no model can fully possess. + +### What Skills Provide + +1. Specialized workflows - Multi-step procedures for specific domains +2. Tool integrations - Instructions for working with specific file formats or APIs +3. Domain expertise - Company-specific knowledge, schemas, business logic +4. Bundled resources - Scripts, references, and assets for complex and repetitive tasks + +## Core Principles + +### Concise is Key + +The context window is a public good. Skills share the context window with everything else the agent needs: system prompt, conversation history, other Skills' metadata, and the actual user request. + +**Default assumption: the agent is already very smart.** Only add context the agent doesn't already have. Challenge each piece of information: "Does the agent really need this explanation?" and "Does this paragraph justify its token cost?" + +Prefer concise examples over verbose explanations. + +### Set Appropriate Degrees of Freedom + +Match the level of specificity to the task's fragility and variability: + +**High freedom (text-based instructions)**: Use when multiple approaches are valid, decisions depend on context, or heuristics guide the approach. + +**Medium freedom (pseudocode or scripts with parameters)**: Use when a preferred pattern exists, some variation is acceptable, or configuration affects behavior. + +**Low freedom (specific scripts, few parameters)**: Use when operations are fragile and error-prone, consistency is critical, or a specific sequence must be followed. + +Think of the agent as exploring a path: a narrow bridge with cliffs needs specific guardrails (low freedom), while an open field allows many routes (high freedom). + +### Anatomy of a Skill + +Every skill consists of a required SKILL.md file and optional bundled resources: + +``` +skill-name/ +├── SKILL.md (required) +│ ├── YAML frontmatter metadata (required) +│ │ ├── name: (required) +│ │ └── description: (required) +│ └── Markdown instructions (required) +└── Bundled Resources (optional) + ├── scripts/ - Executable code (Python/Bash/etc.) + ├── references/ - Documentation intended to be loaded into context as needed + └── assets/ - Files used in output (templates, icons, fonts, etc.) +``` + +#### SKILL.md (required) + +Every SKILL.md consists of: + +- **Frontmatter** (YAML): Contains `name` and `description` fields. These are the only fields that the agent reads to determine when the skill gets used, thus it is very important to be clear and comprehensive in describing what the skill is, and when it should be used. +- **Body** (Markdown): Instructions and guidance for using the skill. Only loaded AFTER the skill triggers (if at all). + +#### Bundled Resources (optional) + +##### Scripts (`scripts/`) + +Executable code (Python/Bash/etc.) for tasks that require deterministic reliability or are repeatedly rewritten. + +- **When to include**: When the same code is being rewritten repeatedly or deterministic reliability is needed +- **Example**: `scripts/rotate_pdf.py` for PDF rotation tasks +- **Benefits**: Token efficient, deterministic, may be executed without loading into context +- **Note**: Scripts may still need to be read by the agent for patching or environment-specific adjustments + +##### References (`references/`) + +Documentation and reference material intended to be loaded as needed into context to inform the agent's process and thinking. + +- **When to include**: For documentation that the agent should reference while working +- **Examples**: `references/finance.md` for financial schemas, `references/mnda.md` for company NDA template, `references/policies.md` for company policies, `references/api_docs.md` for API specifications +- **Use cases**: Database schemas, API documentation, domain knowledge, company policies, detailed workflow guides +- **Benefits**: Keeps SKILL.md lean, loaded only when the agent determines it's needed +- **Best practice**: If files are large (>10k words), include grep search patterns in SKILL.md +- **Avoid duplication**: Information should live in either SKILL.md or references files, not both. Prefer references files for detailed information unless it's truly core to the skill—this keeps SKILL.md lean while making information discoverable without hogging the context window. Keep only essential procedural instructions and workflow guidance in SKILL.md; move detailed reference material, schemas, and examples to references files. + +##### Assets (`assets/`) + +Files not intended to be loaded into context, but rather used within the output the agent produces. + +- **When to include**: When the skill needs files that will be used in the final output +- **Examples**: `assets/logo.png` for brand assets, `assets/slides.pptx` for PowerPoint templates, `assets/frontend-template/` for HTML/React boilerplate, `assets/font.ttf` for typography +- **Use cases**: Templates, images, icons, boilerplate code, fonts, sample documents that get copied or modified +- **Benefits**: Separates output resources from documentation, enables the agent to use files without loading them into context + +#### What to Not Include in a Skill + +A skill should only contain essential files that directly support its functionality. Do NOT create extraneous documentation or auxiliary files, including: + +- README.md +- INSTALLATION_GUIDE.md +- QUICK_REFERENCE.md +- CHANGELOG.md +- etc. + +The skill should only contain the information needed for an AI agent to do the job at hand. It should not contain auxiliary context about the process that went into creating it, setup and testing procedures, user-facing documentation, etc. Creating additional documentation files just adds clutter and confusion. + +### Progressive Disclosure Design Principle + +Skills use a three-level loading system to manage context efficiently: + +1. **Metadata (name + description)** - Always in context (~100 words) +2. **SKILL.md body** - When skill triggers (<5k words) +3. **Bundled resources** - As needed by the agent (Unlimited because scripts can be executed without reading into context window) + +#### Progressive Disclosure Patterns + +Keep SKILL.md body to the essentials and under 500 lines to minimize context bloat. Split content into separate files when approaching this limit. When splitting out content into other files, it is very important to reference them from SKILL.md and describe clearly when to read them, to ensure the reader of the skill knows they exist and when to use them. + +**Key principle:** When a skill supports multiple variations, frameworks, or options, keep only the core workflow and selection guidance in SKILL.md. Move variant-specific details (patterns, examples, configuration) into separate reference files. + +**Pattern 1: High-level guide with references** + +```markdown +# PDF Processing + +## Quick start + +Extract text with pdfplumber: +[code example] + +## Advanced features + +- **Form filling**: See [FORMS.md](FORMS.md) for complete guide +- **API reference**: See [REFERENCE.md](REFERENCE.md) for all methods +- **Examples**: See [EXAMPLES.md](EXAMPLES.md) for common patterns +``` + +the agent loads FORMS.md, REFERENCE.md, or EXAMPLES.md only when needed. + +**Pattern 2: Domain-specific organization** + +For Skills with multiple domains, organize content by domain to avoid loading irrelevant context: + +``` +bigquery-skill/ +├── SKILL.md (overview and navigation) +└── reference/ + ├── finance.md (revenue, billing metrics) + ├── sales.md (opportunities, pipeline) + ├── product.md (API usage, features) + └── marketing.md (campaigns, attribution) +``` + +When a user asks about sales metrics, the agent only reads sales.md. + +Similarly, for skills supporting multiple frameworks or variants, organize by variant: + +``` +cloud-deploy/ +├── SKILL.md (workflow + provider selection) +└── references/ + ├── aws.md (AWS deployment patterns) + ├── gcp.md (GCP deployment patterns) + └── azure.md (Azure deployment patterns) +``` + +When the user chooses AWS, the agent only reads aws.md. + +**Pattern 3: Conditional details** + +Show basic content, link to advanced content: + +```markdown +# DOCX Processing + +## Creating documents + +Use docx-js for new documents. See [DOCX-JS.md](DOCX-JS.md). + +## Editing documents + +For simple edits, modify the XML directly. + +**For tracked changes**: See [REDLINING.md](REDLINING.md) +**For OOXML details**: See [OOXML.md](OOXML.md) +``` + +the agent reads REDLINING.md or OOXML.md only when the user needs those features. + +**Important guidelines:** + +- **Avoid deeply nested references** - Keep references one level deep from SKILL.md. All reference files should link directly from SKILL.md. +- **Structure longer reference files** - For files longer than 100 lines, include a table of contents at the top so the agent can see the full scope when previewing. + +## Skill Creation Process + +Skill creation involves these steps: + +1. Understand the skill with concrete examples +2. Plan reusable skill contents (scripts, references, assets) +3. Initialize the skill (run init_skill.py) +4. Edit the skill (implement resources and write SKILL.md) +5. Package the skill (run package_skill.py) +6. Iterate based on real usage + +Follow these steps in order, skipping only if there is a clear reason why they are not applicable. + +### Skill Naming + +- Use lowercase letters, digits, and hyphens only; normalize user-provided titles to hyphen-case (e.g., "Plan Mode" -> `plan-mode`). +- When generating names, generate a name under 64 characters (letters, digits, hyphens). +- Prefer short, verb-led phrases that describe the action. +- Namespace by tool when it improves clarity or triggering (e.g., `gh-address-comments`, `linear-address-issue`). +- Name the skill folder exactly after the skill name. + +### Step 1: Understanding the Skill with Concrete Examples + +Skip this step only when the skill's usage patterns are already clearly understood. It remains valuable even when working with an existing skill. + +To create an effective skill, clearly understand concrete examples of how the skill will be used. This understanding can come from either direct user examples or generated examples that are validated with user feedback. + +For example, when building an image-editor skill, relevant questions include: + +- "What functionality should the image-editor skill support? Editing, rotating, anything else?" +- "Can you give some examples of how this skill would be used?" +- "I can imagine users asking for things like 'Remove the red-eye from this image' or 'Rotate this image'. Are there other ways you imagine this skill being used?" +- "What would a user say that should trigger this skill?" + +To avoid overwhelming users, avoid asking too many questions in a single message. Start with the most important questions and follow up as needed for better effectiveness. + +Conclude this step when there is a clear sense of the functionality the skill should support. + +### Step 2: Planning the Reusable Skill Contents + +To turn concrete examples into an effective skill, analyze each example by: + +1. Considering how to execute on the example from scratch +2. Identifying what scripts, references, and assets would be helpful when executing these workflows repeatedly + +Example: When building a `pdf-editor` skill to handle queries like "Help me rotate this PDF," the analysis shows: + +1. Rotating a PDF requires re-writing the same code each time +2. A `scripts/rotate_pdf.py` script would be helpful to store in the skill + +Example: When designing a `frontend-webapp-builder` skill for queries like "Build me a todo app" or "Build me a dashboard to track my steps," the analysis shows: + +1. Writing a frontend webapp requires the same boilerplate HTML/React each time +2. An `assets/hello-world/` template containing the boilerplate HTML/React project files would be helpful to store in the skill + +Example: When building a `big-query` skill to handle queries like "How many users have logged in today?" the analysis shows: + +1. Querying BigQuery requires re-discovering the table schemas and relationships each time +2. A `references/schema.md` file documenting the table schemas would be helpful to store in the skill + +To establish the skill's contents, analyze each concrete example to create a list of the reusable resources to include: scripts, references, and assets. + +### Step 3: Initializing the Skill + +At this point, it is time to actually create the skill. + +Skip this step only if the skill being developed already exists, and iteration or packaging is needed. In this case, continue to the next step. + +When creating a new skill from scratch, always run the `init_skill.py` script. The script conveniently generates a new template skill directory that automatically includes everything a skill requires, making the skill creation process much more efficient and reliable. + +Usage: + +```bash +scripts/init_skill.py <skill-name> --path <output-directory> [--resources scripts,references,assets] [--examples] +``` + +Examples: + +```bash +scripts/init_skill.py my-skill --path skills/public +scripts/init_skill.py my-skill --path skills/public --resources scripts,references +scripts/init_skill.py my-skill --path skills/public --resources scripts --examples +``` + +The script: + +- Creates the skill directory at the specified path +- Generates a SKILL.md template with proper frontmatter and TODO placeholders +- Optionally creates resource directories based on `--resources` +- Optionally adds example files when `--examples` is set + +After initialization, customize the SKILL.md and add resources as needed. If you used `--examples`, replace or delete placeholder files. + +### Step 4: Edit the Skill + +When editing the (newly-generated or existing) skill, remember that the skill is being created for another instance of the agent to use. Include information that would be beneficial and non-obvious to the agent. Consider what procedural knowledge, domain-specific details, or reusable assets would help another the agent instance execute these tasks more effectively. + +#### Learn Proven Design Patterns + +Consult these helpful guides based on your skill's needs: + +- **Multi-step processes**: See references/workflows.md for sequential workflows and conditional logic +- **Specific output formats or quality standards**: See references/output-patterns.md for template and example patterns + +These files contain established best practices for effective skill design. + +#### Start with Reusable Skill Contents + +To begin implementation, start with the reusable resources identified above: `scripts/`, `references/`, and `assets/` files. Note that this step may require user input. For example, when implementing a `brand-guidelines` skill, the user may need to provide brand assets or templates to store in `assets/`, or documentation to store in `references/`. + +Added scripts must be tested by actually running them to ensure there are no bugs and that the output matches what is expected. If there are many similar scripts, only a representative sample needs to be tested to ensure confidence that they all work while balancing time to completion. + +If you used `--examples`, delete any placeholder files that are not needed for the skill. Only create resource directories that are actually required. + +#### Update SKILL.md + +**Writing Guidelines:** Always use imperative/infinitive form. + +##### Frontmatter + +Write the YAML frontmatter with `name` and `description`: + +- `name`: The skill name +- `description`: This is the primary triggering mechanism for your skill, and helps the agent understand when to use the skill. + - Include both what the Skill does and specific triggers/contexts for when to use it. + - Include all "when to use" information here - Not in the body. The body is only loaded after triggering, so "When to Use This Skill" sections in the body are not helpful to the agent. + - Example description for a `docx` skill: "Comprehensive document creation, editing, and analysis with support for tracked changes, comments, formatting preservation, and text extraction. Use when the agent needs to work with professional documents (.docx files) for: (1) Creating new documents, (2) Modifying or editing content, (3) Working with tracked changes, (4) Adding comments, or any other document tasks" + +Do not include any other fields in YAML frontmatter. + +##### Body + +Write instructions for using the skill and its bundled resources. + +### Step 5: Packaging a Skill + +Once development of the skill is complete, it must be packaged into a distributable .skill file that gets shared with the user. The packaging process automatically validates the skill first to ensure it meets all requirements: + +```bash +scripts/package_skill.py <path/to/skill-folder> +``` + +Optional output directory specification: + +```bash +scripts/package_skill.py <path/to/skill-folder> ./dist +``` + +The packaging script will: + +1. **Validate** the skill automatically, checking: + + - YAML frontmatter format and required fields + - Skill naming conventions and directory structure + - Description completeness and quality + - File organization and resource references + +2. **Package** the skill if validation passes, creating a .skill file named after the skill (e.g., `my-skill.skill`) that includes all files and maintains the proper directory structure for distribution. The .skill file is a zip file with a .skill extension. + +If validation fails, the script will report the errors and exit without creating a package. Fix any validation errors and run the packaging command again. + +### Step 6: Iterate + +After testing the skill, users may request improvements. Often this happens right after using the skill, with fresh context of how the skill performed. + +**Iteration workflow:** + +1. Use the skill on real tasks +2. Notice struggles or inefficiencies +3. Identify how SKILL.md or bundled resources should be updated +4. Implement changes and test again diff --git a/nanobot/nanobot/skills/summarize/SKILL.md b/nanobot/nanobot/skills/summarize/SKILL.md new file mode 100644 index 0000000..766ab5d --- /dev/null +++ b/nanobot/nanobot/skills/summarize/SKILL.md @@ -0,0 +1,67 @@ +--- +name: summarize +description: Summarize or extract text/transcripts from URLs, podcasts, and local files (great fallback for “transcribe this YouTube/video”). +homepage: https://summarize.sh +metadata: {"nanobot":{"emoji":"🧾","requires":{"bins":["summarize"]},"install":[{"id":"brew","kind":"brew","formula":"steipete/tap/summarize","bins":["summarize"],"label":"Install summarize (brew)"}]}} +--- + +# Summarize + +Fast CLI to summarize URLs, local files, and YouTube links. + +## When to use (trigger phrases) + +Use this skill immediately when the user asks any of: +- “use summarize.sh” +- “what’s this link/video about?” +- “summarize this URL/article” +- “transcribe this YouTube/video” (best-effort transcript extraction; no `yt-dlp` needed) + +## Quick start + +```bash +summarize "https://example.com" --model google/gemini-3-flash-preview +summarize "/path/to/file.pdf" --model google/gemini-3-flash-preview +summarize "https://youtu.be/dQw4w9WgXcQ" --youtube auto +``` + +## YouTube: summary vs transcript + +Best-effort transcript (URLs only): + +```bash +summarize "https://youtu.be/dQw4w9WgXcQ" --youtube auto --extract-only +``` + +If the user asked for a transcript but it’s huge, return a tight summary first, then ask which section/time range to expand. + +## Model + keys + +Set the API key for your chosen provider: +- OpenAI: `OPENAI_API_KEY` +- Anthropic: `ANTHROPIC_API_KEY` +- xAI: `XAI_API_KEY` +- Google: `GEMINI_API_KEY` (aliases: `GOOGLE_GENERATIVE_AI_API_KEY`, `GOOGLE_API_KEY`) + +Default model is `google/gemini-3-flash-preview` if none is set. + +## Useful flags + +- `--length short|medium|long|xl|xxl|<chars>` +- `--max-output-tokens <count>` +- `--extract-only` (URLs only) +- `--json` (machine readable) +- `--firecrawl auto|off|always` (fallback extraction) +- `--youtube auto` (Apify fallback if `APIFY_API_TOKEN` set) + +## Config + +Optional config file: `~/.summarize/config.json` + +```json +{ "model": "openai/gpt-5.2" } +``` + +Optional services: +- `FIRECRAWL_API_KEY` for blocked sites +- `APIFY_API_TOKEN` for YouTube fallback diff --git a/nanobot/nanobot/skills/tmux/SKILL.md b/nanobot/nanobot/skills/tmux/SKILL.md new file mode 100644 index 0000000..f2a3144 --- /dev/null +++ b/nanobot/nanobot/skills/tmux/SKILL.md @@ -0,0 +1,121 @@ +--- +name: tmux +description: Remote-control tmux sessions for interactive CLIs by sending keystrokes and scraping pane output. +metadata: {"nanobot":{"emoji":"🧵","os":["darwin","linux"],"requires":{"bins":["tmux"]}}} +--- + +# tmux Skill + +Use tmux only when you need an interactive TTY. Prefer exec background mode for long-running, non-interactive tasks. + +## Quickstart (isolated socket, exec tool) + +```bash +SOCKET_DIR="${NANOBOT_TMUX_SOCKET_DIR:-${TMPDIR:-/tmp}/nanobot-tmux-sockets}" +mkdir -p "$SOCKET_DIR" +SOCKET="$SOCKET_DIR/nanobot.sock" +SESSION=nanobot-python + +tmux -S "$SOCKET" new -d -s "$SESSION" -n shell +tmux -S "$SOCKET" send-keys -t "$SESSION":0.0 -- 'PYTHON_BASIC_REPL=1 python3 -q' Enter +tmux -S "$SOCKET" capture-pane -p -J -t "$SESSION":0.0 -S -200 +``` + +After starting a session, always print monitor commands: + +``` +To monitor: + tmux -S "$SOCKET" attach -t "$SESSION" + tmux -S "$SOCKET" capture-pane -p -J -t "$SESSION":0.0 -S -200 +``` + +## Socket convention + +- Use `NANOBOT_TMUX_SOCKET_DIR` environment variable. +- Default socket path: `"$NANOBOT_TMUX_SOCKET_DIR/nanobot.sock"`. + +## Targeting panes and naming + +- Target format: `session:window.pane` (defaults to `:0.0`). +- Keep names short; avoid spaces. +- Inspect: `tmux -S "$SOCKET" list-sessions`, `tmux -S "$SOCKET" list-panes -a`. + +## Finding sessions + +- List sessions on your socket: `{baseDir}/scripts/find-sessions.sh -S "$SOCKET"`. +- Scan all sockets: `{baseDir}/scripts/find-sessions.sh --all` (uses `NANOBOT_TMUX_SOCKET_DIR`). + +## Sending input safely + +- Prefer literal sends: `tmux -S "$SOCKET" send-keys -t target -l -- "$cmd"`. +- Control keys: `tmux -S "$SOCKET" send-keys -t target C-c`. + +## Watching output + +- Capture recent history: `tmux -S "$SOCKET" capture-pane -p -J -t target -S -200`. +- Wait for prompts: `{baseDir}/scripts/wait-for-text.sh -t session:0.0 -p 'pattern'`. +- Attaching is OK; detach with `Ctrl+b d`. + +## Spawning processes + +- For python REPLs, set `PYTHON_BASIC_REPL=1` (non-basic REPL breaks send-keys flows). + +## Windows / WSL + +- tmux is supported on macOS/Linux. On Windows, use WSL and install tmux inside WSL. +- This skill is gated to `darwin`/`linux` and requires `tmux` on PATH. + +## Orchestrating Coding Agents (Codex, Claude Code) + +tmux excels at running multiple coding agents in parallel: + +```bash +SOCKET="${TMPDIR:-/tmp}/codex-army.sock" + +# Create multiple sessions +for i in 1 2 3 4 5; do + tmux -S "$SOCKET" new-session -d -s "agent-$i" +done + +# Launch agents in different workdirs +tmux -S "$SOCKET" send-keys -t agent-1 "cd /tmp/project1 && codex --yolo 'Fix bug X'" Enter +tmux -S "$SOCKET" send-keys -t agent-2 "cd /tmp/project2 && codex --yolo 'Fix bug Y'" Enter + +# Poll for completion (check if prompt returned) +for sess in agent-1 agent-2; do + if tmux -S "$SOCKET" capture-pane -p -t "$sess" -S -3 | grep -q "❯"; then + echo "$sess: DONE" + else + echo "$sess: Running..." + fi +done + +# Get full output from completed session +tmux -S "$SOCKET" capture-pane -p -t agent-1 -S -500 +``` + +**Tips:** +- Use separate git worktrees for parallel fixes (no branch conflicts) +- `pnpm install` first before running codex in fresh clones +- Check for shell prompt (`❯` or `$`) to detect completion +- Codex needs `--yolo` or `--full-auto` for non-interactive fixes + +## Cleanup + +- Kill a session: `tmux -S "$SOCKET" kill-session -t "$SESSION"`. +- Kill all sessions on a socket: `tmux -S "$SOCKET" list-sessions -F '#{session_name}' | xargs -r -n1 tmux -S "$SOCKET" kill-session -t`. +- Remove everything on the private socket: `tmux -S "$SOCKET" kill-server`. + +## Helper: wait-for-text.sh + +`{baseDir}/scripts/wait-for-text.sh` polls a pane for a regex (or fixed string) with a timeout. + +```bash +{baseDir}/scripts/wait-for-text.sh -t session:0.0 -p 'pattern' [-F] [-T 20] [-i 0.5] [-l 2000] +``` + +- `-t`/`--target` pane target (required) +- `-p`/`--pattern` regex to match (required); add `-F` for fixed string +- `-T` timeout seconds (integer, default 15) +- `-i` poll interval seconds (default 0.5) +- `-l` history lines to search (integer, default 1000) diff --git a/nanobot/nanobot/skills/tmux/scripts/find-sessions.sh b/nanobot/nanobot/skills/tmux/scripts/find-sessions.sh new file mode 100755 index 0000000..00552c6 --- /dev/null +++ b/nanobot/nanobot/skills/tmux/scripts/find-sessions.sh @@ -0,0 +1,112 @@ +#!/usr/bin/env bash +set -euo pipefail + +usage() { + cat <<'USAGE' +Usage: find-sessions.sh [-L socket-name|-S socket-path|-A] [-q pattern] + +List tmux sessions on a socket (default tmux socket if none provided). + +Options: + -L, --socket tmux socket name (passed to tmux -L) + -S, --socket-path tmux socket path (passed to tmux -S) + -A, --all scan all sockets under NANOBOT_TMUX_SOCKET_DIR + -q, --query case-insensitive substring to filter session names + -h, --help show this help +USAGE +} + +socket_name="" +socket_path="" +query="" +scan_all=false +socket_dir="${NANOBOT_TMUX_SOCKET_DIR:-${TMPDIR:-/tmp}/nanobot-tmux-sockets}" + +while [[ $# -gt 0 ]]; do + case "$1" in + -L|--socket) socket_name="${2-}"; shift 2 ;; + -S|--socket-path) socket_path="${2-}"; shift 2 ;; + -A|--all) scan_all=true; shift ;; + -q|--query) query="${2-}"; shift 2 ;; + -h|--help) usage; exit 0 ;; + *) echo "Unknown option: $1" >&2; usage; exit 1 ;; + esac +done + +if [[ "$scan_all" == true && ( -n "$socket_name" || -n "$socket_path" ) ]]; then + echo "Cannot combine --all with -L or -S" >&2 + exit 1 +fi + +if [[ -n "$socket_name" && -n "$socket_path" ]]; then + echo "Use either -L or -S, not both" >&2 + exit 1 +fi + +if ! command -v tmux >/dev/null 2>&1; then + echo "tmux not found in PATH" >&2 + exit 1 +fi + +list_sessions() { + local label="$1"; shift + local tmux_cmd=(tmux "$@") + + if ! sessions="$("${tmux_cmd[@]}" list-sessions -F '#{session_name}\t#{session_attached}\t#{session_created_string}' 2>/dev/null)"; then + echo "No tmux server found on $label" >&2 + return 1 + fi + + if [[ -n "$query" ]]; then + sessions="$(printf '%s\n' "$sessions" | grep -i -- "$query" || true)" + fi + + if [[ -z "$sessions" ]]; then + echo "No sessions found on $label" + return 0 + fi + + echo "Sessions on $label:" + printf '%s\n' "$sessions" | while IFS=$'\t' read -r name attached created; do + attached_label=$([[ "$attached" == "1" ]] && echo "attached" || echo "detached") + printf ' - %s (%s, started %s)\n' "$name" "$attached_label" "$created" + done +} + +if [[ "$scan_all" == true ]]; then + if [[ ! -d "$socket_dir" ]]; then + echo "Socket directory not found: $socket_dir" >&2 + exit 1 + fi + + shopt -s nullglob + sockets=("$socket_dir"/*) + shopt -u nullglob + + if [[ "${#sockets[@]}" -eq 0 ]]; then + echo "No sockets found under $socket_dir" >&2 + exit 1 + fi + + exit_code=0 + for sock in "${sockets[@]}"; do + if [[ ! -S "$sock" ]]; then + continue + fi + list_sessions "socket path '$sock'" -S "$sock" || exit_code=$? + done + exit "$exit_code" +fi + +tmux_cmd=(tmux) +socket_label="default socket" + +if [[ -n "$socket_name" ]]; then + tmux_cmd+=(-L "$socket_name") + socket_label="socket name '$socket_name'" +elif [[ -n "$socket_path" ]]; then + tmux_cmd+=(-S "$socket_path") + socket_label="socket path '$socket_path'" +fi + +list_sessions "$socket_label" "${tmux_cmd[@]:1}" diff --git a/nanobot/nanobot/skills/tmux/scripts/wait-for-text.sh b/nanobot/nanobot/skills/tmux/scripts/wait-for-text.sh new file mode 100755 index 0000000..56354be --- /dev/null +++ b/nanobot/nanobot/skills/tmux/scripts/wait-for-text.sh @@ -0,0 +1,83 @@ +#!/usr/bin/env bash +set -euo pipefail + +usage() { + cat <<'USAGE' +Usage: wait-for-text.sh -t target -p pattern [options] + +Poll a tmux pane for text and exit when found. + +Options: + -t, --target tmux target (session:window.pane), required + -p, --pattern regex pattern to look for, required + -F, --fixed treat pattern as a fixed string (grep -F) + -T, --timeout seconds to wait (integer, default: 15) + -i, --interval poll interval in seconds (default: 0.5) + -l, --lines number of history lines to inspect (integer, default: 1000) + -h, --help show this help +USAGE +} + +target="" +pattern="" +grep_flag="-E" +timeout=15 +interval=0.5 +lines=1000 + +while [[ $# -gt 0 ]]; do + case "$1" in + -t|--target) target="${2-}"; shift 2 ;; + -p|--pattern) pattern="${2-}"; shift 2 ;; + -F|--fixed) grep_flag="-F"; shift ;; + -T|--timeout) timeout="${2-}"; shift 2 ;; + -i|--interval) interval="${2-}"; shift 2 ;; + -l|--lines) lines="${2-}"; shift 2 ;; + -h|--help) usage; exit 0 ;; + *) echo "Unknown option: $1" >&2; usage; exit 1 ;; + esac +done + +if [[ -z "$target" || -z "$pattern" ]]; then + echo "target and pattern are required" >&2 + usage + exit 1 +fi + +if ! [[ "$timeout" =~ ^[0-9]+$ ]]; then + echo "timeout must be an integer number of seconds" >&2 + exit 1 +fi + +if ! [[ "$lines" =~ ^[0-9]+$ ]]; then + echo "lines must be an integer" >&2 + exit 1 +fi + +if ! command -v tmux >/dev/null 2>&1; then + echo "tmux not found in PATH" >&2 + exit 1 +fi + +# End time in epoch seconds (integer, good enough for polling) +start_epoch=$(date +%s) +deadline=$((start_epoch + timeout)) + +while true; do + # -J joins wrapped lines, -S uses negative index to read last N lines + pane_text="$(tmux capture-pane -p -J -t "$target" -S "-${lines}" 2>/dev/null || true)" + + if printf '%s\n' "$pane_text" | grep $grep_flag -- "$pattern" >/dev/null 2>&1; then + exit 0 + fi + + now=$(date +%s) + if (( now >= deadline )); then + echo "Timed out after ${timeout}s waiting for pattern: $pattern" >&2 + echo "Last ${lines} lines from $target:" >&2 + printf '%s\n' "$pane_text" >&2 + exit 1 + fi + + sleep "$interval" +done diff --git a/nanobot/nanobot/skills/weather/SKILL.md b/nanobot/nanobot/skills/weather/SKILL.md new file mode 100644 index 0000000..8073de1 --- /dev/null +++ b/nanobot/nanobot/skills/weather/SKILL.md @@ -0,0 +1,49 @@ +--- +name: weather +description: Get current weather and forecasts (no API key required). +homepage: https://wttr.in/:help +metadata: {"nanobot":{"emoji":"🌤️","requires":{"bins":["curl"]}}} +--- + +# Weather + +Two free services, no API keys needed. + +## wttr.in (primary) + +Quick one-liner: +```bash +curl -s "wttr.in/London?format=3" +# Output: London: ⛅️ +8°C +``` + +Compact format: +```bash +curl -s "wttr.in/London?format=%l:+%c+%t+%h+%w" +# Output: London: ⛅️ +8°C 71% ↙5km/h +``` + +Full forecast: +```bash +curl -s "wttr.in/London?T" +``` + +Format codes: `%c` condition · `%t` temp · `%h` humidity · `%w` wind · `%l` location · `%m` moon + +Tips: +- URL-encode spaces: `wttr.in/New+York` +- Airport codes: `wttr.in/JFK` +- Units: `?m` (metric) `?u` (USCS) +- Today only: `?1` · Current only: `?0` +- PNG: `curl -s "wttr.in/Berlin.png" -o /tmp/weather.png` + +## Open-Meteo (fallback, JSON) + +Free, no key, good for programmatic use: +```bash +curl -s "https://api.open-meteo.com/v1/forecast?latitude=51.5&longitude=-0.12¤t_weather=true" +``` + +Find coordinates for a city, then query. Returns JSON with temp, windspeed, weathercode. + +Docs: https://open-meteo.com/en/docs diff --git a/nanobot/nanobot/utils/__init__.py b/nanobot/nanobot/utils/__init__.py new file mode 100644 index 0000000..7444987 --- /dev/null +++ b/nanobot/nanobot/utils/__init__.py @@ -0,0 +1,5 @@ +"""Utility functions for nanobot.""" + +from nanobot.utils.helpers import ensure_dir, get_workspace_path, get_data_path + +__all__ = ["ensure_dir", "get_workspace_path", "get_data_path"] diff --git a/nanobot/nanobot/utils/helpers.py b/nanobot/nanobot/utils/helpers.py new file mode 100644 index 0000000..7271735 --- /dev/null +++ b/nanobot/nanobot/utils/helpers.py @@ -0,0 +1,91 @@ +"""Utility functions for nanobot.""" + +from pathlib import Path +from datetime import datetime + + +def ensure_dir(path: Path) -> Path: + """Ensure a directory exists, creating it if necessary.""" + path.mkdir(parents=True, exist_ok=True) + return path + + +def get_data_path() -> Path: + """Get the nanobot data directory (~/.nanobot).""" + return ensure_dir(Path.home() / ".nanobot") + + +def get_workspace_path(workspace: str | None = None) -> Path: + """ + Get the workspace path. + + Args: + workspace: Optional workspace path. Defaults to ~/.nanobot/workspace. + + Returns: + Expanded and ensured workspace path. + """ + if workspace: + path = Path(workspace).expanduser() + else: + path = Path.home() / ".nanobot" / "workspace" + return ensure_dir(path) + + +def get_sessions_path() -> Path: + """Get the sessions storage directory.""" + return ensure_dir(get_data_path() / "sessions") + + +def get_memory_path(workspace: Path | None = None) -> Path: + """Get the memory directory within the workspace.""" + ws = workspace or get_workspace_path() + return ensure_dir(ws / "memory") + + +def get_skills_path(workspace: Path | None = None) -> Path: + """Get the skills directory within the workspace.""" + ws = workspace or get_workspace_path() + return ensure_dir(ws / "skills") + + +def today_date() -> str: + """Get today's date in YYYY-MM-DD format.""" + return datetime.now().strftime("%Y-%m-%d") + + +def timestamp() -> str: + """Get current timestamp in ISO format.""" + return datetime.now().isoformat() + + +def truncate_string(s: str, max_len: int = 100, suffix: str = "...") -> str: + """Truncate a string to max length, adding suffix if truncated.""" + if len(s) <= max_len: + return s + return s[: max_len - len(suffix)] + suffix + + +def safe_filename(name: str) -> str: + """Convert a string to a safe filename.""" + # Replace unsafe characters + unsafe = '<>:"/\\|?*' + for char in unsafe: + name = name.replace(char, "_") + return name.strip() + + +def parse_session_key(key: str) -> tuple[str, str]: + """ + Parse a session key into channel and chat_id. + + Args: + key: Session key in format "channel:chat_id" + + Returns: + Tuple of (channel, chat_id) + """ + parts = key.split(":", 1) + if len(parts) != 2: + raise ValueError(f"Invalid session key: {key}") + return parts[0], parts[1] diff --git a/nanobot/requirements.txt b/nanobot/requirements.txt new file mode 100644 index 0000000..fcb968a --- /dev/null +++ b/nanobot/requirements.txt @@ -0,0 +1,50 @@ +# ============================================================================== +# Litewrite nanobot Service Dependencies +# ============================================================================== +# Install: pip install -r requirements.txt +# ============================================================================== + + +# ------------------------------------------------------------------------------ +# Cron +# ------------------------------------------------------------------------------ +croniter>=2.0.0 +httpx>=0.25.0 +lark-oapi>=1.0.0 + +# ------------------------------------------------------------------------------ +# LLM +# ------------------------------------------------------------------------------ +litellm>=1.0.0 + +# ------------------------------------------------------------------------------ +# Logging / Display +# ------------------------------------------------------------------------------ +loguru>=0.7.0 + +# ------------------------------------------------------------------------------ +# Config / Validation +# ------------------------------------------------------------------------------ +pydantic>=2.0.0 +pydantic-settings>=2.0.0 + +# ------------------------------------------------------------------------------ +# Channels +# ------------------------------------------------------------------------------ +python-telegram-bot>=21.0 + +# ------------------------------------------------------------------------------ +# HTML parsing +# ------------------------------------------------------------------------------ +readability-lxml>=0.8.0 +rich>=13.0.0 +# ------------------------------------------------------------------------------ +# CLI +# ------------------------------------------------------------------------------ +typer>=0.9.0 +websocket-client>=1.6.0 + +# ------------------------------------------------------------------------------ +# Networking +# ------------------------------------------------------------------------------ +websockets>=12.0 diff --git a/scripts/scan-non-i18n-chinese.ts b/scripts/scan-non-i18n-chinese.ts index ed3d8e8..b12b3ef 100644 --- a/scripts/scan-non-i18n-chinese.ts +++ b/scripts/scan-non-i18n-chinese.ts @@ -34,6 +34,7 @@ const SKIP_DIRS = new Set([ ".git", ".next", ".history", + ".ruff_cache", "node_modules", "dist", "build", @@ -43,6 +44,8 @@ const SKIP_DIRS = new Set([ "projects", // User-facing template content may legitimately contain CJK text. "templates", + // Nanobot is a Python service that legitimately uses Chinese for NLP pattern matching. + "nanobot", ]); // Skip local environment files (should not be committed). diff --git a/scripts/setup.sh b/scripts/setup.sh new file mode 100755 index 0000000..8b62797 --- /dev/null +++ b/scripts/setup.sh @@ -0,0 +1,281 @@ +#!/usr/bin/env bash +# ============================================================================== +# Litewrite + nanobot One-Click Setup Script +# ============================================================================== +# +# This script sets up the complete Litewrite environment from scratch, +# including the nanobot Feishu bot integration. +# +# Usage: +# ./scripts/setup.sh # Interactive setup +# ./scripts/setup.sh --defaults # Use defaults (skip prompts, fill .env later) +# +# Prerequisites: +# - Docker Desktop (or Docker Engine + Compose plugin) +# - Internet connection (for pulling images and building) +# +# ============================================================================== + +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +cd "$ROOT_DIR" + +# Colors +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +CYAN='\033[0;36m' +NC='\033[0m' # No Color + +info() { echo -e "${BLUE}[INFO]${NC} $*"; } +ok() { echo -e "${GREEN}[OK]${NC} $*"; } +warn() { echo -e "${YELLOW}[WARN]${NC} $*"; } +err() { echo -e "${RED}[ERROR]${NC} $*" >&2; } + +banner() { + echo + echo -e "${CYAN}================================================================${NC}" + echo -e "${CYAN} Litewrite + nanobot Setup${NC}" + echo -e "${CYAN}================================================================${NC}" + echo +} + +# -------------------------------------------------------------------------- +# 1. Check prerequisites +# -------------------------------------------------------------------------- +check_prerequisites() { + info "Checking prerequisites..." + + if ! command -v docker >/dev/null 2>&1; then + err "Docker is not installed. Please install Docker Desktop first." + err " https://docs.docker.com/get-docker/" + exit 1 + fi + + if ! docker compose version >/dev/null 2>&1; then + err "'docker compose' is not available." + err "Please install Docker Desktop (or Docker Engine + Compose plugin)." + exit 1 + fi + + if ! docker info >/dev/null 2>&1; then + err "Docker daemon is not running. Please start Docker Desktop first." + exit 1 + fi + + ok "Docker and Docker Compose are available." +} + +# -------------------------------------------------------------------------- +# 2. Generate secrets +# -------------------------------------------------------------------------- +generate_secret() { + python3 -c "import secrets; print(secrets.token_hex(32))" 2>/dev/null \ + || openssl rand -hex 32 2>/dev/null \ + || head -c 32 /dev/urandom | xxd -p 2>/dev/null \ + || echo "change-me-$(date +%s)" +} + +# -------------------------------------------------------------------------- +# 3. Create .env file +# -------------------------------------------------------------------------- +setup_env() { + local USE_DEFAULTS="${1:-false}" + + if [ -f ".env" ]; then + warn ".env file already exists." + if [ "$USE_DEFAULTS" = "false" ]; then + read -rp " Overwrite? (y/N): " answer + if [[ ! "$answer" =~ ^[Yy]$ ]]; then + info "Keeping existing .env file." + return + fi + else + info "Keeping existing .env file (--defaults mode)." + return + fi + fi + + info "Creating .env configuration file..." + + local NEXTAUTH_SECRET + local INTERNAL_API_SECRET + NEXTAUTH_SECRET="$(generate_secret)" + INTERNAL_API_SECRET="$(generate_secret)" + + local OPENROUTER_API_KEY="" + local FEISHU_ENABLED="false" + local FEISHU_APP_ID="" + local FEISHU_APP_SECRET="" + + if [ "$USE_DEFAULTS" = "false" ]; then + echo + echo -e "${CYAN}--- AI Configuration ---${NC}" + read -rp " OpenRouter API Key (required for AI features, press Enter to skip): " OPENROUTER_API_KEY + + echo + echo -e "${CYAN}--- nanobot Feishu Bot (Optional) ---${NC}" + read -rp " Enable Feishu bot? (y/N): " enable_feishu + if [[ "$enable_feishu" =~ ^[Yy]$ ]]; then + FEISHU_ENABLED="true" + read -rp " Feishu App ID: " FEISHU_APP_ID + read -rp " Feishu App Secret: " FEISHU_APP_SECRET + fi + fi + + cat > .env <<ENVEOF +# ============================================================================== +# Litewrite Environment Configuration +# Generated by scripts/setup.sh on $(date -u +"%Y-%m-%dT%H:%M:%SZ") +# ============================================================================== + +# ------------------------------------------------------------------------------ +# NextAuth (session encryption) +# ------------------------------------------------------------------------------ +NEXTAUTH_SECRET=${NEXTAUTH_SECRET} +NEXTAUTH_URL=http://localhost:3000 +NEXT_PUBLIC_SITE_URL=http://localhost:3000 + +# ------------------------------------------------------------------------------ +# Internal service auth (Next.js <-> ai-server / ws-server / nanobot) +# ------------------------------------------------------------------------------ +INTERNAL_API_SECRET=${INTERNAL_API_SECRET} + +# ------------------------------------------------------------------------------ +# WebSocket (client connection URL) +# ------------------------------------------------------------------------------ +NEXT_PUBLIC_WS_URL=ws://localhost:1234 + +# ------------------------------------------------------------------------------ +# AI providers (OpenRouter) +# ------------------------------------------------------------------------------ +OPENROUTER_API_KEY=${OPENROUTER_API_KEY} + +# ------------------------------------------------------------------------------ +# nanobot (Feishu bot integration) +# ------------------------------------------------------------------------------ +FEISHU_ENABLED=${FEISHU_ENABLED} +FEISHU_APP_ID=${FEISHU_APP_ID} +FEISHU_APP_SECRET=${FEISHU_APP_SECRET} +# Your Litewrite user UUID (will be set after first user registration) +NANOBOT_DEFAULT_LITEWRITE_USER_ID= +ENVEOF + + ok ".env file created." + + if [ -z "$OPENROUTER_API_KEY" ]; then + warn "OPENROUTER_API_KEY is not set. AI features and nanobot will not work." + warn "Edit .env and add your API key before starting." + fi +} + +# -------------------------------------------------------------------------- +# 4. Build and start services +# -------------------------------------------------------------------------- +build_and_start() { + info "Building Docker images (first run takes 10-20 minutes)..." + echo + + docker compose build + + echo + ok "All images built successfully." + + info "Starting services..." + docker compose up -d + + echo + ok "All services started." +} + +# -------------------------------------------------------------------------- +# 5. Wait for services to be healthy +# -------------------------------------------------------------------------- +wait_for_services() { + info "Waiting for services to be ready..." + + local max_wait=120 + local elapsed=0 + + # Wait for web + while [ $elapsed -lt $max_wait ]; do + if curl -sf http://localhost:3000/api/auth/session >/dev/null 2>&1; then + break + fi + sleep 3 + elapsed=$((elapsed + 3)) + printf "." + done + echo + + if [ $elapsed -ge $max_wait ]; then + warn "Web service did not become healthy within ${max_wait}s." + warn "Check logs: docker compose logs web" + else + ok "Web service is ready." + fi +} + +# -------------------------------------------------------------------------- +# 6. Print summary +# -------------------------------------------------------------------------- +print_summary() { + echo + echo -e "${CYAN}================================================================${NC}" + echo -e "${CYAN} Setup Complete!${NC}" + echo -e "${CYAN}================================================================${NC}" + echo + echo -e " ${GREEN}Litewrite App${NC} http://localhost:3000" + echo -e " ${GREEN}WebSocket${NC} ws://localhost:1234" + echo -e " ${GREEN}AI Server${NC} http://localhost:6612/health" + echo -e " ${GREEN}Compile Server${NC} http://localhost:3002/health" + echo -e " ${GREEN}MinIO Console${NC} http://localhost:9001 (minioadmin/minioadmin)" + echo + + if grep -q "FEISHU_ENABLED=true" .env 2>/dev/null; then + echo -e " ${GREEN}nanobot (Feishu)${NC} Running (check: docker logs litewrite-nanobot)" + else + echo -e " ${YELLOW}nanobot (Feishu)${NC} Disabled (set FEISHU_ENABLED=true in .env)" + fi + + echo + echo -e "${BLUE}Next steps:${NC}" + echo " 1. Open http://localhost:3000 and register an account" + echo " 2. Create a LaTeX project and start writing" + echo + echo " To enable nanobot Feishu bot:" + echo " 1. Set FEISHU_ENABLED=true, FEISHU_APP_ID, FEISHU_APP_SECRET in .env" + echo " 2. Set NANOBOT_DEFAULT_LITEWRITE_USER_ID to your user UUID" + echo " 3. Run: docker compose up -d nanobot" + echo + echo " See nanobot/DEPLOYMENT.md for detailed nanobot configuration guide." + echo + echo -e "${BLUE}Useful commands:${NC}" + echo " docker compose ps # Check service status" + echo " docker compose logs -f web # Follow web logs" + echo " docker compose logs -f nanobot # Follow nanobot logs" + echo " docker compose down # Stop all services" + echo " docker compose up -d --build # Rebuild and restart" + echo +} + +# -------------------------------------------------------------------------- +# Main +# -------------------------------------------------------------------------- +main() { + local USE_DEFAULTS="false" + if [ "${1:-}" = "--defaults" ]; then + USE_DEFAULTS="true" + fi + + banner + check_prerequisites + setup_env "$USE_DEFAULTS" + build_and_start + wait_for_services + print_summary +} + +main "$@" diff --git a/scripts/up-dev.sh b/scripts/up-dev.sh index 456651f..b5e5df0 100755 --- a/scripts/up-dev.sh +++ b/scripts/up-dev.sh @@ -43,6 +43,7 @@ echo " - WS (Yjs): ws://localhost:1234" echo " - AI Server: http://localhost:6612/health" echo " - Compile Server: http://localhost:3002/health" echo " - MinIO Console: http://localhost:9001 (minioadmin/minioadmin)" +echo " - nanobot: docker logs litewrite-nanobot (Feishu bot)" echo echo "Tip: run in background:" echo " ./scripts/up-dev.sh -d" diff --git a/server/ws-server.ts b/server/ws-server.ts index 0556897..efb54df 100644 --- a/server/ws-server.ts +++ b/server/ws-server.ts @@ -123,63 +123,69 @@ async function getYDoc(docName: string): Promise<DocData> { // Start initialization const initPromise = (async () => { - const doc = new Y.Doc(); - const awareness = new awarenessProtocol.Awareness(doc); + try { + const doc = new Y.Doc(); + const awareness = new awarenessProtocol.Awareness(doc); - // Parse room name to get projectId and fileId - const parsed = parseRoomName(docName); - let persistenceCleanup: (() => void) | undefined; + // Parse room name to get projectId and fileId + const parsed = parseRoomName(docName); + let persistenceCleanup: (() => void) | undefined; - if (parsed) { - const { projectId, fileId } = parsed; + if (parsed) { + const { projectId, fileId } = parsed; - // Try restoring document from Redis - const restored = await restoreDocument(doc, projectId, fileId); + // Try restoring document from Redis + const restored = await restoreDocument(doc, projectId, fileId); - if (restored) { - console.log(`📥 Document restored from persistence: ${docName}`); - } + if (restored) { + console.log(`📥 Document restored from persistence: ${docName}`); + } - // Bind persistence to auto-save subsequent updates - persistenceCleanup = bindDocumentToPersistence(doc, projectId, fileId); - console.log(`💾 Persistence bound: ${docName}`); - } else { - console.log(`⚠️ Failed to parse room name; skipping persistence: ${docName}`); - } + // Bind persistence to auto-save subsequent updates + persistenceCleanup = bindDocumentToPersistence(doc, projectId, fileId); + console.log(`💾 Persistence bound: ${docName}`); + } else { + console.log(`⚠️ Failed to parse room name; skipping persistence: ${docName}`); + } - // NOTE: We no longer auto-update line numbers for pending edits. - // startLine/endLine only keep initial values for chat display. - // Editor inline diff uses RelativePosition for precise real-time positions. + // NOTE: We no longer auto-update line numbers for pending edits. + // startLine/endLine only keep initial values for chat display. + // Editor inline diff uses RelativePosition for precise real-time positions. - // Listen for awareness changes - awareness.on("update", ({ added, updated, removed }: { added: number[]; updated: number[]; removed: number[] }) => { - const changedClients = added.concat(updated).concat(removed); - const docData = docs.get(docName); - if (docData) { - const encoder = encoding.createEncoder(); - encoding.writeVarUint(encoder, messageAwareness); - encoding.writeVarUint8Array(encoder, awarenessProtocol.encodeAwarenessUpdate(awareness, changedClients)); - const message = encoding.toUint8Array(encoder); + // Listen for awareness changes + awareness.on("update", ({ added, updated, removed }: { added: number[]; updated: number[]; removed: number[] }) => { + const changedClients = added.concat(updated).concat(removed); + const docData = docs.get(docName); + if (docData) { + const encoder = encoding.createEncoder(); + encoding.writeVarUint(encoder, messageAwareness); + encoding.writeVarUint8Array(encoder, awarenessProtocol.encodeAwarenessUpdate(awareness, changedClients)); + const message = encoding.toUint8Array(encoder); - docData.clients.forEach((client) => { - if (client.readyState === WebSocket.OPEN) { - client.send(message); - } - }); - } - }); + docData.clients.forEach((client) => { + if (client.readyState === WebSocket.OPEN) { + client.send(message); + } + }); + } + }); - const docData: DocData = { - doc, - awareness, - clients: new Set(), - persistenceCleanup, - }; + const docData: DocData = { + doc, + awareness, + clients: new Set(), + persistenceCleanup, + }; - docs.set(docName, docData); - initializingDocs.delete(docName); + docs.set(docName, docData); - return docData; + return docData; + } finally { + // Always clean up initializingDocs, whether the init succeeded or failed. + // Without this, a rejected promise would remain in the map forever, + // causing all subsequent getYDoc() calls for this room to fail permanently. + initializingDocs.delete(docName); + } })(); initializingDocs.set(docName, initPromise); @@ -232,15 +238,29 @@ const server = http.createServer(async (req, res) => { } const prefix = `${projectId}-`; - const docNames = Array.from(docs.keys()).filter( - (name) => name === projectId || name.startsWith(prefix) || name.startsWith(`ws/${prefix}`) - ); + const matchesProject = (name: string) => + name === projectId || name.startsWith(prefix) || name.startsWith(`ws/${prefix}`); + + // Collect from both docs AND initializingDocs so we don't miss in-flight inits + // that would otherwise resurrect stale content after clear finishes. + const docNames = new Set([ + ...Array.from(docs.keys()).filter(matchesProject), + ...Array.from(initializingDocs.keys()).filter(matchesProject), + ]); let closedClients = 0; let clearedDocs = 0; for (const docName of docNames) { - const docData = docs.get(docName); + // If initialization is in-flight, await it so we can tear it down properly. + let docData = docs.get(docName); + if (!docData && initializingDocs.has(docName)) { + try { + docData = await initializingDocs.get(docName)!; + } catch { + // Init failed — nothing to clean up + } + } if (!docData) continue; // Close all websocket clients to force reconnect @@ -312,11 +332,20 @@ const server = http.createServer(async (req, res) => { return; } - const docData = docs.get(roomName); - // Cancel in-progress initialization (avoid resurrecting old content after init finishes) - initializingDocs.delete(roomName); + // Resolve the document — if initialization is in-flight, await it so we + // can properly tear it down. Simply deleting from initializingDocs does NOT + // cancel the running promise; the promise would still call docs.set() and + // resurrect the stale content after /clear finishes. + let docData = docs.get(roomName); + if (!docData && initializingDocs.has(roomName)) { + try { + docData = await initializingDocs.get(roomName)!; + } catch { + // Init failed — nothing to clean up from it + } + } - // Clear in-memory doc (if present) + // Clear in-memory doc (if present — including freshly-awaited init result) if (docData) { // Critical: unbind persistence writer first to avoid re-writing updates back to Redis after clearing (race) if (docData.persistenceCleanup) { @@ -349,6 +378,96 @@ const server = http.createServer(async (req, res) => { return; } + // POST /replace/:projectId/:fileId - replace document content with new text + // Used by: /api/internal/files/edit to push bot-written content into Yjs + // so that connected browsers receive the update via sync protocol instead + // of losing changes when the browser re-syncs its stale local state. + const replaceMatch = url.match(/^\/replace\/([^\/]+)\/(.+)$/); + if (req.method === "POST" && replaceMatch) { + const [, projectId, fileId] = replaceMatch; + const decodedFileId = decodeURIComponent(fileId); + const roomName = `${projectId}-${decodedFileId}`; + + // Internal auth + if (!process.env.INTERNAL_API_SECRET) { + res.writeHead(500, { "Content-Type": "application/json" }); + res.end(JSON.stringify({ error: "INTERNAL_API_SECRET not configured" })); + return; + } + if (!requireInternalSecret(req)) { + res.writeHead(401, { "Content-Type": "application/json" }); + res.end(JSON.stringify({ error: "Invalid internal secret" })); + return; + } + + // Read request body + let body = ""; + req.on("data", (chunk: Buffer) => { body += chunk.toString(); }); + req.on("end", async () => { + try { + const { content } = JSON.parse(body) as { content: string }; + if (typeof content !== "string") { + res.writeHead(400, { "Content-Type": "application/json" }); + res.end(JSON.stringify({ error: "content must be a string" })); + return; + } + + // Resolve the target document. + // We must check initializingDocs as well: if a client just connected, + // getYDoc() may be restoring stale state from Redis. Awaiting the + // init promise lets us replace content on the fully-initialized doc + // so the new content wins over whatever was restored. + let docData = docs.get(roomName); + + if (!docData && initializingDocs.has(roomName)) { + // Document initialization is in-flight — wait for it to finish + // so we can replace its (potentially stale) content in-place. + console.log( + `🔄 /replace: Awaiting in-flight init for ${projectId}/${decodedFileId}` + ); + docData = await initializingDocs.get(roomName)!; + } + + if (docData) { + // Document is in memory — replace Y.Text content in-place + // This will automatically sync to all connected browsers via the + // Yjs update broadcast, and persist to Redis via the bound persistence. + const ytext = docData.doc.getText("content"); + docData.doc.transact(() => { + ytext.delete(0, ytext.length); + ytext.insert(0, content); + }); + console.log( + `🔄 /replace: Updated in-memory doc ${projectId}/${decodedFileId}, ` + + `${content.length} chars, clients=${docData.clients.size}` + ); + } else { + // Document is NOT in memory (no active connections, no in-flight init). + // Clear any stale Redis state so the next connection loads from S3 + // (which was already updated by the caller). + await clearPersistence(projectId, decodedFileId); + console.log( + `🔄 /replace: No in-memory doc for ${projectId}/${decodedFileId}, ` + + `cleared Redis persistence` + ); + } + + res.writeHead(200, { "Content-Type": "application/json" }); + res.end(JSON.stringify({ + success: true, + roomName, + inMemory: !!docData, + contentLength: content.length, + })); + } catch (err) { + console.error("❌ Failed to replace document:", err); + res.writeHead(500, { "Content-Type": "application/json" }); + res.end(JSON.stringify({ success: false, error: "Failed to replace document" })); + } + }); + return; + } + // GET /doc/:projectId/:fileId - get document content (used by TAP completion) const docMatch = url.match(/^\/doc\/([^\/]+)\/(.+)$/); if (req.method === "GET" && docMatch) { @@ -356,7 +475,18 @@ const server = http.createServer(async (req, res) => { // Decode URL-encoded filename (e.g. main.tex) const decodedFileId = decodeURIComponent(fileId); const roomName = `${projectId}-${decodedFileId}`; - const docData = docs.get(roomName); + // Also await in-flight initialization so we don't 404 while a doc is loading. + let docData = docs.get(roomName); + if (!docData && initializingDocs.has(roomName)) { + try { + docData = await initializingDocs.get(roomName)!; + } catch (err) { + console.error(`❌ GET /doc: init failed for ${roomName}:`, err); + res.writeHead(500, { "Content-Type": "application/json" }); + res.end(JSON.stringify({ error: "Document initialization failed" })); + return; + } + } if (!docData) { res.writeHead(404, { "Content-Type": "application/json" }); @@ -404,10 +534,14 @@ const server = http.createServer(async (req, res) => { body += chunk.toString(); }); - req.on("end", () => { + req.on("end", async () => { try { const { content, blocks } = JSON.parse(body); - const docData = docs.get(roomName); + // Also await in-flight initialization so updates aren't silently dropped. + let docData = docs.get(roomName); + if (!docData && initializingDocs.has(roomName)) { + docData = await initializingDocs.get(roomName)!; + } if (!docData) { console.log(`[POST] ❌ Document not in memory: ${roomName}, rooms:`, Array.from(docs.keys())); @@ -885,7 +1019,27 @@ wss.on("connection", async (ws: WebSocket, req) => { } // Regular Yjs doc room (async init with Redis restore) - const docData = await getYDoc(docName); + let docData: DocData; + try { + docData = await getYDoc(docName); + } catch (err) { + console.error(`❌ Failed to initialize doc for connection: ${docName}`, err); + try { ws.close(1011, "internal_error"); } catch { /* ignore */ } + return; + } + + // If the socket closed while we were waiting for getYDoc(), skip setup + // to avoid adding a zombie client that can never be removed (the 'close' + // event already fired before we registered its handler). + if (ws.readyState !== WebSocket.OPEN) { + console.log(`⚠️ WebSocket closed during init, skipping setup: ${docName}`); + // Use delayed cleanup (same as normal close path) to avoid racing with + // concurrent connections that also awaited the same init promise and + // haven't added themselves to clients yet. + setTimeout(() => cleanupDoc(docName), 30_000); + return; + } + const { doc, awareness } = docData; docData.clients.add(ws);