diff --git a/Changelog.md b/Changelog.md index f078d4ed..9e0d7867 100644 --- a/Changelog.md +++ b/Changelog.md @@ -19,6 +19,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - bug/370-anthropic-streaming (2025-09-16) ### Changed + - feat/464-update-input-support-multiple-keys (2025-11-11) - feat/490-can-read-deepagents-agent-files (2025-11-10) - feat/485-user-env (2025-11-08) - feat/482-teams-webhook (2025-11-05) diff --git a/backend/src/constants/llm.py b/backend/src/constants/llm.py index e0d17c1c..2380f5b6 100644 --- a/backend/src/constants/llm.py +++ b/backend/src/constants/llm.py @@ -1,58 +1,97 @@ -from enum import Enum import os +from enum import Enum +from src.services.llm import llm_service from src.constants import ( - OPENAI_API_KEY, - ANTHROPIC_API_KEY, - OLLAMA_BASE_URL, - GROQ_API_KEY, - GOOGLE_API_KEY, - XAI_API_KEY, + OPENAI_API_KEY, + ANTHROPIC_API_KEY, + OLLAMA_BASE_URL, + GROQ_API_KEY, + GOOGLE_API_KEY, + XAI_API_KEY, ) +from src.utils.logger import logger class ChatModels(str, Enum): - if OPENAI_API_KEY: - OPENAI_REASONING_03 = "openai:o3" - OPENAI_REASONING_04_MINI = "openai:o4-mini" - OPENAI_GPT_5_NANO = "openai:gpt-5-nano" - OPENAI_GPT_5_MINI = "openai:gpt-5-mini" - OPENAI_GPT_5 = "openai:gpt-5" - # OPENAI_GPT_5_CODEX = "openai:gpt-5-codex" - if ANTHROPIC_API_KEY: - ANTHROPIC_CLAUDE_3_7_SONNET = "anthropic:claude-3-7-sonnet-latest" - ANTHROPIC_CLAUDE_4_SONNET = "anthropic:claude-sonnet-4" - ANTHROPIC_CLAUDE_4_OPUS = "anthropic:claude-opus-4-1" - ANTHROPIC_CLAUDE_4_5_HAIKU = "anthropic:claude-haiku-4-5" - ANTHROPIC_CLAUDE_4_5_SONNET = "anthropic:claude-sonnet-4-5" - if XAI_API_KEY: - XAI_GROK_4 = "xai:grok-4" - XAI_GROK_4_FAST = "xai:grok-4-fast" - XAI_GROK_4_FAST_NON_REASONING = "xai:grok-4-fast-non-reasoning" - XAI_GROK_CODE_FAST_1 = "xai:grok-code-fast-1" - if GOOGLE_API_KEY: - GOOGLE_GEMINI_2_5_FLASH_LITE = "google_genai:gemini-2.5-flash-lite" - GOOGLE_GEMINI_2_5_FLASH = "google_genai:gemini-2.5-flash" - GOOGLE_GEMINI_2_5_PRO = "google_genai:gemini-2.5-pro" - if GROQ_API_KEY: - GROQ_OPENAI_GPT_OSS_120B = "groq:openai/gpt-oss-120b" - GROQ_LLAMA_3_3_70B_VERSATILE = "groq:llama-3.3-70b-versatile" - if OLLAMA_BASE_URL: - OLLAMA_QWEN3 = "ollama:qwen3" - + if OPENAI_API_KEY: + OPENAI_REASONING_03 = "openai:o3" + OPENAI_REASONING_04_MINI = "openai:o4-mini" + OPENAI_GPT_5_NANO = "openai:gpt-5-nano" + OPENAI_GPT_5_MINI = "openai:gpt-5-mini" + OPENAI_GPT_5 = "openai:gpt-5" + # OPENAI_GPT_5_CODEX = "openai:gpt-5-codex" + if ANTHROPIC_API_KEY: + ANTHROPIC_CLAUDE_3_7_SONNET = "anthropic:claude-3-7-sonnet-latest" + ANTHROPIC_CLAUDE_4_SONNET = "anthropic:claude-sonnet-4" + ANTHROPIC_CLAUDE_4_OPUS = "anthropic:claude-opus-4-1" + ANTHROPIC_CLAUDE_4_5_HAIKU = "anthropic:claude-haiku-4-5" + ANTHROPIC_CLAUDE_4_5_SONNET = "anthropic:claude-sonnet-4-5" + if XAI_API_KEY: + XAI_GROK_4 = "xai:grok-4" + XAI_GROK_4_FAST = "xai:grok-4-fast" + XAI_GROK_4_FAST_NON_REASONING = "xai:grok-4-fast-non-reasoning" + XAI_GROK_CODE_FAST_1 = "xai:grok-code-fast-1" + if GOOGLE_API_KEY: + GOOGLE_GEMINI_2_5_FLASH_LITE = "google_genai:gemini-2.5-flash-lite" + GOOGLE_GEMINI_2_5_FLASH = "google_genai:gemini-2.5-flash" + GOOGLE_GEMINI_2_5_PRO = "google_genai:gemini-2.5-pro" + if GROQ_API_KEY: + GROQ_OPENAI_GPT_OSS_120B = "groq:openai/gpt-oss-120b" + GROQ_LLAMA_3_3_70B_VERSATILE = "groq:llama-3.3-70b-versatile" + + +def get_ollama_models(): + models = [] + try: + import requests + response = requests.get(f"{OLLAMA_BASE_URL.rstrip('/')}/api/tags", timeout=3) + if response.ok: + data = response.json() + tags = data.get("models", []) if isinstance(data, dict) else [] + for tag in tags: + model_name = tag.get("name") + if model_name: + models.append(f"ollama:{model_name}") + except Exception as e: + logger.error(f"Error getting Ollama models: {e}") + pass + return models + +def get_all_models(): + models = [] + if OPENAI_API_KEY: + models.extend(llm_service.model_by_provider(provider="openai")) + if ANTHROPIC_API_KEY: + models.extend(llm_service.model_by_provider(provider="anthropic")) + if GOOGLE_API_KEY: + models.extend(llm_service.model_by_provider(provider="google")) + if GROQ_API_KEY: + models.extend(llm_service.model_by_provider(provider="groq")) + if XAI_API_KEY: + models.extend(llm_service.model_by_provider(provider="xai")) + if OLLAMA_BASE_URL: + models.extend(get_ollama_models()) + return sorted(models) + def get_free_models(): - return [ - ChatModels.ANTHROPIC_CLAUDE_4_5_HAIKU.value, - ChatModels.OPENAI_GPT_5_NANO.value, - ChatModels.GOOGLE_GEMINI_2_5_FLASH_LITE.value, - ChatModels.GROQ_OPENAI_GPT_OSS_120B.value, - ChatModels.GROQ_LLAMA_3_3_70B_VERSATILE.value, - ] + models = [] + if OPENAI_API_KEY: + models.append(ChatModels.OPENAI_GPT_5_NANO.value) + if ANTHROPIC_API_KEY: + models.append(ChatModels.ANTHROPIC_CLAUDE_4_5_HAIKU.value) + if GOOGLE_API_KEY: + models.append(ChatModels.GOOGLE_GEMINI_2_5_FLASH_LITE.value) + if GROQ_API_KEY: + models.append(ChatModels.GROQ_OPENAI_GPT_OSS_120B.value) + if OLLAMA_BASE_URL: + models.extend(get_ollama_models()) + return sorted(models) def get_system_prompt(): - path = "src/static/prompts/md" - with open(os.path.join(path, "default.md"), "r") as file: - return file.read() + path = "src/static/prompts/md" + with open(os.path.join(path, "default.md"), "r") as file: + return file.read() DEFAULT_SYSTEM_PROMPT = get_system_prompt() diff --git a/backend/src/flows/__init__.py b/backend/src/flows/__init__.py index 221c6be3..94d6ddbb 100644 --- a/backend/src/flows/__init__.py +++ b/backend/src/flows/__init__.py @@ -12,14 +12,16 @@ from deepagents import SubAgent, create_deep_agent +from src.schemas.models.auth import ProtectedUser from src.services.memory import memory_service from src.services.tool import tool_service from src.tools.memory import MEMORY_TOOLS -from src.schemas.entities import LLMRequest, LLMStreamRequest +from src.schemas.entities import LLMRequest from src.utils.logger import logger from src.utils.format import init_system_prompt from src.schemas.contexts import ContextSchema from src.schemas.entities.a2a import A2AServers +from src.utils.middleware import add_ai_message_metadata, pii_middleware async def add_memories_to_system(): @@ -50,7 +52,7 @@ def graph_builder( subagents: list[SubAgent] = [], prompt: str = "You are a helpful assistant.", model: str = "openai:gpt-5-nano", - context_schema: Type[Any] | None = None, + context_schema: Type[ContextSchema] | None = None, checkpointer: BaseCheckpointSaver | None = None, store: BaseStore | None = None, graph_id: Literal[ @@ -74,6 +76,7 @@ def graph_builder( system_prompt=prompt, checkpointer=checkpointer, context_schema=context_schema, + middleware=[add_ai_message_metadata] + pii_middleware(), store=store, ) return deep_agent @@ -96,7 +99,7 @@ async def init_tools( return tools -async def init_subagents(params: LLMRequest | LLMStreamRequest) -> list[SubAgent]: +async def init_subagents(params: LLMRequest) -> list[SubAgent]: result = [] for subagent in params.subagents: subagent_dict = { @@ -120,22 +123,28 @@ async def init_memories(system_prompt: str, tools: list[BaseTool]): return tools + MEMORY_TOOLS, prompt -def init_config(params: LLMRequest | LLMStreamRequest): - if params.metadata: - return RunnableConfig( - configurable=params.metadata.model_dump(), - max_concurrency=10, - recursion_limit=100, - ) - else: - return None +def init_config( + params: LLMRequest, + user: ProtectedUser | None = None, + max_concurrency: int = 4, + recursion_limit: int = 100, +) -> RunnableConfig: + return RunnableConfig( + configurable={ + "user_id": user.id if user else None, + "thread_id": params.metadata.thread_id or str(uuid4()), + "assistant_id": params.metadata.assistant_id or None, + }, + max_concurrency=max_concurrency, + recursion_limit=recursion_limit, + metadata={**params.metadata.model_dump()}, + ) ################################################################################ ### Construct Agent ################################################################################ async def construct_agent( - # params: LLMRequest | LLMStreamRequest, system_prompt: str, tools: list[BaseTool], model: BaseChatModel, @@ -218,10 +227,4 @@ def astream( ) -> AsyncGenerator[BaseMessage, None]: return self.graph.astream( messages, config=config, stream_mode=stream_mode, context=context - ) - - async def aget_state(self, config: RunnableConfig = None): - # if config is None: - # config = self.config - state = await self.graph.aget_state(config) - return state + ) \ No newline at end of file diff --git a/backend/src/routes/v0/assistant.py b/backend/src/routes/v0/assistant.py index b1e2b5fd..bb091ea8 100644 --- a/backend/src/routes/v0/assistant.py +++ b/backend/src/routes/v0/assistant.py @@ -10,7 +10,6 @@ from src.utils.auth import verify_credentials from src.utils.logger import logger from src.services.assistant import ( - assistant_service, AssistantSearch, Assistant, ASSISTANT_EXAMPLES, diff --git a/backend/src/routes/v0/llm.py b/backend/src/routes/v0/llm.py index b67bb665..76e4ec55 100644 --- a/backend/src/routes/v0/llm.py +++ b/backend/src/routes/v0/llm.py @@ -12,11 +12,11 @@ Form, UploadFile, ) -from langchain_core.runnables import RunnableConfig -from langchain_mcp_adapters.client import MultiServerMCPClient +from langgraph.store.base import BaseStore from langmem.prompts.types import ( OptimizerInput, ) +from src.services.llm import llm_service from src.schemas.entities.a2a import A2AServers from src.services.presidio import PresidioException, process_presidio from src.services.prompt.optimize import PromptOptimizer, PromptOptimizerRequest @@ -30,11 +30,11 @@ from src.schemas.entities import LLMRequest from src.utils.stream import stream_generator from src.utils.llm import audio_to_text -from src.flows import construct_agent +from src.flows import construct_agent, init_config from src.services.assistant import Assistant from src.services.db import get_store, get_checkpoint_db from src.utils.rate_limit import limiter -from src.constants.llm import ChatModels, get_free_models +from src.constants.llm import ChatModels, get_all_models, get_free_models from src.tools import default_tools llm_router = APIRouter(tags=["LLM"], prefix="/llm") @@ -59,6 +59,7 @@ async def llm_invoke( user: ProtectedUser = Depends(get_optional_user), store=Depends(get_store), ) -> dict[str, Any] | Any: + params.input.to_langchain_messages() params.metadata.thread_id = params.metadata.thread_id or str(uuid4()) if user: service_context = ServiceContext(user_id=user.id, store=store) @@ -67,15 +68,13 @@ async def llm_invoke( params.metadata.assistant_id ) params = assistant.to_llm_request( - messages=params.messages, + input=params.input, model=params.model, metadata=params.metadata, ) async with get_checkpoint_db() as checkpointer: agent = await construct_agent(params, checkpointer, service_context.store) - response = await agent.invoke( - {"messages": params.to_langchain_messages()}, - ) + response = await agent.invoke(params.input) return response @@ -92,29 +91,24 @@ async def llm_stream( request: Request, params: LLMRequest = Body(openapi_examples=Examples.LLM_STREAM_EXAMPLES), user: ProtectedUser = Depends(get_optional_user), - store=Depends(get_store), + store: BaseStore = Depends(get_store), ) -> StreamingResponse: """ Streams LLM output as server-sent events (SSE). """ try: + # Convert API messages to LangChain message objects + params.input.to_langchain_messages() + # Initialize thread id params.metadata.thread_id = params.metadata.thread_id or str(uuid4()) - config = RunnableConfig( - configurable={ - "user_id": user.id if user else None, - "thread_id": params.metadata.thread_id or str(uuid4()), - "assistant_id": params.metadata.assistant_id or None, - }, - max_concurrency=4, - recursion_limit=100, - metadata={**params.metadata.model_dump()}, - ) + # Initialize config + config = init_config(params, user) service_context = ServiceContext(config=config, store=store) params = await process_presidio(params, service_context.presidio_service) ### Collect all tools tool_map = {t.name: t for t in default_tools()} # O(n) index TOOLS = ( - A2AServers(a2a=params.a2a).fetch_agent_cards_as_tools(params.metadata.thread_id) + A2AServers(a2a=params.a2a).fetch_agent_cards_as_tools(config["configurable"].get("thread_id")) + await service_context.tool_service.mcp_tools(params.mcp) + [tool_map[name] for name in (params.tools or ()) if name in tool_map] ) @@ -125,21 +119,21 @@ async def llm_stream( if items: structured_tool = items[0] tool_metadata = {structured_tool.name: structured_tool.metadata} - config['metadata'] = {**tool_metadata, **config['metadata']} + config["metadata"] = {**tool_metadata, **config["metadata"]} TOOLS.append(structured_tool) - if params.metadata.assistant_id: + if config["configurable"].get("assistant_id"): assistant: Assistant = await service_context.assistant_service.get( - params.metadata.assistant_id, + config["configurable"].get("assistant_id"), ) params = assistant.to_llm_request( - messages=params.messages, + input=params.input, model=params.model, - metadata=params.metadata, + metadata=config["metadata"], ) return StreamingResponse( stream_generator( - params.to_langchain_messages(), + params.input, params.model, params.system, TOOLS, @@ -160,7 +154,7 @@ async def llm_stream( ) except Exception as e: logger.exception(f"Error in llm_stream: {e}") - raise HTTPException(status_code=500, detail=str(e)) + raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)) ################################################################################ @@ -193,11 +187,11 @@ async def transcribe( return JSONResponse( content={"transcript": transcript.model_dump()}, media_type="application/json", - status_code=200, + status_code=status.HTTP_200_OK, ) except Exception as e: logger.exception(str(e)) - raise HTTPException(status_code=500, detail=str(e)) + raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)) ################################################################################ @@ -227,12 +221,25 @@ async def optimize_prompt( name="List Models", ) async def list_models(): - chat_models = sorted({model.value for model in ChatModels}) return JSONResponse( - status_code=200, + status_code=status.HTTP_200_OK, content={ - "default": ChatModels.ANTHROPIC_CLAUDE_4_5_HAIKU.value, - "free": sorted(get_free_models()), - "models": chat_models, + "default": ChatModels.XAI_GROK_4_FAST.value, + "free": get_free_models(), + "models": get_all_models(), } ) + +################################################################################ +### Reset Models +################################################################################ +@llm_router.get( + "/models/reset", + name="Reset Models", +) +async def reset_models(): + llm_service._reset_cache() + return JSONResponse( + status_code=status.HTTP_200_OK, + content={"message": "Models reset successfully"} + ) \ No newline at end of file diff --git a/backend/src/schemas/contexts/__init__.py b/backend/src/schemas/contexts/__init__.py index b8b795de..bbdc39f0 100644 --- a/backend/src/schemas/contexts/__init__.py +++ b/backend/src/schemas/contexts/__init__.py @@ -3,4 +3,5 @@ @dataclass class ContextSchema: - user_id: str + model: str = None + user_id: str = None \ No newline at end of file diff --git a/backend/src/schemas/entities/__init__.py b/backend/src/schemas/entities/__init__.py index c9d8f616..90bf83b2 100644 --- a/backend/src/schemas/entities/__init__.py +++ b/backend/src/schemas/entities/__init__.py @@ -1,20 +1,10 @@ from enum import Enum from uuid import uuid4 -from typing import Optional, List, Any, Literal - -from pydantic import BaseModel, Field, ConfigDict -from langgraph.types import StreamMode -from langchain_core.messages import ( - AnyMessage, - BaseMessage, - HumanMessage, - AIMessage, - SystemMessage, - ToolMessage, -) +from typing import Optional, List, Any + +from pydantic import BaseModel, Field -from src.schemas.models.assistant import Assistant -from src.constants.llm import DEFAULT_SYSTEM_PROMPT, ChatModels +from src.schemas.entities.llm import * from src.constants.examples import ( ADD_DOCUMENTS_EXAMPLE, THREAD_HISTORY_EXAMPLE, @@ -31,22 +21,6 @@ class InvokeTool(BaseModel): ) -class Configurable(BaseModel): - thread_id: str - - -class StreamInput(BaseModel): - messages: list - configurable: Configurable - - -class ChatInput(BaseModel): - system: Optional[str] = Field(default="You are a helpful assistant.") - query: str = Field(default="What is the capital of France?") - images: Optional[List[str]] = Field(default=[]) - model: Optional[str] = Field(default="openai:gpt-4o-mini") - - class ArcadeConfig(BaseModel): tools: Optional[List[str]] = Field(default_factory=list) toolkits: Optional[List[str]] = Field(default_factory=list) @@ -62,7 +36,7 @@ class Thread(BaseModel): thread_id: str = Field(...) checkpoint_ns: Optional[str] = Field(default="") checkpoint_id: Optional[str] = Field(default=None) - messages: list[AnyMessage] = Field(default_factory=list) + messages: list[BaseMessage] = Field(default_factory=list) v: Optional[int] = Field(default=1) ts: Optional[str] = Field(default=None) @@ -83,7 +57,7 @@ class Threads(BaseModel): class Answer(BaseModel): thread_id: str = Field(...) - answer: AnyMessage = Field(...) + answer: BaseMessage = Field(...) model_config = { "json_schema_extra": { @@ -139,89 +113,9 @@ class SearchKwargs(dict): filter: str = None -class StreamContext(BaseModel): - msg: AnyMessage | None = None - metadata: dict = {} - event: str = "" - - -class Config(BaseModel): - model_config = ConfigDict(extra="allow") # ✅ allow arbitrary extra fields - - user_id: Optional[str] = Field( - default=None, description="The user id", examples=[str(uuid4())] - ) - thread_id: Optional[str] = Field( - default=None, description="The thread id", examples=[str(uuid4())] - ) - checkpoint_id: Optional[str] = Field( - default=None, description="The checkpoint id", examples=[str(uuid4())] - ) - assistant_id: Optional[str] = Field( - default=None, description="The assistant id", examples=[str(uuid4())] - ) - graph_id: Optional[Literal["react", "deepagent"]] = Field( - default=None, description="The graph id", examples=["react", "deepagent"] - ) - - class ThreadSearch(BaseModel): limit: int = Field(default=100, description="The limit of threads to search") offset: int = Field(default=0, description="The offset of threads to search") filter: Optional[Config] = Field( default_factory=Config, description="The filter of threads to search" - ) - - -class PresidioRequest(BaseModel): - analyze: Optional[bool] = Field( - default=False, description="Whether to analyze the text" - ) - anonymize: Optional[bool] = Field( - default=False, description="Whether to anonymize the text" - ) - - -class LLMRequest(BaseModel): - model: Optional[ChatModels] = Field(default=ChatModels.OPENAI_GPT_5_NANO.value) - system: Optional[str] = Field( - default=DEFAULT_SYSTEM_PROMPT, - exclude=True, # this is the default but should never be shown to client - ) - tools: Optional[List[str]] = Field(default_factory=list) - a2a: Optional[dict[str, dict]] = Field(default_factory=dict) - mcp: Optional[dict[str, dict]] = Field(default_factory=dict) - subagents: Optional[List[Assistant]] = Field(default_factory=list) - presidio: Optional[PresidioRequest] = Field(default_factory=PresidioRequest) - - metadata: Optional[Config] = Field( - default={}, description="LangGraph configuration" - ) - - class ChatMessage(BaseModel): - role: Literal["user", "assistant", "system", "tool"] = Field(examples=["user"]) - content: str | List[Any] = Field(examples=["Weather in Dallas?"]) - - messages: List[ChatMessage] - - def to_langchain_messages(self) -> List[BaseMessage]: - # Convert API messages to LangChain message objects - converted: List[BaseMessage] = [] - for message in self.messages: - role = message.role - content = message.content - if role == "user": - converted.append(HumanMessage(content=content)) - elif role == "assistant": - converted.append(AIMessage(content=content)) - elif role == "system": - converted.append(SystemMessage(content=content)) - elif role == "tool": - converted.append(ToolMessage(content=content)) - else: - raise ValueError(f"Unsupported role: {role}") - return converted - - -class LLMStreamRequest(LLMRequest): - stream_mode: StreamMode | list[StreamMode] = "values" + ) \ No newline at end of file diff --git a/backend/src/schemas/entities/llm.py b/backend/src/schemas/entities/llm.py new file mode 100644 index 00000000..de3e14f9 --- /dev/null +++ b/backend/src/schemas/entities/llm.py @@ -0,0 +1,125 @@ +from uuid import uuid4 +from datetime import datetime +from typing import List, Any, Literal, Optional +from pydantic import BaseModel, Field, ConfigDict, computed_field, field_serializer +from langchain_core.messages import BaseMessage, HumanMessage, AIMessage, SystemMessage, ToolMessage + +from src.constants.llm import DEFAULT_SYSTEM_PROMPT +from src.utils.format import slugify + + +class PresidioRequest(BaseModel): + analyze: Optional[bool] = Field( + default=False, description="Whether to analyze the text" + ) + anonymize: Optional[bool] = Field( + default=False, description="Whether to anonymize the text" + ) + +class Config(BaseModel): + model_config = ConfigDict(extra="allow") # ✅ allow arbitrary extra fields + + user_id: Optional[str] = Field( + default=None, description="The user id", examples=[str(uuid4())] + ) + thread_id: Optional[str] = Field( + default=None, description="The thread id", examples=[str(uuid4())] + ) + checkpoint_id: Optional[str] = Field( + default=None, description="The checkpoint id", examples=[str(uuid4())] + ) + assistant_id: Optional[str] = Field( + default=None, description="The assistant id", examples=[str(uuid4())] + ) + graph_id: Optional[Literal["react", "deepagent"]] = Field( + default=None, description="The graph id", examples=["react", "deepagent"] + ) + +class LLMInput(BaseModel): + model_config = ConfigDict(extra="allow") # Allow additional properties + + class ChatMessage(BaseModel): + role: Literal["user", "assistant", "system", "tool"] = Field(examples=["user"]) + content: str | List[Any] = Field(examples=["Weather in Dallas?"]) + + messages: List[ChatMessage] + + def to_langchain_messages(self) -> "LLMInput": + # Convert API messages to LangChain message objects + converted: List[BaseMessage] = [] + for message in self.messages: + role = message.role + content = message.content + if role == "user": + converted.append(HumanMessage(content=content)) + elif role == "assistant": + converted.append(AIMessage(content=content)) + elif role == "system": + converted.append(SystemMessage(content=content)) + elif role == "tool": + converted.append(ToolMessage(content=content)) + else: + raise ValueError(f"Unsupported role: {role}") + self.messages = converted + +class AssistantSearch(BaseModel): + limit: int = 200 + offset: int = 0 + sort: str = "updated_at" + sort_order: str = "desc" + filter: dict = {} + +class Assistant(BaseModel): + id: Optional[str] = None + name: str + description: str = Field(default="Helpful AI Assistant.") + model: Optional[str] = None + prompt: str = Field(default="You are a helpful assistant.") + tools: list[str] + subagents: Optional[list[dict]] = [] + mcp: Optional[dict] = {} + a2a: Optional[dict] = {} + metadata: dict = {} + updated_at: Optional[datetime] = None + created_at: Optional[datetime] = None + + @computed_field + @property + def slug(self) -> str: + return slugify(self.name) + + @field_serializer("created_at", "updated_at") + def serialize_dt(self, dt: Optional[datetime], _): + return dt.isoformat() if dt else None + + def to_llm_request( + self, + input: LLMInput, + model: str = None, + metadata: "Config" = None, + ) -> "LLMRequest": + from src.schemas.entities import Config + from src.schemas.entities import LLMRequest + + if metadata and isinstance(metadata, Config): + metadata = metadata.model_dump() + return LLMRequest( + model=model or self.model, + tools=self.tools, + a2a=self.a2a, + mcp=self.mcp, + subagents=self.subagents, + metadata=metadata or self.metadata, + input=input, + ) + +class LLMRequest(BaseModel): + input: LLMInput + model: Optional[str] = Field(default="openai:gpt-5-nano") + system: Optional[str] = Field(default=DEFAULT_SYSTEM_PROMPT, exclude=True) + tools: Optional[List[str]] = Field(default_factory=list) + a2a: Optional[dict[str, dict]] = Field(default_factory=dict) + mcp: Optional[dict[str, dict]] = Field(default_factory=dict) + subagents: Optional[List[Assistant]] = Field(default_factory=list) + presidio: Optional[PresidioRequest] = Field(default_factory=PresidioRequest) + metadata: Optional[Config] = Field(default={}, description="LangGraph configuration") \ No newline at end of file diff --git a/backend/src/schemas/entities/presidio.py b/backend/src/schemas/entities/presidio.py new file mode 100644 index 00000000..e69de29b diff --git a/backend/src/schemas/models/assistant.py b/backend/src/schemas/models/assistant.py deleted file mode 100644 index bb811d0e..00000000 --- a/backend/src/schemas/models/assistant.py +++ /dev/null @@ -1,62 +0,0 @@ -from typing import Optional, TYPE_CHECKING -from langchain_core.messages import BaseMessage -from src.utils.format import slugify -from pydantic import BaseModel, computed_field, field_serializer, Field -from datetime import datetime - -if TYPE_CHECKING: - from src.schemas.entities import Config - from src.schemas.entities import LLMRequest - - -class AssistantSearch(BaseModel): - limit: int = 200 - offset: int = 0 - sort: str = "updated_at" - sort_order: str = "desc" - filter: dict = {} - - -class Assistant(BaseModel): - id: Optional[str] = None - name: str - description: str = Field(default="Helpful AI Assistant.") - model: Optional[str] = None - prompt: str = Field(default="You are a helpful assistant.") - tools: list[str] - subagents: Optional[list[dict]] = [] - mcp: Optional[dict] = {} - a2a: Optional[dict] = {} - metadata: dict = {} - updated_at: Optional[datetime] = None - created_at: Optional[datetime] = None - - @computed_field - @property - def slug(self) -> str: - return slugify(self.name) - - @field_serializer("created_at", "updated_at") - def serialize_dt(self, dt: Optional[datetime], _): - return dt.isoformat() if dt else None - - def to_llm_request( - self, - messages: list[BaseMessage], - model: str = None, - metadata: "Config" = None, - ) -> "LLMRequest": - from src.schemas.entities import Config - from src.schemas.entities import LLMRequest - - if metadata and isinstance(metadata, Config): - metadata = metadata.model_dump() - return LLMRequest( - model=model or self.model, - tools=self.tools, - a2a=self.a2a, - mcp=self.mcp, - subagents=self.subagents, - metadata=metadata or self.metadata, - messages=messages, - ) diff --git a/backend/src/services/assistant.py b/backend/src/services/assistant.py index 7d6867cb..e97cb35a 100644 --- a/backend/src/services/assistant.py +++ b/backend/src/services/assistant.py @@ -2,21 +2,24 @@ from typing import Any from langgraph.store.memory import InMemoryStore from langgraph.store.base import BaseStore -from src.utils.logger import logger -from src.schemas.models.assistant import * from langgraph.store.base import SearchItem + +from src.schemas.entities.llm import * +from src.utils.logger import logger from src.constants.examples import Examples from src.services.db import get_store_in_memory -STORE_KEY = "assistants" class AssistantService: def __init__(self, user_id: str = None, store: BaseStore = get_store_in_memory()): self.user_id = user_id self.store: BaseStore = store + + def _get_store_key(self): + return "assistants" def _get_namespace(self): - return (self.user_id, "assistants") + return (self.user_id, self._get_store_key()) async def update(self, assistant_id: str, data: dict): try: @@ -25,7 +28,7 @@ async def update(self, assistant_id: str, data: dict): ) return True except Exception as e: - logger.exception(f"Error updating {STORE_KEY} {assistant_id}: {e}") + logger.exception(f"Error updating {self._get_store_key()} {assistant_id}: {e}") return False return True @@ -41,7 +44,7 @@ async def delete(self, key: str) -> bool: await self.store.adelete(self._get_namespace(), key) return True except Exception as e: - logger.exception(f"Error deleting {STORE_KEY} {key}: {e}") + logger.exception(f"Error deleting {self._get_store_key()} {key}: {e}") return False async def search( @@ -56,7 +59,7 @@ async def search( assistants = await self._postgres_search(limit) return self._format_assistant(assistants) except Exception as e: - logger.error(f"Error searching {STORE_KEY}: {e}") + logger.error(f"Error searching {self._get_store_key()}: {e}") return [] ########################################################################### diff --git a/backend/src/services/llm.py b/backend/src/services/llm.py new file mode 100644 index 00000000..07b398dc --- /dev/null +++ b/backend/src/services/llm.py @@ -0,0 +1,62 @@ +import time +import requests +from src.utils.llm import filter_tool_call_models +from src.utils.logger import logger + + +class LLMService: + def __init__(self, ttl_seconds: int = 60 * 60 * 24): # 24 hours + self._models_cache = None + self._cache_time = 0.0 + self._ttl = ttl_seconds + + def _reset_cache(self): + """Force-clear the model cache so the next call re-fetches.""" + self._models_cache = None + self._cache_time = 0.0 + + def _fetch_models(self): + """Fetch models from API with 24h caching.""" + now = time.time() + + # Serve from cache if still fresh + if self._models_cache is not None and (now - self._cache_time) < self._ttl: + return self._models_cache + + try: + response = requests.get("https://models.dev/api.json", timeout=3) + response.raise_for_status() # Raises exception for 4xx/5xx status codes + self._models_cache = response.json() or {} + self._cache_time = now + logger.info(f"Models fetched successfully and cached for {self._ttl} seconds") + except (requests.RequestException, ValueError) as e: + logger.warning(f"Failed to fetch models: {e}") + # Keep old cache if present; otherwise empty dict + if self._models_cache is None: + self._models_cache = {} + + return self._models_cache + + def model_by_provider(self, provider: str): + """Get tool-calling models for a given provider.""" + all_models = self._fetch_models() + + provider_data = all_models.get(provider, {}) or {} + provider_models = provider_data.get("models", []) or [] + + if not provider_models: + logger.info(f"No models found for provider: {provider}") + return [] + + # Filter for tool-calling models + tool_models = filter_tool_call_models(provider_models) + logger.info(f"Found {len(tool_models)} tool calling models for {provider}") + + # Normalize provider name + normalized_provider = "google_genai" if provider == "google" else provider + + return [f"{normalized_provider}:{model}" for model in tool_models] + + +# Make sure this is a singleton used by your app (e.g., FastAPI dependency) +llm_service = LLMService() diff --git a/backend/src/services/presidio.py b/backend/src/services/presidio.py index e95eba9d..72bf2bb5 100644 --- a/backend/src/services/presidio.py +++ b/backend/src/services/presidio.py @@ -99,7 +99,7 @@ async def process_presidio(params: dict, presidio_service: PresidioService): if not isinstance(params, LLMRequest): params = LLMRequest(**params) - query = format_content(params.messages[-1].content) + query = format_content(params.input.messages[-1].content) if params.presidio and params.presidio.analyze: if not PRESIDIO_ANALYZE_HOST: raise PresidioException( @@ -130,7 +130,7 @@ async def process_presidio(params: dict, presidio_service: PresidioService): message="Error anonymizing the query. Please review the results and try again.", results=None, ) - params.messages[-1].content = [ + params.input.messages[-1].content = [ {"type": "text", "text": anonymized_query["text"]} ] return params diff --git a/backend/src/services/prompt/optimize.py b/backend/src/services/prompt/optimize.py index 88fa9505..4b0b4368 100644 --- a/backend/src/services/prompt/optimize.py +++ b/backend/src/services/prompt/optimize.py @@ -63,7 +63,7 @@ class PromptOptimizerRequest(BaseModel): trajectories: List[dict] = Field(default=DEFAULT_TRAJECTORIES) prompt: str | Prompt = Field(default=DEFAULT_PROMPTS[1]) - model: ChatModels = Field(default=ChatModels.OPENAI_GPT_5_NANO) + model: str = Field(default=ChatModels.OPENAI_GPT_5_NANO) kind: Optional[str] = Field(default="gradient") config: Optional[dict] = Field( default={"min_reflection_steps": 1, "max_reflection_steps": 3} diff --git a/backend/src/utils/auth.py b/backend/src/utils/auth.py index 56136714..82be3931 100644 --- a/backend/src/utils/auth.py +++ b/backend/src/utils/auth.py @@ -8,7 +8,7 @@ from src.constants.llm import get_free_models from src.repos.user_repo import UserRepo from src.constants import JWT_SECRET_KEY, JWT_ALGORITHM, JWT_TOKEN_EXPIRE_MINUTES -from src.schemas.entities import LLMRequest, LLMStreamRequest +from src.schemas.entities import LLMRequest from src.schemas.models import User from src.services.db import get_async_db from src.utils.logger import logger @@ -45,7 +45,7 @@ def is_authorized_model(model: str) -> bool: async def get_optional_user( request: Request, - params: LLMRequest | LLMStreamRequest, + params: LLMRequest, credentials: Optional[HTTPAuthorizationCredentials] = Depends(security), db: AsyncSession = Depends(get_async_db), ) -> Optional[User]: diff --git a/backend/src/utils/llm.py b/backend/src/utils/llm.py index 69e4ec19..7a253c9e 100644 --- a/backend/src/utils/llm.py +++ b/backend/src/utils/llm.py @@ -46,3 +46,29 @@ def audio_to_text( return translation except Exception as e: raise e + + +def filter_models(models: dict, **props): + """ + Filter model dict by internal flags/properties. + + Example: + filter_models(models, tool_call=True) + filter_models(models, attachment=True, reasoning=False) + """ + filtered = {} + + for name, data in models.items(): + # each model entry looks like {"id": "...", "attachment": True, ...} + if all(data.get(k) == v for k, v in props.items()): + filtered[name] = data + + return list(filtered.keys()) + +def filter_tool_call_models(provider_models: dict[str, dict]) -> list[str]: + """Return all model IDs for this provider that support tool calling.""" + return [ + model_id + for model_id, meta in provider_models.items() + if meta.get("tool_call") + ] \ No newline at end of file diff --git a/backend/src/utils/middleware.py b/backend/src/utils/middleware.py new file mode 100644 index 00000000..8e0fcb85 --- /dev/null +++ b/backend/src/utils/middleware.py @@ -0,0 +1,38 @@ +from langchain.agents.middleware import after_model +from langchain.agents import AgentState +from langchain_core.messages import AIMessage +from langgraph.runtime import Runtime +from src.schemas.contexts import ContextSchema +from langchain.agents.middleware import PIIMiddleware + +@after_model +def add_ai_message_metadata(state: AgentState, runtime: Runtime[ContextSchema]) -> dict | None: + """Attach AI message metadata to final response.""" + if state["messages"]: + last_msg = state["messages"][-1] + if isinstance(last_msg, AIMessage) and not last_msg.tool_calls: + last_msg.model = runtime.context.model + return None + +def pii_middleware() -> dict | None: + return [ + # Redact email addresses + # PIIMiddleware( + # "email", + # strategy="redact", + # apply_to_input=True, + # ), + # Mask credit card numbers + PIIMiddleware( + "credit_card", + strategy="mask", + apply_to_input=True, + ), + # Block API keys - raise error if detected + PIIMiddleware( + "api_key", + detector=r"sk-[A-Za-z0-9]+", + strategy="block", + apply_to_input=True, + ), + ] \ No newline at end of file diff --git a/backend/src/utils/stream.py b/backend/src/utils/stream.py index ea927c56..83ead630 100644 --- a/backend/src/utils/stream.py +++ b/backend/src/utils/stream.py @@ -1,23 +1,23 @@ +from langchain.agents.middleware import PIIDetectionError +import ujson from langchain_core.language_models import BaseChatModel from langchain_core.runnables import RunnableConfig from langchain_core.tools import BaseTool import ujson -from langgraph.store.base import BaseStore from typing import List from langgraph.types import StreamMode from deepagents import SubAgent +from src.schemas.contexts import ContextSchema from src.contexts.service import ServiceContext -from src.schemas.entities import LLMRequest -from src.schemas.models.auth import ProtectedUser +from src.schemas.entities import LLMInput from src.constants import APP_LOG_LEVEL from src.flows import construct_agent from src.services.db import get_checkpoint_db from src.utils.messages import from_message_to_dict from langchain_core.messages import ( - AIMessage, AIMessageChunk, - BaseMessage, + HumanMessage, ToolMessage, ) from src.utils.logger import log_to_file, logger @@ -165,7 +165,7 @@ def handle_multi_mode(chunk: dict): async def stream_generator( - messages: list[BaseMessage], + input: LLMInput, model: BaseChatModel, system_prompt: str, tools: list[BaseTool], @@ -176,7 +176,6 @@ async def stream_generator( files_map = {} async with get_checkpoint_db() as checkpointer: try: - service_context.config["configurable"]["user_id"] = service_context.user_id agent = await construct_agent( system_prompt=system_prompt, model=model, @@ -186,10 +185,12 @@ async def stream_generator( checkpointer=checkpointer, store=service_context.store ) + input.messages[-1].model = agent.model async for chunk in agent.astream( - {"messages": messages}, - stream_mode=["messages", "values"], + {"messages": input.messages}, + stream_mode=["messages", "values"], config=config, + context=ContextSchema(model=agent.model) ): # Serialize and yield each chunk as SSE stream_chunk = handle_multi_mode(chunk) @@ -202,42 +203,41 @@ async def stream_generator( log_to_file(str(data), agent.model) and APP_LOG_LEVEL == "DEBUG" logger.debug(f"data: {str(data)}") yield f"data: {data}\n\n" + except PIIDetectionError as e: + # Yield error as SSE if streaming fails + logger.warning(f"Sensitive data detected in the query: {e}") + # raise HTTPException(status_code=500, detail=str(e)) + error_msg = ujson.dumps(("error", str(e))) + yield f"data: {error_msg}\n\n" except Exception as e: # Yield error as SSE if streaming fails - logger.exception("Error in event_generator: %s", e) + logger.exception("Error in stream_generator: %s", e) # raise HTTPException(status_code=500, detail=str(e)) error_msg = ujson.dumps(("error", str(e))) yield f"data: {error_msg}\n\n" finally: if service_context.user_id and checkpointer: - final_state = await agent.aget_state(config) - messages = final_state.values.get("messages") - last_message = messages[-1] if messages else None - if isinstance(last_message, AIMessage): - last_message.model = agent.model - new_config = await agent.graph.aupdate_state( - config=final_state.config, - values={"messages": messages}, - ) - configurable = new_config.get("configurable") - thread_id = configurable.get("thread_id") - checkpoint_id = configurable.get("checkpoint_id") - - if service_context.config["configurable"].get("assistant_id"): - service_context.thread_service.assistant_id = ( - config["configurable"].get("assistant_id") - ) - - await service_context.thread_service.update( - thread_id=thread_id, - data={ - "thread_id": thread_id, - "checkpoint_id": checkpoint_id, - "messages": [last_message.model_dump()], - "files": files_map, - "updated_at": get_time(), - }, - ) + final_state = await agent.graph.aget_state(config) + configurable = final_state.config.get("configurable", {}) + messages = final_state.values.get('messages', []) + + # Get the last HumanMessage + last_human_message = None + for message in reversed(messages): + if isinstance(message, HumanMessage): + last_human_message = message + break + + await service_context.thread_service.update( + thread_id=configurable.get("thread_id"), + data={ + "thread_id": configurable.get("thread_id"), + "checkpoint_id": configurable.get("checkpoint_id"), + "messages": [last_human_message.model_dump()] if last_human_message else [], + "files": files_map, + "updated_at": get_time(), + } + ) # Log the update for debugging - logger.info(f"final_state Updated: {str(new_config)}") + logger.info(f"checkpoint: {ujson.dumps(configurable)}") \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index 2d1b32a8..6ecb1be7 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -88,11 +88,11 @@ services: - ollama:/root/.ollama ports: - "11434:11434" - command: > - sh -c " - ollama pull llama3.2-vision && - tail -f /dev/null - " + # command: > + # sh -c " + # ollama pull qwen3-vl gemma3 && + # tail -f /dev/null + # " ############################################## ## Exec Server diff --git a/frontend/src/components/inputs/ChatInput.tsx b/frontend/src/components/inputs/ChatInput.tsx index 81a80949..83d34fd5 100644 --- a/frontend/src/components/inputs/ChatInput.tsx +++ b/frontend/src/components/inputs/ChatInput.tsx @@ -17,6 +17,7 @@ export default function ChatInput({ const textareaRef = useRef(null); const [isRecording, setIsRecording] = useState(false); const { loading } = useAppContext(); + const { isLikelyMobile } = useAppHook(); const { query, abortQuery, @@ -32,8 +33,6 @@ export default function ChatInput({ handleSubmit, } = useChatContext(); - const { isMobile } = useAppHook(); - // Initialize the recorder controls using the hook const recorderControls = useVoiceVisualizer(); @@ -89,7 +88,8 @@ export default function ChatInput({ query.length > 0 ) { e.preventDefault(); - if (!loading && !isMobile()) handleSubmit(query, images); + if (!loading && !isLikelyMobile()) + handleSubmit(query, images); } }} /> diff --git a/frontend/src/components/lists/ChatMessages.tsx b/frontend/src/components/lists/ChatMessages.tsx index e44ce0e1..6239d067 100644 --- a/frontend/src/components/lists/ChatMessages.tsx +++ b/frontend/src/components/lists/ChatMessages.tsx @@ -12,6 +12,7 @@ import SearchEngineTool from "../tools/SearchEngine"; import ChartRenderWidget from "../tools/ChartRenderWidget"; import CopyTextButton from "../buttons/CopyTextButton"; import FileViewer from "../viewers/FileViewer"; +import { latestHumanMessage } from "@/lib/utils/message"; const MAX_LENGTH = 1000; @@ -62,9 +63,11 @@ function ToolAction({ export function Message({ message, isLatest = false, + messages, }: { message: any; isLatest?: boolean; + messages: any[]; }) { const ICON_SIZE = 4; const [isEditing, setIsEditing] = useState(false); @@ -256,14 +259,14 @@ export function Message({
{isLatest && streamingRate?.rate && ( - {streamingRate.rate} tok/s + {streamingRate.rate} tok/s • {streamingRate.count} tokens )}
@@ -315,6 +318,7 @@ const ChatMessages = ({ messages }: { messages: any[] }) => { key={message.id} message={message} isLatest={index === messages.length - 1} + messages={messages} /> )) ) : ( diff --git a/frontend/src/components/lists/SelectModel.tsx b/frontend/src/components/lists/SelectModel.tsx index 325795eb..200be15f 100644 --- a/frontend/src/components/lists/SelectModel.tsx +++ b/frontend/src/components/lists/SelectModel.tsx @@ -8,13 +8,13 @@ import { import { SiAnthropic, SiOpenai, SiOllama, SiGoogle } from "react-icons/si"; import GroqIcon from "@/components/icons/GroqIcon"; import XAIIcon from "../icons/XAIIcon"; -import useModel from "@/hooks/useModel"; import { getAuthToken } from "@/lib/utils/auth"; +import { MainToolTip } from "@/components/tooltips/MainToolTip"; +import { truncateFrom } from "@/lib/utils/format"; +import { useChatContext } from "@/context/ChatContext"; function SelectModel({ onModelSelected }: { onModelSelected?: () => void }) { - const { model, setModel, useModelsEffect, models } = useModel(); - - useModelsEffect(); + const { model, setModel, models } = useChatContext(); const handleModelChange = (value: string) => { setModel(value); @@ -51,13 +51,15 @@ function SelectModel({ onModelSelected }: { onModelSelected?: () => void }) { return modelValue.split(":")[1] || modelValue; }; + const getTruncatedLabel = (label: string) => { + if (label.length <= 20) return label; + return truncateFrom(label, "end", "...", 30); + }; + const authToken = getAuthToken?.(); return ( - @@ -65,12 +67,25 @@ function SelectModel({ onModelSelected }: { onModelSelected?: () => void }) { {models.models.map((modelValue: string) => { const disabled = !authToken && !models.free.includes(modelValue as string); + const fullLabel = getModelLabel(modelValue); + const truncatedLabel = getTruncatedLabel(fullLabel); + const needsTooltip = fullLabel.length > 20; + return ( -
- {getModelIcon(modelValue)} - {getModelLabel(modelValue)} -
+ {needsTooltip ? ( + +
+ {getModelIcon(modelValue)} + {truncatedLabel} +
+
+ ) : ( +
+ {getModelIcon(modelValue)} + {truncatedLabel} +
+ )}
); })} diff --git a/frontend/src/context/ChatContext.tsx b/frontend/src/context/ChatContext.tsx index 18e67bf3..55c95383 100644 --- a/frontend/src/context/ChatContext.tsx +++ b/frontend/src/context/ChatContext.tsx @@ -3,6 +3,7 @@ import useConfigHook from "@/hooks/useConfigHook"; import useImageHook from "@/hooks/useImageHook"; import useChat from "@/hooks/useChat"; import useThread from "@/hooks/useThread"; +import useModel from "@/hooks/useModel"; export const ChatContext = createContext({}); export default function ChatProvider({ @@ -10,10 +11,12 @@ export default function ChatProvider({ }: { children: React.ReactNode; }) { + const modelsHooks = useModel(); const chatHooks = useChat(); const imageHooks = useImageHook(); const configHooks = useConfigHook(); const threadHooks = useThread(); + return ( {children} diff --git a/frontend/src/hooks/useAgent.ts b/frontend/src/hooks/useAgent.ts index df1a241a..586cadb9 100644 --- a/frontend/src/hooks/useAgent.ts +++ b/frontend/src/hooks/useAgent.ts @@ -1,8 +1,8 @@ import agentService, { Agent } from "@/lib/services/agentService"; import { useEffect, useState } from "react"; import ToolConfig from "@/lib/config/tool"; -import useModel from "./useModel"; import { DropdownMenuCheckboxItemProps } from "@radix-ui/react-dropdown-menu"; +import useModel from "./useModel"; type Checked = DropdownMenuCheckboxItemProps["checked"]; @@ -31,7 +31,9 @@ export const INIT_AGENT_STATE: AgentState = { }; export function useAgent() { - const { model } = useModel(); + const { model, useModelsEffect } = useModel(); + useModelsEffect(); + const [agent, setAgent] = useState(INIT_AGENT_STATE.agent); const [agents, setAgents] = useState([]); const [webSearchCheck, setWebSearchCheck] = useState(() => { @@ -56,13 +58,14 @@ export function useAgent() { useEffect(() => { setAgent({ ...agent, + model: model ?? "", presidio: { analyze: piiAnalyzeCheck ? true : false, anonymize: piiAnonymizeCheck ? true : false, redact: false, }, }); - }, [piiAnalyzeCheck, piiAnonymizeCheck]); + }, [piiAnalyzeCheck, piiAnonymizeCheck, model]); const setAgentSystemMessage = (system: string) => { setAgent({ ...agent, prompt: system }); diff --git a/frontend/src/hooks/useAppHook.ts b/frontend/src/hooks/useAppHook.ts index cf235208..91eb1334 100644 --- a/frontend/src/hooks/useAppHook.ts +++ b/frontend/src/hooks/useAppHook.ts @@ -91,9 +91,22 @@ export default function useAppHook() { return null; }; + function isLikelyMobile() { + const uaData = (navigator as any).userAgentData; + if (uaData && uaData.mobile) return true; // #1 Most factual + + if ((navigator as any).connection?.type === "cellular") return true; // #2 Good indicator + + const coarse = window.matchMedia("(pointer: coarse)").matches; + const touch = navigator.maxTouchPoints > 1; + + return coarse && touch; // #3 + #4 combined = best factual proxy + } + return { appVersion, isMobile, + isLikelyMobile, useFetchAppVersionEffect, loading, setLoading, diff --git a/frontend/src/hooks/useChat.ts b/frontend/src/hooks/useChat.ts index a520d89d..924b52a5 100644 --- a/frontend/src/hooks/useChat.ts +++ b/frontend/src/hooks/useChat.ts @@ -122,7 +122,7 @@ export default function useChat(): ChatContextType { metadata.current_time = new Date().toISOString(); const source = streamThread({ system: agent.prompt, - messages: formatedMessages, + input: { messages: formatedMessages }, model: agent.model, metadata: metadata, tools: agent.tools, diff --git a/frontend/src/lib/services/threadService.ts b/frontend/src/lib/services/threadService.ts index 9c7e9c79..e3ad3e0e 100644 --- a/frontend/src/lib/services/threadService.ts +++ b/frontend/src/lib/services/threadService.ts @@ -84,21 +84,28 @@ export const alterSystemPrompt = async (payload: ThreadPayload) => { }; type MessageContent = string | Array<{ type: string; [key: string]: any }>; - +type Messages = { role: string; content: MessageContent; [key: string]: any }[]; +type Input = { messages: Messages }; +type Metadata = { thread_id?: string; checkpoint_id?: string; [key: string]: any }; +type A2A = { [key: string]: any }; +type MCP = { [key: string]: any }; +type Tools = string[]; +type Subagents = Agent[]; +type Presidio = { + analyze?: boolean; + anonymize?: boolean; + redact?: boolean; +}; interface StreamThreadPayload { system?: string; - messages: { role: string; content: MessageContent; [key: string]: any }[]; + input: Input; model: string; - metadata: { thread_id?: string; checkpoint_id?: string; [key: string]: any }; - a2a?: object; - mcp?: object; - tools?: string[]; - subagents?: Agent[]; - presidio?: { - analyze?: boolean; - anonymize?: boolean; - redact?: boolean; - }; + metadata: Metadata; + a2a?: A2A; + mcp?: MCP; + tools?: Tools; + subagents?: Subagents; + presidio?: Presidio; } export const streamThread = (payload: StreamThreadPayload): SSE => { diff --git a/frontend/src/lib/utils/message.ts b/frontend/src/lib/utils/message.ts new file mode 100644 index 00000000..3b1d6c68 --- /dev/null +++ b/frontend/src/lib/utils/message.ts @@ -0,0 +1,14 @@ + + +export function latestHumanMessage(messages: any[] | undefined | null) { + if (!Array.isArray(messages) || messages.length === 0) { + return null; + } + for (let i = messages.length - 1; i >= 0; i--) { + const msg = messages[i]; + if (msg && msg.type === "human") { + return msg; + } + } + return null; +} diff --git a/frontend/src/pages/chat/ChatPanel.tsx b/frontend/src/pages/chat/ChatPanel.tsx index 66cae8cf..00987660 100644 --- a/frontend/src/pages/chat/ChatPanel.tsx +++ b/frontend/src/pages/chat/ChatPanel.tsx @@ -10,6 +10,7 @@ import { ResizableHandle, } from "@/components/ui/resizable"; import FileEditorPanel from "@/components/panels/FileEditorPanel"; +import { useAppContext } from "@/context/AppContext"; interface ChatPanelProps { agent?: Agent; @@ -19,6 +20,7 @@ interface ChatPanelProps { } function ChatPanel({ agent, chatNav, showAgentMenu = true }: ChatPanelProps) { + const { appVersion } = useAppContext(); const { messages, viewMode, filesMap } = useChatContext(); if (agent && messages.length === 0) { @@ -31,7 +33,7 @@ function ChatPanel({ agent, chatNav, showAgentMenu = true }: ChatPanelProps) { diff --git a/frontend/src/pages/chat/chat-v2.tsx b/frontend/src/pages/chat/chat-v2.tsx index 291a4465..50a4d3b5 100644 --- a/frontend/src/pages/chat/chat-v2.tsx +++ b/frontend/src/pages/chat/chat-v2.tsx @@ -15,8 +15,10 @@ export function ChatV2Page() { useListThreadsEffect, useListCheckpointsEffect, metadata, + useModelsEffect, } = useChatContext(); + useModelsEffect(); useEffectGetAgents(); useEffectUpdateAssistantId();