Skip to content

Commit 6e058ee

Browse files
authored
feat: upload document feat (#19)
<img width="1512" height="908" alt="Screenshot 2026-03-02 at 7 54 53 PM" src="https://github.com/user-attachments/assets/8d3e1fcd-291f-4c99-8d3c-9b46148d6405" />
1 parent c48519e commit 6e058ee

File tree

7 files changed

+376
-60
lines changed

7 files changed

+376
-60
lines changed

app/agents/faq.py

Lines changed: 54 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
import numpy as np
1616
import yaml
1717
from jinja2 import Environment, FileSystemLoader
18-
from langchain_core.messages import AIMessage, SystemMessage
18+
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
1919
from langchain_openai import ChatOpenAI
2020
from langgraph.prebuilt import ToolNode
2121
from langsmith import traceable
@@ -103,24 +103,36 @@ async def __call__(self, state: ConversationState) -> ConversationState:
103103
logger.debug("[FAQ] __call__ invoked (tool-calling pattern)")
104104

105105
try:
106-
messages = list(state.get("messages", []))
107-
raw_last = [m.content for m in messages if m.type == "human"]
108-
user_message = raw_last[-1] if raw_last else ""
109-
if isinstance(user_message, list):
110-
text_parts = [
111-
p.get("text", "") for p in user_message
112-
if isinstance(p, dict) and p.get("type") == "text"
113-
]
114-
user_message = " ".join(text_parts).strip() or ""
115-
else:
116-
user_message = (user_message or "").strip() if isinstance(user_message, str) else ""
106+
raw_messages = list(state.get("messages", []))
107+
messages = self._sanitize_messages(raw_messages)
117108

109+
user_message = next(
110+
(m.content for m in reversed(messages) if isinstance(m, HumanMessage)),
111+
"",
112+
)
113+
114+
# Respuesta predefinida para la burbuja "Preguntas frecuentes"
118115
if _is_preguntas_frecuentes_intent(user_message):
119116
logger.info("[FAQ] Respuesta predefinida para 'Preguntas frecuentes'")
120117
response = RESPUESTA_PREGUNTAS_FRECUENTES
121118
else:
119+
# Inyectar contenido del documento adjunto si existe
120+
doc_context = state.get("document_context")
121+
if doc_context:
122+
doc_filename = state.get("document_filename", "documento")
123+
cap = 12_000
124+
snippet = doc_context[:cap] + ("..." if len(doc_context) > cap else "")
125+
for i in range(len(messages) - 1, -1, -1):
126+
if isinstance(messages[i], HumanMessage):
127+
prev = messages[i].content or ""
128+
messages[i] = HumanMessage(
129+
content=f"{prev}\n\n[Documento adjunto: {doc_filename}]\n{snippet}"
130+
)
131+
break
132+
122133
if not any(isinstance(m, SystemMessage) for m in messages):
123134
messages = [self.system_message] + messages
135+
124136
response = await self._tool_calling_loop(messages)
125137

126138
conv_id = state.get("conversation_id")
@@ -129,7 +141,7 @@ async def __call__(self, state: ConversationState) -> ConversationState:
129141
conv_id = await conversation_service.get_or_create_conversation(
130142
session, conv_id
131143
)
132-
await conversation_service.save_message(session, conv_id, "user", user_message)
144+
await conversation_service.save_message(session, conv_id, "user", user_message or "")
133145
await conversation_service.save_message(session, conv_id, "assistant", response)
134146
await session.commit()
135147
except Exception as db_err:
@@ -169,6 +181,35 @@ async def __call__(self, state: ConversationState) -> ConversationState:
169181
# Internal helpers
170182
# ------------------------------------------------------------------
171183

184+
@staticmethod
185+
def _sanitize_messages(messages: list) -> list:
186+
"""Ensure every message has ``content`` as a plain string.
187+
188+
The LangGraph checkpointer (``add_messages``) may replay old
189+
``HumanMessage`` objects whose ``content`` is a list of
190+
multimodal parts from before the WS-layer extraction fix.
191+
OpenAI rejects these, so we flatten them here.
192+
"""
193+
clean: list = []
194+
for m in messages:
195+
content = getattr(m, "content", None)
196+
if isinstance(content, list):
197+
text_parts = [
198+
p.get("text", "")
199+
for p in content
200+
if isinstance(p, dict) and p.get("type") == "text"
201+
]
202+
flat = " ".join(t.strip() for t in text_parts).strip() or ""
203+
if getattr(m, "type", None) == "ai":
204+
clean.append(AIMessage(content=flat))
205+
elif isinstance(m, SystemMessage):
206+
clean.append(SystemMessage(content=flat))
207+
else:
208+
clean.append(HumanMessage(content=flat))
209+
else:
210+
clean.append(m)
211+
return clean
212+
172213
@traceable(run_type="chain")
173214
async def _tool_calling_loop(
174215
self,

app/agents/supervisor.py

Lines changed: 50 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
ROUTABLE_AGENTS,
2222
)
2323
from ..graph.state import ConversationState
24+
from ..graph.document_extractor import extract_text_from_message
2425

2526
logger = logging.getLogger(__name__)
2627

@@ -57,15 +58,21 @@ async def route_message(self, state: ConversationState) -> ConversationState:
5758
"""Analiza el mensaje del usuario y decide el routing."""
5859

5960
messages = state.get("messages", [])
60-
chat_history = [m.content for m in messages if m.type == "human"]
61-
user_message = chat_history[-1].strip()
62-
63-
logger.debug("=" * 60)
64-
logger.debug("[SUPERVISOR] route_message called")
65-
logger.debug(f"[SUPERVISOR] User message: {user_message!r}")
66-
logger.debug(f"[SUPERVISOR] Total messages in state: {len(messages)}")
67-
for i, m in enumerate(messages):
68-
logger.debug(f"[SUPERVISOR] msg[{i}] type={m.type} content={m.content[:100]!r}...")
61+
human_messages = [m for m in messages if m.type == "human"]
62+
63+
user_message = (
64+
extract_text_from_message(human_messages[-1]).strip()
65+
if human_messages
66+
else ""
67+
)
68+
69+
has_doc = bool(state.get("document_context"))
70+
logger.info(
71+
"[SUPERVISOR] route_message: user=%r has_doc=%s doc_file=%r",
72+
user_message[:80],
73+
has_doc,
74+
state.get("document_filename"),
75+
)
6976

7077
# 1. Estado: si hay wizard activo, mantenerlo sin llamar al LLM
7178
wizard_state_obj = state.get("wizard_state")
@@ -82,7 +89,11 @@ async def route_message(self, state: ConversationState) -> ConversationState:
8289
return self._route_to(state, "wizard")
8390

8491
# 2. Routing basado 100% en LLM usando contexto conversacional completo
85-
intention = await self._route_by_descriptions(user_message, messages)
92+
intention = await self._route_by_descriptions(
93+
user_message,
94+
messages,
95+
state=state,
96+
)
8697

8798
state["supervisor_decision"] = intention
8899
state["current_agent"] = intention
@@ -106,7 +117,13 @@ def decide_next_agent(self, state: ConversationState) -> str:
106117
# ------------------------------------------------------------------
107118

108119
@traceable(run_type="llm")
109-
async def _route_by_descriptions(self, message: str, messages: list) -> str:
120+
async def _route_by_descriptions(
121+
self,
122+
message: str,
123+
messages: list,
124+
*,
125+
state: ConversationState | None = None,
126+
) -> str:
110127
"""Usa el LLM para elegir el agente cuya descripción mejor
111128
coincide con la intención del usuario."""
112129

@@ -116,17 +133,31 @@ async def _route_by_descriptions(self, message: str, messages: list) -> str:
116133
f'- "{name}": {description}'
117134
for name, description in ROUTABLE_AGENTS
118135
)
119-
valid_names = ", ".join(
120-
f'"{name}"' for name, _ in ROUTABLE_AGENTS
121-
)
136+
valid_names = ", ".join(f'"{name}"' for name, _ in ROUTABLE_AGENTS)
122137

123-
# Contexto conversacional completo (últimos turnos user/assistant)
124-
context = ""
138+
# Contexto conversacional (últimos turnos + documento si existe)
139+
context_lines = []
125140
if messages:
126-
context = "\n".join(
127-
f"- {'Usuario' if msg.type == 'human' else 'Asistente'}: {msg.content}"
128-
for msg in messages[-6:]
141+
for msg in messages[-6:]:
142+
role = "Usuario" if msg.type == "human" else "Asistente"
143+
text = extract_text_from_message(msg)
144+
context_lines.append(f"- {role}: {text}")
145+
146+
if state and state.get("document_context"):
147+
filename = state.get("document_filename", "documento")
148+
doc_len = len(state.get("document_context") or "")
149+
context_lines.append(
150+
f"- [Sistema]: El usuario ha adjuntado el documento: {filename!r}"
129151
)
152+
logger.info(
153+
"[SUPERVISOR] Documento inyectado en contexto de routing: file=%r, %d chars",
154+
filename,
155+
doc_len,
156+
)
157+
else:
158+
logger.info("[SUPERVISOR] No hay document_context en state, no se inyecta documento.")
159+
160+
context = "\n".join(context_lines)
130161

131162
system_prompt = _prompts.get_template("supervisor_system.j2").render()
132163
prompt = _prompts.get_template("supervisor_route.j2").render(

app/api/v1/agui_ws.py

Lines changed: 51 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,45 @@ def _agui_event(event_type: str, **fields) -> str:
2525
return json.dumps({"type": event_type, **fields})
2626

2727

28+
def _extract_text_and_attachment(
29+
raw_message,
30+
) -> tuple[str, dict | None]:
31+
"""Split a raw WS message into (text, attachment_dict|None).
32+
33+
``raw_message`` can be:
34+
- ``str`` – plain text, no attachment.
35+
- ``list`` – multimodal parts from the frontend
36+
(``{type:"text", text:"…"}``, ``{type:"file", filename:"…", data:"…", media_type:"…"}``).
37+
"""
38+
if isinstance(raw_message, str):
39+
return raw_message.strip(), None
40+
41+
if not isinstance(raw_message, list) or not raw_message:
42+
return "", None
43+
44+
text_parts: list[str] = []
45+
attachment: dict | None = None
46+
47+
for part in raw_message:
48+
if not isinstance(part, dict):
49+
continue
50+
ptype = part.get("type", "")
51+
if ptype == "text":
52+
text_parts.append(part.get("text", ""))
53+
elif ptype in ("file", "document") and attachment is None:
54+
attachment = {
55+
"filename": part.get("filename") or part.get("name") or "document",
56+
"data": part.get("data") or part.get("source") or "",
57+
"media_type": part.get("media_type", "application/octet-stream"),
58+
}
59+
60+
text = " ".join(t.strip() for t in text_parts).strip()
61+
if not text and attachment:
62+
text = f"[Documento adjunto: {attachment['filename']}]"
63+
64+
return text, attachment
65+
66+
2867
@router.websocket("/ws")
2968
async def agui_websocket(
3069
websocket: WebSocket,
@@ -46,8 +85,17 @@ async def agui_websocket(
4685
)
4786
continue
4887

49-
user_message = data.get("message", "").strip()
50-
if not user_message:
88+
raw_message = data.get("message")
89+
user_message, attachment = _extract_text_and_attachment(raw_message)
90+
91+
logger.info(
92+
"[AG-UI] Incoming message type=%s, text_len=%d, has_attachment=%s",
93+
type(raw_message).__name__,
94+
len(user_message),
95+
attachment is not None,
96+
)
97+
98+
if not user_message and not attachment:
5199
await websocket.send_text(
52100
_agui_event("RUN_ERROR", message="Empty message", code="BAD_REQUEST")
53101
)
@@ -66,6 +114,7 @@ async def agui_websocket(
66114
message=user_message,
67115
conversation_id=conversation_id,
68116
wizard_state=wizard_state,
117+
attachment=attachment,
69118
)
70119

71120
response_text = result.get("response", "")

0 commit comments

Comments
 (0)