Skip to content

Commit 2eafa50

Browse files
Merge branch 'main' into devops
2 parents 7e210d1 + e5e4c03 commit 2eafa50

File tree

7 files changed

+669
-129
lines changed

7 files changed

+669
-129
lines changed

DocsManager/app/schemas/chatMessage.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,5 @@ class UserMessageIn(BaseModel):
88

99

1010
class AssistantMessageOut(BaseModel):
11+
session_id: UUID
1112
message: str
Lines changed: 179 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
"""Nodo 4: Parafraseo - Saves message, retrieves chat history, and paraphrases user input."""
22

3+
import json
34
import logging
45

56
from app.agents.state import AgentState
6-
from langchain_core.messages import SystemMessage
7+
from langchain_core.messages import HumanMessage, SystemMessage
78
from langchain_openai import ChatOpenAI
89

910
logger = logging.getLogger(__name__)
@@ -16,42 +17,198 @@ def parafraseo(state: AgentState) -> AgentState:
1617
Parafraseo node - Saves message to DB, retrieves chat history, and paraphrases user input.
1718
1819
This node:
19-
1. Saves the user's message to the chat session in PostgreSQL
20-
2. Retrieves all chat messages for the session (including the newly saved message)
21-
3. Paraphrases the user input using chat history to improve clarity
22-
4. Prepares text for retrieval step
20+
1. Receives a validated user message (after guard_inicial validation)
21+
2. Saves the user's message to the chat session in PostgreSQL (endpoint 1 - placeholder)
22+
3. Retrieves the last 10 messages of the conversation (endpoint 2 - placeholder)
23+
4. Uses the last message to understand user's intentions and the remaining 9 (older) messages as context
24+
5. Sends to LLM with instructions to return 3 differently phrased statements that encapsulate
25+
the user's intentions according to the last message and chat history
2326
2427
Args:
25-
state: Agent state containing prompt, chat_session_id, and user_id
28+
state: Agent state containing validated user message, chat_session_id, and user_id
2629
2730
Returns:
28-
Updated state with chat_messages, paraphrased_text set
31+
Updated state with chat_messages, paraphrased_text, and paraphrased_statements set
2932
"""
3033
updated_state = state.copy()
3134

32-
# TODO: Implement endpoint call to save message and retrieve chat history
35+
# Get the validated user message from state
36+
messages = state.get("messages", [])
37+
if not messages:
38+
logger.error("No messages found in state")
39+
updated_state["error_message"] = "No user message found in state"
40+
return updated_state
41+
42+
# Get the last message (the validated user message)
43+
last_user_message = messages[-1]
44+
user_message_content = last_user_message.content if hasattr(last_user_message, 'content') else str(last_user_message)
45+
46+
# TODO: Endpoint 1 - Save message to PostgreSQL database according to chat session
47+
# This should:
48+
# 1. Call an endpoint (not yet developed) that:
49+
# - Saves the current user message to the chat session in PostgreSQL
50+
# - Uses chat_session_id and user_id from state
51+
# - Returns success/failure status
52+
# 2. Handle errors appropriately (session not found, permission denied, etc.)
53+
logger.info("Endpoint 1 (save message to DB) not yet implemented - skipping")
54+
55+
# TODO: Endpoint 2 - Retrieve last 10 messages of the conversation
3356
# This should:
3457
# 1. Call an endpoint (not yet developed) that:
35-
# - Saves the current user message to the chat session
36-
# - Retrieves all chat messages for the session (including the newly saved message)
37-
# - Returns the updated chat_messages list
58+
# - Retrieves the last 10 messages for the chat session
59+
# - Returns a list of message dictionaries with structure: [{"sender": "user|assistant|system", "message": "...", "created_at": "..."}, ...]
60+
# - Messages should be ordered from oldest to newest (or newest to oldest, depending on API design)
3861
# 2. Update state with chat_messages from the endpoint response
3962
# 3. Handle errors appropriately (session not found, permission denied, etc.)
4063

41-
# Placeholder: For now, we'll use empty chat history
64+
# Placeholder: For now, we'll simulate chat history with just the current message
4265
# Once the endpoint is implemented, replace this with the actual endpoint call
43-
updated_state["chat_messages"] = None
44-
logger.warning("Chat history retrieval endpoint not yet implemented - using empty history")
66+
chat_messages = [
67+
{"sender": "user", "message": user_message_content, "created_at": "2025-01-01T00:00:00"}
68+
]
69+
updated_state["chat_messages"] = chat_messages
70+
logger.warning("Endpoint 2 (retrieve chat history) not yet implemented - using current message only")
71+
72+
# Process chat history: last message (intentions) + 9 older messages (context)
73+
# The last message is the most recent one (for understanding intentions)
74+
# The remaining 9 messages are older (for context)
75+
if len(chat_messages) >= 10:
76+
# We have 10+ messages: use last one for intentions, previous 9 for context
77+
context_messages = chat_messages[-10:-1] # 9 older messages
78+
intention_message = chat_messages[-1] # Last message (most recent)
79+
elif len(chat_messages) > 1:
80+
# We have 2-9 messages: use last one for intentions, all previous for context
81+
context_messages = chat_messages[:-1] # All older messages
82+
intention_message = chat_messages[-1] # Last message
83+
else:
84+
# We have only 1 message: use it for intentions, no context
85+
context_messages = []
86+
intention_message = chat_messages[0] if chat_messages else {"sender": "user", "message": user_message_content}
87+
88+
# Format chat history for LLM prompt
89+
# Each message should clearly indicate if it was sent by the user or the agent
90+
def format_message_with_sender(msg: dict) -> str:
91+
"""Format a message with explicit sender label."""
92+
sender = msg.get("sender", "unknown").lower()
93+
message = msg.get("message", "")
94+
95+
# Normalize sender labels for clarity
96+
if sender == "user":
97+
sender_label = "User"
98+
elif sender == "assistant":
99+
sender_label = "Assistant"
100+
elif sender == "system":
101+
sender_label = "System"
102+
else:
103+
sender_label = sender.capitalize()
104+
105+
return f"{sender_label}: {message}"
106+
107+
context_text = ""
108+
if context_messages:
109+
context_lines = []
110+
for msg in context_messages:
111+
context_lines.append(format_message_with_sender(msg))
112+
context_text = "\n".join(context_lines)
113+
114+
# Format the intention message with sender information
115+
intention_sender = intention_message.get("sender", "user").lower()
116+
intention_message_text = intention_message.get("message", user_message_content)
117+
118+
# Format intention message with sender label
119+
if intention_sender == "user":
120+
intention_label = "User"
121+
elif intention_sender == "assistant":
122+
intention_label = "Assistant"
123+
elif intention_sender == "system":
124+
intention_label = "System"
125+
else:
126+
intention_label = intention_sender.capitalize()
45127

46-
# Paraphrase the last message using history
47-
system_instruction = """You are an expert at paraphrasing user questions to be standalone and clear, given the conversation history.
48-
Reformulate the last user message to be a self-contained query that includes necessary context from previous messages.
49-
Do not answer the question, just rewrite it."""
128+
intention_text = f"{intention_label}: {intention_message_text}"
129+
130+
# Create LLM prompt with instructions
131+
system_instruction = """You are an expert at understanding user intentions and paraphrasing them in different ways.
132+
133+
Given a user's last message and their conversation history, your task is to return exactly 3 differently phrased statements that encapsulate the user's intentions.
134+
135+
The last message represents what the user wants to know or do right now. The conversation history provides context about what they've been discussing.
136+
137+
Requirements:
138+
- Return exactly 3 different phrasings
139+
- Each phrasing should capture the user's core intention from their last message
140+
- Use the conversation history to understand context and references (like "it", "that", "the previous thing")
141+
- Each phrasing should be a complete, standalone statement that makes sense without the full conversation
142+
- The phrasings should be diverse - use different words, sentence structures, and perspectives
143+
- Format your response as a JSON array of exactly 3 strings: ["statement 1", "statement 2", "statement 3"]
144+
- Do not include any explanation, just the JSON array
145+
146+
Example format:
147+
["What are the main features of the product?", "Can you explain the key characteristics of this product?", "I'd like to know what this product offers."]"""
148+
149+
# Build the prompt with context and intention
150+
# Both context and intention messages now clearly show sender (User/Assistant/System)
151+
if context_text:
152+
user_prompt = f"""Conversation History (older messages for context):
153+
{context_text}
154+
155+
Last Message (current intention):
156+
{intention_text}
157+
158+
Return 3 differently phrased statements that encapsulate the user's intention from their last message, using the conversation history for context."""
159+
else:
160+
user_prompt = f"""Message (current intention):
161+
{intention_text}
162+
163+
Return 3 differently phrased statements that encapsulate the user's intention from their message."""
50164

51-
# Use messages from state (will include chat history once endpoint is implemented)
52-
messages = [SystemMessage(content=system_instruction)] + state.get("messages", [])
165+
# Call LLM with the prompt
166+
messages_for_llm = [
167+
SystemMessage(content=system_instruction),
168+
HumanMessage(content=user_prompt)
169+
]
53170

54-
response = llm.invoke(messages)
55-
updated_state["paraphrased_text"] = response.content
171+
try:
172+
response = llm.invoke(messages_for_llm)
173+
response_content = response.content.strip()
174+
175+
# Parse the JSON response to extract the 3 statements
176+
try:
177+
# Try to parse as JSON array
178+
paraphrased_statements = json.loads(response_content)
179+
if isinstance(paraphrased_statements, list) and len(paraphrased_statements) >= 3:
180+
# Take the first 3 statements
181+
paraphrased_statements = paraphrased_statements[:3]
182+
elif isinstance(paraphrased_statements, list):
183+
# If less than 3, pad with the last one or use as-is
184+
while len(paraphrased_statements) < 3:
185+
paraphrased_statements.append(paraphrased_statements[-1] if paraphrased_statements else intention_text)
186+
else:
187+
# If not a list, create a list with the response
188+
paraphrased_statements = [response_content, response_content, response_content]
189+
except json.JSONDecodeError:
190+
# If JSON parsing fails, try to extract statements from text
191+
logger.warning("LLM response is not valid JSON, attempting to parse as text")
192+
# Try to split by newlines or common separators
193+
lines = [line.strip() for line in response_content.split('\n') if line.strip()]
194+
if len(lines) >= 3:
195+
paraphrased_statements = lines[:3]
196+
else:
197+
# Fallback: use the response as-is and duplicate if needed
198+
paraphrased_statements = [response_content] * 3
199+
200+
# Store the results
201+
updated_state["paraphrased_statements"] = paraphrased_statements
202+
# Store the first statement in paraphrased_text for backward compatibility
203+
updated_state["paraphrased_text"] = paraphrased_statements[0] if paraphrased_statements else intention_text
204+
205+
logger.info(f"Generated {len(paraphrased_statements)} paraphrased statements")
206+
207+
except Exception as e:
208+
logger.error(f"Error calling LLM for paraphrasing: {e}")
209+
# Fallback: use the original message
210+
updated_state["paraphrased_statements"] = [intention_text, intention_text, intention_text]
211+
updated_state["paraphrased_text"] = intention_text
212+
updated_state["error_message"] = f"Error in paraphrasing: {str(e)}"
56213

57214
return updated_state

0 commit comments

Comments
 (0)