Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 10 additions & 7 deletions RAGManager/app/agents/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,18 @@ def create_agent_graph() -> StateGraph:
Create and configure the LangGraph agent graph.

The graph implements the following flow:
1. START -> agent_host (Nodo 1)
2. agent_host -> guard (Nodo 2)
3. guard -> [conditional] -> fallback (Nodo 3) or END
4. fallback -> parafraseo (Nodo 4)
1. START -> agent_host (Nodo 1) - Prepares state, no DB operations
2. agent_host -> guard (Nodo 2) - Validates for malicious content
3. guard -> [conditional]:
- malicious -> fallback -> END (stops processing, no DB save)
- continue -> parafraseo (Nodo 4)
4. parafraseo -> Saves message to DB, retrieves chat history, paraphrases
5. parafraseo -> retriever (Nodo 5)
6. retriever -> context_builder (Nodo 6)
7. context_builder -> generator (Nodo 7)
8. generator -> fallback (Nodo 8)
9. fallback -> [conditional] -> END (with final_response) or END (with error)
7. context_builder -> guard (validates response)
8. guard -> [conditional]:
- malicious -> fallback -> END
- continue -> END (success)

Returns:
Configured StateGraph instance ready for execution
Expand Down
129 changes: 17 additions & 112 deletions RAGManager/app/agents/nodes/agent_host.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,8 @@
"""Nodo 1: Agent Host - Entry point that saves initial context."""
"""Nodo 1: Agent Host - Entry point that prepares initial state."""

import logging
from uuid import UUID, uuid4

from app.agents.state import AgentState
from app.core.config import settings
from app.core.database_connection import SessionLocal
from app.models.chat import ChatMessage, ChatSession

logger = logging.getLogger(__name__)

Expand All @@ -17,128 +13,37 @@ def agent_host(state: AgentState) -> AgentState:

This node:
1. Receives the initial prompt and optional chat_session_id
2. Creates or retrieves chat session from PostgreSQL
3. Saves the user's message to the chat session
4. Retrieves all chat messages for the session
5. Prepares state for validation
2. Extracts the prompt from messages
3. Prepares state for validation (no DB operations yet)

Note: Chat history retrieval and message saving is deferred to parafraseo
node to ensure malicious messages are not saved to the database.

Args:
state: Agent state containing the user prompt and optional chat_session_id

Returns:
Updated state with chat_session_id, chat_messages, and initial_context set
Updated state with prompt and initial_context set (no DB operations)
"""
updated_state = state.copy()

prompt = state["messages"][-1]
chat_session_id = state.get("chat_session_id")
user_id = state.get("user_id")
# Extract prompt from messages
messages = state.get("messages", [])
last_message = messages[-1] if messages else None
prompt = last_message.content if last_message else ""

# Validate user_id is provided
user_id = state.get("user_id")
if not user_id:
logger.error("user_id is required in state but was not provided")
updated_state["chat_session_id"] = None
updated_state["chat_messages"] = None
updated_state["initial_context"] = prompt
updated_state["error_message"] = "user_id is required"
return updated_state

db = None
try:
db = SessionLocal()
# Get or create chat session
chat_session = None
if chat_session_id:
try:
session_uuid = UUID(chat_session_id)
# Validate ownership: query with both id and user_id filters
chat_session = db.query(ChatSession).filter(
ChatSession.id == session_uuid,
ChatSession.user_id == user_id
).first()
if not chat_session:
# Log security violation - attempted cross-session access
logger.warning(
f"Chat session {chat_session_id} not found or access denied for user {user_id}"
)
# Don't create new session automatically - this prevents session hijacking
raise PermissionError("Chat session not found or access denied")
except (ValueError, TypeError):
logger.warning(f"Invalid chat_session_id format: {chat_session_id}, creating new session")
except PermissionError:
# Re-raise permission errors
raise

# Create new session if needed
if not chat_session:
chat_session = ChatSession(id=uuid4(), user_id=user_id)
db.add(chat_session)
db.flush()

# Create new message with user's prompt
new_message = ChatMessage(
session_id=chat_session.id,
sender="user",
message=prompt,
)
db.add(new_message)
db.flush()

# Query messages for the session with bounded limit (most recent first, then reverse for chronological order)
messages = (
db.query(ChatMessage)
.filter(ChatMessage.session_id == chat_session.id)
.order_by(ChatMessage.created_at.desc())
.limit(settings.chat_message_limit)
.all()
)
# Reverse to get chronological order
messages = list(reversed(messages))

# Convert messages to dictionaries
chat_messages = [
{
"id": msg.id,
"session_id": str(msg.session_id),
"sender": msg.sender,
"message": msg.message,
"created_at": msg.created_at.isoformat() if msg.created_at else None,
}
for msg in messages
]

# Commit the transaction
db.commit()

# Update state
updated_state["chat_session_id"] = str(chat_session.id)
updated_state["chat_messages"] = chat_messages
updated_state["initial_context"] = prompt

logger.info(f"Chat session {chat_session.id} updated with {len(chat_messages)} messages")
# Set prompt and initial context (no DB operations)
updated_state["prompt"] = prompt
updated_state["initial_context"] = prompt
updated_state["chat_messages"] = None # Will be set in parafraseo after validation

except PermissionError as e:
# Rollback on permission error
if db is not None:
db.rollback()
logger.warning(f"Permission denied in agent_host: {e}")
# Set error state for permission violations
updated_state["chat_session_id"] = None
updated_state["chat_messages"] = None
updated_state["initial_context"] = prompt
updated_state["error_message"] = "Chat session not found or access denied"
except Exception as e:
# Rollback on error
if db is not None:
db.rollback()
logger.error(f"Error in agent_host: {e}", exc_info=True)
# Set error state but don't fail completely
updated_state["chat_session_id"] = None
updated_state["chat_messages"] = None
updated_state["initial_context"] = prompt
updated_state["error_message"] = str(e)
finally:
if db is not None:
db.close()
logger.debug("Agent host prepared state for validation (no DB operations)")

return updated_state
44 changes: 29 additions & 15 deletions RAGManager/app/agents/nodes/parafraseo.py
Original file line number Diff line number Diff line change
@@ -1,43 +1,57 @@
"""Nodo 4: Parafraseo - Paraphrases user input."""
"""Nodo 4: Parafraseo - Saves message, retrieves chat history, and paraphrases user input."""

import logging

from app.agents.state import AgentState
from langchain_core.messages import SystemMessage
from langchain_openai import ChatOpenAI

logger = logging.getLogger(__name__)

llm = ChatOpenAI(model="gpt-5-nano")


def parafraseo(state: AgentState) -> AgentState:
"""
Parafraseo node - Paraphrases the user input.
Parafraseo node - Saves message to DB, retrieves chat history, and paraphrases user input.

This node:
1. Takes the adjusted text from Fallback Inicial
2. Paraphrases it to improve clarity or adjust format
3. Prepares text for retrieval step
1. Saves the user's message to the chat session in PostgreSQL
2. Retrieves all chat messages for the session (including the newly saved message)
3. Paraphrases the user input using chat history to improve clarity
4. Prepares text for retrieval step

Args:
state: Agent state containing adjusted_text
state: Agent state containing prompt, chat_session_id, and user_id

Returns:
Updated state with paraphrased_text set
Updated state with chat_messages, paraphrased_text set
Comment on lines +16 to +28
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Documentation claims functionality not yet implemented.

The docstring states that this node "Saves the user's message to the chat session in PostgreSQL" and "Retrieves all chat messages for the session" (lines 19-20), but the implementation shows this is not yet developed (see TODO at lines 32-44). The function currently uses empty chat history with a warning log.

Update the docstring to reflect the current implementation status:

     """
-    Parafraseo node - Saves message to DB, retrieves chat history, and paraphrases user input.
+    Parafraseo node - Paraphrases user input (chat history integration pending).
 
     This node:
-    1. Saves the user's message to the chat session in PostgreSQL
-    2. Retrieves all chat messages for the session (including the newly saved message)
-    3. Paraphrases the user input using chat history to improve clarity
-    4. Prepares text for retrieval step
+    1. TODO: Save the user's message to the chat session (endpoint not yet implemented)
+    2. TODO: Retrieve all chat messages for the session (endpoint not yet implemented)
+    3. Paraphrases the user input (currently without chat history)
+    4. Prepares text for retrieval step
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
Parafraseo node - Saves message to DB, retrieves chat history, and paraphrases user input.
This node:
1. Takes the adjusted text from Fallback Inicial
2. Paraphrases it to improve clarity or adjust format
3. Prepares text for retrieval step
1. Saves the user's message to the chat session in PostgreSQL
2. Retrieves all chat messages for the session (including the newly saved message)
3. Paraphrases the user input using chat history to improve clarity
4. Prepares text for retrieval step
Args:
state: Agent state containing adjusted_text
state: Agent state containing prompt, chat_session_id, and user_id
Returns:
Updated state with paraphrased_text set
Updated state with chat_messages, paraphrased_text set
Parafraseo node - Paraphrases user input (chat history integration pending).
This node:
1. TODO: Save the user's message to the chat session (endpoint not yet implemented)
2. TODO: Retrieve all chat messages for the session (endpoint not yet implemented)
3. Paraphrases the user input (currently without chat history)
4. Prepares text for retrieval step
Args:
state: Agent state containing prompt, chat_session_id, and user_id
Returns:
Updated state with chat_messages, paraphrased_text set
🤖 Prompt for AI Agents
In RAGManager/app/agents/nodes/parafraseo.py around lines 16 to 28, the
docstring inaccurately states that the node saves messages to PostgreSQL and
retrieves session chat history, but the implementation currently leaves those
steps unimplemented (see TODO and a warning about using empty chat history).
Update the docstring to accurately reflect current behavior: state that saving
to DB and retrieving chat history are not implemented yet, that the node logs a
warning and proceeds with an empty chat history, and optionally note the TODO
for future implementation and where to resume work (e.g., DB save and retrieval
functions).

"""
# TODO: Implement paraphrasing logic
updated_state = state.copy()

# TODO: Implement endpoint call to save message and retrieve chat history
# This should:
# 1. Use an LLM or paraphrasing model to rephrase the text
# 2. Improve clarity, adjust tone, or format as needed
# 3. Set paraphrased_text with the result

# Paraphrase the last message using history
# 1. Call an endpoint (not yet developed) that:
# - Saves the current user message to the chat session
# - Retrieves all chat messages for the session (including the newly saved message)
# - Returns the updated chat_messages list
# 2. Update state with chat_messages from the endpoint response
# 3. Handle errors appropriately (session not found, permission denied, etc.)

# Placeholder: For now, we'll use empty chat history
# Once the endpoint is implemented, replace this with the actual endpoint call
updated_state["chat_messages"] = None
logger.warning("Chat history retrieval endpoint not yet implemented - using empty history")
Comment on lines +32 to +44
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧹 Nitpick | 🔵 Trivial

LGTM! Clear TODO with implementation plan.

The TODO block provides clear guidance on what needs to be implemented, including the endpoint responsibilities and error handling considerations. The placeholder implementation with a warning log is appropriate for the current state.

Once the endpoint is developed, this integration will complete the security fix described in the PR objectives. Would you like me to help draft the endpoint integration code when ready?


# Paraphrase the last message using history
system_instruction = """You are an expert at paraphrasing user questions to be standalone and clear, given the conversation history.
Reformulate the last user message to be a self-contained query that includes necessary context from previous messages.
Do not answer the question, just rewrite it."""

messages = [SystemMessage(content=system_instruction)] + state["messages"]
# Use messages from state (will include chat history once endpoint is implemented)
messages = [SystemMessage(content=system_instruction)] + state.get("messages", [])

response = llm.invoke(messages)
updated_state = state.copy() # Create a copy of the state to update
updated_state["paraphrased_text"] = response.content
Comment on lines +51 to 55
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Inconsistent state access - should use chat_messages instead of messages.

Line 52 uses state.get("messages", []) to build the paraphrasing context, but line 43 sets updated_state["chat_messages"] = None. Once the endpoint is implemented, the paraphrasing should use the retrieved chat history from chat_messages, not the raw messages from the initial state.

Consider updating the code to use chat_messages for consistency:

-    # Use messages from state (will include chat history once endpoint is implemented)
-    messages = [SystemMessage(content=system_instruction)] + state.get("messages", [])
+    # Use chat_messages from state (will include chat history once endpoint is implemented)
+    chat_msgs = state.get("chat_messages") or state.get("messages", [])
+    messages = [SystemMessage(content=system_instruction)] + chat_msgs

This ensures the paraphrasing will automatically use the retrieved chat history once chat_messages is populated by the endpoint.

📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
# Use messages from state (will include chat history once endpoint is implemented)
messages = [SystemMessage(content=system_instruction)] + state.get("messages", [])
response = llm.invoke(messages)
updated_state = state.copy() # Create a copy of the state to update
updated_state["paraphrased_text"] = response.content
# Use chat_messages from state (will include chat history once endpoint is implemented)
chat_msgs = state.get("chat_messages") or state.get("messages", [])
messages = [SystemMessage(content=system_instruction)] + chat_msgs
response = llm.invoke(messages)
updated_state["paraphrased_text"] = response.content
🤖 Prompt for AI Agents
In RAGManager/app/agents/nodes/parafraseo.py around lines 51–55, the code builds
messages using state.get("messages", []) but earlier sets
updated_state["chat_messages"] = None, so switch to using
state.get("chat_messages", []) when composing the messages list; update the
messages assignment to use chat_messages and ensure the rest of the function
references updated_state["paraphrased_text"] unchanged so the paraphrase will
include retrieved chat history once chat_messages is populated.


return updated_state