From 01cceba628ab8eb0348d30fa95f086596b82f8eb Mon Sep 17 00:00:00 2001 From: JPAmorin Date: Sun, 14 Dec 2025 20:15:02 -0300 Subject: [PATCH] Fixed flaw in prompt processing logic. --- RAGManager/app/agents/graph.py | 17 +-- RAGManager/app/agents/nodes/agent_host.py | 129 +++------------------- RAGManager/app/agents/nodes/parafraseo.py | 44 +++++--- 3 files changed, 56 insertions(+), 134 deletions(-) diff --git a/RAGManager/app/agents/graph.py b/RAGManager/app/agents/graph.py index 04eb1d8..9c869bd 100644 --- a/RAGManager/app/agents/graph.py +++ b/RAGManager/app/agents/graph.py @@ -18,15 +18,18 @@ def create_agent_graph() -> StateGraph: Create and configure the LangGraph agent graph. The graph implements the following flow: - 1. START -> agent_host (Nodo 1) - 2. agent_host -> guard (Nodo 2) - 3. guard -> [conditional] -> fallback (Nodo 3) or END - 4. fallback -> parafraseo (Nodo 4) + 1. START -> agent_host (Nodo 1) - Prepares state, no DB operations + 2. agent_host -> guard (Nodo 2) - Validates for malicious content + 3. guard -> [conditional]: + - malicious -> fallback -> END (stops processing, no DB save) + - continue -> parafraseo (Nodo 4) + 4. parafraseo -> Saves message to DB, retrieves chat history, paraphrases 5. parafraseo -> retriever (Nodo 5) 6. retriever -> context_builder (Nodo 6) - 7. context_builder -> generator (Nodo 7) - 8. generator -> fallback (Nodo 8) - 9. fallback -> [conditional] -> END (with final_response) or END (with error) + 7. context_builder -> guard (validates response) + 8. guard -> [conditional]: + - malicious -> fallback -> END + - continue -> END (success) Returns: Configured StateGraph instance ready for execution diff --git a/RAGManager/app/agents/nodes/agent_host.py b/RAGManager/app/agents/nodes/agent_host.py index 231e726..b619573 100644 --- a/RAGManager/app/agents/nodes/agent_host.py +++ b/RAGManager/app/agents/nodes/agent_host.py @@ -1,12 +1,8 @@ -"""Nodo 1: Agent Host - Entry point that saves initial context.""" +"""Nodo 1: Agent Host - Entry point that prepares initial state.""" import logging -from uuid import UUID, uuid4 from app.agents.state import AgentState -from app.core.config import settings -from app.core.database_connection import SessionLocal -from app.models.chat import ChatMessage, ChatSession logger = logging.getLogger(__name__) @@ -17,128 +13,37 @@ def agent_host(state: AgentState) -> AgentState: This node: 1. Receives the initial prompt and optional chat_session_id - 2. Creates or retrieves chat session from PostgreSQL - 3. Saves the user's message to the chat session - 4. Retrieves all chat messages for the session - 5. Prepares state for validation + 2. Extracts the prompt from messages + 3. Prepares state for validation (no DB operations yet) + + Note: Chat history retrieval and message saving is deferred to parafraseo + node to ensure malicious messages are not saved to the database. Args: state: Agent state containing the user prompt and optional chat_session_id Returns: - Updated state with chat_session_id, chat_messages, and initial_context set + Updated state with prompt and initial_context set (no DB operations) """ updated_state = state.copy() - prompt = state["messages"][-1] - chat_session_id = state.get("chat_session_id") - user_id = state.get("user_id") + # Extract prompt from messages + messages = state.get("messages", []) + last_message = messages[-1] if messages else None + prompt = last_message.content if last_message else "" # Validate user_id is provided + user_id = state.get("user_id") if not user_id: logger.error("user_id is required in state but was not provided") - updated_state["chat_session_id"] = None - updated_state["chat_messages"] = None - updated_state["initial_context"] = prompt updated_state["error_message"] = "user_id is required" return updated_state - db = None - try: - db = SessionLocal() - # Get or create chat session - chat_session = None - if chat_session_id: - try: - session_uuid = UUID(chat_session_id) - # Validate ownership: query with both id and user_id filters - chat_session = db.query(ChatSession).filter( - ChatSession.id == session_uuid, - ChatSession.user_id == user_id - ).first() - if not chat_session: - # Log security violation - attempted cross-session access - logger.warning( - f"Chat session {chat_session_id} not found or access denied for user {user_id}" - ) - # Don't create new session automatically - this prevents session hijacking - raise PermissionError("Chat session not found or access denied") - except (ValueError, TypeError): - logger.warning(f"Invalid chat_session_id format: {chat_session_id}, creating new session") - except PermissionError: - # Re-raise permission errors - raise - - # Create new session if needed - if not chat_session: - chat_session = ChatSession(id=uuid4(), user_id=user_id) - db.add(chat_session) - db.flush() - - # Create new message with user's prompt - new_message = ChatMessage( - session_id=chat_session.id, - sender="user", - message=prompt, - ) - db.add(new_message) - db.flush() - - # Query messages for the session with bounded limit (most recent first, then reverse for chronological order) - messages = ( - db.query(ChatMessage) - .filter(ChatMessage.session_id == chat_session.id) - .order_by(ChatMessage.created_at.desc()) - .limit(settings.chat_message_limit) - .all() - ) - # Reverse to get chronological order - messages = list(reversed(messages)) - - # Convert messages to dictionaries - chat_messages = [ - { - "id": msg.id, - "session_id": str(msg.session_id), - "sender": msg.sender, - "message": msg.message, - "created_at": msg.created_at.isoformat() if msg.created_at else None, - } - for msg in messages - ] - - # Commit the transaction - db.commit() - - # Update state - updated_state["chat_session_id"] = str(chat_session.id) - updated_state["chat_messages"] = chat_messages - updated_state["initial_context"] = prompt - - logger.info(f"Chat session {chat_session.id} updated with {len(chat_messages)} messages") + # Set prompt and initial context (no DB operations) + updated_state["prompt"] = prompt + updated_state["initial_context"] = prompt + updated_state["chat_messages"] = None # Will be set in parafraseo after validation - except PermissionError as e: - # Rollback on permission error - if db is not None: - db.rollback() - logger.warning(f"Permission denied in agent_host: {e}") - # Set error state for permission violations - updated_state["chat_session_id"] = None - updated_state["chat_messages"] = None - updated_state["initial_context"] = prompt - updated_state["error_message"] = "Chat session not found or access denied" - except Exception as e: - # Rollback on error - if db is not None: - db.rollback() - logger.error(f"Error in agent_host: {e}", exc_info=True) - # Set error state but don't fail completely - updated_state["chat_session_id"] = None - updated_state["chat_messages"] = None - updated_state["initial_context"] = prompt - updated_state["error_message"] = str(e) - finally: - if db is not None: - db.close() + logger.debug("Agent host prepared state for validation (no DB operations)") return updated_state diff --git a/RAGManager/app/agents/nodes/parafraseo.py b/RAGManager/app/agents/nodes/parafraseo.py index d7f9308..b3cd257 100644 --- a/RAGManager/app/agents/nodes/parafraseo.py +++ b/RAGManager/app/agents/nodes/parafraseo.py @@ -1,43 +1,57 @@ -"""Nodo 4: Parafraseo - Paraphrases user input.""" +"""Nodo 4: Parafraseo - Saves message, retrieves chat history, and paraphrases user input.""" + +import logging from app.agents.state import AgentState from langchain_core.messages import SystemMessage from langchain_openai import ChatOpenAI +logger = logging.getLogger(__name__) + llm = ChatOpenAI(model="gpt-5-nano") def parafraseo(state: AgentState) -> AgentState: """ - Parafraseo node - Paraphrases the user input. + Parafraseo node - Saves message to DB, retrieves chat history, and paraphrases user input. This node: - 1. Takes the adjusted text from Fallback Inicial - 2. Paraphrases it to improve clarity or adjust format - 3. Prepares text for retrieval step + 1. Saves the user's message to the chat session in PostgreSQL + 2. Retrieves all chat messages for the session (including the newly saved message) + 3. Paraphrases the user input using chat history to improve clarity + 4. Prepares text for retrieval step Args: - state: Agent state containing adjusted_text + state: Agent state containing prompt, chat_session_id, and user_id Returns: - Updated state with paraphrased_text set + Updated state with chat_messages, paraphrased_text set """ - # TODO: Implement paraphrasing logic + updated_state = state.copy() + + # TODO: Implement endpoint call to save message and retrieve chat history # This should: - # 1. Use an LLM or paraphrasing model to rephrase the text - # 2. Improve clarity, adjust tone, or format as needed - # 3. Set paraphrased_text with the result - - # Paraphrase the last message using history + # 1. Call an endpoint (not yet developed) that: + # - Saves the current user message to the chat session + # - Retrieves all chat messages for the session (including the newly saved message) + # - Returns the updated chat_messages list + # 2. Update state with chat_messages from the endpoint response + # 3. Handle errors appropriately (session not found, permission denied, etc.) + + # Placeholder: For now, we'll use empty chat history + # Once the endpoint is implemented, replace this with the actual endpoint call + updated_state["chat_messages"] = None + logger.warning("Chat history retrieval endpoint not yet implemented - using empty history") + # Paraphrase the last message using history system_instruction = """You are an expert at paraphrasing user questions to be standalone and clear, given the conversation history. Reformulate the last user message to be a self-contained query that includes necessary context from previous messages. Do not answer the question, just rewrite it.""" - messages = [SystemMessage(content=system_instruction)] + state["messages"] + # Use messages from state (will include chat history once endpoint is implemented) + messages = [SystemMessage(content=system_instruction)] + state.get("messages", []) response = llm.invoke(messages) - updated_state = state.copy() # Create a copy of the state to update updated_state["paraphrased_text"] = response.content return updated_state