From 01cceba628ab8eb0348d30fa95f086596b82f8eb Mon Sep 17 00:00:00 2001
From: JPAmorin <juanpabloamorinjusto@gmail.com>
Date: Sun, 14 Dec 2025 20:15:02 -0300
Subject: [PATCH] Fixed flaw in prompt processing logic.

---
 RAGManager/app/agents/graph.py            |  17 +--
 RAGManager/app/agents/nodes/agent_host.py | 129 +++-------------------
 RAGManager/app/agents/nodes/parafraseo.py |  44 +++++---
 3 files changed, 56 insertions(+), 134 deletions(-)

diff --git a/RAGManager/app/agents/graph.py b/RAGManager/app/agents/graph.py
index 04eb1d8..9c869bd 100644
--- a/RAGManager/app/agents/graph.py
+++ b/RAGManager/app/agents/graph.py
@@ -18,15 +18,18 @@ def create_agent_graph() -> StateGraph:
     Create and configure the LangGraph agent graph.
 
     The graph implements the following flow:
-    1. START -> agent_host (Nodo 1)
-    2. agent_host -> guard (Nodo 2)
-    3. guard -> [conditional] -> fallback (Nodo 3) or END
-    4. fallback -> parafraseo (Nodo 4)
+    1. START -> agent_host (Nodo 1) - Prepares state, no DB operations
+    2. agent_host -> guard (Nodo 2) - Validates for malicious content
+    3. guard -> [conditional]:
+       - malicious -> fallback -> END (stops processing, no DB save)
+       - continue -> parafraseo (Nodo 4)
+    4. parafraseo -> Saves message to DB, retrieves chat history, paraphrases
     5. parafraseo -> retriever (Nodo 5)
     6. retriever -> context_builder (Nodo 6)
-    7. context_builder -> generator (Nodo 7)
-    8. generator -> fallback (Nodo 8)
-    9. fallback -> [conditional] -> END (with final_response) or END (with error)
+    7. context_builder -> guard (validates response)
+    8. guard -> [conditional]:
+       - malicious -> fallback -> END
+       - continue -> END (success)
 
     Returns:
         Configured StateGraph instance ready for execution
diff --git a/RAGManager/app/agents/nodes/agent_host.py b/RAGManager/app/agents/nodes/agent_host.py
index 231e726..b619573 100644
--- a/RAGManager/app/agents/nodes/agent_host.py
+++ b/RAGManager/app/agents/nodes/agent_host.py
@@ -1,12 +1,8 @@
-"""Nodo 1: Agent Host - Entry point that saves initial context."""
+"""Nodo 1: Agent Host - Entry point that prepares initial state."""
 
 import logging
-from uuid import UUID, uuid4
 
 from app.agents.state import AgentState
-from app.core.config import settings
-from app.core.database_connection import SessionLocal
-from app.models.chat import ChatMessage, ChatSession
 
 logger = logging.getLogger(__name__)
 
@@ -17,128 +13,37 @@ def agent_host(state: AgentState) -> AgentState:
 
     This node:
     1. Receives the initial prompt and optional chat_session_id
-    2. Creates or retrieves chat session from PostgreSQL
-    3. Saves the user's message to the chat session
-    4. Retrieves all chat messages for the session
-    5. Prepares state for validation
+    2. Extracts the prompt from messages
+    3. Prepares state for validation (no DB operations yet)
+    
+    Note: Chat history retrieval and message saving is deferred to parafraseo
+    node to ensure malicious messages are not saved to the database.
 
     Args:
         state: Agent state containing the user prompt and optional chat_session_id
 
     Returns:
-        Updated state with chat_session_id, chat_messages, and initial_context set
+        Updated state with prompt and initial_context set (no DB operations)
     """
     updated_state = state.copy()
     
-    prompt = state["messages"][-1]
-    chat_session_id = state.get("chat_session_id")
-    user_id = state.get("user_id")
+    # Extract prompt from messages
+    messages = state.get("messages", [])
+    last_message = messages[-1] if messages else None
+    prompt = last_message.content if last_message else ""
 
     # Validate user_id is provided
+    user_id = state.get("user_id")
     if not user_id:
         logger.error("user_id is required in state but was not provided")
-        updated_state["chat_session_id"] = None
-        updated_state["chat_messages"] = None
-        updated_state["initial_context"] = prompt
         updated_state["error_message"] = "user_id is required"
         return updated_state
 
-    db = None
-    try:
-        db = SessionLocal()
-        # Get or create chat session
-        chat_session = None
-        if chat_session_id:
-            try:
-                session_uuid = UUID(chat_session_id)
-                # Validate ownership: query with both id and user_id filters
-                chat_session = db.query(ChatSession).filter(
-                    ChatSession.id == session_uuid,
-                    ChatSession.user_id == user_id
-                ).first()
-                if not chat_session:
-                    # Log security violation - attempted cross-session access
-                    logger.warning(
-                        f"Chat session {chat_session_id} not found or access denied for user {user_id}"
-                    )
-                    # Don't create new session automatically - this prevents session hijacking
-                    raise PermissionError("Chat session not found or access denied")
-            except (ValueError, TypeError):
-                logger.warning(f"Invalid chat_session_id format: {chat_session_id}, creating new session")
-            except PermissionError:
-                # Re-raise permission errors
-                raise
-        
-        # Create new session if needed
-        if not chat_session:
-            chat_session = ChatSession(id=uuid4(), user_id=user_id)
-            db.add(chat_session)
-            db.flush()
-
-        # Create new message with user's prompt
-        new_message = ChatMessage(
-            session_id=chat_session.id,
-            sender="user",
-            message=prompt,
-        )
-        db.add(new_message)
-        db.flush()
-
-        # Query messages for the session with bounded limit (most recent first, then reverse for chronological order)
-        messages = (
-            db.query(ChatMessage)
-            .filter(ChatMessage.session_id == chat_session.id)
-            .order_by(ChatMessage.created_at.desc())
-            .limit(settings.chat_message_limit)
-            .all()
-        )
-        # Reverse to get chronological order
-        messages = list(reversed(messages))
-
-        # Convert messages to dictionaries
-        chat_messages = [
-            {
-                "id": msg.id,
-                "session_id": str(msg.session_id),
-                "sender": msg.sender,
-                "message": msg.message,
-                "created_at": msg.created_at.isoformat() if msg.created_at else None,
-            }
-            for msg in messages
-        ]
-
-        # Commit the transaction
-        db.commit()
-
-        # Update state
-        updated_state["chat_session_id"] = str(chat_session.id)
-        updated_state["chat_messages"] = chat_messages
-        updated_state["initial_context"] = prompt
-
-        logger.info(f"Chat session {chat_session.id} updated with {len(chat_messages)} messages")
+    # Set prompt and initial context (no DB operations)
+    updated_state["prompt"] = prompt
+    updated_state["initial_context"] = prompt
+    updated_state["chat_messages"] = None  # Will be set in parafraseo after validation
 
-    except PermissionError as e:
-        # Rollback on permission error
-        if db is not None:
-            db.rollback()
-        logger.warning(f"Permission denied in agent_host: {e}")
-        # Set error state for permission violations
-        updated_state["chat_session_id"] = None
-        updated_state["chat_messages"] = None
-        updated_state["initial_context"] = prompt
-        updated_state["error_message"] = "Chat session not found or access denied"
-    except Exception as e:
-        # Rollback on error
-        if db is not None:
-            db.rollback()
-        logger.error(f"Error in agent_host: {e}", exc_info=True)
-        # Set error state but don't fail completely
-        updated_state["chat_session_id"] = None
-        updated_state["chat_messages"] = None
-        updated_state["initial_context"] = prompt
-        updated_state["error_message"] = str(e)
-    finally:
-        if db is not None:
-            db.close()
+    logger.debug("Agent host prepared state for validation (no DB operations)")
 
     return updated_state
diff --git a/RAGManager/app/agents/nodes/parafraseo.py b/RAGManager/app/agents/nodes/parafraseo.py
index d7f9308..b3cd257 100644
--- a/RAGManager/app/agents/nodes/parafraseo.py
+++ b/RAGManager/app/agents/nodes/parafraseo.py
@@ -1,43 +1,57 @@
-"""Nodo 4: Parafraseo - Paraphrases user input."""
+"""Nodo 4: Parafraseo - Saves message, retrieves chat history, and paraphrases user input."""
+
+import logging
 
 from app.agents.state import AgentState
 from langchain_core.messages import SystemMessage
 from langchain_openai import ChatOpenAI
 
+logger = logging.getLogger(__name__)
+
 llm = ChatOpenAI(model="gpt-5-nano")
 
 
 def parafraseo(state: AgentState) -> AgentState:
     """
-    Parafraseo node - Paraphrases the user input.
+    Parafraseo node - Saves message to DB, retrieves chat history, and paraphrases user input.
 
     This node:
-    1. Takes the adjusted text from Fallback Inicial
-    2. Paraphrases it to improve clarity or adjust format
-    3. Prepares text for retrieval step
+    1. Saves the user's message to the chat session in PostgreSQL
+    2. Retrieves all chat messages for the session (including the newly saved message)
+    3. Paraphrases the user input using chat history to improve clarity
+    4. Prepares text for retrieval step
 
     Args:
-        state: Agent state containing adjusted_text
+        state: Agent state containing prompt, chat_session_id, and user_id
 
     Returns:
-        Updated state with paraphrased_text set
+        Updated state with chat_messages, paraphrased_text set
     """
-    # TODO: Implement paraphrasing logic
+    updated_state = state.copy()
+    
+    # TODO: Implement endpoint call to save message and retrieve chat history
     # This should:
-    # 1. Use an LLM or paraphrasing model to rephrase the text
-    # 2. Improve clarity, adjust tone, or format as needed
-    # 3. Set paraphrased_text with the result
-
-    # Paraphrase the last message using history
+    # 1. Call an endpoint (not yet developed) that:
+    #    - Saves the current user message to the chat session
+    #    - Retrieves all chat messages for the session (including the newly saved message)
+    #    - Returns the updated chat_messages list
+    # 2. Update state with chat_messages from the endpoint response
+    # 3. Handle errors appropriately (session not found, permission denied, etc.)
+    
+    # Placeholder: For now, we'll use empty chat history
+    # Once the endpoint is implemented, replace this with the actual endpoint call
+    updated_state["chat_messages"] = None
+    logger.warning("Chat history retrieval endpoint not yet implemented - using empty history")
     
+    # Paraphrase the last message using history
     system_instruction = """You are an expert at paraphrasing user questions to be standalone and clear, given the conversation history.
 Reformulate the last user message to be a self-contained query that includes necessary context from previous messages.
 Do not answer the question, just rewrite it."""
 
-    messages = [SystemMessage(content=system_instruction)] + state["messages"]
+    # Use messages from state (will include chat history once endpoint is implemented)
+    messages = [SystemMessage(content=system_instruction)] + state.get("messages", [])
     
     response = llm.invoke(messages)
-    updated_state = state.copy()  # Create a copy of the state to update
     updated_state["paraphrased_text"] = response.content
 
     return updated_state