Implement graph

JuanFKurucz · JuanFKurucz · commit 31c1d1a74a35 · 2025-12-18T23:26:32.000-03:00
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,5 @@
+*.pdf
+
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
diff --git a/DocsManager/app/core/config.py b/DocsManager/app/core/config.py
@@ -30,6 +30,7 @@ class Settings(BaseSettings):
     minio_secret_key: str
     minio_bucket: str = "documents"
     minio_use_ssl: bool = True
+    minio_folder: str = "rag-docs"  # Folder within bucket for RAG documents
 
     # Database Configuration (for SQLAlchemy)
     database_url: str = ""
diff --git a/DocsManager/app/models/__init__.py b/DocsManager/app/models/__init__.py
@@ -0,0 +1,7 @@
+"""Models for the DocsManager application."""
+
+from app.models.document import Document
+from app.models.document_chunks import DocumentChunk
+
+__all__ = ["Document", "DocumentChunk"]
+
diff --git a/DocsManager/main.py b/DocsManager/main.py
@@ -3,6 +3,8 @@
 
 from app.api.routes import admin, base
 from app.core.db_connection import init_db
+# Import models to ensure SQLAlchemy can resolve relationships
+from app.models import Document, DocumentChunk  # noqa: F401
 
 # Configure logging
 logging.basicConfig(
diff --git a/RAGManager/.dockerignore b/RAGManager/.dockerignore
@@ -0,0 +1,23 @@
+.venv
+__pycache__
+*.pyc
+*.pyo
+*.pyd
+.Python
+*.so
+*.egg
+*.egg-info
+dist
+build
+.git
+.gitignore
+.idea
+.vscode
+*.swp
+*.swo
+*~
+.DS_Store
+.env
+.env.local
+*.log
+
diff --git a/RAGManager/Dockerfile b/RAGManager/Dockerfile
@@ -19,21 +19,31 @@ COPY pyproject.toml uv.lock* ./
 
 RUN uv sync --no-dev --no-cache
 
-# Run the Guardrails configure command to create a .guardrailsrc file
-# Only configure if GUARDRAILS_API_KEY is provided
-RUN uv run guardrails configure --enable-metrics --enable-remote-inferencing --token "$GUARDRAILS_API_KEY"
-
-# Install required guardrails validators
-RUN uv run guardrails hub install hub://guardrails/detect_jailbreak --no-install-local-models
-RUN uv run guardrails hub install hub://guardrails/detect_pii --no-install-local-models
-RUN uv run guardrails hub install hub://guardrails/toxic_language>=0.0.2 --no-install-local-models
-
-# The ToxicLanguage Validator uses the punkt tokenizer, so we need to download that to a known directory
-# Set the directory for nltk data
-ENV NLTK_DATA=/opt/nltk_data
-
-# Download punkt data
-RUN uv run python -m nltk.downloader -d /opt/nltk_data punkt_tab
+# Guardrails disabled for now to speed up builds
+# # Run the Guardrails configure command to create a .guardrailsrc file
+# # Only configure if GUARDRAILS_API_KEY is provided
+# RUN if [ -n "$GUARDRAILS_API_KEY" ]; then \
+#         uv run guardrails configure --enable-metrics --enable-remote-inferencing --token "$GUARDRAILS_API_KEY"; \
+#     else \
+#         echo "Warning: GUARDRAILS_API_KEY not provided, skipping guardrails configuration"; \
+#     fi
+
+# # Install required guardrails validators
+# # Note: Removing --no-install-local-models to ensure Python packages are installed
+# RUN uv run guardrails hub install hub://guardrails/detect_jailbreak
+# RUN uv run guardrails hub install hub://guardrails/detect_pii
+# RUN uv run guardrails hub install hub://guardrails/toxic_language>=0.0.2
+
+# # Verify packages are accessible (this will fail the build if they're not found)
+# RUN uv run python -c "from guardrails.hub import DetectJailbreak, DetectPII, ToxicLanguage; print('Guardrails validators imported successfully')"
+
+# NLTK disabled - only needed for guardrails ToxicLanguage validator
+# # The ToxicLanguage Validator uses the punkt tokenizer, so we need to download that to a known directory
+# # Set the directory for nltk data
+# ENV NLTK_DATA=/opt/nltk_data
+#
+# # Download punkt data
+# RUN uv run python -m nltk.downloader -d /opt/nltk_data punkt_tab
 
 COPY . .
 
diff --git a/RAGManager/app/agents/nodes/__init__.py b/RAGManager/app/agents/nodes/__init__.py
@@ -4,7 +4,6 @@
 from app.agents.nodes.context_builder import context_builder
 from app.agents.nodes.fallback_final import fallback_final
 from app.agents.nodes.fallback_inicial import fallback_inicial
-from app.agents.nodes.generator import generator
 from app.agents.nodes.guard_final import guard_final
 from app.agents.nodes.guard_inicial import guard_inicial
 from app.agents.nodes.parafraseo import parafraseo
@@ -15,6 +14,7 @@
     "guard_inicial",
     "guard_final",
     "fallback_inicial",
+    "fallback_final",
     "parafraseo",
     "retriever",
     "context_builder",
diff --git a/RAGManager/app/agents/nodes/context_builder.py b/RAGManager/app/agents/nodes/context_builder.py
@@ -55,7 +55,7 @@ def context_builder(state: AgentState) -> AgentState:
         logger.warning("No relevant chunks found for context building")
     
     # Create enriched query combining paraphrased text and context
-   enriched_query = f"""Pregunta del usuario: {paraphrased}
+    enriched_query = f"""Pregunta del usuario: {paraphrased}
 
 Contexto relevante de la base de conocimiento:
 {context_section}
diff --git a/RAGManager/app/agents/nodes/guard_final.py b/RAGManager/app/agents/nodes/guard_final.py
@@ -2,28 +2,30 @@
 
 import logging
 
-from guardrails import Guard
-from guardrails.hub import DetectPII, ToxicLanguage
+# Guardrails disabled for now
+# from guardrails import Guard
+# from guardrails.hub import DetectPII, ToxicLanguage
 
 from app.agents.state import AgentState
-from app.core.config import settings
+# from app.core.config import settings
 
 logger = logging.getLogger(__name__)
 
-# Initialize Guard with DetectPII and ToxicLanguage validators
-# Note: The validators must be installed via:
-#   guardrails hub install hub://guardrails/detect_pii
-#   guardrails hub install hub://guardrails/toxic_language
-_guard_final = Guard().use(
-    DetectPII(
-        pii_entities=settings.guardrails_pii_entities,
-        on_fail="noop",  # Don't raise exceptions, handle via state flags
-    )
-).use(
-    ToxicLanguage(
-        on_fail="noop",  # Don't raise exceptions, handle via state flags
-    )
-)
+# Guardrails disabled for now - just pass through
+# # Initialize Guard with DetectPII and ToxicLanguage validators
+# # Note: The validators must be installed via:
+# #   guardrails hub install hub://guardrails/detect_pii
+# #   guardrails hub install hub://guardrails/toxic_language
+# _guard_final = Guard().use(
+#     DetectPII(
+#         pii_entities=settings.guardrails_pii_entities,
+#         on_fail="noop",  # Don't raise exceptions, handle via state flags
+#     )
+# ).use(
+#     ToxicLanguage(
+#         on_fail="noop",  # Don't raise exceptions, handle via state flags
+#     )
+# )
 
 
 def guard_final(state: AgentState) -> AgentState:
@@ -50,31 +52,36 @@ def guard_final(state: AgentState) -> AgentState:
         updated_state["error_message"] = None
         return updated_state
 
-    try:
-        # Validate the generated response using Guardrails
-        validation_result = _guard_final.validate(generated_response)
-
-        # Check if validation passed
-        # The validator returns ValidationResult with outcome
-        # If validation fails, outcome will indicate failure
-        if validation_result.validation_passed:
-            updated_state["is_risky"] = False
-            updated_state["error_message"] = None
-            logger.debug("Generated response passed PII and toxic language detection")
-        else:
-            # PII or toxic language detected
-            updated_state["is_risky"] = True
-            updated_state["error_message"] = (
-               "Contenido riesgoso detectado en la respuesta generada. La información solicitada está clasificada o no es de libre acceso."
-            )
-            logger.warning("Risky content detected in generated response. Response content not logged for security.")
-
-    except Exception as e:
-        # If validation fails due to error, log it but don't block the request
-        # This is a safety measure - if Guardrails fails, we allow the request
-        # but log the error for monitoring
-        logger.error(f"Error during PII detection: {e}")
-        updated_state["is_risky"] = False
-        updated_state["error_message"] = None
+    # Guardrails disabled for now - just pass through
+    updated_state["is_risky"] = False
+    updated_state["error_message"] = None
+    logger.debug("Guardrails disabled - response passed through without validation")
+    
+    # try:
+    #     # Validate the generated response using Guardrails
+    #     validation_result = _guard_final.validate(generated_response)
+    #
+    #     # Check if validation passed
+    #     # The validator returns ValidationResult with outcome
+    #     # If validation fails, outcome will indicate failure
+    #     if validation_result.validation_passed:
+    #         updated_state["is_risky"] = False
+    #         updated_state["error_message"] = None
+    #         logger.debug("Generated response passed PII and toxic language detection")
+    #     else:
+    #         # PII or toxic language detected
+    #         updated_state["is_risky"] = True
+    #         updated_state["error_message"] = (
+    #            "Contenido riesgoso detectado en la respuesta generada. La información solicitada está clasificada o no es de libre acceso."
+    #         )
+    #         logger.warning("Risky content detected in generated response. Response content not logged for security.")
+    #
+    # except Exception as e:
+    #     # If validation fails due to error, log it but don't block the request
+    #     # This is a safety measure - if Guardrails fails, we allow the request
+    #     # but log the error for monitoring
+    #     logger.error(f"Error during PII detection: {e}")
+    #     updated_state["is_risky"] = False
+    #     updated_state["error_message"] = None
 
     return updated_state
diff --git a/RAGManager/app/agents/nodes/guard_inicial.py b/RAGManager/app/agents/nodes/guard_inicial.py
@@ -2,29 +2,31 @@
 
 import logging
 
-from guardrails import Guard
-from guardrails.hub import DetectJailbreak, ToxicLanguage
+# Guardrails disabled for now
+# from guardrails import Guard
+# from guardrails.hub import DetectJailbreak, ToxicLanguage
 
 from app.agents.state import AgentState
-from app.core.config import settings
+# from app.core.config import settings
 
 logger = logging.getLogger(__name__)
 
-# Initialize Guard with DetectJailbreak and ToxicLanguage validators
-# Note: The validators must be installed via:
-#   guardrails hub install hub://guardrails/detect_jailbreak
-#   guardrails hub install hub://guardrails/toxic_language
-_guard_inicial = Guard().use(
-    DetectJailbreak(
-        threshold=settings.guardrails_jailbreak_threshold,
-        device=settings.guardrails_device,
-        on_fail="noop",  # Don't raise exceptions, handle via state flags
-    )
-).use(
-    ToxicLanguage(
-        on_fail="noop",  # Don't raise exceptions, handle via state flags
-    )
-)
+# Guardrails disabled for now - just pass through
+# # Initialize Guard with DetectJailbreak and ToxicLanguage validators
+# # Note: The validators must be installed via:
+# #   guardrails hub install hub://guardrails/detect_jailbreak
+# #   guardrails hub install hub://guardrails/toxic_language
+# _guard_inicial = Guard().use(
+#     DetectJailbreak(
+#         threshold=settings.guardrails_jailbreak_threshold,
+#         device=settings.guardrails_device,
+#         on_fail="noop",  # Don't raise exceptions, handle via state flags
+#     )
+# ).use(
+#     ToxicLanguage(
+#         on_fail="noop",  # Don't raise exceptions, handle via state flags
+#     )
+# )
 
 
 def guard_inicial(state: AgentState) -> AgentState:
@@ -53,31 +55,36 @@ def guard_inicial(state: AgentState) -> AgentState:
         updated_state["error_message"] = None
         return updated_state
 
-    try:
-        # Validate the prompt using Guardrails
-        validation_result = _guard_inicial.validate(prompt)
-
-        # Check if validation passed
-        # The validator returns ValidationResult with outcome
-        # If validation fails, outcome will indicate failure
-        if validation_result.validation_passed:
-            updated_state["is_malicious"] = False
-            updated_state["error_message"] = None
-            logger.debug("Prompt passed jailbreak and toxic language detection")
-        else:
-            # Jailbreak or toxic language detected
-            updated_state["is_malicious"] = True
-            updated_state["error_message"] = (
-                "Contenido malicioso detectado. Tu solicitud contiene contenido que viola las políticas de seguridad."
-            )
-            logger.warning("Malicious content detected. Prompt content not logged for security.")
-
-    except Exception as e:
-        # If validation fails due to error, log it but don't block the request
-        # This is a safety measure - if Guardrails fails, we allow the request
-        # but log the error for monitoring
-        logger.error(f"Error during jailbreak detection: {e}")
-        updated_state["is_malicious"] = False
-        updated_state["error_message"] = None
+    # Guardrails disabled for now - just pass through
+    updated_state["is_malicious"] = False
+    updated_state["error_message"] = None
+    logger.debug("Guardrails disabled - prompt passed through without validation")
+    
+    # try:
+    #     # Validate the prompt using Guardrails
+    #     validation_result = _guard_inicial.validate(prompt)
+    #
+    #     # Check if validation passed
+    #     # The validator returns ValidationResult with outcome
+    #     # If validation fails, outcome will indicate failure
+    #     if validation_result.validation_passed:
+    #         updated_state["is_malicious"] = False
+    #         updated_state["error_message"] = None
+    #         logger.debug("Prompt passed jailbreak and toxic language detection")
+    #     else:
+    #         # Jailbreak or toxic language detected
+    #         updated_state["is_malicious"] = True
+    #         updated_state["error_message"] = (
+    #             "Contenido malicioso detectado. Tu solicitud contiene contenido que viola las políticas de seguridad."
+    #         )
+    #         logger.warning("Malicious content detected. Prompt content not logged for security.")
+    #
+    # except Exception as e:
+    #     # If validation fails due to error, log it but don't block the request
+    #     # This is a safety measure - if Guardrails fails, we allow the request
+    #     # but log the error for monitoring
+    #     logger.error(f"Error during jailbreak detection: {e}")
+    #     updated_state["is_malicious"] = False
+    #     updated_state["error_message"] = None
 
     return updated_state
diff --git a/RAGManager/app/services/chat.py b/RAGManager/app/services/chat.py
@@ -68,10 +68,15 @@ def save_user_message(db: Session, message: str, session_id: UUID | None = None)
         session_id = session.id
         logger.info(f"Created new chat session: {session_id}")
     else:
-        # Validate that the session exists
+        # Validate that the session exists, or create it if it doesn't
         session = db.query(ChatSession).filter(ChatSession.id == session_id).first()
         if not session:
-            raise ValueError(f"Chat session {session_id} not found")
+            # Session doesn't exist - create it (for AG-UI protocol compatibility)
+            logger.info(f"Session {session_id} not found, creating new session with provided UUID")
+            session = ChatSession(id=session_id)
+            db.add(session)
+            db.flush()  # Generate UUID without committing
+            logger.info(f"Created new chat session with provided UUID: {session_id}")
 
     # 2. Create and save the user message
     user_message = ChatMessage(
diff --git a/RAGManager/app/services/chatMessage.py b/RAGManager/app/services/chatMessage.py

Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,5 @@`
	`1`	`+*.pdf`
	`2`	`+`
`1`	`3`	`# Byte-compiled / optimized / DLL files`
`2`	`4`	`__pycache__/`
`3`	`5`	`*.py[cod]`