Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 36 additions & 17 deletions RAGManager/app/agents/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,18 @@
from app.agents.nodes import (
agent_host,
context_builder,
fallback,
guard,
fallback_final,
fallback_inicial,
generator,
guard_final,
guard_inicial,
parafraseo,
retriever,
)
from app.agents.routing import (
route_after_guard_final,
route_after_guard_inicial,
)
from app.agents.state import AgentState
from app.agents.routing import route_after_guard

Expand Down Expand Up @@ -39,29 +46,35 @@ def create_agent_graph() -> StateGraph:

# Add nodes
workflow.add_node("agent_host", agent_host)
workflow.add_node("guard", guard)
workflow.add_node("fallback", fallback)
workflow.add_node("guard_inicial", guard_inicial)
workflow.add_node("fallback_inicial", fallback_inicial)
workflow.add_node("parafraseo", parafraseo)
workflow.add_node("retriever", retriever)
workflow.add_node("context_builder", context_builder)
workflow.add_node("generator", generator)
workflow.add_node("guard_final", guard_final)
workflow.add_node("fallback_final", fallback_final)

# Define edges
# Start -> agent_host
workflow.add_edge(START, "agent_host")

# agent_host -> guard
workflow.add_edge("agent_host", "guard")
# agent_host -> guard_inicial
workflow.add_edge("agent_host", "guard_inicial")

# guard -> conditional routing
# guard_inicial -> conditional routing
workflow.add_conditional_edges(
"guard",
route_after_guard,
"guard_inicial",
route_after_guard_inicial,
{
"malicious": "fallback", # go to fallback if malicious
"continue": "parafraseo", # Continue to parafraseo if valid
"malicious": "fallback_inicial", # Exception path: malicious content detected
"continue": "parafraseo", # Normal path: continue processing
},
)

# fallback_inicial -> END (stop flow with error message)
workflow.add_edge("fallback_inicial", END)

Comment thread
coderabbitai[bot] marked this conversation as resolved.
# parafraseo -> retriever
workflow.add_edge("parafraseo", "retriever")

Expand All @@ -71,15 +84,21 @@ def create_agent_graph() -> StateGraph:
# context_builder -> guard
workflow.add_edge("context_builder", "guard")

# guard -> conditional routing
# generator -> guard_final
workflow.add_edge("generator", "guard_final")

# guard_final -> conditional routing
workflow.add_conditional_edges(
"guard",
route_after_guard,
"guard_final",
route_after_guard_final,
{
"malicious": "fallback", # go to fallback if malicious
"continue": END, # if there's no error ends
"risky": "fallback_final", # Exception path: risky content detected
"continue": END, # Normal path: end successfully
},
)
workflow.add_edge("fallback", END)

# fallback_final -> END (stop flow with error message)
workflow.add_edge("fallback_final", END)

Comment thread
coderabbitai[bot] marked this conversation as resolved.
# Compile the graph
return workflow.compile()
12 changes: 8 additions & 4 deletions RAGManager/app/agents/nodes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,19 @@

from app.agents.nodes.agent_host import agent_host
from app.agents.nodes.context_builder import context_builder
from app.agents.nodes.fallback import fallback
from app.agents.nodes.guard import guard
from app.agents.nodes.fallback_final import fallback_final
from app.agents.nodes.fallback_inicial import fallback_inicial
from app.agents.nodes.generator import generator
from app.agents.nodes.guard_final import guard_final
from app.agents.nodes.guard_inicial import guard_inicial
from app.agents.nodes.parafraseo import parafraseo
from app.agents.nodes.retriever import retriever

__all__ = [
"agent_host",
"guard",
"fallback",
"guard_inicial",
"guard_final",
"fallback_inicial",
"parafraseo",
"retriever",
"context_builder",
Expand Down
30 changes: 30 additions & 0 deletions RAGManager/app/agents/nodes/fallback_final.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
"""Nodo 8: Fallback Final - Stops processing when risky content is detected."""

import logging

from app.agents.state import AgentState

logger = logging.getLogger(__name__)


def fallback_final(state: AgentState) -> AgentState:
"""
Fallback Final node - Stops processing when risky content is detected.

This node:
1. Sets error message indicating that the information requested is classified or not free to know
2. Stops the flow by routing to END

Args:
state: Agent state containing the response flagged as risky

Returns:
Updated state with error_message set, ready to route to END
"""
updated_state = state.copy()

# Set error message for risky content
updated_state["error_message"] = "The information requested is classified or not free to know."
logger.warning("Risky content detected. Stopping processing. Response content not logged for security.")

return updated_state
30 changes: 30 additions & 0 deletions RAGManager/app/agents/nodes/fallback_inicial.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
"""Nodo 3: Fallback Inicial - Stops processing when malicious content is detected."""

import logging

from app.agents.state import AgentState

logger = logging.getLogger(__name__)


def fallback_inicial(state: AgentState) -> AgentState:
"""
Fallback Inicial node - Stops processing when malicious content is detected.

This node:
1. Sets error message indicating that the user's intentions break the chatbot's rules
2. Stops the flow by routing to END

Args:
state: Agent state containing the prompt flagged as malicious

Returns:
Updated state with error_message set, ready to route to END
"""
updated_state = state.copy()

# Set error message for malicious content
updated_state["error_message"] = "The user's intentions break the chatbot's rules."
Comment on lines +26 to +27
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# Verify routing mechanism and error message integration

# First, examine the file in question
echo "=== Content of fallback_inicial.py ==="
cat -n RAGManager/app/agents/nodes/fallback_inicial.py

Repository: ucudal/reto-xmas-2025-goland-ia-backend

Length of output: 1280


🏁 Script executed:

#!/bin/bash
# Search for routing logic that checks error_message
echo "=== Searching for routing logic that checks error_message ==="
rg -n -C3 'error_message' --type=py | head -100

Repository: ucudal/reto-xmas-2025-goland-ia-backend

Length of output: 7845


🏁 Script executed:

#!/bin/bash
# Search for route_after_guard_inicial implementation
echo "=== Searching for route_after_guard_inicial ==="
rg -n 'route_after_guard_inicial' --type=py -A 20 | head -150

Repository: ucudal/reto-xmas-2025-goland-ia-backend

Length of output: 4308


🏁 Script executed:

#!/bin/bash
# Search for guard_inicial references and how fallback_inicial integrates
echo "=== Searching for guard_inicial integration ==="
rg -n 'guard_inicial|fallback_inicial' --type=py -B 2 -A 2 | head -150

Repository: ucudal/reto-xmas-2025-goland-ia-backend

Length of output: 6408


🏁 Script executed:

#!/bin/bash
# Find the graph/workflow definition to understand routing
echo "=== Searching for graph or workflow setup ==="
fd -e py | xargs rg -l 'add_node.*fallback_inicial|fallback_inicial.*add_node' | head -5

Repository: ucudal/reto-xmas-2025-goland-ia-backend

Length of output: 159


The error message is overly generic and overwrites more specific violation context.

The routing mechanism is explicit and correct—route_after_guard_inicial checks the is_malicious flag (set by guard_inicial) to decide whether to route to fallback_inicial, which then sets the error message before routing to END.

However, fallback_inicial sets a generic message that overwrites the more specific message from guard_inicial ("Jailbreak attempt detected. Your request contains content that violates security policies."). Consider whether fallback_inicial should:

  • Preserve the error message set by guard_inicial instead of replacing it
  • Pass through the original message or add context about the detection type
🤖 Prompt for AI Agents
In RAGManager/app/agents/nodes/fallback_inicial.py around lines 26-27,
fallback_inicial currently unconditionally overwrites any existing error_message
with a generic string; change it to preserve the more specific message from
guard_inicial by only setting the generic message when no error_message exists
(or append minimal context instead of replacing), e.g., check for an existing
updated_state["error_message"] and leave it unchanged if present, otherwise set
the fallback message (or concatenate a short indicator like " (fallback
detected)" to the existing message).

logger.warning("Malicious content detected. Stopping processing. Prompt content not logged for security.")

return updated_state
74 changes: 74 additions & 0 deletions RAGManager/app/agents/nodes/guard_final.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
"""Nodo Guard Final - Validates generated response for PII (risky information detection)."""

import logging

from guardrails import Guard
from guardrails.hub import DetectPII

from app.agents.state import AgentState
from app.core.config import settings

logger = logging.getLogger(__name__)

# Initialize Guard with DetectPII validator
# Note: The validator must be installed via: guardrails hub install hub://guardrails/detect_pii
_guard_final = Guard().use(
DetectPII(
pii_entities=settings.guardrails_pii_entities,
on_fail="noop", # Don't raise exceptions, handle via state flags
)
)


def guard_final(state: AgentState) -> AgentState:
"""
Guard final node - Validates generated response for PII using Guardrails DetectPII.

This node:
1. Validates the generated_response using Guardrails DetectPII validator
2. Sets is_risky flag if PII is detected
3. Sets error_message if risky content is detected

Args:
state: Agent state containing the generated_response

Returns:
Updated state with is_risky and error_message set
"""
updated_state = state.copy()
generated_response = state.get("generated_response", "")

if not generated_response:
# Empty response is considered safe
updated_state["is_risky"] = False
updated_state["error_message"] = None
return updated_state

try:
# Validate the generated response using Guardrails
validation_result = _guard_final.validate(generated_response)

# Check if validation passed
# The validator returns ValidationResult with outcome
# If validation fails, outcome will indicate failure
if validation_result.validation_passed:
updated_state["is_risky"] = False
updated_state["error_message"] = None
logger.debug("Generated response passed PII detection")
else:
# PII detected
updated_state["is_risky"] = True
updated_state["error_message"] = (
"PII detected in generated response. The information requested is classified or not free to know."
)
logger.warning("PII detected in generated response. Response content not logged for security.")

except Exception as e:
# If validation fails due to error, log it but don't block the request
# This is a safety measure - if Guardrails fails, we allow the request
# but log the error for monitoring
logger.error(f"Error during PII detection: {e}")
updated_state["is_risky"] = False
updated_state["error_message"] = None
Comment on lines +66 to +72
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Reconsider fail-safe behavior for PII detection.

The exception handler allows requests to proceed when validation fails due to an error (sets is_risky = False). While this prevents blocking legitimate requests when Guardrails fails, it creates a security risk: if PII detection fails silently, sensitive information could be leaked to users.

Consider:

  1. Failing closed (blocking the request) when PII detection errors occur, especially in production
  2. Adding metrics/alerting to detect when PII validation is failing
  3. Using a circuit breaker pattern to degrade gracefully during persistent failures

This contrasts with guard_inicial where failing open (allowing jailbreak attempts through) is more appropriate since false positives are more problematic than false negatives for user input validation.

     except Exception as e:
         # If validation fails due to error, log it but don't block the request
-        # This is a safety measure - if Guardrails fails, we allow the request
+        # SECURITY: Consider failing closed for PII detection in production
         # but log the error for monitoring
         logger.error(f"Error during PII detection: {e}")
-        updated_state["is_risky"] = False
-        updated_state["error_message"] = None
+        # For production: fail closed to prevent PII leakage
+        updated_state["is_risky"] = True
+        updated_state["error_message"] = (
+            "Unable to validate response safety. Please try again later."
+        )
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
except Exception as e:
# If validation fails due to error, log it but don't block the request
# This is a safety measure - if Guardrails fails, we allow the request
# but log the error for monitoring
logger.error(f"Error during PII detection: {e}")
updated_state["is_risky"] = False
updated_state["error_message"] = None
except Exception as e:
# If validation fails due to error, log it but don't block the request
# SECURITY: Consider failing closed for PII detection in production
# but log the error for monitoring
logger.error(f"Error during PII detection: {e}")
# For production: fail closed to prevent PII leakage
updated_state["is_risky"] = True
updated_state["error_message"] = (
"Unable to validate response safety. Please try again later."
)


return updated_state
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Nodo 2: Guard - Validates for malicious content."""
"""Nodo 2: Guard Inicial - Validates for malicious content (jailbreak detection)."""

import logging

Expand All @@ -12,7 +12,7 @@

# Initialize Guard with DetectJailbreak validator
# Note: The validator must be installed via: guardrails hub install hub://guardrails/detect_jailbreak
_guard = Guard().use(
_guard_inicial = Guard().use(
DetectJailbreak(
threshold=settings.guardrails_jailbreak_threshold,
device=settings.guardrails_device,
Expand All @@ -21,9 +21,9 @@
)


def guard(state: AgentState) -> AgentState:
def guard_inicial(state: AgentState) -> AgentState:
"""
Guard node - Validates user input for malicious content using Guardrails DetectJailbreak.
Guard inicial node - Validates user input for jailbreak attempts using Guardrails DetectJailbreak.

This node:
1. Validates the prompt using Guardrails DetectJailbreak validator
Expand All @@ -49,7 +49,7 @@ def guard(state: AgentState) -> AgentState:

try:
# Validate the prompt using Guardrails
validation_result = _guard.validate(prompt)
validation_result = _guard_inicial.validate(prompt)

# Check if validation passed
# The validator returns ValidationResult with outcome
Expand All @@ -64,7 +64,7 @@ def guard(state: AgentState) -> AgentState:
updated_state["error_message"] = (
"Jailbreak attempt detected. Your request contains content that violates security policies."
)
logger.warning("Jailbreak attempt detected in prompt (len=%d)", len(prompt))
logger.warning("Jailbreak attempt detected. Prompt content not logged for security.")

except Exception as e:
# If validation fails due to error, log it but don't block the request
Expand Down
21 changes: 19 additions & 2 deletions RAGManager/app/agents/routing.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
from app.agents.state import AgentState


def route_after_guard(state: AgentState) -> str:
def route_after_guard_inicial(state: AgentState) -> str:
"""
Route after Guard node (Nodo 2) validation.
Route after Guard Inicial node validation.

Determines the next step based on whether the prompt was flagged as malicious.

Expand All @@ -18,3 +18,20 @@ def route_after_guard(state: AgentState) -> str:
if state.get("is_malicious", False):
return "malicious"
return "continue"


def route_after_guard_final(state: AgentState) -> str:
"""
Route after Guard Final node validation.

Determines the next step based on whether the response was flagged as risky.

Args:
state: Current agent state

Returns:
"risky" if the response is risky, "continue" otherwise
"""
if state.get("is_risky", False):
return "risky"
return "continue"
13 changes: 13 additions & 0 deletions RAGManager/app/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,19 @@ class Settings(BaseSettings):
default="cpu",
description="Device for model inference.",
)
guardrails_pii_entities: list[str] = Field(
default=[
"EMAIL_ADDRESS",
"PHONE_NUMBER",
"CREDIT_CARD",
"SSN",
"US_PASSPORT",
"US_DRIVER_LICENSE",
"IBAN_CODE",
"IP_ADDRESS",
],
description="List of PII entity types to detect using DetectPII validator.",
)
Comment on lines +50 to +62
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

🧩 Analysis chain

🌐 Web query:

guardrails-ai DetectPII validator version 0.6.2 valid entity types

💡 Result:

Here are the valid PII entity types supported by the Guardrails DetectPII/GuardrailsPII validator (as exposed in the validator hub / repo documentation). [1][2]

  • CREDIT_CARD
  • CRYPTO
  • DATE_TIME
  • EMAIL_ADDRESS
  • IBAN_CODE
  • IP_ADDRESS
  • NRP
  • LOCATION
  • PERSON
  • PHONE_NUMBER
  • MEDICAL_LICENSE
  • URL
  • US_BANK_NUMBER
  • US_DRIVER_LICENSE
  • US_ITIN
  • US_PASSPORT
  • US_SSN
  • UK_NHS
  • ES_NIF
  • ES_NIE
  • IT_FISCAL_CODE
  • IT_DRIVER_LICENSE
  • IT_VAT_CODE
  • IT_PASSPORT
  • IT_IDENTITY_CARD
  • PL_PESEL
  • SG_NRIC_FIN
  • SG_UEN
  • AU_ABN
  • AU_ACN
  • AU_TFN
  • AU_MEDICARE
  • IN_PAN
  • IN_AADHAAR
  • IN_VEHICLE_REGISTRATION
  • IN_VOTER
  • IN_PASSPORT
  • FI_PERSONAL_IDENTITY_CODE

Sources: Guardrails PII validator docs and DetectPII README.
[1] Guardrails PII validator (guardrails-ai/guardrails_pii).
[2] Guardrails DetectPII (guardrails-ai/detect_pii).


🌐 Web query:

presidio-analyzer 2.2.360 supported PII entity types

💡 Result:

Here are the PII entity types that Presidio Analyzer supports (as listed in the Presidio docs). Global entities, then country/region-specific entities:

Global:
CREDIT_CARD, CRYPTO, DATE_TIME, EMAIL_ADDRESS, IBAN_CODE, IP_ADDRESS, NRP, LOCATION, PERSON, PHONE_NUMBER, MEDICAL_LICENSE, URL. [1]

USA:
US_BANK_NUMBER, US_DRIVER_LICENSE, US_ITIN, US_PASSPORT, US_SSN. [1]

UK:
UK_NHS, UK_NINO. [1]

Spain:
ES_NIF, ES_NIE. [1]

Italy:
IT_FISCAL_CODE, IT_DRIVER_LICENSE, IT_VAT_CODE, IT_PASSPORT, IT_IDENTITY_CARD. [1]

Poland:
PL_PESEL. [1]

Singapore:
SG_NRIC_FIN, SG_UEN. [1]

Australia:
AU_ABN, AU_ACN, AU_TFN, AU_MEDICARE. [1]

India:
IN_PAN, IN_AADHAAR, IN_VEHICLE_REGISTRATION, IN_VOTER, IN_PASSPORT, IN_GSTIN. [1]

Finland:
FI_PERSONAL_IDENTITY_CODE. [1]

Korea:
KR_RRN. [1]

Thai:
TH_TNIN. [1]

For how to retrieve the runtime list programmatically (AnalyzerEngine.get_supported_entities), see the Analyzer API reference. [2]

Sources:
[1] Presidio — Supported entities documentation. [2] Presidio Analyzer Python API (get_supported_entities).


Change SSN to US_SSN in the default PII entity types list.

The entity type SSN is not recognized by guardrails-ai DetectPII validator and presidio-analyzer. The correct entity type is US_SSN. All other current entity types in the list (EMAIL_ADDRESS, PHONE_NUMBER, CREDIT_CARD, US_PASSPORT, US_DRIVER_LICENSE, IBAN_CODE, IP_ADDRESS) are valid.

Additionally, consider including PERSON, LOCATION, DATE_TIME, US_BANK_NUMBER, MEDICAL_LICENSE, and CRYPTO in the default list, as these are all valid and commonly used PII entity types supported by presidio-analyzer.

🤖 Prompt for AI Agents
In RAGManager/app/core/config.py around lines 50 to 62, the default PII entity
list contains "SSN" which is not recognized by the DetectPII validator; replace
"SSN" with "US_SSN" and update the default list to optionally include additional
commonly used presidio-supported entities such as "PERSON", "LOCATION",
"DATE_TIME", "US_BANK_NUMBER", "MEDICAL_LICENSE", and "CRYPTO" if you want
broader detection; ensure the list items remain string literals and the Field
description remains unchanged.

model_config = SettingsConfigDict(
env_file=".env",
env_file_encoding="utf-8",
Expand Down
4 changes: 3 additions & 1 deletion RAGManager/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@ dependencies = [
"pydantic-settings>=2.0.0",
"typing-extensions>=4.15.0",
"uvicorn>=0.38.0",
"guardrails-ai>=0.5.10",
"guardrails-ai>=0.6.2",
"presidio-analyzer>=2.2.360",
"presidio-anonymizer>=2.2.360",
]

[project.optional-dependencies]
Expand Down