@@ -465,6 +465,133 @@ def validate_resource_mime_type(
465465 )
466466 raise ContentTypeError (mime_type , allowed_types )
467467
468+ def detect_malicious_patterns (
469+ self ,
470+ content : str ,
471+ content_type : str = "content" ,
472+ user_email : Optional [str ] = None ,
473+ ip_address : Optional [str ] = None ,
474+ ) -> None :
475+ """Detect malicious patterns in content (US-3).
476+
477+ Scans content for XSS, command injection, SQL injection, and template injection patterns.
478+ Behavior depends on content_pattern_validation_mode:
479+ - strict: Raises ContentPatternError on detection
480+ - moderate: Logs warning and raises ContentPatternError
481+ - lenient: Logs warning only, allows content
482+
483+ Args:
484+ content: Content to scan for malicious patterns
485+ content_type: Type of content (e.g., "Resource content", "Prompt template")
486+ user_email: Optional user email for audit logging (sanitized)
487+ ip_address: Optional IP address for audit logging (sanitized)
488+
489+ Raises:
490+ ContentPatternError: If malicious pattern is detected (strict/moderate modes)
491+
492+ Examples:
493+ >>> service = ContentSecurityService()
494+ >>> service.detect_malicious_patterns("Hello world") # OK
495+ >>> try:
496+ ... service.detect_malicious_patterns("<script>alert('XSS')</script>")
497+ ... except ContentPatternError as e:
498+ ... print(f"Blocked: {e.violation_type}")
499+ Blocked: xss
500+ """
501+ if not settings .content_pattern_detection_enabled :
502+ logger .debug ("Pattern detection disabled via CONTENT_PATTERN_DETECTION_ENABLED" )
503+ return
504+
505+ blocked_patterns = settings .content_blocked_patterns
506+ validation_mode = settings .content_pattern_validation_mode
507+
508+ for pattern in blocked_patterns :
509+ try :
510+ # Use re.search with timeout to prevent ReDoS (CWE-400 fix)
511+ # Python 3.13+ supports timeout parameter
512+ import sys
513+ if sys .version_info >= (3 , 13 ):
514+ match = re .search (pattern , content , re .IGNORECASE | re .DOTALL , timeout = 1.0 )
515+ else :
516+ # Fallback for Python < 3.13 - no timeout protection
517+ # ReDoS mitigation relies on pattern complexity validation in config.py
518+ match = re .search (pattern , content , re .IGNORECASE | re .DOTALL )
519+
520+ if match :
521+ # Determine violation type from pattern
522+ violation_type = self ._classify_violation (pattern , match .group (0 ))
523+
524+ # Log with sanitized PII
525+ sanitized = _sanitize_pii_for_logging (user_email , ip_address )
526+ logger .warning (
527+ "Malicious pattern detected" ,
528+ extra = {
529+ "content_type" : content_type ,
530+ "violation_type" : violation_type ,
531+ "pattern_length" : len (pattern ), # Don't log full pattern for security
532+ "validation_mode" : validation_mode ,
533+ ** sanitized ,
534+ }
535+ )
536+
537+ # In lenient mode, just log and continue
538+ if validation_mode == "lenient" :
539+ logger .info (f"Lenient mode: allowing { content_type } with { violation_type } pattern" )
540+ return
541+
542+ # In strict or moderate mode, raise exception
543+ raise ContentPatternError (
544+ pattern_matched = match .group (0 )[:50 ], # Truncate for security
545+ content_type = content_type ,
546+ content_snippet = content [max (0 , match .start ()- 20 ):match .end ()+ 20 ],
547+ violation_type = violation_type ,
548+ )
549+
550+ except TimeoutError :
551+ # ReDoS protection (CWE-400)
552+ sanitized = _sanitize_pii_for_logging (user_email , ip_address )
553+ logger .error (
554+ "Pattern matching timeout - possible ReDoS" ,
555+ extra = {
556+ "pattern_length" : len (pattern ),
557+ "content_type" : content_type ,
558+ ** sanitized ,
559+ }
560+ )
561+ raise ContentPatternError (
562+ pattern_matched = "[timeout]" ,
563+ content_type = content_type ,
564+ violation_type = "redos_timeout" ,
565+ )
566+
567+ def _classify_violation (self , pattern : str , matched_text : str ) -> str :
568+ """Classify violation type based on pattern and matched text.
569+
570+ Args:
571+ pattern: The regex pattern that matched
572+ matched_text: The actual text that was matched
573+
574+ Returns:
575+ Violation type string (xss, command_injection, sql_injection, template_injection, unknown)
576+ """
577+ matched_lower = matched_text .lower ()
578+
579+ # Check in order of specificity to avoid misclassification
580+ # Template injection patterns
581+ if "{{" in matched_text or "{%" in matched_text or "${" in matched_text :
582+ return "template_injection"
583+ # SQL injection patterns
584+ elif any (sql in matched_lower for sql in ["select" , "union" , "insert" , "delete" , "drop" , "update" ]) or matched_text .strip ().endswith ("--" ):
585+ return "sql_injection"
586+ # Command injection patterns
587+ elif any (cmd in matched_lower for cmd in ["rm -rf" , "&&" , "||" ]) or "`" in matched_text or "$(" in matched_text :
588+ return "command_injection"
589+ # XSS patterns (check last to avoid false positives)
590+ elif "<script" in matched_lower or "javascript:" in matched_lower or "<iframe" in matched_lower or (r"on\w+\s*=" in pattern ):
591+ return "xss"
592+ else :
593+ return "unknown"
594+
468595 def validate_prompt_template (
469596 self ,
470597 template : str ,
@@ -487,6 +614,7 @@ def validate_prompt_template(
487614
488615 Raises:
489616 TemplateValidationError: If template validation fails
617+ ContentPatternError: If malicious patterns detected (US-3)
490618
491619 Examples:
492620 Valid template:
@@ -511,6 +639,15 @@ def validate_prompt_template(
511639 return
512640
513641 template_name = name or "unnamed"
642+
643+ # Step 0: Check for malicious patterns (US-3) BEFORE template validation
644+ # This makes the ContentPatternError handlers in prompt_service.py reachable
645+ self .detect_malicious_patterns (
646+ content = template ,
647+ content_type = "Prompt template" ,
648+ user_email = user_email ,
649+ ip_address = ip_address ,
650+ )
514651
515652 # Step 1: Check for balanced braces
516653 if not self ._check_balanced_braces (template ):
@@ -527,7 +664,8 @@ def validate_prompt_template(
527664 raise TemplateValidationError (template_name , "Template contains dangerous pattern that could lead to code injection" , pattern = pattern )
528665
529666 # Step 3: Validate Jinja2 syntax by attempting to parse and analyze
530- # This catches both syntax errors AND undefined filters/tests
667+ # Note: meta.find_undeclared_variables() only finds undefined variables,
668+ # it does NOT validate filters or raise exceptions for them
531669 try :
532670 # Third-Party
533671 from jinja2 import Environment , meta
@@ -536,13 +674,23 @@ def validate_prompt_template(
536674 # Templates are never rendered with this Environment, so autoescape is not needed
537675 env = Environment () # nosec B701
538676 ast = env .parse (template )
539- # This call validates that all filters and tests exist
540- # It raises TemplateAssertionError for nonexistent filters
677+ # Find undeclared variables (does not validate filters)
541678 meta .find_undeclared_variables (ast )
542679 except Exception as e :
543680 sanitized = _sanitize_pii_for_logging (user_email , ip_address )
544- logger .warning ("Template Jinja2 syntax validation failed" , extra = {"template_name" : template_name , "error" : str (e ), ** sanitized })
545- raise TemplateValidationError (template_name , f"Invalid Jinja2 syntax: { str (e )} " )
681+ logger .warning (
682+ "Template Jinja2 syntax validation failed" ,
683+ extra = {
684+ "template_name" : template_name ,
685+ "error_type" : type (e ).__name__ , # Log error type, not message
686+ ** sanitized
687+ }
688+ )
689+ # Generic message - don't leak template fragments (CWE-209 fix)
690+ raise TemplateValidationError (
691+ template_name ,
692+ "Invalid Jinja2 syntax - template contains parsing errors"
693+ )
546694
547695 logger .debug (f"Template validation passed for: { template_name } " )
548696
0 commit comments