feat: Implement response caching enhancement (Task 2)

anhmtk · anhmtk · commit 55e08866b285 · 2026-01-07T12:56:14.000+07:00
Cache validation results to reduce redundant LLM calls. Expected savings: 20-30% cost reduction.
diff --git a/backend/api/routers/chat_router.py b/backend/api/routers/chat_router.py
@@ -2698,19 +2698,53 @@ async def _handle_validation_with_fallback(
     context_quality = context.get("context_quality", None)
     avg_similarity = context.get("avg_similarity_score", None)
     
-    # Run validation with context quality info
-    # Tier 3.5: Pass context quality, is_philosophical, and is_religion_roleplay to ValidatorChain
-    # CRITICAL: Pass context dict to enable foundational knowledge detection in CitationRequired
-    validation_result = chain.run(
-        raw_response, 
-        ctx_docs,
-        context_quality=context_quality,
-        avg_similarity=avg_similarity,
-        is_philosophical=is_philosophical,
-        is_religion_roleplay=is_religion_roleplay,
-        user_question=chat_request.message,  # Pass user question for FactualHallucinationValidator
-        context=context  # Pass context dict for foundational knowledge detection
-    )
+    # Task 2: Response Caching Enhancement - Cache validation results
+    # Check cache before running expensive validation chain
+    try:
+        from backend.utils.cache_utils import get_cache_key, get_from_cache, set_to_cache
+        
+        # Generate cache key from query and context
+        cache_key = get_cache_key("validation", chat_request.message, context)
+        
+        # Check cache
+        cached_validation = get_from_cache(cache_key)
+        if cached_validation is not None:
+            logger.info(f"✅ Validation cache HIT for query: {chat_request.message[:50]}...")
+            validation_result = cached_validation
+        else:
+            # Cache miss - run validation
+            logger.debug(f"⏳ Validation cache MISS, running validation for: {chat_request.message[:50]}...")
+            
+            # Run validation with context quality info
+            # Tier 3.5: Pass context quality, is_philosophical, and is_religion_roleplay to ValidatorChain
+            # CRITICAL: Pass context dict to enable foundational knowledge detection in CitationRequired
+            validation_result = chain.run(
+                raw_response, 
+                ctx_docs,
+                context_quality=context_quality,
+                avg_similarity=avg_similarity,
+                is_philosophical=is_philosophical,
+                is_religion_roleplay=is_religion_roleplay,
+                user_question=chat_request.message,  # Pass user question for FactualHallucinationValidator
+                context=context  # Pass context dict for foundational knowledge detection
+            )
+            
+            # Cache result (TTL: 1 hour)
+            set_to_cache(cache_key, validation_result, ttl=3600)
+            logger.debug(f"💾 Cached validation result (TTL: 3600s)")
+    except Exception as cache_error:
+        # If caching fails, just run validation normally
+        logger.warning(f"⚠️ Cache error, running validation without cache: {cache_error}")
+        validation_result = chain.run(
+            raw_response, 
+            ctx_docs,
+            context_quality=context_quality,
+            avg_similarity=avg_similarity,
+            is_philosophical=is_philosophical,
+            is_religion_roleplay=is_religion_roleplay,
+            user_question=chat_request.message,
+            context=context
+        )
     
     # Tier 3.5: If context quality is low, inject warning into prompt for next iteration
     # For now, we'll handle this in the prompt building phase
diff --git a/backend/utils/cache_decorators.py b/backend/utils/cache_decorators.py
@@ -0,0 +1,109 @@
+"""
+Cache Decorators for StillMe
+
+Provides decorators for caching expensive operations like validation.
+"""
+
+import logging
+from typing import Callable, Any, Optional
+from functools import wraps
+
+from backend.utils.cache_utils import get_cache_key, get_from_cache, set_to_cache
+
+logger = logging.getLogger(__name__)
+
+
+def cache_validation_result(ttl: int = 3600):
+    """
+    Decorator to cache validation results
+    
+    Caches validation results based on query hash and context hash.
+    Skips validation if cached result exists.
+    
+    Args:
+        ttl: Time to live in seconds (default: 1 hour)
+    
+    Usage:
+        @cache_validation_result(ttl=3600)
+        async def validate_response(response: str, query: str, context: Dict):
+            # Validation logic
+            return validation_result
+    """
+    def decorator(func: Callable) -> Callable:
+        @wraps(func)
+        async def wrapper(*args, **kwargs):
+            # Extract query and context from kwargs
+            query = kwargs.get('query', '')
+            context = kwargs.get('context', {})
+            
+            # Generate cache key
+            cache_key = get_cache_key("validation", query, context)
+            
+            # Check cache
+            cached_result = get_from_cache(cache_key)
+            if cached_result is not None:
+                logger.info(f"✅ Validation cache HIT for query: {query[:50]}...")
+                return cached_result
+            
+            # Execute function
+            logger.debug(f"⏳ Validation cache MISS, executing validation for: {query[:50]}...")
+            result = await func(*args, **kwargs)
+            
+            # Cache result
+            set_to_cache(cache_key, result, ttl=ttl)
+            logger.debug(f"💾 Cached validation result (TTL: {ttl}s)")
+            
+            return result
+        
+        return wrapper
+    return decorator
+
+
+def cache_expensive_operation(prefix: str, ttl: int = 3600, key_func: Optional[Callable] = None):
+    """
+    Generic decorator to cache expensive operations
+    
+    Args:
+        prefix: Cache key prefix
+        ttl: Time to live in seconds
+        key_func: Optional function to generate cache key from args/kwargs
+    
+    Usage:
+        @cache_expensive_operation("llm_response", ttl=1800)
+        async def generate_response(query: str):
+            # Expensive operation
+            return result
+    """
+    def decorator(func: Callable) -> Callable:
+        @wraps(func)
+        async def wrapper(*args, **kwargs):
+            # Generate cache key
+            if key_func:
+                cache_key = key_func(*args, **kwargs)
+            else:
+                # Default: use first arg or query kwarg
+                if args:
+                    cache_key = get_cache_key(prefix, str(args[0]))
+                elif 'query' in kwargs:
+                    cache_key = get_cache_key(prefix, kwargs['query'])
+                else:
+                    # No cache key possible, skip caching
+                    return await func(*args, **kwargs)
+            
+            # Check cache
+            cached_result = get_from_cache(cache_key)
+            if cached_result is not None:
+                logger.debug(f"Cache HIT: {cache_key[:50]}...")
+                return cached_result
+            
+            # Execute function
+            result = await func(*args, **kwargs)
+            
+            # Cache result
+            set_to_cache(cache_key, result, ttl=ttl)
+            
+            return result
+        
+        return wrapper
+    return decorator
+
diff --git a/backend/utils/cache_utils.py b/backend/utils/cache_utils.py
@@ -0,0 +1,212 @@
+"""
+Cache Utilities for StillMe
+
+Provides utilities for caching validation results and other expensive operations.
+Uses Redis if available, falls back to in-memory cache.
+"""
+
+import logging
+import hashlib
+import json
+from typing import Dict, Any, Optional
+from functools import wraps
+
+logger = logging.getLogger(__name__)
+
+# Try to import Redis cache
+try:
+    from backend.services.redis_cache import get_cache_service
+    REDIS_AVAILABLE = True
+except ImportError:
+    REDIS_AVAILABLE = False
+    get_cache_service = None
+
+# In-memory fallback cache
+_in_memory_cache: Dict[str, Dict[str, Any]] = {}
+
+
+def hash_query(query: str) -> str:
+    """
+    Generate hash for query (normalized)
+    
+    Args:
+        query: User query text
+    
+    Returns:
+        MD5 hash (first 16 chars)
+    """
+    if not query:
+        return "empty"
+    
+    # Normalize: lowercase, strip whitespace
+    normalized = query.lower().strip()
+    return hashlib.md5(normalized.encode('utf-8')).hexdigest()[:16]
+
+
+def hash_context(context: Dict[str, Any]) -> str:
+    """
+    Generate hash for context (document IDs + similarities)
+    
+    Args:
+        context: RAG context dictionary
+    
+    Returns:
+        MD5 hash (first 16 chars)
+    """
+    if not context:
+        return "no_context"
+    
+    # Extract document IDs and similarities
+    docs = context.get("knowledge_docs", [])
+    if not docs:
+        return "no_docs"
+    
+    # Sort for consistent hashing
+    doc_ids = sorted([str(doc.get("id", "")) for doc in docs if doc.get("id")])
+    similarities = sorted([round(doc.get("similarity", 0.0), 3) for doc in docs])
+    
+    # Create hash from IDs and similarities
+    context_str = json.dumps({"ids": doc_ids, "sims": similarities}, sort_keys=True)
+    return hashlib.md5(context_str.encode('utf-8')).hexdigest()[:16]
+
+
+def get_cache_key(prefix: str, query: str, context: Optional[Dict[str, Any]] = None) -> str:
+    """
+    Generate cache key from query and context
+    
+    Args:
+        prefix: Cache key prefix (e.g., "validation")
+        query: User query
+        context: Optional RAG context
+    
+    Returns:
+        Cache key string
+    """
+    query_hash = hash_query(query)
+    context_hash = hash_context(context) if context else "no_context"
+    return f"{prefix}:{query_hash}:{context_hash}"
+
+
+def get_from_cache(cache_key: str) -> Optional[Any]:
+    """
+    Get value from cache (Redis or in-memory)
+    
+    Args:
+        cache_key: Cache key
+    
+    Returns:
+        Cached value or None if not found
+    """
+    # Try Redis first
+    if REDIS_AVAILABLE:
+        try:
+            cache_service = get_cache_service()
+            if cache_service:
+                cached = cache_service.get(cache_key)
+                if cached:
+                    logger.debug(f"Cache HIT (Redis): {cache_key[:50]}...")
+                    return cached
+        except Exception as e:
+            logger.debug(f"Redis cache error (falling back to memory): {e}")
+    
+    # Fallback to in-memory
+    if cache_key in _in_memory_cache:
+        cached_data = _in_memory_cache[cache_key]
+        # Check TTL (simple implementation)
+        import time
+        if time.time() < cached_data.get("expires_at", 0):
+            logger.debug(f"Cache HIT (Memory): {cache_key[:50]}...")
+            return cached_data.get("value")
+        else:
+            # Expired, remove it
+            del _in_memory_cache[cache_key]
+    
+    logger.debug(f"Cache MISS: {cache_key[:50]}...")
+    return None
+
+
+def set_to_cache(cache_key: str, value: Any, ttl: int = 3600) -> None:
+    """
+    Set value to cache (Redis or in-memory)
+    
+    Args:
+        cache_key: Cache key
+        value: Value to cache
+        ttl: Time to live in seconds (default: 1 hour)
+    """
+    # Try Redis first
+    if REDIS_AVAILABLE:
+        try:
+            cache_service = get_cache_service()
+            if cache_service:
+                cache_service.set(cache_key, value, ttl=ttl)
+                logger.debug(f"Cached (Redis): {cache_key[:50]}... (TTL: {ttl}s)")
+                return
+        except Exception as e:
+            logger.debug(f"Redis cache error (falling back to memory): {e}")
+    
+    # Fallback to in-memory
+    import time
+    _in_memory_cache[cache_key] = {
+        "value": value,
+        "expires_at": time.time() + ttl
+    }
+    logger.debug(f"Cached (Memory): {cache_key[:50]}... (TTL: {ttl}s)")
+
+
+def clear_cache(cache_key: Optional[str] = None) -> None:
+    """
+    Clear cache (specific key or all)
+    
+    Args:
+        cache_key: Specific key to clear, or None to clear all
+    """
+    if cache_key:
+        # Clear specific key
+        if REDIS_AVAILABLE:
+            try:
+                cache_service = get_cache_service()
+                if cache_service:
+                    cache_service.delete(cache_key)
+            except Exception:
+                pass
+        
+        if cache_key in _in_memory_cache:
+            del _in_memory_cache[cache_key]
+    else:
+        # Clear all
+        if REDIS_AVAILABLE:
+            try:
+                cache_service = get_cache_service()
+                if cache_service:
+                    cache_service.clear()
+            except Exception:
+                pass
+        
+        _in_memory_cache.clear()
+
+
+def get_cache_stats() -> Dict[str, Any]:
+    """
+    Get cache statistics
+    
+    Returns:
+        Dictionary with cache stats
+    """
+    stats = {
+        "redis_available": REDIS_AVAILABLE,
+        "in_memory_size": len(_in_memory_cache),
+        "in_memory_keys": list(_in_memory_cache.keys())[:10]  # First 10 keys
+    }
+    
+    if REDIS_AVAILABLE:
+        try:
+            cache_service = get_cache_service()
+            if cache_service:
+                redis_stats = cache_service.get_stats()
+                stats.update(redis_stats)
+        except Exception as e:
+            stats["redis_error"] = str(e)
+    
+    return stats
+
diff --git a/backend/utils/validation_cache.py b/backend/utils/validation_cache.py