feat: add real-time streaming security analysis (Loop 30)

Jenkins · Jenkins · commit 718d83075471 · 2026-02-01T12:56:18.000Z
Incremental regex-based security analysis during SSE streaming:

- Add StreamingSecurityMonitor that runs lightweight pattern checks
  every N tokens (configurable, default 50) during SSE streaming
- Detect injection patterns, PII, and data leakage mid-stream before
  the response completes — early warning layer
- Fire alerts for critical findings mid-stream via the alert engine
  (don't wait for stream completion)
- Tag all streaming findings with "detection": "streaming" metadata
  so consumers can distinguish from full post-stream analysis
- Add StreamingAnalysisConfig (streaming_analysis.enabled,
  streaming_analysis.token_interval) to ProxyConfig
- Expose RegexSecurityAnalyzer pattern methods publicly for direct
  synchronous use by the streaming monitor
- Full post-stream analysis still runs after completion (unchanged)
- 13 new tests covering: mid-stream injection/PII/leakage detection,
  streaming metadata tagging, delta-only analysis, interval behavior,
  findings draining, multi-finding detection, edge cases
- Update config.example.yaml with streaming_analysis section
diff --git a/config.example.yaml b/config.example.yaml
@@ -204,6 +204,21 @@ grpc:
   # Address and port to bind the gRPC server to.
   listen_addr: "0.0.0.0:50051"
 
+# ---------------------------------------------------------------------------
+# Streaming security analysis — real-time analysis during SSE streaming
+# ---------------------------------------------------------------------------
+
+streaming_analysis:
+  # Enable incremental regex-based security checks during SSE streaming.
+  # When enabled, the proxy runs lightweight pattern matching every N tokens
+  # while the stream is still in progress. Findings are tagged with
+  # "detection": "streaming" metadata and critical issues trigger alerts
+  # mid-stream rather than waiting for stream completion.
+  enabled: false
+  # Number of completion tokens between each incremental analysis check.
+  # Lower values detect threats faster but add marginal CPU overhead per chunk.
+  token_interval: 50
+
 # ---------------------------------------------------------------------------
 # Health check endpoint
 # ---------------------------------------------------------------------------
diff --git a/crates/llmtrace-core/src/lib.rs b/crates/llmtrace-core/src/lib.rs
@@ -847,6 +847,9 @@ pub struct ProxyConfig {
     /// Anomaly detection configuration.
     #[serde(default)]
     pub anomaly_detection: AnomalyDetectionConfig,
+    /// Streaming security analysis configuration.
+    #[serde(default)]
+    pub streaming_analysis: StreamingAnalysisConfig,
 }
 
 impl Default for ProxyConfig {
@@ -879,6 +882,7 @@ impl Default for ProxyConfig {
             auth: AuthConfig::default(),
             grpc: GrpcConfig::default(),
             anomaly_detection: AnomalyDetectionConfig::default(),
+            streaming_analysis: StreamingAnalysisConfig::default(),
         }
     }
 }
@@ -1237,6 +1241,43 @@ impl Default for AnomalyDetectionConfig {
     }
 }
 
+/// Configuration for real-time streaming security analysis.
+///
+/// When enabled, the proxy runs lightweight regex-based security pattern checks
+/// incrementally during SSE streaming — every N tokens — producing interim
+/// `SecurityFinding`s before the stream completes. This provides an early
+/// warning layer; the full security analysis still runs after stream completion.
+///
+/// # Example (YAML)
+///
+/// ```yaml
+/// streaming_analysis:
+///   enabled: true
+///   token_interval: 50
+/// ```
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct StreamingAnalysisConfig {
+    /// Enable incremental security analysis during SSE streaming.
+    #[serde(default)]
+    pub enabled: bool,
+    /// Number of tokens between each incremental analysis check.
+    #[serde(default = "default_streaming_token_interval")]
+    pub token_interval: u32,
+}
+
+fn default_streaming_token_interval() -> u32 {
+    50
+}
+
+impl Default for StreamingAnalysisConfig {
+    fn default() -> Self {
+        Self {
+            enabled: false,
+            token_interval: default_streaming_token_interval(),
+        }
+    }
+}
+
 /// Security analysis configuration for ML-based prompt injection detection.
 ///
 /// Controls whether ML-based detection is enabled alongside regex-based analysis,
@@ -2197,6 +2238,7 @@ mod tests {
             auth: AuthConfig::default(),
             grpc: GrpcConfig::default(),
             anomaly_detection: AnomalyDetectionConfig::default(),
+            streaming_analysis: StreamingAnalysisConfig::default(),
         };
 
         let serialized = serde_json::to_string(&config).unwrap();
diff --git a/crates/llmtrace-proxy/src/lib.rs b/crates/llmtrace-proxy/src/lib.rs
@@ -27,3 +27,4 @@ pub use cost::CostEstimator;
 pub use cost_caps::CostTracker;
 pub use grpc::run_grpc_server;
 pub use proxy::{health_handler, proxy_handler, AppState};
+pub use streaming::StreamingSecurityMonitor;
diff --git a/crates/llmtrace-proxy/src/proxy.rs b/crates/llmtrace-proxy/src/proxy.rs
@@ -7,7 +7,7 @@
 use crate::circuit_breaker::CircuitBreaker;
 use crate::cost::CostEstimator;
 use crate::provider::{self, ParsedResponse};
-use crate::streaming::StreamingAccumulator;
+use crate::streaming::{StreamingAccumulator, StreamingSecurityMonitor};
 use axum::body::Body;
 use axum::extract::State;
 use axum::http::{HeaderMap, Request, Response, StatusCode};
@@ -360,6 +360,13 @@ pub async fn proxy_handler(
         } else {
             None
         };
+        // Initialise the streaming security monitor (only for SSE streams
+        // when streaming analysis is enabled).
+        let mut streaming_monitor = if is_streaming {
+            StreamingSecurityMonitor::new(&state_bg.config.streaming_analysis)
+        } else {
+            None
+        };
         let mut raw_collected = Vec::new();
         let mut ttft_ms: Option<u64> = None;
 
@@ -373,6 +380,26 @@ pub async fn proxy_handler(
                             let elapsed = Utc::now().signed_duration_since(start_time);
                             ttft_ms = Some(elapsed.num_milliseconds().max(0) as u64);
                         }
+
+                        // --- Real-time streaming security analysis ---
+                        if let Some(ref mut monitor) = streaming_monitor {
+                            if monitor.should_analyze(acc.completion_token_count) {
+                                let new_findings = monitor
+                                    .analyze_incremental(&acc.content, acc.completion_token_count);
+                                // Fire mid-stream alerts for critical findings
+                                if !new_findings.is_empty() {
+                                    info!(
+                                        %trace_id,
+                                        count = new_findings.len(),
+                                        tokens = acc.completion_token_count,
+                                        "Streaming security findings detected mid-stream"
+                                    );
+                                    if let Some(ref engine) = state_bg.alert_engine {
+                                        engine.check_and_alert(trace_id, tenant_id, &new_findings);
+                                    }
+                                }
+                            }
+                        }
                     }
                     raw_collected.extend_from_slice(&bytes);
                     if body_sender.send(Ok(bytes)).await.is_err() {
@@ -390,6 +417,29 @@ pub async fn proxy_handler(
         // body_sender is dropped here, closing the stream to the client.
         drop(body_sender);
 
+        // Run one final streaming analysis on any remaining content that
+        // didn't cross a token-interval boundary.
+        if let (Some(ref acc), Some(ref mut monitor)) = (&sse_accumulator, &mut streaming_monitor) {
+            let final_findings =
+                monitor.analyze_incremental(&acc.content, acc.completion_token_count);
+            if !final_findings.is_empty() {
+                info!(
+                    %trace_id,
+                    count = final_findings.len(),
+                    "Streaming security findings in final flush"
+                );
+                if let Some(ref engine) = state_bg.alert_engine {
+                    engine.check_and_alert(trace_id, tenant_id, &final_findings);
+                }
+            }
+        }
+
+        // Collect streaming security findings for attachment to the trace span.
+        let streaming_findings: Vec<SecurityFinding> = streaming_monitor
+            .as_mut()
+            .map(|m| m.take_findings())
+            .unwrap_or_default();
+
         // Build the captured interaction with streaming metrics if applicable
         let (response_text, prompt_tokens, completion_tokens, total_tokens) =
             if let Some(acc) = sse_accumulator {
@@ -454,6 +504,11 @@ pub async fn proxy_handler(
         // --- Security analysis first, so findings can be persisted with the trace ---
         let mut security_findings = run_security_analysis(&state_bg, &captured).await;
 
+        // Merge in any findings detected during streaming (early warning layer).
+        // These have already been alerted on mid-stream; now we persist them
+        // alongside the full post-stream analysis findings.
+        security_findings.extend(streaming_findings);
+
         // --- Anomaly detection (async, non-blocking) ---
         if let Some(ref detector) = state_bg.anomaly_detector {
             let anomaly_findings = detector
diff --git a/crates/llmtrace-proxy/src/streaming.rs b/crates/llmtrace-proxy/src/streaming.rs
diff --git a/crates/llmtrace-security/src/lib.rs b/crates/llmtrace-security/src/lib.rs