llmtrace/examples/config-production.yaml at main · techlab-innov/llmtrace · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# Production Configuration
# Full-featured setup with ClickHouse, PostgreSQL, Redis, ML detection,
# enforcement, cost controls, alerting, and boundary defense.

listen_addr: "0.0.0.0:8080"
upstream_url: "https://api.openai.com"
timeout_ms: 30000
connection_timeout_ms: 5000
max_connections: 1000
enable_tls: false
enable_security_analysis: true
enable_trace_storage: true
enable_streaming: true
max_request_size_bytes: 52428800
security_analysis_timeout_ms: 5000
trace_storage_timeout_ms: 10000

storage:
  profile: "production"
  clickhouse_url: "http://clickhouse:8123"
  clickhouse_database: "llmtrace"
  postgres_url: "postgres://llmtrace:password@postgres:5432/llmtrace"
  redis_url: "redis://redis:6379"
  auto_migrate: true

logging:
  level: "info"
  format: "json"

# ML-based prompt injection detection (requires --features ml)
security_analysis:
  ml_enabled: true
  ml_model: "protectai/deberta-v3-base-prompt-injection-v2"
  ml_threshold: 0.8
  ml_cache_dir: "/root/.cache/huggingface/hub"
  ml_preload: true
  ml_download_timeout_seconds: 600
  jailbreak_enabled: true
  jailbreak_threshold: 0.7
  operating_point: "balanced"
  over_defence: false
  injecguard_enabled: true
  injecguard_model: "leolee99/InjecGuard"
  injecguard_threshold: 0.85
  piguard_enabled: true
  piguard_model: "leolee99/PIGuard"
  piguard_threshold: 0.85

# Pre-request enforcement (block prompt injection before it reaches the LLM)
enforcement:
  mode: "flag"
  analysis_depth: "fast"
  min_severity: "high"
  min_confidence: 0.8
  timeout_ms: 2000

# Boundary token defense (structural prevention for indirect injection)
boundary_defense:
  enabled: true
  shadow_mode: false
  wrap_roles: ["tool"]
  delimiter: "llmtrace-boundary"
  randomize_nonce: false
  inject_system_reminder: true

# Rate limiting
rate_limiting:
  enabled: true
  requests_per_second: 100
  burst_size: 200
  window_seconds: 60

# Circuit breaker
circuit_breaker:
  enabled: true
  failure_threshold: 10
  recovery_timeout_ms: 30000
  half_open_max_calls: 3

# Cost estimation
cost_estimation:
  enabled: true

# Cost caps
cost_caps:
  enabled: true
  default_budget_caps:
    - window: hourly
      hard_limit_usd: 50.0
      soft_limit_usd: 40.0
    - window: daily
      hard_limit_usd: 500.0
      soft_limit_usd: 400.0

# Alerting
alerts:
  enabled: true
  channels:
    - type: slack
      url: "https://hooks.slack.com/services/YOUR/SLACK/WEBHOOK"
      min_severity: "Medium"
      min_security_score: 50
  cooldown_seconds: 300

# Output safety
output_safety:
  enabled: true
  toxicity_enabled: false
  block_on_critical: false

# Streaming analysis
streaming_analysis:
  enabled: true
  token_interval: 50
  output_enabled: false

# Graceful shutdown
shutdown:
  timeout_seconds: 30

health_check:
  enabled: true
  path: "/health"
  interval_seconds: 10
  timeout_ms: 5000
  retries: 3