-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathconfig-production.yaml
More file actions
126 lines (111 loc) · 2.8 KB
/
config-production.yaml
File metadata and controls
126 lines (111 loc) · 2.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# Production Configuration
# Full-featured setup with ClickHouse, PostgreSQL, Redis, ML detection,
# enforcement, cost controls, alerting, and boundary defense.
listen_addr: "0.0.0.0:8080"
upstream_url: "https://api.openai.com"
timeout_ms: 30000
connection_timeout_ms: 5000
max_connections: 1000
enable_tls: false
enable_security_analysis: true
enable_trace_storage: true
enable_streaming: true
max_request_size_bytes: 52428800
security_analysis_timeout_ms: 5000
trace_storage_timeout_ms: 10000
storage:
profile: "production"
clickhouse_url: "http://clickhouse:8123"
clickhouse_database: "llmtrace"
postgres_url: "postgres://llmtrace:password@postgres:5432/llmtrace"
redis_url: "redis://redis:6379"
auto_migrate: true
logging:
level: "info"
format: "json"
# ML-based prompt injection detection (requires --features ml)
security_analysis:
ml_enabled: true
ml_model: "protectai/deberta-v3-base-prompt-injection-v2"
ml_threshold: 0.8
ml_cache_dir: "/root/.cache/huggingface/hub"
ml_preload: true
ml_download_timeout_seconds: 600
jailbreak_enabled: true
jailbreak_threshold: 0.7
operating_point: "balanced"
over_defence: false
injecguard_enabled: true
injecguard_model: "leolee99/InjecGuard"
injecguard_threshold: 0.85
piguard_enabled: true
piguard_model: "leolee99/PIGuard"
piguard_threshold: 0.85
# Pre-request enforcement (block prompt injection before it reaches the LLM)
enforcement:
mode: "flag"
analysis_depth: "fast"
min_severity: "high"
min_confidence: 0.8
timeout_ms: 2000
# Boundary token defense (structural prevention for indirect injection)
boundary_defense:
enabled: true
shadow_mode: false
wrap_roles: ["tool"]
delimiter: "llmtrace-boundary"
randomize_nonce: false
inject_system_reminder: true
# Rate limiting
rate_limiting:
enabled: true
requests_per_second: 100
burst_size: 200
window_seconds: 60
# Circuit breaker
circuit_breaker:
enabled: true
failure_threshold: 10
recovery_timeout_ms: 30000
half_open_max_calls: 3
# Cost estimation
cost_estimation:
enabled: true
# Cost caps
cost_caps:
enabled: true
default_budget_caps:
- window: hourly
hard_limit_usd: 50.0
soft_limit_usd: 40.0
- window: daily
hard_limit_usd: 500.0
soft_limit_usd: 400.0
# Alerting
alerts:
enabled: true
channels:
- type: slack
url: "https://hooks.slack.com/services/YOUR/SLACK/WEBHOOK"
min_severity: "Medium"
min_security_score: 50
cooldown_seconds: 300
# Output safety
output_safety:
enabled: true
toxicity_enabled: false
block_on_critical: false
# Streaming analysis
streaming_analysis:
enabled: true
token_interval: 50
output_enabled: false
# Graceful shutdown
shutdown:
timeout_seconds: 30
health_check:
enabled: true
path: "/health"
interval_seconds: 10
timeout_ms: 5000
retries: 3