-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbudget.yaml
More file actions
59 lines (52 loc) · 1.89 KB
/
budget.yaml
File metadata and controls
59 lines (52 loc) · 1.89 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# CrashLens Policy Configuration
# Enforce cost controls and usage policies for LLM logs
metadata:
name: "Budget Control Policy"
version: "1.0"
description: "Prevent cost overruns and enforce model usage policies"
rules:
- id: "no-gpt4-in-retries"
description: "Prevent expensive GPT-4 usage in retry scenarios"
match:
model: "gpt-4"
retry_count: ">2"
action: "fail"
severity: "high"
suggestion: "Use GPT-3.5-turbo for retries or reduce fallback steps to save costs"
- id: "token-limit-exceeded"
description: "Warn when token usage is very high"
match:
usage.total_tokens: ">10000"
action: "warn"
severity: "medium"
suggestion: "Consider breaking down large prompts or using more efficient models"
- id: "expensive-model-overuse"
description: "Block overuse of expensive models"
match:
model: ["gpt-4", "gpt-4-32k", "claude-3-opus"]
usage.prompt_tokens: ">5000"
action: "fail"
severity: "high"
suggestion: "Use GPT-3.5-turbo for large prompts or implement prompt compression"
- id: "development-model-restriction"
description: "Restrict expensive models in development environment"
match:
model: "regex:gpt-4.*"
environment: "development"
action: "block"
severity: "critical"
suggestion: "Use GPT-3.5-turbo in development. GPT-4 is reserved for production"
- id: "excessive-retries"
description: "Prevent retry loops that waste tokens"
match:
retry_count: ">=5"
action: "fail"
severity: "high"
suggestion: "Implement exponential backoff or circuit breaker pattern"
- id: "unauthorized-model"
description: "Block usage of unauthorized models"
match:
model: "not in:['gpt-3.5-turbo', 'gpt-4', 'claude-3-haiku']"
action: "block"
severity: "critical"
suggestion: "Only approved models are allowed. Contact admin for model authorization"