crashlens/policies/block-gpt4-on-summary.yaml at main · Crashlens/crashlens · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# CrashLens Policy: Block GPT-4 on Summary Tasks
# Optimized for Langfuse users - prevents expensive model usage for simple summarization
# yaml-language-server: $schema=../crashlens/config/policy-schema.json
#
# Usage:
#   crashlens guard logs.jsonl --policy-file policies/block-gpt4-on-summary.yaml --fail-on-violations
#   crashlens scan logs.jsonl --policy-template model-overkill-detection
#
# Related templates:
#   - model-overkill-detection
#   - budget-protection
#   - token-efficiency

version: 1

rules:
  - id: block_gpt4_summary_tasks
    description: "Block GPT-4 usage for simple summarization tasks"
    match:
      model: "in:[gpt-4,gpt-4-turbo,gpt-4o]"
    action: fail
    severity: high
    suggestion: "Use gpt-3.5-turbo or gpt-4o-mini for summarization tasks - they're 10x cheaper and equally effective"

  - id: warn_gpt4_short_prompts
    description: "Warn when GPT-4 is used for very short prompts"
    match:
      model: "in:[gpt-4,gpt-4-turbo,gpt-4o]"
      usage.prompt_tokens: "<30"
    action: warn
    severity: medium
    suggestion: "Consider gpt-3.5-turbo for short prompts to reduce costs by ~90%"

  - id: detect_summary_keywords
    description: "Flag potential summary tasks in prompt content"
    match:
      usage.prompt_tokens: "<100"
    action: warn
    severity: low
    suggestion: "Short prompt detected - ensure you're using cost-effective models"

global:
  max_violations_per_rule: 25
  enable_cost_estimation: true

cost_thresholds:
  warning_threshold: 0.02  # $0.02 per request
  critical_threshold: 0.10  # $0.10 per request