agent-governance-toolkit/packages/agent-sre/examples/quickstart.py at main · imran-siddique/agent-governance-toolkit · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
"""
Agent-SRE Quickstart — Monitor an AI agent in 30 lines.

Run:
    pip install agent-sre
    python examples/quickstart.py
"""

import random

from agent_sre import SLO, ErrorBudget
from agent_sre.slo.indicators import TaskSuccessRate, CostPerTask, HallucinationRate
from agent_sre.slo.dashboard import SLODashboard
from agent_sre.cost.guard import CostGuard
from agent_sre.incidents.detector import IncidentDetector, Signal, SignalType

# ── 1. Define what "reliable" means for your agent ──────────────────────

success_rate = TaskSuccessRate(target=0.95, window="24h")
cost_per_task = CostPerTask(target_usd=0.50, window="24h")
hallucination = HallucinationRate(target=0.05, window="24h")

slo = SLO(
    name="my-assistant",
    description="Production assistant agent reliability targets",
    indicators=[success_rate, cost_per_task, hallucination],
    error_budget=ErrorBudget(total=0.05, burn_rate_critical=10.0),
)

# ── 2. Set cost guardrails ──────────────────────────────────────────────

guard = CostGuard(
    per_task_limit=2.00,
    per_agent_daily_limit=50.0,
    org_monthly_budget=1000.0,
)

# ── 3. Wire up incident detection ──────────────────────────────────────

detector = IncidentDetector(correlation_window_seconds=60)

# ── 4. Simulate agent work ─────────────────────────────────────────────

print("Agent-SRE Quickstart")
print("=" * 60)
print()

for i in range(100):
    # Simulate agent task outcomes
    succeeded = random.random() < 0.93  # 93% success (below 95% target!)
    cost = random.uniform(0.05, 0.80)
    hallucinated = random.random() < 0.08  # 8% hallucination (above 5% target!)

    # Record into SLIs
    success_rate.record_task(success=succeeded)
    cost_per_task.record_cost(cost_usd=cost)
    hallucination.record_evaluation(hallucinated=hallucinated)

    # Track cost
    allowed, reason = guard.check_task("my-assistant", estimated_cost=cost)
    if allowed:
        guard.record_cost("my-assistant", f"task-{i}", cost)

    # Record into error budget
    slo.record_event(good=succeeded and not hallucinated)

# ── 5. Check results ───────────────────────────────────────────────────

status = slo.evaluate()
print(f"SLO Status:          {status.value}")
print(f"Error Budget Left:   {slo.error_budget.remaining_percent:.1f}%")
print(f"Burn Rate (1h):      {slo.error_budget.burn_rate(3600):.1f}x")
print()

print("Indicators:")
for ind in slo.indicators:
    val = ind.current_value()
    comp = ind.compliance()
    label = "✅" if comp and comp >= ind.target else "❌"
    print(f"  {label} {ind.name}: {val:.3f} (target: {ind.target:.3f}, compliance: {comp:.1%})")

print()

# Cost summary
print(f"Cost Today:          ${guard.org_spent_month:.2f} / ${guard.org_monthly_budget:.2f}")
budget_info = guard.get_budget("my-assistant")
print(f"Agent Budget Left:   ${budget_info.remaining_today_usd:.2f} / ${budget_info.daily_limit_usd:.2f}")
print()

# Check for SLO breaches
if slo.error_budget.is_exhausted:
    signal = Signal(
        signal_type=SignalType.ERROR_BUDGET_EXHAUSTED,
        source="my-assistant",
        message="Error budget fully consumed — freeze deployments",
    )
    incident = detector.ingest_signal(signal)
    if incident:
        print(f"🚨 Incident Created: {incident.title}")
        print(f"   Severity: {incident.severity.value}")

for alert in slo.error_budget.firing_alerts():
    print(f"⚠️  Burn Rate Alert: {alert.name} burn rate = {alert.rate:.1f}x")

print()
print("─" * 60)
print("This is what Agent-SRE does: SLOs, cost budgets, and incidents")
print("for AI agents — not infrastructure.")