Skip to content

Commit c50f4ca

Browse files
author
Jenkins
committed
feat: add multi-channel alerting with Slack and PagerDuty support (Loop 34)
1 parent 96ebf10 commit c50f4ca

5 files changed

Lines changed: 1139 additions & 498 deletions

File tree

config.example.yaml

Lines changed: 33 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -141,18 +141,42 @@ circuit_breaker:
141141

142142
alerts:
143143
# Enable the alert engine. When enabled, security findings that exceed the
144-
# configured thresholds will trigger an HTTP POST to the webhook URL.
144+
# configured thresholds will trigger notifications to the configured channels.
145145
enabled: false
146-
# Webhook URL to POST alert payloads to (Slack incoming webhooks, generic, etc.).
147-
# Slack-format payloads are sent automatically when the URL contains hooks.slack.com.
148-
webhook_url: ""
149-
# Minimum severity level to trigger an alert: Info, Low, Medium, High, Critical.
150-
min_severity: "High"
151-
# Minimum confidence-based score (0-100) to trigger an alert.
152-
min_security_score: 70
153-
# Cooldown in seconds between repeated alerts for the same finding type.
146+
147+
# ---------- Legacy mode (backward compatible) ----------
148+
# If no `channels` are defined, these top-level fields are used:
149+
# webhook_url: "https://hooks.slack.com/services/T00/B00/xxx"
150+
# min_severity: "High"
151+
# min_security_score: 70
152+
153+
# ---------- Multi-channel mode ----------
154+
# When `channels` is defined, each channel has its own type, URL, and severity filter.
155+
# Supported types: webhook, slack, pagerduty (email: coming soon)
156+
channels: []
157+
# - type: slack
158+
# url: "https://hooks.slack.com/services/T00/B00/xxx"
159+
# min_severity: "Medium" # Slack gets Medium and above
160+
# min_security_score: 50
161+
#
162+
# - type: pagerduty
163+
# routing_key: "your-pagerduty-routing-key"
164+
# min_severity: "Critical" # PagerDuty only for Critical
165+
# min_security_score: 90
166+
#
167+
# - type: webhook
168+
# url: "https://your-server.com/alerts"
169+
# min_severity: "High"
170+
# min_security_score: 70
171+
172+
# Global cooldown in seconds between repeated alerts for the same finding type.
154173
cooldown_seconds: 300
155174

175+
# Optional: alert escalation (re-send at higher severity if unacknowledged).
176+
# escalation:
177+
# enabled: false
178+
# escalate_after_seconds: 600
179+
156180
# ---------------------------------------------------------------------------
157181
# Cost caps — per-agent budget & token enforcement
158182
# ---------------------------------------------------------------------------

crates/llmtrace-core/src/lib.rs

Lines changed: 126 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1492,26 +1492,87 @@ impl Default for GrpcConfig {
14921492

14931493
/// Alert engine configuration for webhook notifications.
14941494
///
1495-
/// When enabled, the alert engine sends HTTP POST requests to a webhook URL
1496-
/// whenever security findings exceed the configured severity and confidence
1497-
/// thresholds. A per-finding-type cooldown prevents alert spam.
1495+
/// Supports both a legacy single-webhook mode (via `webhook_url`) and a
1496+
/// multi-channel mode (via `channels`). When `channels` is non-empty it
1497+
/// takes precedence; otherwise the legacy `webhook_url` is wrapped in a
1498+
/// single `WebhookChannelConfig` for backward compatibility.
14981499
#[derive(Debug, Clone, Serialize, Deserialize)]
14991500
pub struct AlertConfig {
15001501
/// Enable the alert engine.
15011502
#[serde(default)]
15021503
pub enabled: bool,
1503-
/// Webhook URL to POST alert payloads to.
1504+
/// **Legacy** — Webhook URL to POST alert payloads to.
1505+
/// Ignored when `channels` is non-empty.
15041506
#[serde(default)]
15051507
pub webhook_url: String,
1506-
/// Minimum severity level to trigger an alert (e.g., `"High"`, `"Critical"`).
1508+
/// **Legacy** — Minimum severity level (e.g. `"High"`).
1509+
/// Used as the global default when `channels` is empty.
15071510
#[serde(default = "default_alert_min_severity")]
15081511
pub min_severity: String,
1509-
/// Minimum confidence-based score (0–100) to trigger an alert.
1512+
/// **Legacy** — Minimum confidence-based score (0–100).
15101513
#[serde(default = "default_alert_min_security_score")]
15111514
pub min_security_score: u8,
15121515
/// Cooldown in seconds between repeated alerts for the same finding type.
15131516
#[serde(default = "default_alert_cooldown_seconds")]
15141517
pub cooldown_seconds: u64,
1518+
/// Multi-channel alert destinations.
1519+
/// When non-empty, each channel has its own type, URL, and min_severity.
1520+
#[serde(default)]
1521+
pub channels: Vec<AlertChannelConfig>,
1522+
/// Optional escalation configuration.
1523+
#[serde(default)]
1524+
pub escalation: Option<AlertEscalationConfig>,
1525+
}
1526+
1527+
/// Configuration for a single alert channel.
1528+
#[derive(Debug, Clone, Serialize, Deserialize)]
1529+
pub struct AlertChannelConfig {
1530+
/// Channel type: `"webhook"`, `"slack"`, `"pagerduty"`, or `"email"`.
1531+
#[serde(rename = "type")]
1532+
pub channel_type: String,
1533+
/// Webhook / Slack incoming-webhook URL.
1534+
#[serde(default)]
1535+
pub url: Option<String>,
1536+
/// Alias accepted for `url` (convenience for webhook channels).
1537+
#[serde(default)]
1538+
pub webhook_url: Option<String>,
1539+
/// PagerDuty Events API v2 routing key.
1540+
#[serde(default)]
1541+
pub routing_key: Option<String>,
1542+
/// Minimum severity to send to this channel (default: `"High"`).
1543+
#[serde(default = "default_alert_min_severity")]
1544+
pub min_severity: String,
1545+
/// Minimum confidence-based score (0–100) to send to this channel.
1546+
#[serde(default = "default_alert_min_security_score")]
1547+
pub min_security_score: u8,
1548+
}
1549+
1550+
impl AlertChannelConfig {
1551+
/// Resolve the effective URL (prefers `url`, falls back to `webhook_url`).
1552+
pub fn effective_url(&self) -> Option<&str> {
1553+
self.url
1554+
.as_deref()
1555+
.or(self.webhook_url.as_deref())
1556+
.filter(|s| !s.is_empty())
1557+
}
1558+
}
1559+
1560+
/// Optional alert escalation configuration.
1561+
///
1562+
/// If no acknowledgement is received within `escalate_after_seconds`, the
1563+
/// alert is re-sent at the next higher severity channel.
1564+
#[derive(Debug, Clone, Serialize, Deserialize)]
1565+
pub struct AlertEscalationConfig {
1566+
/// Enable escalation.
1567+
#[serde(default)]
1568+
pub enabled: bool,
1569+
/// Seconds to wait before escalating an unacknowledged alert.
1570+
#[serde(default = "default_escalation_seconds")]
1571+
pub escalate_after_seconds: u64,
1572+
}
1573+
1574+
fn default_escalation_seconds() -> u64 {
1575+
600
15151576
}
15161577

15171578
fn default_alert_min_severity() -> String {
@@ -1534,6 +1595,8 @@ impl Default for AlertConfig {
15341595
min_severity: default_alert_min_severity(),
15351596
min_security_score: default_alert_min_security_score(),
15361597
cooldown_seconds: default_alert_cooldown_seconds(),
1598+
channels: Vec::new(),
1599+
escalation: None,
15371600
}
15381601
}
15391602
}
@@ -2475,6 +2538,8 @@ mod tests {
24752538
assert_eq!(config.min_severity, "High");
24762539
assert_eq!(config.min_security_score, 70);
24772540
assert_eq!(config.cooldown_seconds, 300);
2541+
assert!(config.channels.is_empty());
2542+
assert!(config.escalation.is_none());
24782543
}
24792544

24802545
#[test]
@@ -2485,6 +2550,8 @@ mod tests {
24852550
min_severity: "Critical".to_string(),
24862551
min_security_score: 90,
24872552
cooldown_seconds: 600,
2553+
channels: Vec::new(),
2554+
escalation: None,
24882555
};
24892556
let serialized = serde_json::to_string(&config).unwrap();
24902557
let deserialized: AlertConfig = serde_json::from_str(&serialized).unwrap();
@@ -2504,6 +2571,58 @@ mod tests {
25042571
assert_eq!(config.min_severity, "High");
25052572
assert_eq!(config.min_security_score, 70);
25062573
assert_eq!(config.cooldown_seconds, 300);
2574+
assert!(config.channels.is_empty());
2575+
}
2576+
2577+
#[test]
2578+
fn test_alert_config_multi_channel() {
2579+
let json = r#"{
2580+
"enabled": true,
2581+
"cooldown_seconds": 120,
2582+
"channels": [
2583+
{"type": "slack", "url": "https://hooks.slack.com/services/T/B/x", "min_severity": "Medium"},
2584+
{"type": "pagerduty", "routing_key": "abc123", "min_severity": "Critical"},
2585+
{"type": "webhook", "url": "https://example.com/hook", "min_severity": "High"}
2586+
]
2587+
}"#;
2588+
let config: AlertConfig = serde_json::from_str(json).unwrap();
2589+
assert!(config.enabled);
2590+
assert_eq!(config.channels.len(), 3);
2591+
assert_eq!(config.channels[0].channel_type, "slack");
2592+
assert_eq!(config.channels[0].min_severity, "Medium");
2593+
assert_eq!(config.channels[1].channel_type, "pagerduty");
2594+
assert_eq!(config.channels[1].routing_key.as_deref(), Some("abc123"));
2595+
assert_eq!(config.channels[2].channel_type, "webhook");
2596+
}
2597+
2598+
#[test]
2599+
fn test_alert_channel_config_effective_url() {
2600+
// Prefers `url` over `webhook_url`
2601+
let cfg = AlertChannelConfig {
2602+
channel_type: "webhook".to_string(),
2603+
url: Some("https://primary.com".to_string()),
2604+
webhook_url: Some("https://fallback.com".to_string()),
2605+
routing_key: None,
2606+
min_severity: "High".to_string(),
2607+
min_security_score: 70,
2608+
};
2609+
assert_eq!(cfg.effective_url(), Some("https://primary.com"));
2610+
2611+
// Falls back to `webhook_url`
2612+
let cfg2 = AlertChannelConfig {
2613+
url: None,
2614+
webhook_url: Some("https://fallback.com".to_string()),
2615+
..cfg.clone()
2616+
};
2617+
assert_eq!(cfg2.effective_url(), Some("https://fallback.com"));
2618+
2619+
// Empty strings treated as None
2620+
let cfg3 = AlertChannelConfig {
2621+
url: Some(String::new()),
2622+
webhook_url: None,
2623+
..cfg
2624+
};
2625+
assert!(cfg3.effective_url().is_none());
25072626
}
25082627

25092628
#[test]
@@ -2512,6 +2631,7 @@ mod tests {
25122631
assert!(!config.alerts.enabled);
25132632
assert!(config.alerts.webhook_url.is_empty());
25142633
assert_eq!(config.alerts.min_severity, "High");
2634+
assert!(config.alerts.channels.is_empty());
25152635
}
25162636

25172637
#[test]

crates/llmtrace-proxy/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ hyper = { version = "1", features = ["full"] }
4343
reqwest = { version = "0.12", default-features = false, features = ["json", "stream", "rustls-tls"] }
4444
serde_yaml = "0.9"
4545
dashmap = "6"
46+
thiserror = "2"
4647
bytes = "1"
4748
http = "1"
4849
http-body-util = "0.1"

0 commit comments

Comments
 (0)