-
Notifications
You must be signed in to change notification settings - Fork 45
/
Copy pathpromtail.yaml
31 lines (30 loc) · 1.51 KB
/
promtail.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# Deploy a promtail instance that is monitored by kube-prometheus-stack
promtail:
enabled: true
project: infra-logging
values:
serviceMonitor:
enabled: true
labels:
k8s.adfinis.com/prometheus: kube-prometheus
prometheusRule:
enabled: true
additionalLabels:
k8s.adfinis.com/prometheus: kube-prometheus
rules:
- alert: PromtailRequestErrors
expr: 100 * sum(rate(promtail_request_duration_seconds_count{status_code=~"5..|failed"}[1m])) by (namespace, job, route, instance) / sum(rate(promtail_request_duration_seconds_count[1m])) by (namespace, job, route, instance) > 10
for: 5m
labels:
severity: critical
annotations:
summary: Promtail request errors (instance {{ $labels.instance }})
description: "The {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf \"%.2f\" $value }}% errors.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: PromtailRequestLatency
expr: histogram_quantile(0.99, sum(rate(promtail_request_duration_seconds_bucket[5m])) by (le)) > 1
for: 5m
labels:
severity: critical
annotations:
summary: Promtail request latency (instance {{ $labels.instance }})
description: "The {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf \"%.2f\" $value }}s 99th percentile latency.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"