Skip to content

Commit e33868e

Browse files
committed
feat(kps): slo cleanup
Signed-off-by: Nicolas Lamirault <[email protected]>
1 parent 3a52aab commit e33868e

File tree

1 file changed

+20
-20
lines changed
  • gitops/argocd/charts/monitoring/kube-prometheus-stack

1 file changed

+20
-20
lines changed

gitops/argocd/charts/monitoring/kube-prometheus-stack/values.yaml

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -631,9 +631,9 @@ pyrra-service-levels:
631631
latency:
632632
- name: prometheus-operator-reconcile-errors
633633
service: prometheus-operator
634-
dashboard: https://logs.prod.oina.ws/....
635-
runbook: https://notion.so/....
636-
message: Prometheus Operator .....
634+
dashboard: http://grafana.192.168.0.61.nip.io
635+
runbook: https://notions.so
636+
message: Prometheus Operator reconcilation have latency
637637
team: "sre"
638638
extraLabels: {}
639639
metric: prometheus_operator_reconcile_duration_seconds_bucket{job="kube-prometheus-stack-operator", namespace="monitoring", le="0.1"}
@@ -650,9 +650,9 @@ pyrra-service-levels:
650650
ratio:
651651
- name: prometheus-operator-http-errors
652652
service: prometheus-operator
653-
dashboard: https://logs.prod.oina.ws/....
654-
runbook: https://notion.so/....
655-
message: Prometheus Operator .....
653+
dashboard: http://grafana.192.168.0.61.nip.io
654+
runbook: https://notions.so
655+
message: Prometheus Operator API have errors
656656
team: "sre"
657657
extraLabels: {}
658658
metric: prometheus_operator_kubernetes_client_http_requests_total{job="kube-prometheus-stack-operator", namespace="monitoring", status_code=~"5.."}
@@ -667,9 +667,9 @@ pyrra-service-levels:
667667
disabled: false
668668
- name: prometheus-operator-reconcile-errors
669669
service: prometheus-operator
670-
dashboard: https://logs.prod.oina.ws/....
671-
runbook: https://notion.so/....
672-
message: Prometheus Operator .....
670+
dashboard: http://grafana.192.168.0.61.nip.io
671+
runbook: https://notions.so
672+
message: Prometheus Operator reconciliation have errors
673673
team: "sre"
674674
extraLabels: {}
675675
metric: prometheus_operator_reconcile_errors_total{job="kube-prometheus-stack-operator",namespace="monitoring"}
@@ -684,8 +684,8 @@ pyrra-service-levels:
684684
disabled: false
685685
- name: prometheus-notifications-errors
686686
service: prometheus
687-
dashboard: https://logs.prod.oina.ws/....
688-
runbook: https://notion.so/....
687+
dashboard: http://grafana.192.168.0.61.nip.io
688+
runbook: https://notions.so
689689
message: ""
690690
team: "sre"
691691
extraLabels: {}
@@ -701,8 +701,8 @@ pyrra-service-levels:
701701
disabled: false
702702
- name: prometheus-query-errors
703703
service: prometheus
704-
dashboard: https://logs.prod.oina.ws/....
705-
runbook: https://notion.so/....
704+
dashboard: http://grafana.192.168.0.61.nip.io
705+
runbook: https://notions.so
706706
message: "95% of Prometheus requests return a good HTTP code"
707707
team: "sre"
708708
extraLabels: {}
@@ -719,8 +719,8 @@ pyrra-service-levels:
719719
disabled: false
720720
- name: prometheus-rule-evaluation-failures
721721
service: prometheus
722-
dashboard: https://logs.prod.oina.ws/....
723-
runbook: https://notion.so/....
722+
dashboard: http://grafana.192.168.0.61.nip.io
723+
runbook: https://notions.so
724724
message: ""
725725
team: "sre"
726726
extraLabels: {}
@@ -736,8 +736,8 @@ pyrra-service-levels:
736736
disabled: false
737737
- name: prometheus-sd-kubernetes-errors
738738
service: prometheus
739-
dashboard: https://logs.prod.oina.ws/....
740-
runbook: https://notion.so/....
739+
dashboard: http://grafana.192.168.0.61.nip.io
740+
runbook: https://notions.so
741741
message: "Prometheus have error with Kubernetes Service Discovery"
742742
team: "sre"
743743
extraLabels: {}
@@ -753,16 +753,16 @@ pyrra-service-levels:
753753
disabled: false
754754
- name: alertmanager-notification-errors
755755
service: alertmanager
756-
dashboard: https://logs.prod.oina.ws/....
757-
runbook: https://notion.so/....
756+
dashboard: http://grafana.192.168.0.61.nip.io
757+
runbook: https://notions.so
758758
message:
759759
team: "sre"
760760
extraLabels: {}
761761
metric: alertmanager_notifications_failed_total{job="kube-prometheus-stack-alertmanager", namespace="monitoring", code=~"^5..$"}
762762
metricTotal: alertmanager_notifications_failed_total{job="kube-prometheus-stack-alertmanager", namespace="monitoring", code!~"^4..$"}
763763
groupBy: []
764764
target: "99"
765-
window: 28d
765+
window: 1d
766766
alerting:
767767
name: SLOAlertmanagerNotificationsAvailabilityErrorBudgetBurning
768768
absent: true

0 commit comments

Comments
 (0)