You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: .nais/prod/klass-api-alerts.yaml
+31-5Lines changed: 31 additions & 5 deletions
Original file line number
Diff line number
Diff line change
@@ -9,8 +9,34 @@ spec:
9
9
groups:
10
10
- name: dapla-metadata
11
11
rules:
12
+
- alert: HTTP error responses
13
+
expr: 100 * sum(rate(nginx_ingress_controller_request_duration_seconds_count{ingress="klass-nais-ingress-eaceac17",status!~"[4-5].*"}[10m])) by (ingress) / sum(rate(nginx_ingress_controller_request_duration_seconds_count{ingress="klass-nais-ingress-eaceac17"}[10m])) by (ingress) < 75.0
14
+
for: 3m
15
+
annotations:
16
+
title: "HTTP error responses"
17
+
consequence: "A high number of HTTP requests are receiving error responses. This may indicate a problem with the application."
18
+
action: "Investigate whether this indicates an app failure or user errors."
19
+
labels:
20
+
service: klass-api
21
+
namespace: dapla-metadata
22
+
severity: critical
23
+
environment: prod
24
+
25
+
- alert: High heap memory usage
26
+
expr: (100 * sum by (instance) (jvm_memory_used_bytes{application="klass", area="heap"})) / (sum by (instance) (jvm_memory_max_bytes{application="klass", area="heap"}) ) > 70.0
27
+
for: 3m
28
+
annotations:
29
+
title: "High heap memory usage"
30
+
consequence: "If this increase continues then the app could run out of memory and either lock or crash."
31
+
action: "Immediate: Restart the app from the Nais console\nShort term: Investigate the cause of high heap usage and either fix the bug or increase the available heap."
32
+
labels:
33
+
service: klass-api
34
+
namespace: dapla-metadata
35
+
severity: critical
36
+
environment: prod
37
+
12
38
- alert: High number of errors
13
-
expr: (100 * sum by (app, namespace) (rate(logback_events_total{app="klass-api",level="error"}[3m])) / sum by (app, namespace) (rate(logback_events_total{app="klass-api"}[3m]))) > 10
39
+
expr: (100 * sum by (app, namespace) (rate(logback_events_total{application="klass",level="error"}[3m])) / sum by (app, namespace) (rate(logback_events_total{application="klass"}[3m]))) > 1
Copy file name to clipboardExpand all lines: .nais/test/klass-api-alerts.yaml
+16-3Lines changed: 16 additions & 3 deletions
Original file line number
Diff line number
Diff line change
@@ -9,13 +9,26 @@ spec:
9
9
groups:
10
10
- name: dapla-metadata
11
11
rules:
12
+
- alert: HTTP error responses
13
+
expr: 100 * sum(rate(nginx_ingress_controller_request_duration_seconds_count{ingress="klass-nais-ingress-eaceac17",status!~"[4-5].*"}[10m])) by (ingress) / sum(rate(nginx_ingress_controller_request_duration_seconds_count{ingress="klass-nais-ingress-eaceac17"}[10m])) by (ingress) < 75.0
14
+
for: 3m
15
+
annotations:
16
+
title: "HTTP error responses"
17
+
consequence: "A high number of HTTP requests are receiving error responses. This may indicate a problem with the application."
18
+
action: "Investigate whether this indicates an app failure or user errors."
19
+
labels:
20
+
service: klass-api
21
+
namespace: dapla-metadata
22
+
severity: critical
23
+
environment: test
24
+
12
25
- alert: High heap memory usage
13
26
expr: (100 * sum by (instance) (jvm_memory_used_bytes{application="klass", area="heap"})) / (sum by (instance) (jvm_memory_max_bytes{application="klass", area="heap"}) ) > 70.0
14
27
for: 3m
15
28
annotations:
16
29
title: "High heap memory usage"
17
30
consequence: "If this increase continues then the app could run out of memory and either lock or crash."
18
-
action: "Immediate: Restart the app from the Nais console\nShort term: Investigate the cause of high heap usage and either fix the bug or "
31
+
action: "Immediate: Restart the app from the Nais console\nShort term: Investigate the cause of high heap usage and either fix the bug or increase the available heap."
0 commit comments