Skip to content

Commit e5452c6

Browse files
committed
prometheus.latency.rules.yml: Mark cluster wide manager metrics with labels
(cherry picked from commit e9005b2)
1 parent 4fe3630 commit e5452c6

File tree

2 files changed

+24
-0
lines changed

2 files changed

+24
-0
lines changed

docs/source/procedures/datadog/datadog.rules.yml

+12
Original file line numberDiff line numberDiff line change
@@ -481,3 +481,15 @@ groups:
481481
labels:
482482
by: "cluster"
483483
dd: "1"
484+
- record: scylla_manager_healthcheck_cql_status_ag
485+
expr: sum(scylla_manager_healthcheck_cql_status) by(cluster)
486+
labels:
487+
by: "cluster"
488+
status: "1"
489+
dd: "1"
490+
- record: scylla_manager_healthcheck_cql_status_ag
491+
expr: count(scylla_manager_healthcheck_cql_status==0) by(cluster)
492+
labels:
493+
by: "cluster"
494+
status: "0"
495+
dd: "1"

prometheus/prom_rules/prometheus.latency.rules.yml

+12
Original file line numberDiff line numberDiff line change
@@ -37,12 +37,24 @@ groups:
3737
expr: errors:local_failed + errors:operation_unavailable
3838
- record: manager:repair_done_ts
3939
expr: timestamp(sum(changes(scylla_manager_scheduler_run_total{status="DONE",type="repair"}[2m])) by (cluster) > 0) or on(cluster) manager:repair_done_ts
40+
labels:
41+
dd: "1"
42+
by: "cluster"
4043
- record: manager:backup_done_ts
4144
expr: timestamp(sum(changes(scylla_manager_scheduler_run_total{status="DONE",type="backup"}[2m])) by (cluster) > 0) or on(cluster) manager:backup_done_ts
45+
labels:
46+
dd: "1"
47+
by: "cluster"
4248
- record: manager:repair_fail_ts
4349
expr: timestamp(sum(changes(scylla_manager_scheduler_run_total{status="ERROR",type="repair"}[2m])) by (cluster) > 0) or on(cluster) manager:repair_fail_ts
50+
labels:
51+
dd: "1"
52+
by: "cluster"
4453
- record: manager:backup_fail_ts
4554
expr: timestamp(sum(changes(scylla_manager_scheduler_run_total{status="ERROR",type="backup"}[2m])) by (cluster) > 0) or on(cluster) manager:backup_fail_ts
55+
labels:
56+
dd: "1"
57+
by: "cluster"
4658
- record: manager:repair_progress
4759
expr: (max(scylla_manager_scheduler_run_indicator{type="repair"}) by (cluster) >bool 0)*((max(scylla_manager_repair_token_ranges_total) by(cluster)<= 0)*0 or on(cluster) (sum(scylla_manager_repair_token_ranges_success>=0) by (cluster) + sum(scylla_manager_repair_token_ranges_error>=0) by (cluster))/sum(scylla_manager_repair_token_ranges_total>=0) by (cluster))
4860
- record: manager:backup_progress

0 commit comments

Comments
 (0)