Skip to content

Commit c0a0c87

Browse files
chore: Bootstrap alarms (#75)
1 parent e1b84b9 commit c0a0c87

File tree

3 files changed

+128
-2
lines changed

3 files changed

+128
-2
lines changed

bootstrap/main.tf

+24-1
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ module "ogmios_v1_feature" {
2323
api_key_salt = var.api_key_salt
2424
dcu_per_frame = var.dcu_per_frame
2525
dns_zone = var.dns_zone
26+
resources = var.operator_resources
2627
}
2728

2829
module "ogmios_v1_proxy" {
@@ -76,8 +77,26 @@ module "ogmios_instances" {
7677
ogmios_image = each.value.ogmios_image
7778
node_private_dns = each.value.node_private_dns
7879
ogmios_version = each.value.ogmios_version
79-
tolerations = each.value.tolerations
8080
replicas = each.value.replicas
81+
tolerations = coalesce(each.value.tolerations, [
82+
{
83+
effect = "NoSchedule"
84+
key = "demeter.run/compute-profile"
85+
operator = "Exists"
86+
},
87+
{
88+
effect = "NoSchedule"
89+
key = "demeter.run/compute-arch"
90+
operator = "Equal"
91+
value = "arm64"
92+
},
93+
{
94+
effect = "NoSchedule"
95+
key = "demeter.run/availability-sla"
96+
operator = "Equal"
97+
value = "consistent"
98+
}
99+
])
81100
}
82101

83102
module "ogmios_services" {
@@ -90,4 +109,8 @@ module "ogmios_services" {
90109
network = each.value.network
91110
}
92111

112+
module "ogmios_monitoring" {
113+
source = "./monitoring"
93114

115+
o11y_datasource_uid = var.o11y_datasource_uid
116+
}

bootstrap/monitoring/main.tf

+100
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
terraform {
2+
required_providers {
3+
grafana = {
4+
source = "grafana/grafana"
5+
version = ">= 1.28.2"
6+
}
7+
}
8+
}
9+
10+
variable "o11y_datasource_uid" {
11+
type = string
12+
}
13+
14+
resource "grafana_folder" "folder" {
15+
title = "Ogmios"
16+
}
17+
18+
resource "grafana_rule_group" "instance_is_down" {
19+
name = "Ogmios is down"
20+
folder_uid = grafana_folder.folder.uid
21+
interval_seconds = 60
22+
org_id = 1
23+
24+
rule {
25+
name = "Ogmios is down"
26+
condition = "B"
27+
for = "5m"
28+
no_data_state = "OK"
29+
exec_err_state = "OK"
30+
annotations = {
31+
description = "We are not receiving more metrics from a particular Ogmios instance.",
32+
summary = "{{ range $k, $v := $values -}}\n{{ if (match \"A[0-9]+\" $k) -}}\nPod: {{ $v.Labels.pod }}\n{{ end }}\n{{ end }}"
33+
}
34+
35+
data {
36+
ref_id = "A"
37+
datasource_uid = var.o11y_datasource_uid
38+
39+
relative_time_range {
40+
from = 3600
41+
to = 0
42+
}
43+
44+
model = jsonencode({
45+
editorMode = "code",
46+
expr = "count(avg_over_time(ogmios_connected[10m] offset 1h)) by (pod) unless count(avg_over_time(ogmios_connected[10m])) by (pod)",
47+
hide = false,
48+
intervalMs = 1000,
49+
legendFormat = "__auto",
50+
maxDataPoints = 43200,
51+
range = true,
52+
refId = "A"
53+
})
54+
}
55+
56+
data {
57+
ref_id = "B"
58+
datasource_uid = "-100"
59+
60+
relative_time_range {
61+
from = 3600
62+
to = 0
63+
}
64+
65+
model = jsonencode({
66+
conditions = [
67+
{
68+
evaluator = {
69+
params = [0]
70+
type = "gt"
71+
},
72+
operator = {
73+
type = "and"
74+
},
75+
query = {
76+
params : [
77+
"A"
78+
]
79+
},
80+
reducer = {
81+
params = [],
82+
type = "count_non_null"
83+
},
84+
type = "query"
85+
}
86+
],
87+
datasource = {
88+
type = "__expr__",
89+
uid = "-100"
90+
},
91+
expression = "A",
92+
hide = false,
93+
intervalMs = 1000,
94+
maxDataPoints = 43200,
95+
refId = "B",
96+
type = "classic_conditions"
97+
})
98+
}
99+
}
100+
}

bootstrap/variables.tf

+4-1
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,10 @@ variable "versions" {
3232
default = ["5", "6"]
3333
}
3434

35+
variable "o11y_datasource_uid" {
36+
type = string
37+
}
38+
3539
// operator settings
3640

3741
variable "operator_image_tag" {
@@ -145,7 +149,6 @@ variable "instances" {
145149
ogmios_image = string
146150
node_private_dns = string
147151
ogmios_version = string
148-
compute_arch = string
149152
replicas = number
150153
resources = optional(object({
151154
limits = object({

0 commit comments

Comments
 (0)