Skip to content

Commit 3e0cb65

Browse files
committed
add rule for loadbalancer
1 parent 3f0ff32 commit 3e0cb65

File tree

2 files changed

+62
-12
lines changed

2 files changed

+62
-12
lines changed
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
---
22
apiVersion: v1
33
name: prometheus-openstack-exporter
4-
version: 0.4.4
4+
version: 0.4.5
55
appVersion: v1.7.0

charts/prometheus-openstack-exporter/templates/prometheusrule.yaml

+61-11
Original file line numberDiff line numberDiff line change
@@ -197,22 +197,22 @@ spec:
197197
as quickly as possible.
198198
199199
- alert: NovaInstanceInError
200-
for: 15m
200+
for: 10m
201201
expr: |
202202
openstack_nova_server_status{status="ERROR"}
203203
labels:
204-
severity: warning
204+
severity: cirtical
205205
annotations:
206206
summary: "[`{{`{{$labels.id}}`}}`] Instance in ERROR state"
207207
description: >
208-
The instance `{{`{{$labels.id}}`}}` has been in ERROR state for over 15 minutes. It must
208+
The instance `{{`{{$labels.id}}`}}` has been in ERROR state for over 10 minutes. It must
209209
be cleaned up or removed in order to provide a consistent customer experience.
210210
211211
- alert: NovaInstanceInBuilding
212212
for: 15m
213213
expr: 'openstack_nova_server_status == 1'
214214
labels:
215-
severity: warning
215+
severity: cirtical
216216
annotations:
217217
summary: "[`{{`{{$labels.id}}`}}`] Instance in BUILD state"
218218
description: >
@@ -222,7 +222,7 @@ spec:
222222
for: 15m
223223
expr: 'openstack_nova_server_status == 10'
224224
labels:
225-
severity: warning
225+
severity: cirtical
226226
annotations:
227227
summary: "[`{{`{{$labels.id}}`}}`] Instance in RESIZE state"
228228
description: >
@@ -232,7 +232,7 @@ spec:
232232
for: 15m
233233
expr: 'openstack_nova_server_status == 13'
234234
labels:
235-
severity: warning
235+
severity: cirtical
236236
annotations:
237237
summary: "[`{{`{{$labels.id}}`}}`] Instance in UNKNOWN state"
238238
description: >
@@ -242,21 +242,21 @@ spec:
242242
for: 15m
243243
expr: 'openstack_nova_server_status == 14'
244244
labels:
245-
severity: warning
245+
severity: cirtical
246246
annotations:
247247
summary: "[`{{`{{$labels.id}}`}}`] Instance in VERIFY_RESIZE state"
248248
description: >
249249
The instance `{{`{{$labels.id}}`}}` has been in VERIFY_RESIZE state for over 15 minutes.
250250
251251
- alert: NovaInstanceInMIGRATING
252-
for: 15m
252+
for: 30m
253253
expr: 'openstack_nova_server_status == 15'
254254
labels:
255-
severity: warning
255+
severity: cirtical
256256
annotations:
257257
summary: "[`{{`{{$labels.id}}`}}`] Instance in MIGRATING state"
258258
description: >
259-
The instance `{{`{{$labels.id}}`}}` has been in MIGRATING state for over 15 minutes.
259+
The instance `{{`{{$labels.id}}`}}` has been in MIGRATING state for over 30 minutes.
260260
261261
- alert: NovaFailureRisk
262262
for: 6h
@@ -272,7 +272,7 @@ spec:
272272
failures occur. Please ensure that adequate amount of infrastructure is assigned to this
273273
deployment to prevent this.
274274
275-
- alert: NovaCapacity
275+
- alert: NovaCapacityNearFull
276276
for: 6h
277277
expr: |
278278
sum (
@@ -286,9 +286,59 @@ spec:
286286
) * 100 > 75
287287
labels:
288288
severity: warning
289+
annotations:
290+
summary: "[nova] near full Capacity risk"
291+
description: >
292+
The cloud capacity is currently at `{{`{{$value}}`}}` which means there is a risk of running
293+
out of capacity due to the timeline required to add new nodes. Please ensure that adequate
294+
amount of infrastructure is assigned to this deployment to prevent this.
295+
296+
- alert: NovaCapacityFull
297+
for: 6h
298+
expr: |
299+
sum (
300+
openstack_nova_memory_used_bytes
301+
+ on(hostname) group_left(adminState)
302+
(0 * openstack_nova_agent_state{exported_service="nova-compute",adminState="enabled"})
303+
) / sum (
304+
openstack_nova_memory_available_bytes
305+
+ on(hostname) group_left(adminState)
306+
(0 * openstack_nova_agent_state{exported_service="nova-compute",adminState="enabled"})
307+
) * 100 > 85
308+
labels:
309+
severity: critical
289310
annotations:
290311
summary: "[nova] Capacity risk"
291312
description: >
292313
The cloud capacity is currently at `{{`{{$value}}`}}` which means there is a risk of running
293314
out of capacity due to the timeline required to add new nodes. Please ensure that adequate
294315
amount of infrastructure is assigned to this deployment to prevent this.
316+
317+
- name: octavia
318+
rules:
319+
- alert: LoadbalancerDown
320+
for: 5m
321+
expr: 'openstack_loadbalancer_up != 1'
322+
labels:
323+
severity: critical
324+
annotations:
325+
summary: "OpenStack loadbalancer service down"
326+
description: "OpenStack loadbalancer service down"
327+
328+
- alert: LoadbalancerNotActive
329+
for: 5m
330+
expr: openstack_loadbalancer_loadbalancer_status{provisioning_status!="ACTIVE"}
331+
labels:
332+
severity: critical
333+
annotations:
334+
summary: "OpenStack loadbalancer `{{`{{$labels.name}}`}}` provisioning status is not ACTIVE"
335+
description: "OpenStack loadbalancer `{{`{{$labels.name}}`}}` provisioning status is not ACTIVE"
336+
337+
- alert: LoadbalancerPoolNotActive
338+
for: 5m
339+
expr: openstack_loadbalancer_pool_status{provisioning_status!="ACTIVE"}
340+
labels:
341+
severity: critical
342+
annotations:
343+
summary: "OpenStack loadbalancer pool `{{`{{$labels.name}}`}}` provisioning status is not ACTIVE"
344+
description: "OpenStack loadbalancer pool `{{`{{$labels.name}}`}}` provisioning status is not ACTIVE"

0 commit comments

Comments
 (0)