@@ -197,22 +197,22 @@ spec:
197
197
as quickly as possible.
198
198
199
199
- alert : NovaInstanceInError
200
- for : 15m
200
+ for : 10m
201
201
expr : |
202
202
openstack_nova_server_status{status="ERROR"}
203
203
labels :
204
- severity : warning
204
+ severity : cirtical
205
205
annotations :
206
206
summary : " [`{{`{{$labels.id}}`}}`] Instance in ERROR state"
207
207
description : >
208
- The instance `{{`{{$labels.id}}`}}` has been in ERROR state for over 15 minutes. It must
208
+ The instance `{{`{{$labels.id}}`}}` has been in ERROR state for over 10 minutes. It must
209
209
be cleaned up or removed in order to provide a consistent customer experience.
210
210
211
211
- alert : NovaInstanceInBuilding
212
212
for : 15m
213
213
expr : ' openstack_nova_server_status == 1'
214
214
labels :
215
- severity : warning
215
+ severity : cirtical
216
216
annotations :
217
217
summary : " [`{{`{{$labels.id}}`}}`] Instance in BUILD state"
218
218
description : >
@@ -222,7 +222,7 @@ spec:
222
222
for : 15m
223
223
expr : ' openstack_nova_server_status == 10'
224
224
labels :
225
- severity : warning
225
+ severity : cirtical
226
226
annotations :
227
227
summary : " [`{{`{{$labels.id}}`}}`] Instance in RESIZE state"
228
228
description : >
@@ -232,7 +232,7 @@ spec:
232
232
for : 15m
233
233
expr : ' openstack_nova_server_status == 13'
234
234
labels :
235
- severity : warning
235
+ severity : cirtical
236
236
annotations :
237
237
summary : " [`{{`{{$labels.id}}`}}`] Instance in UNKNOWN state"
238
238
description : >
@@ -242,21 +242,21 @@ spec:
242
242
for : 15m
243
243
expr : ' openstack_nova_server_status == 14'
244
244
labels :
245
- severity : warning
245
+ severity : cirtical
246
246
annotations :
247
247
summary : " [`{{`{{$labels.id}}`}}`] Instance in VERIFY_RESIZE state"
248
248
description : >
249
249
The instance `{{`{{$labels.id}}`}}` has been in VERIFY_RESIZE state for over 15 minutes.
250
250
251
251
- alert : NovaInstanceInMIGRATING
252
- for : 15m
252
+ for : 30m
253
253
expr : ' openstack_nova_server_status == 15'
254
254
labels :
255
- severity : warning
255
+ severity : cirtical
256
256
annotations :
257
257
summary : " [`{{`{{$labels.id}}`}}`] Instance in MIGRATING state"
258
258
description : >
259
- The instance `{{`{{$labels.id}}`}}` has been in MIGRATING state for over 15 minutes.
259
+ The instance `{{`{{$labels.id}}`}}` has been in MIGRATING state for over 30 minutes.
260
260
261
261
- alert : NovaFailureRisk
262
262
for : 6h
@@ -272,7 +272,7 @@ spec:
272
272
failures occur. Please ensure that adequate amount of infrastructure is assigned to this
273
273
deployment to prevent this.
274
274
275
- - alert : NovaCapacity
275
+ - alert : NovaCapacityNearFull
276
276
for : 6h
277
277
expr : |
278
278
sum (
@@ -286,9 +286,59 @@ spec:
286
286
) * 100 > 75
287
287
labels :
288
288
severity : warning
289
+ annotations :
290
+ summary : " [nova] near full Capacity risk"
291
+ description : >
292
+ The cloud capacity is currently at `{{`{{$value}}`}}` which means there is a risk of running
293
+ out of capacity due to the timeline required to add new nodes. Please ensure that adequate
294
+ amount of infrastructure is assigned to this deployment to prevent this.
295
+
296
+ - alert : NovaCapacityFull
297
+ for : 6h
298
+ expr : |
299
+ sum (
300
+ openstack_nova_memory_used_bytes
301
+ + on(hostname) group_left(adminState)
302
+ (0 * openstack_nova_agent_state{exported_service="nova-compute",adminState="enabled"})
303
+ ) / sum (
304
+ openstack_nova_memory_available_bytes
305
+ + on(hostname) group_left(adminState)
306
+ (0 * openstack_nova_agent_state{exported_service="nova-compute",adminState="enabled"})
307
+ ) * 100 > 85
308
+ labels :
309
+ severity : critical
289
310
annotations :
290
311
summary : " [nova] Capacity risk"
291
312
description : >
292
313
The cloud capacity is currently at `{{`{{$value}}`}}` which means there is a risk of running
293
314
out of capacity due to the timeline required to add new nodes. Please ensure that adequate
294
315
amount of infrastructure is assigned to this deployment to prevent this.
316
+
317
+ - name : octavia
318
+ rules :
319
+ - alert : LoadbalancerDown
320
+ for : 5m
321
+ expr : ' openstack_loadbalancer_up != 1'
322
+ labels :
323
+ severity : critical
324
+ annotations :
325
+ summary : " OpenStack loadbalancer service down"
326
+ description : " OpenStack loadbalancer service down"
327
+
328
+ - alert : LoadbalancerNotActive
329
+ for : 5m
330
+ expr : openstack_loadbalancer_loadbalancer_status{provisioning_status!="ACTIVE"}
331
+ labels :
332
+ severity : critical
333
+ annotations :
334
+ summary : " OpenStack loadbalancer `{{`{{$labels.name}}`}}` provisioning status is not ACTIVE"
335
+ description : " OpenStack loadbalancer `{{`{{$labels.name}}`}}` provisioning status is not ACTIVE"
336
+
337
+ - alert : LoadbalancerPoolNotActive
338
+ for : 5m
339
+ expr : openstack_loadbalancer_pool_status{provisioning_status!="ACTIVE"}
340
+ labels :
341
+ severity : critical
342
+ annotations :
343
+ summary : " OpenStack loadbalancer pool `{{`{{$labels.name}}`}}` provisioning status is not ACTIVE"
344
+ description : " OpenStack loadbalancer pool `{{`{{$labels.name}}`}}` provisioning status is not ACTIVE"
0 commit comments