@@ -5,31 +5,31 @@ evaluation_interval: 1m
55
66tests :
77 # #
8- # Test 1: No alert when origin request rate is below the 80% threshold (20 req/s)
8+ # Test 1: No alert when origin request rate is below the 80% threshold (10 req/s)
99 # #
10- - name : No alert when traffic is below 80% of load-tested capacity
10+ - name : No alert when traffic is below 80% of rightsized capacity
1111 interval : 1m
1212 input_series :
1313 - series : ' fastly_rt_origin_fetches_total{service_name="staging data.gov.uk"}'
14- values : ' 0+1200x30 ' # 1200 counter increment per minute = 20 req/s — below 44.6 threshold
14+ values : ' 0+600x30 ' # 600 counter increment per minute = 10 req/s — below 20.0 threshold
1515
1616 alert_rule_test :
1717 - alertname : DataGovUkHighTrafficRate
1818 eval_time : 30m
1919 exp_alerts : []
2020
2121 # #
22- # Test 2: Alert fires when origin request rate is sustained above 80% threshold (50 req/s)
22+ # Test 2: Alert fires when origin request rate is sustained above 80% threshold (22 req/s)
2323 #
24- # With rate([5m]) and a constant 3000 /min (50 req/s) increment:
24+ # With rate([5m]) and a constant 1320 /min (22 req/s) increment:
2525 # - Condition first becomes TRUE at t=5m (full 5-minute window populated)
2626 # - With for: 5m the alert FIRES at t=10m
2727 # #
28- - name : Alert fires when traffic exceeds 80% of load-tested capacity for 5+ minutes
28+ - name : Alert fires when traffic exceeds 80% of rightsized capacity for 5+ minutes
2929 interval : 1m
3030 input_series :
3131 - series : ' fastly_rt_origin_fetches_total{service_name="staging data.gov.uk"}'
32- values : ' 0+3000x30 ' # 3000 counter increment per minute = 50 req/s — exceeds 44.6 threshold
32+ values : ' 0+1320x30 ' # 1320 counter increment per minute = 22 req/s — exceeds 20.0 threshold
3333
3434 alert_rule_test :
3535 - alertname : DataGovUkHighTrafficRate
@@ -43,13 +43,13 @@ tests:
4343 severity : warning
4444 destination : slack-datagovuk-technical
4545 exp_annotations :
46- summary : data.gov.uk origin request rate exceeds 80% of load-tested capacity
46+ summary : data.gov.uk origin request rate exceeds 80% of rightsized capacity
4747 description : >-
4848 The request rate to the data.gov.uk origin has exceeded 80% of the
49- peak capacity established by load testing (55.8 req/s at 100 virtual users).
49+ rightsized capacity target (25 req/s, validated by load testing at 45 virtual users).
5050
51- Current rate: 50 req/s
52- Threshold (80% of 55.8 req/s): 44.6 req/s
51+ Current rate: 22 req/s
52+ Threshold (80% of 25 req/s): 20.0 req/s
5353
54- Consider scaling the find and CKAN pods before traffic reaches 100% capacity.
54+ Consider scaling the find and CKAN pods before traffic reaches 100% capacity (25 req/s) .
5555 runbook_url : https://docs.publishing.service.gov.uk/manual/alerts/data-gov-uk-high-traffic-alert.html
0 commit comments