@@ -213,6 +213,7 @@ resource "azurerm_monitor_metric_alert" "cosmos_biz_db_normalized_ru_exceeded" {
213213 window_size = " PT5M"
214214 frequency = " PT5M"
215215 auto_mitigate = false
216+ enabled = false # TODO disabled TMP
216217
217218
218219 # Metric info
@@ -247,10 +248,16 @@ resource "azurerm_monitor_metric_alert" "cosmos_biz_db_normalized_ru_exceeded" {
247248 tags = module. tag_config . tags
248249}
249250
251+
252+ # In general, for a production workload, if you see between 1-5% of requests with 429s,
253+ # and your end-to-end latency is acceptable, this is a healthy sign that the RU/s are being fully utilized.
254+ # In this case, the normalized RU consumption metric reaching 100% only means that in a given second,
255+ # at least one partition key range used all its provisioned throughput.
256+ # This is acceptable because the overall rate of 429s is still low. No further action is required.
250257resource "azurerm_monitor_metric_alert" "cosmos_biz_db_provisioned_throughput_exceeded" { # https://github.com/pagopa/terraform-azurerm-v3/blob/58f14dc120e10bd3515bcc34e0685e74d1d11047/cosmosdb_account/main.tf#L205
251258 count = var. env_short == " p" ? 1 : 0
252259
253- name = " [${ var . domain != null ? " ${ var . domain } | " : " " } ${ module . bizevents_datastore_cosmosdb_account . name } ] 409 Throttling Errors Exceeded"
260+ name = " [${ var . domain != null ? " ${ var . domain } | " : " " } ${ module . bizevents_datastore_cosmosdb_account . name } ] 429 Throttling Errors Exceeded"
254261 resource_group_name = azurerm_resource_group. bizevents_rg . name
255262 scopes = [module . bizevents_datastore_cosmosdb_account . id ]
256263 description = " A collection throughput (RU/s) exceed provisioned throughput, and it's raising 429 errors. Please, consider to increase RU. Runbook: not needed."
@@ -267,7 +274,7 @@ resource "azurerm_monitor_metric_alert" "cosmos_biz_db_provisioned_throughput_ex
267274 metric_name = " TotalRequestUnits"
268275 aggregation = " Total"
269276 operator = " GreaterThan"
270- threshold = 1
277+ threshold = 100 # https://learn.microsoft.com/en-us/azure/cosmos-db/monitor-normalized-request-units?utm_source=chatgpt.com#what-to-expect-and-do-when-normalized-rus-is-higher
271278 skip_metric_validation = false
272279
273280 dimension {
0 commit comments