diff --git a/CHANGELOG.md b/CHANGELOG.md index 19f98b9..a34555c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,16 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] +## [0.4.0] - 2025-10-13 + +[Compare with previous version](https://github.com/sparkfabrik/terraform-google-services-monitoring/compare/0.3.0...0.4.0) + +### changed + +- Rename tf file from `cloud-sql.tf` to `cloud_sql.tf`. +- Rename tf file from `kyverno_log_alert.tf` to `kyverno.tf`. +- Add cert-manager missing issuer alert log. + ## [0.3.0] - 2025-10-07 [Compare with previous version](https://github.com/sparkfabrik/terraform-google-services-monitoring/compare/0.2.0...0.3.0) diff --git a/README.md b/README.md index 18b825f..e2738e8 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,9 @@ Supported services: - Kyverno - Error logs for admission-controller, background-controller, cleanup-controller, reports-controller - - Metric threshold (optional) + +- cert-manager + - Error logs for cert-manager controller when an Issuer or ClusterIssuer is missing ## Providers @@ -33,6 +35,7 @@ Supported services: | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| +| [cert\_manager](#input\_cert\_manager) | Configuration for cert-manager missing issuer log alert. Allows customization of project, cluster, namespace, notification channels, alert documentation, enablement, extra filters, auto-close timing, and notification rate limiting. |
object({
enabled = optional(bool, true)
cluster_name = string
project_id = optional(string, null)
namespace = optional(string, "cert-manager")
notification_enabled = optional(bool, true)
notification_channels = optional(list(string), [])
logmatch_notification_rate_limit = optional(string, "300s")
alert_documentation = optional(string, null)
auto_close_seconds = optional(number, 3600)
filter_extra = optional(string, "")
})
| n/a | yes | | [cloud\_sql](#input\_cloud\_sql) | Configuration for Cloud SQL monitoring alerts. Supports customization of project, auto-close timing, notification channels, and per-instance alert thresholds for CPU, memory, and disk utilization. |
object({
project_id = optional(string, null)
auto_close = optional(string, "86400s") # default 24h
notification_enabled = optional(bool, true)
notification_channels = optional(list(string), [])
instances = optional(map(object({
cpu_utilization = optional(list(object({
severity = optional(string, "WARNING"),
threshold = optional(number, 0.90)
alignment_period = optional(string, "120s")
duration = optional(string, "300s")
})), [
{
threshold = 0.85,
duration = "1200s",
},
{
severity = "CRITICAL",
threshold = 1,
duration = "300s",
alignment_period = "60s",
}
])
memory_utilization = optional(list(object({
severity = optional(string, "WARNING"),
threshold = optional(number, 0.90)
alignment_period = optional(string, "300s")
duration = optional(string, "300s")
})), [
{
severity = "WARNING",
},
{
severity = "CRITICAL",
threshold = 0.95,
}
])
disk_utilization = optional(list(object({
severity = optional(string, "WARNING"),
threshold = optional(number, 0.85)
alignment_period = optional(string, "300s")
duration = optional(string, "600s")
})), [
{
severity = "WARNING",
},
{
severity = "CRITICAL",
threshold = 0.95,
}
])
})), {})
})
| n/a | yes | | [kyverno](#input\_kyverno) | Configuration for Kyverno monitoring alerts. Allows customization of cluster name, project, notification channels, alert documentation, metric thresholds, auto-close timing, enablement, extra filters, and namespace. |
object({
enabled = optional(bool, true)
cluster_name = string
project_id = optional(string, null)
notification_enabled = optional(bool, true)
notification_channels = optional(list(string), [])
# Rate limit for notifications, e.g. "300s" for 5 minutes, used only for log match alerts
logmatch_notification_rate_limit = optional(string, "300s")
alert_documentation = optional(string, null)
auto_close_seconds = optional(number, 3600)
filter_extra = optional(string, "")
namespace = optional(string, "kyverno")
})
| n/a | yes | | [notification\_channels](#input\_notification\_channels) | List of notification channel IDs to notify when an alert is triggered | `list(string)` | `[]` | no | @@ -50,6 +53,7 @@ Supported services: | Name | Type | |------|------| +| [google_monitoring_alert_policy.cert_manager_logmatch_alert](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/monitoring_alert_policy) | resource | | [google_monitoring_alert_policy.cloud_sql_cpu_utilization](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/monitoring_alert_policy) | resource | | [google_monitoring_alert_policy.cloud_sql_disk_utilization](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/monitoring_alert_policy) | resource | | [google_monitoring_alert_policy.cloud_sql_memory_utilization](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/monitoring_alert_policy) | resource | diff --git a/cert_manager.tf b/cert_manager.tf new file mode 100644 index 0000000..a27df28 --- /dev/null +++ b/cert_manager.tf @@ -0,0 +1,70 @@ +locals { + cert_manager_project_id = var.cert_manager.project_id != null ? var.cert_manager.project_id : var.project_id + cert_manager_alert_documentation = ( + var.cert_manager.alert_documentation != null + ? var.cert_manager.alert_documentation + : <<-EOT + cert-manager is reporting that an Issuer or ClusterIssuer resource referenced by a Certificate cannot be found. This may indicate that the Issuer/ClusterIssuer has been deleted or is otherwise unavailable. + EOT + ) + cert_manager_notification_channels = var.cert_manager.notification_enabled ? (length(var.cert_manager.notification_channels) > 0 ? var.cert_manager.notification_channels : var.notification_channels) : [] + + cert_manager_log_filter = <<-EOT + ( + ( + resource.type="k8s_container" + AND resource.labels.project_id="${local.cert_manager_project_id}" + AND resource.labels.cluster_name="${var.cert_manager.cluster_name}" + AND resource.labels.namespace_name="${var.cert_manager.namespace}" + ) + OR ( + log_id("events") + AND resource.labels.project_id="${local.cert_manager_project_id}" + AND resource.labels.cluster_name="${var.cert_manager.cluster_name}" + AND ( + jsonPayload.involvedObject.namespace="${var.cert_manager.namespace}" + OR jsonPayload.metadata.namespace="${var.cert_manager.namespace}" + ) + ) + ) + AND ( + textPayload=~"Referenced \"(Issuer|ClusterIssuer)\" not found" + OR jsonPayload.message=~"Referenced \"(Issuer|ClusterIssuer)\" not found" + OR jsonPayload.note=~"Referenced \"(Issuer|ClusterIssuer)\" not found" + ) + ${trimspace(var.cert_manager.filter_extra)} + EOT +} + +resource "google_monitoring_alert_policy" "cert_manager_logmatch_alert" { + count = ( + var.cert_manager.enabled + && trimspace(var.cert_manager.cluster_name) != "" + && var.cert_manager.cluster_name != null + ) ? 1 : 0 + + display_name = "cert-manager missing Issuer/ClusterIssuer (cluster=${var.cert_manager.cluster_name}, namespace=${var.cert_manager.namespace})" + combiner = "OR" + enabled = var.cert_manager.enabled + + conditions { + display_name = "Log match: cert-manager Issuer/ClusterIssuer not found" + condition_matched_log { + filter = local.cert_manager_log_filter + } + } + + documentation { + content = local.cert_manager_alert_documentation + mime_type = "text/markdown" + } + + notification_channels = local.cert_manager_notification_channels + + alert_strategy { + auto_close = "${var.cert_manager.auto_close_seconds}s" + notification_rate_limit { + period = var.cert_manager.logmatch_notification_rate_limit + } + } +} diff --git a/cloud-sql.tf b/cloud_sql.tf similarity index 100% rename from cloud-sql.tf rename to cloud_sql.tf diff --git a/examples/main.tf b/examples/main.tf index b91edef..92ffa3b 100644 --- a/examples/main.tf +++ b/examples/main.tf @@ -49,13 +49,17 @@ module "example" { project_id = var.project_id cloud_sql = local.cloud_sql kyverno = { - cluster_name = "test-cluster" - enabled = true - use_metric_threshold = true - metric_threshold_count = 5 - notification_channels = [] + cluster_name = "test-cluster" + enabled = true + notification_channels = [] # Optional filter for log entries, exclude known non-actionable messages # e.g., "-textPayload:\"stale GroupVersion discovery: metrics.k8s.io/v1beta1\"" filter_extra = "-textPayload:\"stale GroupVersion discovery: metrics.k8s.io/v1beta1\"" } + cert_manager = { + cluster_name = "test-cluster" + namespace = "cert-manager" + enabled = true + notification_channels = [] + } } diff --git a/examples/variables.tf b/examples/variables.tf index 2a09651..c06bf12 100644 --- a/examples/variables.tf +++ b/examples/variables.tf @@ -13,16 +13,32 @@ variable "notification_channels" { variable "kyverno" { description = "Configuration for Kyverno monitoring alerts. Allows customization of cluster name, project, notification channels, alert documentation, metric thresholds, auto-close timing, enablement, extra filters, and namespace." type = object({ - enabled = optional(bool, true) - project_id = optional(string, null) - cluster_name = string - namespace = optional(string, "kyverno") - notification_enabled = optional(bool, true) - notification_channels = optional(list(string), []) - alert_documentation = optional(string, null) - metric_threshold_count = optional(number, 2) - metric_lookback_minutes = optional(number, 1) - auto_close_seconds = optional(number, 3600) - filter_extra = optional(string, "") + enabled = optional(bool, true) + cluster_name = string + project_id = optional(string, null) + notification_enabled = optional(bool, true) + notification_channels = optional(list(string), []) + # Rate limit for notifications, e.g. "300s" for 5 minutes, used only for log match alerts + logmatch_notification_rate_limit = optional(string, "300s") + alert_documentation = optional(string, null) + auto_close_seconds = optional(number, 3600) + filter_extra = optional(string, "") + namespace = optional(string, "kyverno") + }) +} + +variable "cert_manager" { + description = "Configuration for cert-manager missing issuer log alert. Allows customization of project, cluster, namespace, notification channels, alert documentation, enablement, extra filters, auto-close timing, and notification rate limiting." + type = object({ + enabled = optional(bool, true) + cluster_name = string + project_id = optional(string, null) + namespace = optional(string, "cert-manager") + notification_enabled = optional(bool, true) + notification_channels = optional(list(string), []) + logmatch_notification_rate_limit = optional(string, "300s") + alert_documentation = optional(string, null) + auto_close_seconds = optional(number, 3600) + filter_extra = optional(string, "") }) } diff --git a/kyverno_log_alert.tf b/kyverno.tf similarity index 97% rename from kyverno_log_alert.tf rename to kyverno.tf index 048a1b4..45f3d5c 100644 --- a/kyverno_log_alert.tf +++ b/kyverno.tf @@ -21,6 +21,7 @@ resource "google_monitoring_alert_policy" "kyverno_logmatch_alert" { count = ( var.kyverno.enabled && trimspace(var.kyverno.cluster_name) != "" + && var.kyverno.cluster_name != null ) ? 1 : 0 display_name = "Kyverno controllers ERROR logs (namespace=${var.kyverno.namespace})" diff --git a/variables.tf b/variables.tf index 14a0392..49c0f99 100644 --- a/variables.tf +++ b/variables.tf @@ -82,3 +82,19 @@ variable "kyverno" { namespace = optional(string, "kyverno") }) } + +variable "cert_manager" { + description = "Configuration for cert-manager missing issuer log alert. Allows customization of project, cluster, namespace, notification channels, alert documentation, enablement, extra filters, auto-close timing, and notification rate limiting." + type = object({ + enabled = optional(bool, true) + cluster_name = string + project_id = optional(string, null) + namespace = optional(string, "cert-manager") + notification_enabled = optional(bool, true) + notification_channels = optional(list(string), []) + logmatch_notification_rate_limit = optional(string, "300s") + alert_documentation = optional(string, null) + auto_close_seconds = optional(number, 3600) + filter_extra = optional(string, "") + }) +}