diff --git a/CHANGELOG.md b/CHANGELOG.md index 19f98b9..a34555c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,16 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] +## [0.4.0] - 2025-10-13 + +[Compare with previous version](https://github.com/sparkfabrik/terraform-google-services-monitoring/compare/0.3.0...0.4.0) + +### changed + +- Rename tf file from `cloud-sql.tf` to `cloud_sql.tf`. +- Rename tf file from `kyverno_log_alert.tf` to `kyverno.tf`. +- Add cert-manager missing issuer alert log. + ## [0.3.0] - 2025-10-07 [Compare with previous version](https://github.com/sparkfabrik/terraform-google-services-monitoring/compare/0.2.0...0.3.0) diff --git a/README.md b/README.md index 18b825f..e2738e8 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,9 @@ Supported services: - Kyverno - Error logs for admission-controller, background-controller, cleanup-controller, reports-controller - - Metric threshold (optional) + +- cert-manager + - Error logs for cert-manager controller when an Issuer or ClusterIssuer is missing ## Providers @@ -33,6 +35,7 @@ Supported services: | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| +| [cert\_manager](#input\_cert\_manager) | Configuration for cert-manager missing issuer log alert. Allows customization of project, cluster, namespace, notification channels, alert documentation, enablement, extra filters, auto-close timing, and notification rate limiting. |
object({
enabled = optional(bool, true)
cluster_name = string
project_id = optional(string, null)
namespace = optional(string, "cert-manager")
notification_enabled = optional(bool, true)
notification_channels = optional(list(string), [])
logmatch_notification_rate_limit = optional(string, "300s")
alert_documentation = optional(string, null)
auto_close_seconds = optional(number, 3600)
filter_extra = optional(string, "")
}) | n/a | yes |
| [cloud\_sql](#input\_cloud\_sql) | Configuration for Cloud SQL monitoring alerts. Supports customization of project, auto-close timing, notification channels, and per-instance alert thresholds for CPU, memory, and disk utilization. | object({
project_id = optional(string, null)
auto_close = optional(string, "86400s") # default 24h
notification_enabled = optional(bool, true)
notification_channels = optional(list(string), [])
instances = optional(map(object({
cpu_utilization = optional(list(object({
severity = optional(string, "WARNING"),
threshold = optional(number, 0.90)
alignment_period = optional(string, "120s")
duration = optional(string, "300s")
})), [
{
threshold = 0.85,
duration = "1200s",
},
{
severity = "CRITICAL",
threshold = 1,
duration = "300s",
alignment_period = "60s",
}
])
memory_utilization = optional(list(object({
severity = optional(string, "WARNING"),
threshold = optional(number, 0.90)
alignment_period = optional(string, "300s")
duration = optional(string, "300s")
})), [
{
severity = "WARNING",
},
{
severity = "CRITICAL",
threshold = 0.95,
}
])
disk_utilization = optional(list(object({
severity = optional(string, "WARNING"),
threshold = optional(number, 0.85)
alignment_period = optional(string, "300s")
duration = optional(string, "600s")
})), [
{
severity = "WARNING",
},
{
severity = "CRITICAL",
threshold = 0.95,
}
])
})), {})
}) | n/a | yes |
| [kyverno](#input\_kyverno) | Configuration for Kyverno monitoring alerts. Allows customization of cluster name, project, notification channels, alert documentation, metric thresholds, auto-close timing, enablement, extra filters, and namespace. | object({
enabled = optional(bool, true)
cluster_name = string
project_id = optional(string, null)
notification_enabled = optional(bool, true)
notification_channels = optional(list(string), [])
# Rate limit for notifications, e.g. "300s" for 5 minutes, used only for log match alerts
logmatch_notification_rate_limit = optional(string, "300s")
alert_documentation = optional(string, null)
auto_close_seconds = optional(number, 3600)
filter_extra = optional(string, "")
namespace = optional(string, "kyverno")
}) | n/a | yes |
| [notification\_channels](#input\_notification\_channels) | List of notification channel IDs to notify when an alert is triggered | `list(string)` | `[]` | no |
@@ -50,6 +53,7 @@ Supported services:
| Name | Type |
|------|------|
+| [google_monitoring_alert_policy.cert_manager_logmatch_alert](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/monitoring_alert_policy) | resource |
| [google_monitoring_alert_policy.cloud_sql_cpu_utilization](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/monitoring_alert_policy) | resource |
| [google_monitoring_alert_policy.cloud_sql_disk_utilization](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/monitoring_alert_policy) | resource |
| [google_monitoring_alert_policy.cloud_sql_memory_utilization](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/monitoring_alert_policy) | resource |
diff --git a/cert_manager.tf b/cert_manager.tf
new file mode 100644
index 0000000..a27df28
--- /dev/null
+++ b/cert_manager.tf
@@ -0,0 +1,70 @@
+locals {
+ cert_manager_project_id = var.cert_manager.project_id != null ? var.cert_manager.project_id : var.project_id
+ cert_manager_alert_documentation = (
+ var.cert_manager.alert_documentation != null
+ ? var.cert_manager.alert_documentation
+ : <<-EOT
+ cert-manager is reporting that an Issuer or ClusterIssuer resource referenced by a Certificate cannot be found. This may indicate that the Issuer/ClusterIssuer has been deleted or is otherwise unavailable.
+ EOT
+ )
+ cert_manager_notification_channels = var.cert_manager.notification_enabled ? (length(var.cert_manager.notification_channels) > 0 ? var.cert_manager.notification_channels : var.notification_channels) : []
+
+ cert_manager_log_filter = <<-EOT
+ (
+ (
+ resource.type="k8s_container"
+ AND resource.labels.project_id="${local.cert_manager_project_id}"
+ AND resource.labels.cluster_name="${var.cert_manager.cluster_name}"
+ AND resource.labels.namespace_name="${var.cert_manager.namespace}"
+ )
+ OR (
+ log_id("events")
+ AND resource.labels.project_id="${local.cert_manager_project_id}"
+ AND resource.labels.cluster_name="${var.cert_manager.cluster_name}"
+ AND (
+ jsonPayload.involvedObject.namespace="${var.cert_manager.namespace}"
+ OR jsonPayload.metadata.namespace="${var.cert_manager.namespace}"
+ )
+ )
+ )
+ AND (
+ textPayload=~"Referenced \"(Issuer|ClusterIssuer)\" not found"
+ OR jsonPayload.message=~"Referenced \"(Issuer|ClusterIssuer)\" not found"
+ OR jsonPayload.note=~"Referenced \"(Issuer|ClusterIssuer)\" not found"
+ )
+ ${trimspace(var.cert_manager.filter_extra)}
+ EOT
+}
+
+resource "google_monitoring_alert_policy" "cert_manager_logmatch_alert" {
+ count = (
+ var.cert_manager.enabled
+ && trimspace(var.cert_manager.cluster_name) != ""
+ && var.cert_manager.cluster_name != null
+ ) ? 1 : 0
+
+ display_name = "cert-manager missing Issuer/ClusterIssuer (cluster=${var.cert_manager.cluster_name}, namespace=${var.cert_manager.namespace})"
+ combiner = "OR"
+ enabled = var.cert_manager.enabled
+
+ conditions {
+ display_name = "Log match: cert-manager Issuer/ClusterIssuer not found"
+ condition_matched_log {
+ filter = local.cert_manager_log_filter
+ }
+ }
+
+ documentation {
+ content = local.cert_manager_alert_documentation
+ mime_type = "text/markdown"
+ }
+
+ notification_channels = local.cert_manager_notification_channels
+
+ alert_strategy {
+ auto_close = "${var.cert_manager.auto_close_seconds}s"
+ notification_rate_limit {
+ period = var.cert_manager.logmatch_notification_rate_limit
+ }
+ }
+}
diff --git a/cloud-sql.tf b/cloud_sql.tf
similarity index 100%
rename from cloud-sql.tf
rename to cloud_sql.tf
diff --git a/examples/main.tf b/examples/main.tf
index b91edef..92ffa3b 100644
--- a/examples/main.tf
+++ b/examples/main.tf
@@ -49,13 +49,17 @@ module "example" {
project_id = var.project_id
cloud_sql = local.cloud_sql
kyverno = {
- cluster_name = "test-cluster"
- enabled = true
- use_metric_threshold = true
- metric_threshold_count = 5
- notification_channels = []
+ cluster_name = "test-cluster"
+ enabled = true
+ notification_channels = []
# Optional filter for log entries, exclude known non-actionable messages
# e.g., "-textPayload:\"stale GroupVersion discovery: metrics.k8s.io/v1beta1\""
filter_extra = "-textPayload:\"stale GroupVersion discovery: metrics.k8s.io/v1beta1\""
}
+ cert_manager = {
+ cluster_name = "test-cluster"
+ namespace = "cert-manager"
+ enabled = true
+ notification_channels = []
+ }
}
diff --git a/examples/variables.tf b/examples/variables.tf
index 2a09651..c06bf12 100644
--- a/examples/variables.tf
+++ b/examples/variables.tf
@@ -13,16 +13,32 @@ variable "notification_channels" {
variable "kyverno" {
description = "Configuration for Kyverno monitoring alerts. Allows customization of cluster name, project, notification channels, alert documentation, metric thresholds, auto-close timing, enablement, extra filters, and namespace."
type = object({
- enabled = optional(bool, true)
- project_id = optional(string, null)
- cluster_name = string
- namespace = optional(string, "kyverno")
- notification_enabled = optional(bool, true)
- notification_channels = optional(list(string), [])
- alert_documentation = optional(string, null)
- metric_threshold_count = optional(number, 2)
- metric_lookback_minutes = optional(number, 1)
- auto_close_seconds = optional(number, 3600)
- filter_extra = optional(string, "")
+ enabled = optional(bool, true)
+ cluster_name = string
+ project_id = optional(string, null)
+ notification_enabled = optional(bool, true)
+ notification_channels = optional(list(string), [])
+ # Rate limit for notifications, e.g. "300s" for 5 minutes, used only for log match alerts
+ logmatch_notification_rate_limit = optional(string, "300s")
+ alert_documentation = optional(string, null)
+ auto_close_seconds = optional(number, 3600)
+ filter_extra = optional(string, "")
+ namespace = optional(string, "kyverno")
+ })
+}
+
+variable "cert_manager" {
+ description = "Configuration for cert-manager missing issuer log alert. Allows customization of project, cluster, namespace, notification channels, alert documentation, enablement, extra filters, auto-close timing, and notification rate limiting."
+ type = object({
+ enabled = optional(bool, true)
+ cluster_name = string
+ project_id = optional(string, null)
+ namespace = optional(string, "cert-manager")
+ notification_enabled = optional(bool, true)
+ notification_channels = optional(list(string), [])
+ logmatch_notification_rate_limit = optional(string, "300s")
+ alert_documentation = optional(string, null)
+ auto_close_seconds = optional(number, 3600)
+ filter_extra = optional(string, "")
})
}
diff --git a/kyverno_log_alert.tf b/kyverno.tf
similarity index 97%
rename from kyverno_log_alert.tf
rename to kyverno.tf
index 048a1b4..45f3d5c 100644
--- a/kyverno_log_alert.tf
+++ b/kyverno.tf
@@ -21,6 +21,7 @@ resource "google_monitoring_alert_policy" "kyverno_logmatch_alert" {
count = (
var.kyverno.enabled
&& trimspace(var.kyverno.cluster_name) != ""
+ && var.kyverno.cluster_name != null
) ? 1 : 0
display_name = "Kyverno controllers ERROR logs (namespace=${var.kyverno.namespace})"
diff --git a/variables.tf b/variables.tf
index 14a0392..49c0f99 100644
--- a/variables.tf
+++ b/variables.tf
@@ -82,3 +82,19 @@ variable "kyverno" {
namespace = optional(string, "kyverno")
})
}
+
+variable "cert_manager" {
+ description = "Configuration for cert-manager missing issuer log alert. Allows customization of project, cluster, namespace, notification channels, alert documentation, enablement, extra filters, auto-close timing, and notification rate limiting."
+ type = object({
+ enabled = optional(bool, true)
+ cluster_name = string
+ project_id = optional(string, null)
+ namespace = optional(string, "cert-manager")
+ notification_enabled = optional(bool, true)
+ notification_channels = optional(list(string), [])
+ logmatch_notification_rate_limit = optional(string, "300s")
+ alert_documentation = optional(string, null)
+ auto_close_seconds = optional(number, 3600)
+ filter_extra = optional(string, "")
+ })
+}