diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 26a27c955..3c51ec3d2 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -13,6 +13,8 @@ aliases: ## tip +* FEATURE: [vmoperator](https://docs.victoriametrics.com/operator/): add `object_namespace` label to the `operator_alertmanager_bad_objects_count` and `operator_vmalert_bad_objects_count` metrics. + * BUGFIX: [vmoperator](https://docs.victoriametrics.com/operator/): remove orphaned ServiceAccount and RBAC resources. See [#1665](https://github.com/VictoriaMetrics/operator/issues/1665). * BUGFIX: [vmanomaly](https://docs.victoriametrics.com/operator/resources/vmanomaly/): properly handle configuration which is missing `reader.queries` in either `configRawYaml` or `configSecret`. Previously, it would lead to panic. * BUGFIX: [vmanomaly](https://docs.victoriametrics.com/operator/resources/vmanomaly/): fix configuration parsing when running in [UI mode](https://docs.victoriametrics.com/anomaly-detection/ui/). Previously, configuration required to use `preset: ui:version` instead of `preset: ui`. diff --git a/internal/controller/operator/factory/vmalert/rules.go b/internal/controller/operator/factory/vmalert/rules.go index 9b25ef050..49c6c798b 100644 --- a/internal/controller/operator/factory/vmalert/rules.go +++ b/internal/controller/operator/factory/vmalert/rules.go @@ -26,13 +26,16 @@ import ( "github.com/VictoriaMetrics/operator/internal/controller/operator/factory/reconcile" ) -var badConfigsTotal = prometheus.NewCounter(prometheus.CounterOpts{ - Name: "operator_vmalert_bad_objects_count", - Help: "Number of incorrect objects by controller", - ConstLabels: prometheus.Labels{ - "controller": "vmrules", +var badConfigsTotal = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "operator_vmalert_bad_objects_count", + Help: "Number of incorrect objects by controller", + ConstLabels: prometheus.Labels{ + "controller": "vmrules", + }, }, -}) + []string{"object_namespace"}, +) func init() { metrics.Registry.MustRegister(badConfigsTotal) @@ -223,25 +226,36 @@ func selectRulesContent(ctx context.Context, rclient client.Client, cr *vmv1beta logger.WithContext(ctx).Info("deduplicating vmalert rules") vmRules = deduplicateRules(ctx, vmRules) } - var brokenRulesCnt int + var brokenRulesByNamespace map[string]int + var brokenRulesTotal int for _, pRule := range vmRules { if !build.MustSkipRuntimeValidation { if err := pRule.Validate(); err != nil { pRule.Status.CurrentSyncError = err.Error() - brokenRulesCnt++ + if brokenRulesByNamespace == nil { + brokenRulesByNamespace = map[string]int{} + } + brokenRulesTotal++ + brokenRulesByNamespace[pRule.Namespace]++ continue } } content, err := generateContent(pRule.Spec, cr.Spec.EnforcedNamespaceLabel, pRule.Namespace) if err != nil { pRule.Status.CurrentSyncError = fmt.Sprintf("cannot generate content for rule: %s, err :%s", pRule.Name, err) - brokenRulesCnt++ + if brokenRulesByNamespace == nil { + brokenRulesByNamespace = map[string]int{} + } + brokenRulesTotal++ + brokenRulesByNamespace[pRule.Namespace]++ continue } rules[fmt.Sprintf("%s-%s.yaml", pRule.Namespace, pRule.Name)] = content } - logger.SelectedObjects(ctx, "VMRules", len(namespacedNames), brokenRulesCnt, namespacedNames) - badConfigsTotal.Add(float64(brokenRulesCnt)) + logger.SelectedObjects(ctx, "VMRules", len(namespacedNames), brokenRulesTotal, namespacedNames) + for ns, cnt := range brokenRulesByNamespace { + badConfigsTotal.WithLabelValues(ns).Add(float64(cnt)) + } return rules, vmRules, nil } diff --git a/internal/controller/operator/factory/vmalertmanager/alertmanager.go b/internal/controller/operator/factory/vmalertmanager/alertmanager.go index 38171a304..35bcad099 100644 --- a/internal/controller/operator/factory/vmalertmanager/alertmanager.go +++ b/internal/controller/operator/factory/vmalertmanager/alertmanager.go @@ -22,13 +22,15 @@ import ( const templatesDir = "/etc/vm/templates" -var badConfigsTotal = prometheus.NewCounter(prometheus.CounterOpts{ +var badConfigsTotal = prometheus.NewCounterVec(prometheus.CounterOpts{ Name: "operator_alertmanager_bad_objects_count", Help: "Number of child CRDs with bad or incomplete configurations", ConstLabels: prometheus.Labels{ "crd": "vmalertmanager_config", }, -}) +}, + []string{"object_namespace"}, +) func init() { metrics.Registry.MustRegister(badConfigsTotal) diff --git a/internal/controller/operator/factory/vmalertmanager/statefulset.go b/internal/controller/operator/factory/vmalertmanager/statefulset.go index e2cb8fcf1..7779176fa 100644 --- a/internal/controller/operator/factory/vmalertmanager/statefulset.go +++ b/internal/controller/operator/factory/vmalertmanager/statefulset.go @@ -760,7 +760,15 @@ func buildAlertmanagerConfigWithCRDs(ctx context.Context, rclient client.Client, } parsedCfg.brokenAMCfgs = append(parsedCfg.brokenAMCfgs, badCfgs...) logger.SelectedObjects(ctx, "VMAlertmanagerConfigs", len(parsedCfg.amcfgs), len(parsedCfg.brokenAMCfgs), namespacedNames) - badConfigsTotal.Add(float64(len(badCfgs))) + if len(parsedCfg.brokenAMCfgs) > 0 { + brokenCfgByNamespace := make(map[string]int) + for _, bamc := range parsedCfg.brokenAMCfgs { + brokenCfgByNamespace[bamc.Namespace]++ + } + for ns, cnt := range brokenCfgByNamespace { + badConfigsTotal.WithLabelValues(ns).Add(float64(cnt)) + } + } return parsedCfg, nil }