Skip to content

Commit 418ec73

Browse files
f41gh7AndrewChubatiuk
authored andcommitted
controller: add object_namespace label to bad configs metric
This commit adds object_namespace label to the metrics: * operator_alertmanager_bad_objects_count * operator_vmalert_bad_objects_count It helps to route alerts on object_namespace label.
1 parent 84a49bc commit 418ec73

File tree

4 files changed

+38
-14
lines changed

4 files changed

+38
-14
lines changed

docs/CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ aliases:
1313

1414
## tip
1515

16+
* FEATURE: [vmoperator](https://docs.victoriametrics.com/operator/): add `object_namespace` label to the `operator_alertmanager_bad_objects_count` and `operator_vmalert_bad_objects_count` metrics.
17+
1618
## [v0.66.1](https://github.com/VictoriaMetrics/operator/releases/tag/v0.66.1)
1719

1820
**Release date:** 06 December 2025

internal/controller/operator/factory/vmalert/rules.go

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,16 @@ import (
2626
"github.com/VictoriaMetrics/operator/internal/controller/operator/factory/reconcile"
2727
)
2828

29-
var badConfigsTotal = prometheus.NewCounter(prometheus.CounterOpts{
30-
Name: "operator_vmalert_bad_objects_count",
31-
Help: "Number of incorrect objects by controller",
32-
ConstLabels: prometheus.Labels{
33-
"controller": "vmrules",
29+
var badConfigsTotal = prometheus.NewCounterVec(
30+
prometheus.CounterOpts{
31+
Name: "operator_vmalert_bad_objects_count",
32+
Help: "Number of incorrect objects by controller",
33+
ConstLabels: prometheus.Labels{
34+
"controller": "vmrules",
35+
},
3436
},
35-
})
37+
[]string{"object_namespace"},
38+
)
3639

3740
func init() {
3841
metrics.Registry.MustRegister(badConfigsTotal)
@@ -223,25 +226,34 @@ func selectRulesContent(ctx context.Context, rclient client.Client, cr *vmv1beta
223226
logger.WithContext(ctx).Info("deduplicating vmalert rules")
224227
vmRules = deduplicateRules(ctx, vmRules)
225228
}
226-
var brokenRulesCnt int
229+
var brokenRulesByNamespace map[string]int
227230
for _, pRule := range vmRules {
228231
if !build.MustSkipRuntimeValidation {
229232
if err := pRule.Validate(); err != nil {
230233
pRule.Status.CurrentSyncError = err.Error()
231-
brokenRulesCnt++
234+
if brokenRulesByNamespace == nil {
235+
brokenRulesByNamespace = map[string]int{}
236+
}
237+
brokenRulesByNamespace[pRule.Namespace]++
232238
continue
233239
}
234240
}
235241
content, err := generateContent(pRule.Spec, cr.Spec.EnforcedNamespaceLabel, pRule.Namespace)
236242
if err != nil {
237243
pRule.Status.CurrentSyncError = fmt.Sprintf("cannot generate content for rule: %s, err :%s", pRule.Name, err)
238-
brokenRulesCnt++
244+
if brokenRulesByNamespace == nil {
245+
brokenRulesByNamespace = map[string]int{}
246+
}
247+
brokenRulesByNamespace[pRule.Namespace]++
248+
239249
continue
240250
}
241251
rules[fmt.Sprintf("%s-%s.yaml", pRule.Namespace, pRule.Name)] = content
242252
}
243-
logger.SelectedObjects(ctx, "VMRules", len(namespacedNames), brokenRulesCnt, namespacedNames)
244-
badConfigsTotal.Add(float64(brokenRulesCnt))
253+
logger.SelectedObjects(ctx, "VMRules", len(namespacedNames), len(brokenRulesByNamespace), namespacedNames)
254+
for ns, cnt := range brokenRulesByNamespace {
255+
badConfigsTotal.WithLabelValues(ns).Add(float64(cnt))
256+
}
245257
return rules, vmRules, nil
246258
}
247259

internal/controller/operator/factory/vmalertmanager/alertmanager.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,15 @@ import (
2222

2323
const templatesDir = "/etc/vm/templates"
2424

25-
var badConfigsTotal = prometheus.NewCounter(prometheus.CounterOpts{
25+
var badConfigsTotal = prometheus.NewCounterVec(prometheus.CounterOpts{
2626
Name: "operator_alertmanager_bad_objects_count",
2727
Help: "Number of child CRDs with bad or incomplete configurations",
2828
ConstLabels: prometheus.Labels{
2929
"crd": "vmalertmanager_config",
3030
},
31-
})
31+
},
32+
[]string{"object_namespace"},
33+
)
3234

3335
func init() {
3436
metrics.Registry.MustRegister(badConfigsTotal)

internal/controller/operator/factory/vmalertmanager/statefulset.go

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -760,7 +760,15 @@ func buildAlertmanagerConfigWithCRDs(ctx context.Context, rclient client.Client,
760760
}
761761
parsedCfg.brokenAMCfgs = append(parsedCfg.brokenAMCfgs, badCfgs...)
762762
logger.SelectedObjects(ctx, "VMAlertmanagerConfigs", len(parsedCfg.amcfgs), len(parsedCfg.brokenAMCfgs), namespacedNames)
763-
badConfigsTotal.Add(float64(len(badCfgs)))
763+
if len(parsedCfg.brokenAMCfgs) > 0 {
764+
brokenCfgByNamespace := make(map[string]int)
765+
for _, bamc := range parsedCfg.brokenAMCfgs {
766+
brokenCfgByNamespace[bamc.Namespace]++
767+
}
768+
for ns, cnt := range brokenCfgByNamespace {
769+
badConfigsTotal.WithLabelValues(ns).Add(float64(cnt))
770+
}
771+
}
764772
return parsedCfg, nil
765773
}
766774

0 commit comments

Comments
 (0)