Skip to content

Commit 831e5ec

Browse files
committed
Fix metric linter to handle recording rules separately
Signed-off-by: avlitman <alitman@redhat.com>
1 parent 7f09805 commit 831e5ec

File tree

3 files changed

+229
-20
lines changed

3 files changed

+229
-20
lines changed
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
{
2+
"operators": {
3+
"kubevirt": [
4+
"kubevirt_allocatable_nodes",
5+
"kubevirt_api_request_deprecated_total",
6+
"kubevirt_memory_delta_from_requested_bytes",
7+
"kubevirt_nodes_with_kvm",
8+
"kubevirt_number_of_vms",
9+
"kubevirt_virt_api_up",
10+
"kubevirt_virt_controller_ready",
11+
"kubevirt_virt_controller_up",
12+
"kubevirt_virt_handler_up",
13+
"kubevirt_virt_operator_leading",
14+
"kubevirt_virt_operator_ready",
15+
"kubevirt_virt_operator_up",
16+
"kubevirt_vm_container_memory_request_margin_based_on_rss_bytes",
17+
"kubevirt_vm_container_memory_request_margin_based_on_working_set_bytes",
18+
"kubevirt_vm_created_total",
19+
"kubevirt_vmi_guest_vcpu_queue",
20+
"kubevirt_vmi_memory_used_bytes",
21+
"kubevirt_vmi_phase_count",
22+
"kubevirt_vmi_vcpu_count",
23+
"kubevirt_vmsnapshot_disks_restored_from_source",
24+
"kubevirt_vmsnapshot_disks_restored_from_source_bytes",
25+
"kubevirt_vmsnapshot_persistentvolumeclaim_labels"
26+
],
27+
"hco": [
28+
"cnv_abnormal",
29+
"kubevirt_hyperconverged_operator_health_status"
30+
],
31+
"cdi": [
32+
"kubevirt_cdi_clone_pods_high_restart",
33+
"kubevirt_cdi_import_pods_high_restart",
34+
"kubevirt_cdi_operator_up",
35+
"kubevirt_cdi_upload_pods_high_restart"
36+
],
37+
"cnao": [
38+
"kubevirt_cnao_cr_kubemacpool_aggregated",
39+
"kubevirt_cnao_kubemacpool_duplicate_macs",
40+
"kubevirt_cnao_kubemacpool_manager_up",
41+
"kubevirt_cnao_operator_up"
42+
],
43+
"hpp": [
44+
"kubevirt_hpp_operator_up"
45+
],
46+
"ssp": [
47+
"cnv:vmi_status_running:count",
48+
"kubevirt_ssp_common_templates_restored_increase",
49+
"kubevirt_ssp_operator_reconcile_succeeded_aggregated",
50+
"kubevirt_ssp_operator_up",
51+
"kubevirt_ssp_template_validator_rejected_increase",
52+
"kubevirt_ssp_template_validator_up"
53+
]
54+
}
55+
}
56+

test/metrics/prom-metrics-linter/custom_linter_rules.go

Lines changed: 114 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -20,15 +20,15 @@ package main
2020

2121
import (
2222
"fmt"
23-
"sort"
23+
"regexp"
2424
"strings"
2525

2626
"github.com/prometheus/client_golang/prometheus/testutil/promlint"
2727
dto "github.com/prometheus/client_model/go"
2828
)
2929

30-
func CustomLinterRules(problems []promlint.Problem, mf *dto.MetricFamily, operatorName, subOperatorName string) []promlint.Problem {
31-
// Check metric prefix
30+
func CustomMetricsValidation(problems []promlint.Problem, mf *dto.MetricFamily, operatorName, subOperatorName string) []promlint.Problem {
31+
// Validate metric prefix
3232
nameParts := strings.Split(*mf.Name, "_")
3333

3434
if nameParts[0] != operatorName {
@@ -56,10 +56,116 @@ func CustomLinterRules(problems []promlint.Problem, mf *dto.MetricFamily, operat
5656
}
5757
}
5858

59-
// Sort the problems by metric name
60-
sort.Slice(newProblems, func(i, j int) bool {
61-
return newProblems[i].Metric < newProblems[j].Metric
62-
})
63-
6459
return newProblems
6560
}
61+
62+
func CustomRecordingRuleValidation(rr recordingRule) []promlint.Problem {
63+
var problems []promlint.Problem
64+
65+
name := rr.Record
66+
if name == "" {
67+
return problems
68+
}
69+
70+
// Require name structure: level:metric:operations
71+
parts := strings.Split(name, ":")
72+
if len(parts) != 3 || parts[0] == "" || parts[1] == "" || parts[2] == "" {
73+
problems = append(problems, promlint.Problem{Metric: name, Text: "recording rule name must be level:metric:operations. if there is no obvious operations, use 'sum'"})
74+
return problems
75+
}
76+
77+
// Enforce metric segment prefix: it should start with operator_suboperator like metrics do
78+
{
79+
metricPart := parts[1]
80+
expectedPrefix := operatorName + "_"
81+
if operatorName != subOperatorName {
82+
expectedPrefix = operatorName + "_" + subOperatorName + "_"
83+
}
84+
if !strings.HasPrefix(metricPart, expectedPrefix) {
85+
problems = append(problems, promlint.Problem{Metric: name, Text: fmt.Sprintf("metric (second segment in level:metric:operations) need to start with \"%s\"", expectedPrefix)})
86+
}
87+
}
88+
89+
// Single/Multiple operations: detect ops in expr and enforce suffix rules
90+
{
91+
aggOps := detectOps(rr.Expr, promAggOps)
92+
timeOps := detectOps(rr.Expr, promTimeOps)
93+
total := len(aggOps) + len(timeOps)
94+
opsPart := parts[2]
95+
if total == 1 {
96+
if len(aggOps) == 1 {
97+
expected := aggOps[0]
98+
if !strings.HasSuffix(opsPart, expected) {
99+
problems = append(problems, promlint.Problem{Metric: name, Text: fmt.Sprintf("single aggregation detected in expr: operations (third segment in level:metric:operations) should end with '%s'", expected)})
100+
}
101+
} else if len(timeOps) == 1 {
102+
expected := timeOps[0]
103+
timeSuffixRe := regexp.MustCompile(`(?i)` + expected + `[0-9]+[smhdwy]+$`)
104+
if !timeSuffixRe.MatchString(opsPart) {
105+
problems = append(problems, promlint.Problem{Metric: name, Text: fmt.Sprintf("single time operation detected in expr: operations (third segment in level:metric:operations) should end with '%s<duration>'", expected)})
106+
}
107+
}
108+
} else if total > 1 {
109+
present := false
110+
for _, op := range append(aggOps, timeOps...) {
111+
if strings.Contains(opsPart, op) {
112+
present = true
113+
break
114+
}
115+
}
116+
if !present {
117+
problems = append(problems, promlint.Problem{Metric: name, Text: "multiple operations detected in expr: at least one operation should appear in operations (third segment in level:metric:operations)"})
118+
}
119+
}
120+
}
121+
122+
// Forbid duplicated operations
123+
{
124+
seen := map[string]struct{}{}
125+
tokens := strings.Split(parts[2], "_")
126+
for _, tok := range tokens {
127+
if tok == "" {
128+
continue
129+
}
130+
if _, ok := seen[tok]; ok {
131+
problems = append(problems, promlint.Problem{Metric: name, Text: "operations (third segment in level:metric:operations) contains duplicate tokens; merge duplicates (e.g., min_min -> min)"})
132+
break
133+
}
134+
seen[tok] = struct{}{}
135+
}
136+
}
137+
138+
return problems
139+
}
140+
141+
// getOpsRegex builds a regex for PromQL operations we recognize for naming purposes.
142+
var (
143+
promAggOps = []string{
144+
// Aggregations
145+
"sum", "avg", "min", "max", "count", "quantile", "stddev", "stdvar",
146+
"group", "count_values", "limitk", "limit_ratio",
147+
// Selectors considered as aggregations
148+
"topk", "bottomk",
149+
}
150+
promTimeOps = []string{
151+
// Time/transform functions
152+
"rate", "irate", "increase", "delta", "idelta", "deriv",
153+
"avg_over_time", "min_over_time", "max_over_time", "sum_over_time",
154+
"count_over_time", "quantile_over_time", "stddev_over_time", "stdvar_over_time",
155+
}
156+
)
157+
158+
// detectOps returns a unique list of ops from expr based on the provided ops list
159+
func detectOps(expr string, ops []string) []string {
160+
pattern := "\\b(" + strings.Join(ops, "|") + ")\\b"
161+
re := regexp.MustCompile(pattern)
162+
uniq := map[string]struct{}{}
163+
for _, m := range re.FindAllStringSubmatch(expr, -1) {
164+
uniq[m[1]] = struct{}{}
165+
}
166+
var opsList []string
167+
for k := range uniq {
168+
opsList = append(opsList, k)
169+
}
170+
return opsList
171+
}

test/metrics/prom-metrics-linter/metric_name_linter.go

Lines changed: 59 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,12 @@
1919
package main
2020

2121
import (
22+
_ "embed"
2223
"encoding/json"
2324
"flag"
2425
"fmt"
2526
"os"
27+
"sort"
2628

2729
"github.com/prometheus/client_golang/prometheus/testutil/promlint"
2830
dto "github.com/prometheus/client_model/go"
@@ -34,6 +36,13 @@ var (
3436
subOperatorName string
3537
)
3638

39+
//go:embed allowlist.json
40+
var embeddedAllowlist []byte
41+
42+
type allowlistConfig struct {
43+
Operators map[string][]string `json:"operators"`
44+
}
45+
3746
func init() {
3847
// Define command-line flags
3948
flag.StringVar(&metricFamilies, "metric-families", "", "JSON representation of metric families")
@@ -50,10 +59,10 @@ func main() {
5059
os.Exit(1)
5160
}
5261

53-
// Parse metric families from the JSON representation
54-
families := parseMetricFamilies(metricFamilies)
62+
// Parse input JSON containing metricFamilies and optional recordingRules.
63+
families, recordingRules := parseInput(metricFamilies)
5564

56-
// Call customRules function to apply the custom rules to the linter
65+
// Lint metrics with promlint, then apply custom validations
5766
linter := promlint.NewWithMetricFamilies(families)
5867

5968
problems, err := linter.Lint()
@@ -62,21 +71,59 @@ func main() {
6271
os.Exit(1)
6372
}
6473

74+
// Collect and print metric problems first (sorted by metric name)
75+
metricProblems := problems
6576
for _, family := range families {
66-
problems = CustomLinterRules(problems, family, operatorName, subOperatorName)
77+
metricProblems = CustomMetricsValidation(metricProblems, family, operatorName, subOperatorName)
78+
}
79+
sort.Slice(metricProblems, func(i, j int) bool { return metricProblems[i].Metric < metricProblems[j].Metric })
80+
for _, p := range metricProblems {
81+
fmt.Printf("%s: %s\n", p.Metric, p.Text)
6782
}
6883

69-
for _, problem := range problems {
70-
fmt.Printf("%s: %s\n", problem.Metric, problem.Text)
84+
// Then collect and print recording rule problems (sorted by rule name), skipping allowlisted names
85+
allow := map[string]struct{}{}
86+
if len(embeddedAllowlist) > 0 {
87+
var cfg allowlistConfig
88+
if err := json.Unmarshal(embeddedAllowlist, &cfg); err == nil {
89+
for _, n := range cfg.Operators[subOperatorName] {
90+
if n == "" {
91+
continue
92+
}
93+
allow[n] = struct{}{}
94+
}
95+
}
96+
}
97+
ruleProblems := []promlint.Problem{}
98+
for _, rr := range recordingRules {
99+
if _, ok := allow[rr.Record]; ok {
100+
continue
101+
}
102+
ruleProblems = append(ruleProblems, CustomRecordingRuleValidation(rr)...)
103+
}
104+
sort.Slice(ruleProblems, func(i, j int) bool { return ruleProblems[i].Metric < ruleProblems[j].Metric })
105+
for _, p := range ruleProblems {
106+
fmt.Printf("%s: %s\n", p.Metric, p.Text)
71107
}
72108
}
73109

74-
func parseMetricFamilies(jsonStr string) []*dto.MetricFamily {
75-
var families []*dto.MetricFamily
76-
err := json.Unmarshal([]byte(jsonStr), &families)
77-
if err != nil {
78-
fmt.Fprintf(os.Stderr, "error parsing metric families: %v\n", err)
110+
type recordingRule struct {
111+
Record string `json:"record"`
112+
Expr string `json:"expr"`
113+
Type string `json:"type,omitempty"`
114+
}
115+
116+
type inputJSON struct {
117+
MetricFamilies []*dto.MetricFamily `json:"metricFamilies"`
118+
RecordingRules []recordingRule `json:"recordingRules"`
119+
}
120+
121+
func parseInput(jsonStr string) ([]*dto.MetricFamily, []recordingRule) {
122+
// Expect a JSON with metricFamilies and optional recordingRules
123+
var env inputJSON
124+
if err := json.Unmarshal([]byte(jsonStr), &env); err != nil {
125+
fmt.Fprintf(os.Stderr, "error parsing input: %v\n", err)
79126
os.Exit(1)
80127
}
81-
return families
128+
return env.MetricFamilies, env.RecordingRules
82129
}

0 commit comments

Comments
 (0)