Skip to content

Commit b619aab

Browse files
committed
feat(metrics): add histogram metric type for packet size distribution
Replace 20 individual counter metrics for packet size buckets with two native Prometheus histograms (RX/TX). This maps SAI port stat fields to cumulative histogram buckets via a new `histogram` transform in the metrics config.
1 parent 90fbe4f commit b619aab

5 files changed

Lines changed: 254 additions & 189 deletions

File tree

docs/usage/metrics.md

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,8 @@ These are defined in YAML and can be customized or extended by operators. The de
6262
| `sonic_switch_interface_fec_frames_total` | counter | `interface`, `type` | FEC frame counters (correctable, uncorrectable, symbol_errors) |
6363
| `sonic_switch_interface_queue_length` | gauge | `interface` | Current output queue length |
6464
| `sonic_switch_interface_pfc_packets_total` | counter | `interface`, `direction`, `priority` | PFC packets per priority (0-7) |
65-
| `sonic_switch_interface_packet_size_total` | counter | `interface`, `direction`, `size` | Packets by size bucket |
65+
| `sonic_switch_interface_rx_packet_size_bytes` | histogram | `interface` | RX packet size distribution (buckets: 64, 127, 255, 511, 1023, 1518, 2047, 4095, 9216, 16383) |
66+
| `sonic_switch_interface_tx_packet_size_bytes` | histogram | `interface` | TX packet size distribution (buckets: 64, 127, 255, 511, 1023, 1518, 2047, 4095, 9216, 16383) |
6667
| `sonic_switch_interface_anomaly_packets_total` | counter | `interface`, `type` | Anomalous packets (undersize, oversize, fragments, jabbers, unknown_protos) |
6768

6869
## Metrics configuration schema
@@ -99,7 +100,7 @@ Each entry maps a Redis hash field (or set of fields) to a Prometheus metric.
99100
| `field` | no | — | Specific Redis hash field name. Mutually exclusive with `field_pattern` |
100101
| `field_pattern` | no | — | Set to `*` to iterate all hash fields. Mutually exclusive with `field` |
101102
| `metric` | yes | — | Prometheus metric name |
102-
| `type` | yes | — | `gauge` or `counter` |
103+
| `type` | yes | — | `gauge`, `counter`, or `histogram` |
103104
| `help` | no | — | Metric help string |
104105
| `value` | no | — | Fixed metric value (ignores field value). Use for `_info` pattern metrics |
105106
| `labels` | no | — | Map of label names to [value templates](#label-value-templates) |
@@ -191,6 +192,29 @@ transform:
191192
dom_flag_severity: true
192193
```
193194

195+
#### `histogram`
196+
197+
Maps multiple Redis hash fields to a single Prometheus histogram. Each entry in `buckets` maps an upper bound (float64) to a Redis hash field name. The transform reads each field, parses the count as an unsigned integer, and accumulates cumulative bucket counts. The resulting histogram has `sum=0` because SAI counters don't provide total bytes — but bucket-based percentile queries and heatmap visualizations still work. Requires `type: "histogram"`.
198+
199+
```yaml
200+
- metric: sonic_switch_interface_rx_packet_size_bytes
201+
type: histogram
202+
help: "RX packet size distribution"
203+
labels:
204+
interface: "$port_name"
205+
transform:
206+
histogram:
207+
buckets:
208+
64: SAI_PORT_STAT_ETHER_IN_PKTS_64_OCTETS
209+
127: SAI_PORT_STAT_ETHER_IN_PKTS_65_TO_127_OCTETS
210+
255: SAI_PORT_STAT_ETHER_IN_PKTS_128_TO_255_OCTETS
211+
511: SAI_PORT_STAT_ETHER_IN_PKTS_256_TO_511_OCTETS
212+
1023: SAI_PORT_STAT_ETHER_IN_PKTS_512_TO_1023_OCTETS
213+
1518: SAI_PORT_STAT_ETHER_IN_PKTS_1024_TO_1518_OCTETS
214+
```
215+
216+
This emits `_bucket`, `_count`, and `_sum` series automatically — Prometheus handles the histogram suffixes.
217+
194218
## Examples
195219

196220
### Adding a new counter from COUNTERS_DB

internal/agent/metrics/config.go

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,11 @@ package metrics
55

66
import (
77
"embed"
8+
"encoding/json"
89
"fmt"
910
"os"
1011
"regexp"
12+
"strconv"
1113

1214
"sigs.k8s.io/yaml"
1315
)
@@ -76,6 +78,8 @@ type Transform struct {
7678
RegexCapture *RegexCapture `json:"regex_capture,omitempty"`
7779
// DOMFlagSeverity computes a severity rollup (0=ok, 1=warning, 2=alarm) from all hash fields.
7880
DOMFlagSeverity bool `json:"dom_flag_severity,omitempty"`
81+
// Histogram maps upper bounds to Redis field names, emitting a Prometheus histogram.
82+
Histogram *HistogramBuckets `json:"histogram,omitempty"`
7983
}
8084

8185
// RegexCapture defines a regex-based field name matching transform.
@@ -86,6 +90,35 @@ type RegexCapture struct {
8690
Pattern string `json:"pattern"`
8791
}
8892

93+
// HistogramBuckets defines a histogram transform that maps Redis field names to
94+
// Prometheus histogram bucket upper bounds.
95+
type HistogramBuckets struct {
96+
// Buckets maps upper bounds (in bytes, seconds, etc.) to Redis hash field names.
97+
// Values are read, parsed as uint64, and accumulated into cumulative histogram buckets.
98+
Buckets map[float64]string `json:"buckets"`
99+
}
100+
101+
// UnmarshalJSON implements custom JSON unmarshaling for HistogramBuckets.
102+
// sigs.k8s.io/yaml converts YAML→JSON, so numeric YAML keys become JSON string keys.
103+
// This method parses those string keys back to float64.
104+
func (hb *HistogramBuckets) UnmarshalJSON(data []byte) error {
105+
var raw struct {
106+
Buckets map[string]string `json:"buckets"`
107+
}
108+
if err := json.Unmarshal(data, &raw); err != nil {
109+
return err
110+
}
111+
hb.Buckets = make(map[float64]string, len(raw.Buckets))
112+
for k, v := range raw.Buckets {
113+
f, err := strconv.ParseFloat(k, 64)
114+
if err != nil {
115+
return fmt.Errorf("histogram bucket key %q is not a valid number: %w", k, err)
116+
}
117+
hb.Buckets[f] = v
118+
}
119+
return nil
120+
}
121+
89122
// effectiveSeparator returns the key separator, defaulting to "|".
90123
func (m *MetricMapping) effectiveSeparator() string {
91124
if m.KeySeparator != "" {
@@ -135,8 +168,13 @@ func validateConfig(cfg *MetricsConfig) error {
135168
if f.Metric == "" {
136169
return fmt.Errorf("metrics[%d].fields[%d]: metric is required", i, j)
137170
}
138-
if f.Type != "gauge" && f.Type != "counter" {
139-
return fmt.Errorf("metrics[%d].fields[%d]: type must be 'gauge' or 'counter', got %q", i, j, f.Type)
171+
if f.Type != metricTypeGauge && f.Type != metricTypeCounter && f.Type != metricTypeHistogram {
172+
return fmt.Errorf("metrics[%d].fields[%d]: type must be 'gauge', 'counter', or 'histogram', got %q", i, j, f.Type)
173+
}
174+
if f.Type == metricTypeHistogram {
175+
if f.Transform == nil || f.Transform.Histogram == nil || len(f.Transform.Histogram.Buckets) == 0 {
176+
return fmt.Errorf("metrics[%d].fields[%d]: histogram type requires transform.histogram.buckets", i, j)
177+
}
140178
}
141179
if f.Field != "" && f.FieldPattern != "" {
142180
return fmt.Errorf("metrics[%d].fields[%d]: field and field_pattern are mutually exclusive", i, j)

internal/agent/metrics/config_collector.go

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,14 @@ func (c *ConfigCollector) Collect(ch chan<- prometheus.Metric) {
136136
continue
137137
}
138138

139+
// histogram operates on the whole hash — reads specific bucket fields
140+
if fm.Type == "histogram" && fm.Transform != nil && fm.Transform.Histogram != nil {
141+
desc := c.descs[fm.Metric]
142+
labels := resolveLabels(fm.Labels, keySuffix, portName, "", fields)
143+
collectHistogram(ch, desc, fm.Transform.Histogram, fields, labels)
144+
continue
145+
}
146+
139147
if fm.FieldPattern == "*" {
140148
// Iterate all fields
141149
c.collectAllFields(ch, fi, fm, fields, keySuffix, portName)
@@ -311,3 +319,47 @@ func appendUnique(slice []string, items ...string) []string {
311319
}
312320
return slice
313321
}
322+
323+
// collectHistogram reads bucket fields from the hash, accumulates cumulative counts,
324+
// and emits a prometheus.MustNewConstHistogram.
325+
func collectHistogram(
326+
ch chan<- prometheus.Metric,
327+
desc *prometheus.Desc,
328+
hb *HistogramBuckets,
329+
hashFields map[string]string,
330+
labels []string,
331+
) {
332+
// Sort upper bounds
333+
bounds := make([]float64, 0, len(hb.Buckets))
334+
for ub := range hb.Buckets {
335+
bounds = append(bounds, ub)
336+
}
337+
sort.Float64s(bounds)
338+
339+
// Read non-cumulative counts from Redis and accumulate into cumulative buckets.
340+
var totalCount uint64
341+
cumBuckets := make(map[float64]uint64, len(bounds))
342+
var cumulative uint64
343+
for _, ub := range bounds {
344+
fieldName := hb.Buckets[ub]
345+
val, ok := hashFields[fieldName]
346+
if !ok {
347+
cumBuckets[ub] = cumulative
348+
continue
349+
}
350+
n, err := strconv.ParseUint(val, 10, 64)
351+
if err != nil {
352+
cumBuckets[ub] = cumulative
353+
continue
354+
}
355+
cumulative += n
356+
cumBuckets[ub] = cumulative
357+
}
358+
totalCount = cumulative
359+
360+
// +Inf bucket count equals totalCount (Prometheus adds it automatically).
361+
// sum is 0 — SAI doesn't provide total bytes, only bucket counts.
362+
ch <- prometheus.MustNewConstHistogram(
363+
desc, totalCount, 0, cumBuckets, labels...,
364+
)
365+
}

internal/agent/metrics/default_config.yaml

Lines changed: 34 additions & 158 deletions
Original file line numberDiff line numberDiff line change
@@ -437,169 +437,45 @@ metrics:
437437
direction: "tx"
438438
priority: "7"
439439

440-
# Packet size distribution (RX)
441-
- field: SAI_PORT_STAT_ETHER_IN_PKTS_64_OCTETS
442-
metric: sonic_switch_interface_packet_size_total
443-
type: counter
444-
help: "Total packets by size bucket"
445-
labels:
446-
interface: "$port_name"
447-
direction: "rx"
448-
size: "64"
449-
- field: SAI_PORT_STAT_ETHER_IN_PKTS_65_TO_127_OCTETS
450-
metric: sonic_switch_interface_packet_size_total
451-
type: counter
452-
help: "Total packets by size bucket"
453-
labels:
454-
interface: "$port_name"
455-
direction: "rx"
456-
size: "65_to_127"
457-
- field: SAI_PORT_STAT_ETHER_IN_PKTS_128_TO_255_OCTETS
458-
metric: sonic_switch_interface_packet_size_total
459-
type: counter
460-
help: "Total packets by size bucket"
461-
labels:
462-
interface: "$port_name"
463-
direction: "rx"
464-
size: "128_to_255"
465-
- field: SAI_PORT_STAT_ETHER_IN_PKTS_256_TO_511_OCTETS
466-
metric: sonic_switch_interface_packet_size_total
467-
type: counter
468-
help: "Total packets by size bucket"
469-
labels:
470-
interface: "$port_name"
471-
direction: "rx"
472-
size: "256_to_511"
473-
- field: SAI_PORT_STAT_ETHER_IN_PKTS_512_TO_1023_OCTETS
474-
metric: sonic_switch_interface_packet_size_total
475-
type: counter
476-
help: "Total packets by size bucket"
440+
# Packet size distribution (RX) — Prometheus histogram
441+
- metric: sonic_switch_interface_rx_packet_size_bytes
442+
type: histogram
443+
help: "RX packet size distribution"
477444
labels:
478445
interface: "$port_name"
479-
direction: "rx"
480-
size: "512_to_1023"
481-
- field: SAI_PORT_STAT_ETHER_IN_PKTS_1024_TO_1518_OCTETS
482-
metric: sonic_switch_interface_packet_size_total
483-
type: counter
484-
help: "Total packets by size bucket"
485-
labels:
486-
interface: "$port_name"
487-
direction: "rx"
488-
size: "1024_to_1518"
489-
- field: SAI_PORT_STAT_ETHER_IN_PKTS_1519_TO_2047_OCTETS
490-
metric: sonic_switch_interface_packet_size_total
491-
type: counter
492-
help: "Total packets by size bucket"
493-
labels:
494-
interface: "$port_name"
495-
direction: "rx"
496-
size: "1519_to_2047"
497-
- field: SAI_PORT_STAT_ETHER_IN_PKTS_2048_TO_4095_OCTETS
498-
metric: sonic_switch_interface_packet_size_total
499-
type: counter
500-
help: "Total packets by size bucket"
501-
labels:
502-
interface: "$port_name"
503-
direction: "rx"
504-
size: "2048_to_4095"
505-
- field: SAI_PORT_STAT_ETHER_IN_PKTS_4096_TO_9216_OCTETS
506-
metric: sonic_switch_interface_packet_size_total
507-
type: counter
508-
help: "Total packets by size bucket"
509-
labels:
510-
interface: "$port_name"
511-
direction: "rx"
512-
size: "4096_to_9216"
513-
- field: SAI_PORT_STAT_ETHER_IN_PKTS_9217_TO_16383_OCTETS
514-
metric: sonic_switch_interface_packet_size_total
515-
type: counter
516-
help: "Total packets by size bucket"
517-
labels:
518-
interface: "$port_name"
519-
direction: "rx"
520-
size: "9217_to_16383"
446+
transform:
447+
histogram:
448+
buckets:
449+
64: SAI_PORT_STAT_ETHER_IN_PKTS_64_OCTETS
450+
127: SAI_PORT_STAT_ETHER_IN_PKTS_65_TO_127_OCTETS
451+
255: SAI_PORT_STAT_ETHER_IN_PKTS_128_TO_255_OCTETS
452+
511: SAI_PORT_STAT_ETHER_IN_PKTS_256_TO_511_OCTETS
453+
1023: SAI_PORT_STAT_ETHER_IN_PKTS_512_TO_1023_OCTETS
454+
1518: SAI_PORT_STAT_ETHER_IN_PKTS_1024_TO_1518_OCTETS
455+
2047: SAI_PORT_STAT_ETHER_IN_PKTS_1519_TO_2047_OCTETS
456+
4095: SAI_PORT_STAT_ETHER_IN_PKTS_2048_TO_4095_OCTETS
457+
9216: SAI_PORT_STAT_ETHER_IN_PKTS_4096_TO_9216_OCTETS
458+
16383: SAI_PORT_STAT_ETHER_IN_PKTS_9217_TO_16383_OCTETS
521459

522-
# Packet size distribution (TX)
523-
- field: SAI_PORT_STAT_ETHER_OUT_PKTS_64_OCTETS
524-
metric: sonic_switch_interface_packet_size_total
525-
type: counter
526-
help: "Total packets by size bucket"
527-
labels:
528-
interface: "$port_name"
529-
direction: "tx"
530-
size: "64"
531-
- field: SAI_PORT_STAT_ETHER_OUT_PKTS_65_TO_127_OCTETS
532-
metric: sonic_switch_interface_packet_size_total
533-
type: counter
534-
help: "Total packets by size bucket"
535-
labels:
536-
interface: "$port_name"
537-
direction: "tx"
538-
size: "65_to_127"
539-
- field: SAI_PORT_STAT_ETHER_OUT_PKTS_128_TO_255_OCTETS
540-
metric: sonic_switch_interface_packet_size_total
541-
type: counter
542-
help: "Total packets by size bucket"
543-
labels:
544-
interface: "$port_name"
545-
direction: "tx"
546-
size: "128_to_255"
547-
- field: SAI_PORT_STAT_ETHER_OUT_PKTS_256_TO_511_OCTETS
548-
metric: sonic_switch_interface_packet_size_total
549-
type: counter
550-
help: "Total packets by size bucket"
551-
labels:
552-
interface: "$port_name"
553-
direction: "tx"
554-
size: "256_to_511"
555-
- field: SAI_PORT_STAT_ETHER_OUT_PKTS_512_TO_1023_OCTETS
556-
metric: sonic_switch_interface_packet_size_total
557-
type: counter
558-
help: "Total packets by size bucket"
460+
# Packet size distribution (TX) — Prometheus histogram
461+
- metric: sonic_switch_interface_tx_packet_size_bytes
462+
type: histogram
463+
help: "TX packet size distribution"
559464
labels:
560465
interface: "$port_name"
561-
direction: "tx"
562-
size: "512_to_1023"
563-
- field: SAI_PORT_STAT_ETHER_OUT_PKTS_1024_TO_1518_OCTETS
564-
metric: sonic_switch_interface_packet_size_total
565-
type: counter
566-
help: "Total packets by size bucket"
567-
labels:
568-
interface: "$port_name"
569-
direction: "tx"
570-
size: "1024_to_1518"
571-
- field: SAI_PORT_STAT_ETHER_OUT_PKTS_1519_TO_2047_OCTETS
572-
metric: sonic_switch_interface_packet_size_total
573-
type: counter
574-
help: "Total packets by size bucket"
575-
labels:
576-
interface: "$port_name"
577-
direction: "tx"
578-
size: "1519_to_2047"
579-
- field: SAI_PORT_STAT_ETHER_OUT_PKTS_2048_TO_4095_OCTETS
580-
metric: sonic_switch_interface_packet_size_total
581-
type: counter
582-
help: "Total packets by size bucket"
583-
labels:
584-
interface: "$port_name"
585-
direction: "tx"
586-
size: "2048_to_4095"
587-
- field: SAI_PORT_STAT_ETHER_OUT_PKTS_4096_TO_9216_OCTETS
588-
metric: sonic_switch_interface_packet_size_total
589-
type: counter
590-
help: "Total packets by size bucket"
591-
labels:
592-
interface: "$port_name"
593-
direction: "tx"
594-
size: "4096_to_9216"
595-
- field: SAI_PORT_STAT_ETHER_OUT_PKTS_9217_TO_16383_OCTETS
596-
metric: sonic_switch_interface_packet_size_total
597-
type: counter
598-
help: "Total packets by size bucket"
599-
labels:
600-
interface: "$port_name"
601-
direction: "tx"
602-
size: "9217_to_16383"
466+
transform:
467+
histogram:
468+
buckets:
469+
64: SAI_PORT_STAT_ETHER_OUT_PKTS_64_OCTETS
470+
127: SAI_PORT_STAT_ETHER_OUT_PKTS_65_TO_127_OCTETS
471+
255: SAI_PORT_STAT_ETHER_OUT_PKTS_128_TO_255_OCTETS
472+
511: SAI_PORT_STAT_ETHER_OUT_PKTS_256_TO_511_OCTETS
473+
1023: SAI_PORT_STAT_ETHER_OUT_PKTS_512_TO_1023_OCTETS
474+
1518: SAI_PORT_STAT_ETHER_OUT_PKTS_1024_TO_1518_OCTETS
475+
2047: SAI_PORT_STAT_ETHER_OUT_PKTS_1519_TO_2047_OCTETS
476+
4095: SAI_PORT_STAT_ETHER_OUT_PKTS_2048_TO_4095_OCTETS
477+
9216: SAI_PORT_STAT_ETHER_OUT_PKTS_4096_TO_9216_OCTETS
478+
16383: SAI_PORT_STAT_ETHER_OUT_PKTS_9217_TO_16383_OCTETS
603479

604480
# Anomaly counters
605481
- field: SAI_PORT_STAT_ETHER_STATS_UNDERSIZE_PKTS

0 commit comments

Comments
 (0)