Skip to content

Commit e781b81

Browse files
seedspiritclaude
andcommitted
fix(BA-5878): close accelerator pattern gap for *_power and *_temperature
The window-stat gauge patterns lagged behind what legacy accelerator plugins actually publish: - *_power: legacy emits both stats.max and stats.avg; this PR was emitting only stats.max. - *_temperature: legacy emits both stats.max and stats.avg; this PR was emitting neither. Surveyed plugins (rebellions/common, rebellions/atom_max, habana, ipu, mock): - All emit *_mem (max only) and *_util (max + avg). - Only mock currently emits *_power and *_temperature, both with {avg, max} filters. Extend STATS_MAX_GAUGE_METRIC_PATTERNS to include _temperature and STATS_AVG_GAUGE_METRIC_PATTERNS to include _power and _temperature so the new pipeline matches what every legacy plugin actually publishes. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
1 parent 1f50031 commit e781b81

2 files changed

Lines changed: 4 additions & 4 deletions

File tree

src/ai/backend/common/clients/prometheus/metric_types.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,11 +82,11 @@ def to_list(self) -> list[MetricPreset]:
8282
"io_scratch_size",
8383
})
8484
STATS_MAX_GAUGE_METRIC_PATTERNS: Final[frozenset[str]] = frozenset({
85-
r"[A-Za-z0-9][A-Za-z0-9_-]*_(mem|util|power)",
85+
r"[A-Za-z0-9][A-Za-z0-9_-]*_(mem|util|power|temperature)",
8686
})
8787
STATS_AVG_GAUGE_METRICS: Final[frozenset[str]] = frozenset()
8888
STATS_AVG_GAUGE_METRIC_PATTERNS: Final[frozenset[str]] = frozenset({
89-
r"[A-Za-z0-9][A-Za-z0-9_-]*_util",
89+
r"[A-Za-z0-9][A-Za-z0-9_-]*_(util|power|temperature)",
9090
})
9191
STATS_MAX_OVER_RATE_METRICS: Final[frozenset[str]] = frozenset({"cpu_util"})
9292
STATS_AVG_OVER_RATE_METRICS: Final[frozenset[str]] = frozenset({"cpu_util"})

tests/unit/manager/services/utilization_metric/test_container_metric.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -820,7 +820,7 @@ def test_stats_queries_render_legacy_labels_from_typed_value_types(self) -> None
820820
"backendai_container_utilization"
821821
'{kernel_id=~"12345678-1234-5678-1234-567812345678",'
822822
'container_metric_name=~"io_scratch_size|mem|'
823-
'[A-Za-z0-9][A-Za-z0-9_-]*_(mem|util|power)",'
823+
'[A-Za-z0-9][A-Za-z0-9_-]*_(mem|util|power|temperature)",'
824824
'value_type="current"}))[5m:]),'
825825
'"value_type","stats.max","value_type",".*")'
826826
" or "
@@ -835,7 +835,7 @@ def test_stats_queries_render_legacy_labels_from_typed_value_types(self) -> None
835835
"label_replace(avg_over_time((sum by (container_metric_name,kernel_id,value_type)("
836836
"backendai_container_utilization"
837837
'{kernel_id=~"12345678-1234-5678-1234-567812345678",'
838-
'container_metric_name=~"[A-Za-z0-9][A-Za-z0-9_-]*_util",'
838+
'container_metric_name=~"[A-Za-z0-9][A-Za-z0-9_-]*_(util|power|temperature)",'
839839
'value_type="current"}))[5m:]),'
840840
'"value_type","stats.avg","value_type",".*")'
841841
" or "

0 commit comments

Comments
 (0)