1919 currentReplicas * prometheus.GaugeVec
2020 desiredRatio * prometheus.GaugeVec
2121
22+ // Saturation and capacity metrics
23+ saturationUtilization * prometheus.GaugeVec
24+ spareCapacity * prometheus.GaugeVec
25+ requiredCapacity * prometheus.GaugeVec
26+ kvCacheTokensUsed * prometheus.GaugeVec
27+ kvCacheTokensTotal * prometheus.GaugeVec
28+
2229 // controllerInstance stores the optional controller instance identifier.
2330 // When set, it's added as a label to all emitted metrics.
2431 controllerInstance string
@@ -41,10 +48,13 @@ func InitMetrics(registry prometheus.Registerer) error {
4148 // Build label sets based on whether controller_instance is configured
4249 baseLabels := []string {constants .LabelVariantName , constants .LabelNamespace , constants .LabelAcceleratorType }
4350 scalingLabels := []string {constants .LabelVariantName , constants .LabelNamespace , constants .LabelDirection , constants .LabelReason }
51+ // modelLabels: variant_name + namespace only (no accelerator_type) for model-level and token metrics
52+ modelLabels := []string {constants .LabelVariantName , constants .LabelNamespace }
4453
4554 if controllerInstance != "" {
4655 baseLabels = append (baseLabels , constants .LabelControllerInstance )
4756 scalingLabels = append (scalingLabels , constants .LabelControllerInstance )
57+ modelLabels = append (modelLabels , constants .LabelControllerInstance )
4858 }
4959
5060 replicaScalingTotal = prometheus .NewCounterVec (
@@ -75,6 +85,41 @@ func InitMetrics(registry prometheus.Registerer) error {
7585 },
7686 baseLabels ,
7787 )
88+ saturationUtilization = prometheus .NewGaugeVec (
89+ prometheus.GaugeOpts {
90+ Name : constants .WVASaturationUtilization ,
91+ Help : "Per-variant utilization ratio (0.0-1.0) from saturation analysis" ,
92+ },
93+ baseLabels ,
94+ )
95+ spareCapacity = prometheus .NewGaugeVec (
96+ prometheus.GaugeOpts {
97+ Name : constants .WVASpareCapacity ,
98+ Help : "Per-variant spare capacity (0.0-1.0) from saturation analysis" ,
99+ },
100+ baseLabels ,
101+ )
102+ requiredCapacity = prometheus .NewGaugeVec (
103+ prometheus.GaugeOpts {
104+ Name : constants .WVARequiredCapacity ,
105+ Help : "Model-level required capacity; >0 indicates scale-up needed (V1: binary 0/1, V2: continuous token demand)" ,
106+ },
107+ modelLabels ,
108+ )
109+ kvCacheTokensUsed = prometheus .NewGaugeVec (
110+ prometheus.GaugeOpts {
111+ Name : constants .WVAKvCacheTokensUsed ,
112+ Help : "Total KV cache tokens currently in use across all replicas of a variant" ,
113+ },
114+ modelLabels ,
115+ )
116+ kvCacheTokensTotal = prometheus .NewGaugeVec (
117+ prometheus.GaugeOpts {
118+ Name : constants .WVAKvCacheTokensTotal ,
119+ Help : "Total KV cache token capacity across all replicas of a variant" ,
120+ },
121+ modelLabels ,
122+ )
78123
79124 // Register metrics with the registry
80125 if err := registry .Register (replicaScalingTotal ); err != nil {
@@ -89,6 +134,21 @@ func InitMetrics(registry prometheus.Registerer) error {
89134 if err := registry .Register (desiredRatio ); err != nil {
90135 return fmt .Errorf ("failed to register desiredRatio metric: %w" , err )
91136 }
137+ if err := registry .Register (saturationUtilization ); err != nil {
138+ return fmt .Errorf ("failed to register saturationUtilization metric: %w" , err )
139+ }
140+ if err := registry .Register (spareCapacity ); err != nil {
141+ return fmt .Errorf ("failed to register spareCapacity metric: %w" , err )
142+ }
143+ if err := registry .Register (requiredCapacity ); err != nil {
144+ return fmt .Errorf ("failed to register requiredCapacity metric: %w" , err )
145+ }
146+ if err := registry .Register (kvCacheTokensUsed ); err != nil {
147+ return fmt .Errorf ("failed to register kvCacheTokensUsed metric: %w" , err )
148+ }
149+ if err := registry .Register (kvCacheTokensTotal ); err != nil {
150+ return fmt .Errorf ("failed to register kvCacheTokensTotal metric: %w" , err )
151+ }
92152
93153 return nil
94154}
@@ -163,3 +223,39 @@ func (m *MetricsEmitter) EmitReplicaMetrics(ctx context.Context, va *llmdOptv1al
163223 desiredRatio .With (baseLabels ).Set (float64 (desired ) / float64 (current ))
164224 return nil
165225}
226+
227+ // EmitSaturationMetrics emits saturation analysis and KV cache capacity metrics
228+ func (m * MetricsEmitter ) EmitSaturationMetrics (
229+ ctx context.Context ,
230+ variantName , namespace , acceleratorType string ,
231+ utilization , spare , required float64 ,
232+ kvTokensUsed , kvTokensTotal int64 ,
233+ ) error {
234+ if saturationUtilization == nil || spareCapacity == nil || requiredCapacity == nil ||
235+ kvCacheTokensUsed == nil || kvCacheTokensTotal == nil {
236+ return fmt .Errorf ("saturation metrics not initialized" )
237+ }
238+
239+ accelLabels := prometheus.Labels {
240+ constants .LabelVariantName : variantName ,
241+ constants .LabelNamespace : namespace ,
242+ constants .LabelAcceleratorType : acceleratorType ,
243+ }
244+ modelLabels := prometheus.Labels {
245+ constants .LabelVariantName : variantName ,
246+ constants .LabelNamespace : namespace ,
247+ }
248+
249+ if controllerInstance != "" {
250+ accelLabels [constants .LabelControllerInstance ] = controllerInstance
251+ modelLabels [constants .LabelControllerInstance ] = controllerInstance
252+ }
253+
254+ saturationUtilization .With (accelLabels ).Set (utilization )
255+ spareCapacity .With (accelLabels ).Set (spare )
256+ requiredCapacity .With (modelLabels ).Set (required )
257+ kvCacheTokensUsed .With (modelLabels ).Set (float64 (kvTokensUsed ))
258+ kvCacheTokensTotal .With (modelLabels ).Set (float64 (kvTokensTotal ))
259+
260+ return nil
261+ }
0 commit comments