Skip to content

Commit 5ead7c5

Browse files
committed
Auto discover accelerator name from deployment nodeSelector/nodeAffinity
1 parent 072ec8b commit 5ead7c5

17 files changed

Lines changed: 383 additions & 109 deletions

File tree

charts/workload-variant-autoscaler/values-dev.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,9 @@ llmd:
6161
modelID: "Qwen/Qwen3-0.6B"
6262
va:
6363
enabled: true
64+
# accelerator: Optional. If not specified, it will be auto-discovered
65+
# from target deployment. If specified, it will be used as fall-back value if it can't
66+
# be discovered.
6467
accelerator: H100
6568
sloTpot: 10
6669
sloTtft: 1000

charts/workload-variant-autoscaler/values.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,9 @@ llmd:
8989

9090
va:
9191
enabled: true
92+
# accelerator: Otional. If not specified, it will be auto-discovered
93+
# from target deployment. If specified, it will be used as fall-back value if it can't
94+
# be discovered.
9295
accelerator: H100
9396
# Cost per replica in arbitrary units (higher = more expensive to scale)
9497
# Used by saturation analysis to weight scaling decisions across variants

deploy/README.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,9 @@ llmd:
358358
# VariantAutoscaling Configuration
359359
va:
360360
enabled: true # Create VariantAutoscaling CR
361+
# accelerator: Optional. If not specified, it will be auto-discovered
362+
# from target deployment. If specified, it will be used as fall-back value if it can't
363+
# be discovered.
361364
accelerator: H100 # GPU type: A100, H100, L40S, etc.
362365
sloTpot: 10 # Time per output token SLO (ms)
363366
sloTtft: 1000 # Time to first token SLO (ms)
@@ -475,6 +478,9 @@ llmd:
475478
# Create VariantAutoscaling
476479
va:
477480
enabled: true
481+
# accelerator: Optional. If not specified, it will be auto-discovered
482+
# from target deployment. If specified, it will be used as fall-back value if it can't
483+
# be discovered.
478484
accelerator: A100
479485
sloTpot: 10
480486
sloTtft: 1000

internal/actuator/actuator_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,7 @@ var _ = Describe("Actuator", func() {
205205
Name: contextResourceName,
206206
Namespace: namespace,
207207
Labels: map[string]string{
208-
"inference.optimization/acceleratorName": "A100",
208+
ctrlutils.AcceleratorNameLabel: "A100",
209209
},
210210
},
211211
Spec: llmdVariantAutoscalingV1alpha1.VariantAutoscalingSpec{

internal/collector/replica_metrics.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -440,13 +440,13 @@ func (c *ReplicaMetricsCollector) CollectReplicaMetrics(
440440
}
441441
variantKey := utils.GetNamespacedKey(namespace, vaName)
442442

443-
// Get accelerator name from VariantAutoscaling label
443+
// Get accelerator name from Deployment nodeSelector or VariantAutoscaling label
444444
acceleratorName := ""
445445
if va, ok := variantAutoscalings[variantKey]; ok && va != nil {
446-
if va.Labels != nil {
447-
if accName, exists := va.Labels[utils.AcceleratorNameLabel]; exists {
448-
acceleratorName = accName
449-
}
446+
// Find the deployment for this VA
447+
deploymentKey := utils.GetNamespacedKey(va.Namespace, va.GetScaleTargetName())
448+
if deployment, found := deployments[deploymentKey]; found {
449+
acceleratorName = utils.GetAcceleratorNameFromDeployment(va, deployment)
450450
}
451451
}
452452

internal/config/loader_test.go

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,6 @@ import (
77
"time"
88

99
flag "github.com/spf13/pflag"
10-
11-
1210
)
1311

1412
// writeTestConfigFile writes a YAML config file to a temp directory and returns its path.

internal/engines/saturation/engine.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1112,9 +1112,9 @@ func (e *Engine) emitSafetyNetMetrics(
11121112
}
11131113
}
11141114
if accelerator == "" {
1115-
// Try to get from VA labels as last resort
1116-
if val, ok := va.Labels[utils.AcceleratorNameLabel]; ok && val != "" {
1117-
accelerator = val
1115+
// Try to get from deployment nodeSelector/nodeAffinity or VA labels
1116+
if acceleratorName, err := utils.GetAcceleratorNameFromVA(ctx, e.client, &va); err == nil && acceleratorName != "" {
1117+
accelerator = acceleratorName
11181118
}
11191119
}
11201120
if accelerator == "" {

internal/engines/saturation/engine_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ data:
141141
Name: name,
142142
Namespace: "default",
143143
Labels: map[string]string{
144-
"inference.optimization/acceleratorName": "A100",
144+
utils.AcceleratorNameLabel: "A100",
145145
},
146146
},
147147
Spec: llmdVariantAutoscalingV1alpha1.VariantAutoscalingSpec{
@@ -377,7 +377,7 @@ data:
377377
Name: name,
378378
Namespace: "default",
379379
Labels: map[string]string{
380-
"inference.optimization/acceleratorName": "A100",
380+
utils.AcceleratorNameLabel: "A100",
381381
},
382382
},
383383
Spec: llmdVariantAutoscalingV1alpha1.VariantAutoscalingSpec{

internal/engines/saturation/engine_v2.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ func (e *Engine) runV2AnalysisOnly(
3838
"variant", va.Name, "deployKey", deployKey)
3939
continue
4040
}
41-
accelerator := utils.GetAcceleratorType(va)
41+
accelerator := utils.GetAcceleratorNameFromDeployment(va, deploy)
4242
gpuCount := getDeploymentGPUsPerReplica(deploy)
4343
e.capacityStore.LoadFromDeployment(namespace, modelID, va.Name, accelerator, gpuCount, deploy)
4444
logger.V(logging.DEBUG).Info("Pre-populated capacity store from deployment",
@@ -112,9 +112,9 @@ func (e *Engine) runAnalyzersAndScore(
112112
continue
113113
}
114114
switch aw.Name {
115+
// future: case "throughput", "slo"
115116
case "saturation":
116117
totalWeighted += baseResult.RequiredCapacity * aw.Score
117-
// future: case "throughput", "slo"
118118
}
119119
}
120120

internal/engines/scalefromzero/engine.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,7 @@ func (e *Engine) processInactiveVariant(ctx context.Context, va wvav1alpha1.Vari
291291
accelerator = va.Status.DesiredOptimizedAlloc.Accelerator
292292
if accelerator == "" {
293293
// Try to get from VA labels as last resort
294-
if val, ok := va.Labels["inference.optimization/acceleratorName"]; ok && val != "" {
294+
if val, err := utils.GetAcceleratorNameFromVA(ctx, e.client, &va); err == nil {
295295
accelerator = val
296296
}
297297
}

0 commit comments

Comments
 (0)