Skip to content

Commit 12e9624

Browse files
authored
api: change NumReplicas to *int32 pointer for scale-to-zero support (#894)
* api: change NumReplicas to *int32 pointer for scale-to-zero support Signed-off-by: Vivek Karunai Kiri Ragavan <vkarunai@redhat.com> Made-with: Cursor Signed-off-by: Vivek Karunai Kiri Ragavan <vkarunai@redhat.com> * fix: dereference *int32 NumReplicas in fmt prints and revert unrelated go fmt changes Signed-off-by: Vivek Karunai Kiri Ragavan <vkarunai@redhat.com> --------- Signed-off-by: Vivek Karunai Kiri Ragavan <vkarunai@redhat.com>
1 parent d3f7c8c commit 12e9624

22 files changed

Lines changed: 164 additions & 96 deletions

api/v1alpha1/variantautoscaling_types.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,8 +76,9 @@ type OptimizedAlloc struct {
7676
Accelerator string `json:"accelerator"`
7777

7878
// NumReplicas is the number of replicas for the optimized allocation.
79+
// nil means no optimization decision has been made yet.
7980
// +kubebuilder:validation:Minimum=0
80-
NumReplicas int `json:"numReplicas"`
81+
NumReplicas *int32 `json:"numReplicas,omitempty"`
8182
}
8283

8384
// ActuationStatus provides details about the actuation process and its current status.

api/v1alpha1/variantautoscaling_types_test.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ import (
1111
"k8s.io/apimachinery/pkg/runtime"
1212
)
1313

14+
func int32Ptr(v int32) *int32 { return &v }
15+
1416
// helper: build a valid VariantAutoscaling object
1517
// TODO: move to utils??
1618
func makeValidVA() *VariantAutoscaling {
@@ -39,7 +41,7 @@ func makeValidVA() *VariantAutoscaling {
3941
DesiredOptimizedAlloc: OptimizedAlloc{
4042
LastRunTime: metav1.NewTime(time.Unix(1730000000, 0).UTC()),
4143
Accelerator: "nvidia.com/mig-1g.5gb",
42-
NumReplicas: 2,
44+
NumReplicas: int32Ptr(2),
4345
},
4446
Actuation: ActuationStatus{
4547
Applied: true,
@@ -163,7 +165,7 @@ func TestStatusOmitEmpty(t *testing.T) {
163165
Status struct {
164166
DesiredOptimizedAlloc struct {
165167
LastRunTime *string `json:"lastRunTime"`
166-
NumReplicas int `json:"numReplicas"`
168+
NumReplicas *int32 `json:"numReplicas,omitempty"`
167169
} `json:"desiredOptimizedAlloc"`
168170
Actuation struct {
169171
Applied bool `json:"applied"`
@@ -173,7 +175,7 @@ func TestStatusOmitEmpty(t *testing.T) {
173175
if err := json.Unmarshal(b, &probe); err != nil {
174176
t.Fatalf("unmarshal probe failed: %v", err)
175177
}
176-
if probe.Status.DesiredOptimizedAlloc.NumReplicas != 0 ||
178+
if probe.Status.DesiredOptimizedAlloc.NumReplicas != nil ||
177179
probe.Status.Actuation.Applied != false {
178180
t.Errorf("unexpected non-zero defaults in status: %+v", probe.Status)
179181
}

api/v1alpha1/zz_generated.deepcopy.go

Lines changed: 5 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

charts/workload-variant-autoscaler/crds/llmd.ai_variantautoscalings.yaml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -211,13 +211,14 @@ spec:
211211
format: date-time
212212
type: string
213213
numReplicas:
214-
description: NumReplicas is the number of replicas for the optimized
215-
allocation.
214+
description: |-
215+
NumReplicas is the number of replicas for the optimized allocation.
216+
nil means no optimization decision has been made yet.
217+
format: int32
216218
minimum: 0
217219
type: integer
218220
required:
219221
- accelerator
220-
- numReplicas
221222
type: object
222223
type: object
223224
type: object

config/crd/bases/llmd.ai_variantautoscalings.yaml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -211,13 +211,14 @@ spec:
211211
format: date-time
212212
type: string
213213
numReplicas:
214-
description: NumReplicas is the number of replicas for the optimized
215-
allocation.
214+
description: |-
215+
NumReplicas is the number of replicas for the optimized allocation.
216+
nil means no optimization decision has been made yet.
217+
format: int32
216218
minimum: 0
217219
type: integer
218220
required:
219221
- accelerator
220-
- numReplicas
221222
type: object
222223
type: object
223224
type: object

docs/user-guide/crd-reference.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ _Appears in:_
4545
| --- | --- | --- | --- |
4646
| `lastRunTime` _[Time](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.32/#time-v1-meta)_ | LastRunTime is the timestamp of the last optimization run. | | |
4747
| `accelerator` _string_ | Accelerator is the type of accelerator for the optimized allocation. | | MinLength: 2 <br /> |
48-
| `numReplicas` _integer_ | NumReplicas is the number of replicas for the optimized allocation. | | Minimum: 0 <br /> |
48+
| `numReplicas` _integer_ | NumReplicas is the number of replicas for the optimized allocation.<br />nil means no optimization decision has been made yet. | | Minimum: 0 <br /> |
4949

5050

5151
#### VariantAutoscaling

internal/actuator/actuator.go

Lines changed: 30 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -58,39 +58,40 @@ func (a *Actuator) GetCurrentDeploymentReplicasFromDeployment(va *llmdOptv1alpha
5858
}
5959

6060
func (a *Actuator) EmitMetrics(ctx context.Context, VariantAutoscaling *llmdOptv1alpha1.VariantAutoscaling) error {
61-
// Emit replica metrics with real-time data for external autoscalers
6261
logger := log.FromContext(ctx)
63-
if VariantAutoscaling.Status.DesiredOptimizedAlloc.NumReplicas >= 0 {
62+
if VariantAutoscaling.Status.DesiredOptimizedAlloc.NumReplicas == nil {
63+
logger.Info("Skipping EmitReplicaMetrics - no optimization decision yet",
64+
"variantName", VariantAutoscaling.Name)
65+
return nil
66+
}
67+
68+
desiredReplicas := *VariantAutoscaling.Status.DesiredOptimizedAlloc.NumReplicas
6469

65-
// Get real current replicas from Deployment (not stale VariantAutoscaling status)
66-
currentReplicas, err := a.GetCurrentDeploymentReplicasFromVA(ctx, VariantAutoscaling)
67-
if err != nil {
68-
logger.Error(err, "Could not get current deployment replicas, using VariantAutoscaling status",
69-
"variantName", VariantAutoscaling.Name)
70-
currentReplicas = 0 // Fallback to 0 since CurrentAlloc is removed
71-
}
70+
// Get real current replicas from Deployment (not stale VariantAutoscaling status)
71+
currentReplicas, err := a.GetCurrentDeploymentReplicasFromVA(ctx, VariantAutoscaling)
72+
if err != nil {
73+
logger.Error(err, "Could not get current deployment replicas, using VariantAutoscaling status",
74+
"variantName", VariantAutoscaling.Name)
75+
currentReplicas = 0 // Fallback to 0 since CurrentAlloc is removed
76+
}
7277

73-
if err := a.MetricsEmitter.EmitReplicaMetrics(
74-
ctx,
75-
VariantAutoscaling,
76-
currentReplicas, // Real current from Deployment
77-
int32(VariantAutoscaling.Status.DesiredOptimizedAlloc.NumReplicas), // Inferno's optimization target
78-
VariantAutoscaling.Status.DesiredOptimizedAlloc.Accelerator,
79-
); err != nil {
80-
logger.Error(err, "Failed to emit optimization signals for variantAutoscaling",
81-
"variantName", VariantAutoscaling.Name)
82-
// Don't fail the reconciliation for metric emission errors
83-
// Metrics are critical for HPA, but emission failures shouldn't break core functionality
84-
return nil
85-
}
86-
logger.Info("EmitReplicaMetrics completed",
87-
"variantName", VariantAutoscaling.Name,
88-
"currentReplicas", currentReplicas,
89-
"desiredReplicas", VariantAutoscaling.Status.DesiredOptimizedAlloc.NumReplicas,
90-
"accelerator", VariantAutoscaling.Status.DesiredOptimizedAlloc.Accelerator)
78+
if err := a.MetricsEmitter.EmitReplicaMetrics(
79+
ctx,
80+
VariantAutoscaling,
81+
currentReplicas,
82+
desiredReplicas, // Inferno's optimization target
83+
VariantAutoscaling.Status.DesiredOptimizedAlloc.Accelerator,
84+
); err != nil {
85+
logger.Error(err, "Failed to emit optimization signals for variantAutoscaling",
86+
"variantName", VariantAutoscaling.Name)
87+
// Don't fail the reconciliation for metric emission errors
88+
// Metrics are critical for HPA, but emission failures shouldn't break core functionality
9189
return nil
9290
}
93-
logger.Info("Skipping EmitReplicaMetrics - NumReplicas is 0",
94-
"variantName", VariantAutoscaling.Name)
91+
logger.Info("EmitReplicaMetrics completed",
92+
"variantName", VariantAutoscaling.Name,
93+
"currentReplicas", currentReplicas,
94+
"desiredReplicas", desiredReplicas,
95+
"accelerator", VariantAutoscaling.Status.DesiredOptimizedAlloc.Accelerator)
9596
return nil
9697
}

internal/actuator/actuator_test.go

Lines changed: 26 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,13 @@ import (
3535
"sigs.k8s.io/controller-runtime/pkg/client"
3636
)
3737

38+
func fmtNumReplicas(nr *int32) string {
39+
if nr == nil {
40+
return "<nil>"
41+
}
42+
return fmt.Sprintf("%d", *nr)
43+
}
44+
3845
var _ = Describe("Actuator", func() {
3946
var (
4047
ctx context.Context
@@ -341,15 +348,15 @@ var _ = Describe("Actuator", func() {
341348
},
342349
Status: llmdVariantAutoscalingV1alpha1.VariantAutoscalingStatus{
343350
DesiredOptimizedAlloc: llmdVariantAutoscalingV1alpha1.OptimizedAlloc{
344-
NumReplicas: 4,
351+
NumReplicas: ctrlutils.Ptr(int32(4)),
345352
Accelerator: "A100",
346353
},
347354
},
348355
}
349356

350357
Expect(k8sClient.Create(ctx, deployment)).To(Succeed())
351358
Expect(k8sClient.Create(ctx, va)).To(Succeed())
352-
va.Status.DesiredOptimizedAlloc.NumReplicas = 4
359+
va.Status.DesiredOptimizedAlloc.NumReplicas = ctrlutils.Ptr(int32(4))
353360
va.Status.DesiredOptimizedAlloc.Accelerator = "A100"
354361
})
355362

@@ -359,7 +366,7 @@ var _ = Describe("Actuator", func() {
359366
})
360367

361368
It("should emit metrics successfully when desired replicas > 0", func() {
362-
fmt.Printf("Emitting metrics for variantAutoscaling - name: %s\n numReplicas: %d\n", va.Name, va.Status.DesiredOptimizedAlloc.NumReplicas)
369+
fmt.Printf("Emitting metrics for variantAutoscaling - name: %s\n numReplicas: %s\n", va.Name, fmtNumReplicas(va.Status.DesiredOptimizedAlloc.NumReplicas))
363370
err := actuator.EmitMetrics(ctx, va)
364371
Expect(err).NotTo(HaveOccurred())
365372

@@ -368,9 +375,9 @@ var _ = Describe("Actuator", func() {
368375
// but we can verify the method completed without error
369376
})
370377

371-
It("should skip metrics emission when desired replicas is 0", func() {
372-
va.Status.DesiredOptimizedAlloc.NumReplicas = 0
373-
fmt.Printf("Emitting metrics for variantAutoscaling - name: %s\n numReplicas: %d\n", va.Name, va.Status.DesiredOptimizedAlloc.NumReplicas)
378+
It("should skip metrics emission when NumReplicas is nil (no decision)", func() {
379+
va.Status.DesiredOptimizedAlloc.NumReplicas = nil
380+
fmt.Printf("Emitting metrics for variantAutoscaling - name: %s\n numReplicas: %s\n", va.Name, fmtNumReplicas(va.Status.DesiredOptimizedAlloc.NumReplicas))
374381
err := actuator.EmitMetrics(ctx, va)
375382
Expect(err).NotTo(HaveOccurred())
376383

@@ -389,7 +396,7 @@ var _ = Describe("Actuator", func() {
389396
Namespace: namespace,
390397
}, &dep)
391398
}).Should(HaveOccurred())
392-
fmt.Printf("Emitting metrics for variantAutoscaling - name: %s\n numReplicas: %d\n", va.Name, va.Status.DesiredOptimizedAlloc.NumReplicas)
399+
fmt.Printf("Emitting metrics for variantAutoscaling - name: %s\n numReplicas: %s\n", va.Name, fmtNumReplicas(va.Status.DesiredOptimizedAlloc.NumReplicas))
393400
err := actuator.EmitMetrics(ctx, va)
394401
Expect(err).NotTo(HaveOccurred())
395402

@@ -401,7 +408,7 @@ var _ = Describe("Actuator", func() {
401408
// This test verifies that metrics emission errors don't fail the method
402409
// We can't easily simulate a metrics emission error without mocking,
403410
// but we can verify the error handling logic exists
404-
fmt.Printf("Emitting metrics for variantAutoscaling - name: %s\n numReplicas: %d\n", va.Name, va.Status.DesiredOptimizedAlloc.NumReplicas)
411+
fmt.Printf("Emitting metrics for variantAutoscaling - name: %s\n numReplicas: %s\n", va.Name, fmtNumReplicas(va.Status.DesiredOptimizedAlloc.NumReplicas))
405412
err := actuator.EmitMetrics(ctx, va)
406413
Expect(err).NotTo(HaveOccurred())
407414
})
@@ -460,15 +467,15 @@ var _ = Describe("Actuator", func() {
460467
},
461468
Status: llmdVariantAutoscalingV1alpha1.VariantAutoscalingStatus{
462469
DesiredOptimizedAlloc: llmdVariantAutoscalingV1alpha1.OptimizedAlloc{
463-
NumReplicas: 3,
470+
NumReplicas: ctrlutils.Ptr(int32(3)),
464471
Accelerator: "A100",
465472
},
466473
},
467474
}
468475

469476
Expect(k8sClient.Create(ctx, deployment)).To(Succeed())
470477
Expect(k8sClient.Create(ctx, va)).To(Succeed())
471-
va.Status.DesiredOptimizedAlloc.NumReplicas = 3
478+
va.Status.DesiredOptimizedAlloc.NumReplicas = ctrlutils.Ptr(int32(3))
472479
va.Status.DesiredOptimizedAlloc.Accelerator = "A100"
473480

474481
})
@@ -480,20 +487,20 @@ var _ = Describe("Actuator", func() {
480487
})
481488

482489
It("should verify that metrics emitter can emit scaling metrics", func() {
483-
fmt.Printf("Emitting scaling metrics for variantAutoscaling - name: %s\n numReplicas: %d\n", va.Name, va.Status.DesiredOptimizedAlloc.NumReplicas)
490+
fmt.Printf("Emitting scaling metrics for variantAutoscaling - name: %s\n numReplicas: %s\n", va.Name, fmtNumReplicas(va.Status.DesiredOptimizedAlloc.NumReplicas))
484491
err := actuator.MetricsEmitter.EmitReplicaScalingMetrics(ctx, va, "up", "optimization")
485492
Expect(err).NotTo(HaveOccurred())
486493
})
487494

488495
It("should verify that metrics emitter can emit replica metrics", func() {
489-
fmt.Printf("Emitting replica metrics for variantAutoscaling - name: %s\n numReplicas: %d\n", va.Name, va.Status.DesiredOptimizedAlloc.NumReplicas)
496+
fmt.Printf("Emitting replica metrics for variantAutoscaling - name: %s\n numReplicas: %s\n", va.Name, fmtNumReplicas(va.Status.DesiredOptimizedAlloc.NumReplicas))
490497
err := actuator.MetricsEmitter.EmitReplicaMetrics(ctx, va, 1, 3, "A100")
491498
Expect(err).NotTo(HaveOccurred())
492499
})
493500

494501
It("should verify full metric emission workflow", func() {
495502
// Test the complete workflow
496-
fmt.Printf("Emitting metrics for variantAutoscaling - name: %s\n numReplicas: %d\n", va.Name, va.Status.DesiredOptimizedAlloc.NumReplicas)
503+
fmt.Printf("Emitting metrics for variantAutoscaling - name: %s\n numReplicas: %s\n", va.Name, fmtNumReplicas(va.Status.DesiredOptimizedAlloc.NumReplicas))
497504
err := actuator.EmitMetrics(ctx, va)
498505
Expect(err).NotTo(HaveOccurred())
499506

@@ -520,9 +527,9 @@ var _ = Describe("Actuator", func() {
520527
MaxReplicas: 2,
521528
},
522529
Status: llmdVariantAutoscalingV1alpha1.VariantAutoscalingStatus{
523-
// DesiredOptimizedAlloc.NumReplicas will be 0 by default
530+
// DesiredOptimizedAlloc.NumReplicas is nil (no decision yet)
524531
DesiredOptimizedAlloc: llmdVariantAutoscalingV1alpha1.OptimizedAlloc{
525-
NumReplicas: 0, // This should cause EmitMetrics to skip
532+
NumReplicas: nil, // This should cause EmitMetrics to skip
526533
Accelerator: "A100",
527534
},
528535
},
@@ -532,9 +539,9 @@ var _ = Describe("Actuator", func() {
532539
defer func() {
533540
Expect(client.IgnoreNotFound(k8sClient.Delete(ctx, va))).To(Succeed())
534541
}()
535-
fmt.Printf("Emitting metrics for variantAutoscaling - name: %s\n numReplicas: %d\n", va.Name, va.Status.DesiredOptimizedAlloc.NumReplicas)
542+
fmt.Printf("Emitting metrics for variantAutoscaling - name: %s\n numReplicas: %s\n", va.Name, fmtNumReplicas(va.Status.DesiredOptimizedAlloc.NumReplicas))
536543
err := actuator.EmitMetrics(ctx, va)
537-
Expect(err).NotTo(HaveOccurred()) // Should skip metrics emission due to 0 replicas
544+
Expect(err).NotTo(HaveOccurred()) // Should skip metrics emission due to nil NumReplicas
538545
})
539546
})
540547

@@ -591,15 +598,15 @@ var _ = Describe("Actuator", func() {
591598
},
592599
Status: llmdVariantAutoscalingV1alpha1.VariantAutoscalingStatus{
593600
DesiredOptimizedAlloc: llmdVariantAutoscalingV1alpha1.OptimizedAlloc{
594-
NumReplicas: 5,
601+
NumReplicas: ctrlutils.Ptr(int32(5)),
595602
Accelerator: "A100",
596603
},
597604
},
598605
}
599606

600607
Expect(k8sClient.Create(ctx, deployment)).To(Succeed())
601608
Expect(k8sClient.Create(ctx, va)).To(Succeed())
602-
va.Status.DesiredOptimizedAlloc.NumReplicas = 5
609+
va.Status.DesiredOptimizedAlloc.NumReplicas = ctrlutils.Ptr(int32(5))
603610
va.Status.DesiredOptimizedAlloc.Accelerator = "A100"
604611

605612
})

internal/controller/variantautoscaling_controller.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -245,8 +245,8 @@ func (r *VariantAutoscalingReconciler) Reconcile(ctx context.Context, req ctrl.R
245245
// fullDesiredAllocPatchBase returns a patch base that forces the full
246246
// desiredOptimizedAlloc object into the JSON merge patch. Without this,
247247
// MergeFrom only includes changed fields within nested structs, and the
248-
// CRD validates the partial patch — rejecting it when required fields
249-
// (numReplicas, accelerator) are absent from the partial object.
248+
// CRD validates the partial patch — rejecting it when the required field
249+
// (accelerator) is absent from the partial object.
250250
// When desiredOptimizedAlloc hasn't been set yet (accelerator is empty),
251251
// the base is left unchanged so the zero-valued struct is not included.
252252
func fullDesiredAllocPatchBase(originalVA *llmdVariantAutoscalingV1alpha1.VariantAutoscaling, va *llmdVariantAutoscalingV1alpha1.VariantAutoscaling) *llmdVariantAutoscalingV1alpha1.VariantAutoscaling {

internal/controller/variantautoscaling_controller_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -488,7 +488,7 @@ var _ = Describe("VariantAutoscalings Controller", func() {
488488

489489
// DesiredOptimizedAlloc should remain at zero values (not set)
490490
Expect(resource.Status.DesiredOptimizedAlloc.Accelerator).To(BeEmpty(), "Accelerator should remain empty")
491-
Expect(resource.Status.DesiredOptimizedAlloc.NumReplicas).To(Equal(0), "NumReplicas should remain 0")
491+
Expect(resource.Status.DesiredOptimizedAlloc.NumReplicas).To(BeNil(), "NumReplicas should remain nil")
492492
})
493493
})
494494

0 commit comments

Comments
 (0)