Skip to content

Commit 807e467

Browse files
committed
refactor(api): change default maxReplicas from 10 to 2
Signed-off-by: Vivek Karunai Kiri Ragavan <vkarunai@redhat.com>
1 parent da4bd4b commit 807e467

12 files changed

Lines changed: 28 additions & 28 deletions

File tree

api/v1alpha1/variantautoscaling_types.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ type VariantAutoscalingSpec struct {
4747
// MaxReplicas is the upper bound on the number of replicas for this variant.
4848
// The autoscaler will never scale beyond this value regardless of load.
4949
// +kubebuilder:validation:Minimum=1
50-
// +kubebuilder:default=10
50+
// +kubebuilder:default=2
5151
MaxReplicas int32 `json:"maxReplicas"`
5252

5353
// VariantAutoscalingConfigSpec holds optional tuning fields that integrators can embed.

api/v1alpha1/variantautoscaling_types_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ func makeValidVA() *VariantAutoscaling {
3333
Name: "va-sample-deployment",
3434
},
3535
ModelID: "model-123",
36-
MaxReplicas: 10,
36+
MaxReplicas: 2,
3737
},
3838
Status: VariantAutoscalingStatus{
3939
// CurrentAlloc: Allocation{...} -- Removed

charts/workload-variant-autoscaler/crds/llmd.ai_variantautoscalings.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,7 @@ spec:
222222
type: object
223223
type: object
224224
maxReplicas:
225-
default: 10
225+
default: 2
226226
description: |-
227227
MaxReplicas is the upper bound on the number of replicas for this variant.
228228
The autoscaler will never scale beyond this value regardless of load.

charts/workload-variant-autoscaler/values-dev.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ hpa:
6868
enabled: true
6969
# minReplicas: 0 for scale-to-zero testing (requires HPAScaleToZero feature gate)
7070
minReplicas: 0
71-
maxReplicas: 10
71+
maxReplicas: 2
7272
targetAverageValue: "1"
7373
vllmService:
7474
enabled: true

charts/workload-variant-autoscaler/values.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ hpa:
103103
# minReplicas: 1 is the safe default that prevents scale-to-zero
104104
# Set to 0 when wva.scaleToZero is enabled
105105
minReplicas: 1
106-
maxReplicas: 10
106+
maxReplicas: 2
107107
targetAverageValue: "1"
108108
# HPA scaling behavior configuration
109109
behavior:

config/crd/bases/llmd.ai_variantautoscalings.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,7 @@ spec:
222222
type: object
223223
type: object
224224
maxReplicas:
225-
default: 10
225+
default: 2
226226
description: |-
227227
MaxReplicas is the upper bound on the number of replicas for this variant.
228228
The autoscaler will never scale beyond this value regardless of load.

config/samples/hpa-integration.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ spec:
99
kind: Deployment
1010
name: sample-deployment
1111
# minReplicas: 0 # scale to zero - alpha feature
12-
maxReplicas: 10
12+
maxReplicas: 2
1313
behavior:
1414
scaleUp:
1515
stabilizationWindowSeconds: 0

internal/actuator/actuator_test.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ var _ = Describe("Actuator", func() {
122122
Kind: "Deployment",
123123
Name: resourceName,
124124
},
125-
MaxReplicas: 10,
125+
MaxReplicas: 2,
126126
},
127127
}
128128

@@ -215,7 +215,7 @@ var _ = Describe("Actuator", func() {
215215
Name: contextResourceName,
216216
},
217217
ModelID: "test-model/variant-1",
218-
MaxReplicas: 10,
218+
MaxReplicas: 2,
219219
},
220220
Status: llmdVariantAutoscalingV1alpha1.VariantAutoscalingStatus{
221221
DesiredOptimizedAlloc: llmdVariantAutoscalingV1alpha1.OptimizedAlloc{
@@ -334,7 +334,7 @@ var _ = Describe("Actuator", func() {
334334
Name: contextResourceName,
335335
},
336336
ModelID: "test-model/metrics-test",
337-
MaxReplicas: 10,
337+
MaxReplicas: 2,
338338
},
339339
Status: llmdVariantAutoscalingV1alpha1.VariantAutoscalingStatus{
340340
DesiredOptimizedAlloc: llmdVariantAutoscalingV1alpha1.OptimizedAlloc{
@@ -395,7 +395,7 @@ var _ = Describe("Actuator", func() {
395395
Name: "incomplete-va",
396396
},
397397
ModelID: "test-model/incomplete",
398-
MaxReplicas: 10,
398+
MaxReplicas: 2,
399399
},
400400
Status: llmdVariantAutoscalingV1alpha1.VariantAutoscalingStatus{
401401
// DesiredOptimizedAlloc.NumReplicas will be 0 by default
@@ -465,7 +465,7 @@ var _ = Describe("Actuator", func() {
465465
Name: contextResourceName,
466466
},
467467
ModelID: "test-model/validation-test",
468-
MaxReplicas: 10,
468+
MaxReplicas: 2,
469469
},
470470
Status: llmdVariantAutoscalingV1alpha1.VariantAutoscalingStatus{
471471
DesiredOptimizedAlloc: llmdVariantAutoscalingV1alpha1.OptimizedAlloc{

internal/controller/indexers/indexers_test.go

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ var _ = Describe("Indexers", Ordered, func() {
116116
Name: deploymentName,
117117
},
118118
ModelID: "model-1",
119-
MaxReplicas: 10,
119+
MaxReplicas: 2,
120120
},
121121
}
122122
Expect(k8sClient.Create(testCtx, va1)).To(Succeed())
@@ -134,7 +134,7 @@ var _ = Describe("Indexers", Ordered, func() {
134134
Name: "other-deployment",
135135
},
136136
ModelID: "model-other",
137-
MaxReplicas: 10,
137+
MaxReplicas: 2,
138138
},
139139
}
140140
Expect(k8sClient.Create(testCtx, vaOther)).To(Succeed())
@@ -190,7 +190,7 @@ var _ = Describe("Indexers", Ordered, func() {
190190
Name: deploymentName, // Same deployment name but different namespace
191191
},
192192
ModelID: "model-other-ns",
193-
MaxReplicas: 10,
193+
MaxReplicas: 2,
194194
},
195195
}
196196
Expect(k8sClient.Create(testCtx, vaOtherNs)).To(Succeed())
@@ -226,7 +226,7 @@ var _ = Describe("Indexers", Ordered, func() {
226226
Name: sharedName,
227227
},
228228
ModelID: "model-deploy",
229-
MaxReplicas: 10,
229+
MaxReplicas: 2,
230230
},
231231
}
232232
Expect(k8sClient.Create(testCtx, vaDeployment)).To(Succeed())
@@ -247,7 +247,7 @@ var _ = Describe("Indexers", Ordered, func() {
247247
Name: sharedName,
248248
},
249249
ModelID: "model-sts",
250-
MaxReplicas: 10,
250+
MaxReplicas: 2,
251251
},
252252
}
253253
Expect(k8sClient.Create(testCtx, vaStatefulSet)).To(Succeed())
@@ -293,7 +293,7 @@ var _ = Describe("Indexers", Ordered, func() {
293293
Name: sharedName,
294294
},
295295
ModelID: "model-dup-1",
296-
MaxReplicas: 10,
296+
MaxReplicas: 2,
297297
},
298298
}
299299
Expect(k8sClient.Create(testCtx, va1)).To(Succeed())
@@ -313,7 +313,7 @@ var _ = Describe("Indexers", Ordered, func() {
313313
Name: sharedName,
314314
},
315315
ModelID: "model-dup-2",
316-
MaxReplicas: 10,
316+
MaxReplicas: 2,
317317
},
318318
}
319319
Expect(k8sClient.Create(testCtx, va2)).To(Succeed())
@@ -348,7 +348,7 @@ var _ = Describe("Indexers", Ordered, func() {
348348
Name: deploymentName,
349349
},
350350
ModelID: "model-apiversion",
351-
MaxReplicas: 10,
351+
MaxReplicas: 2,
352352
},
353353
}
354354
Expect(k8sClient.Create(testCtx, va)).To(Succeed())
@@ -381,7 +381,7 @@ var _ = Describe("Indexers", Ordered, func() {
381381
Name: deploymentName,
382382
},
383383
ModelID: "model-no-apiversion",
384-
MaxReplicas: 10,
384+
MaxReplicas: 2,
385385
},
386386
}
387387
Expect(k8sClient.Create(testCtx, va)).To(Succeed())

internal/controller/variantautoscaling_controller_test.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ var _ = Describe("VariantAutoscalings Controller", func() {
9090
Name: resourceName,
9191
},
9292
ModelID: "default-default",
93-
MaxReplicas: 10,
93+
MaxReplicas: 2,
9494
},
9595
}
9696
Expect(k8sClient.Create(ctx, resource)).To(Succeed())
@@ -197,7 +197,7 @@ var _ = Describe("VariantAutoscalings Controller", func() {
197197
Name: "invalid-model-id",
198198
},
199199
ModelID: "", // Empty ModelID
200-
MaxReplicas: 10,
200+
MaxReplicas: 2,
201201
},
202202
}
203203
err := k8sClient.Create(ctx, resource)
@@ -315,7 +315,7 @@ var _ = Describe("VariantAutoscalings Controller", func() {
315315
Name: resourceName,
316316
},
317317
ModelID: "default-default",
318-
MaxReplicas: 10,
318+
MaxReplicas: 2,
319319
},
320320
}
321321
Expect(k8sClient.Create(ctx, resource)).To(Succeed())
@@ -408,7 +408,7 @@ var _ = Describe("VariantAutoscalings Controller", func() {
408408
Name: resourceName,
409409
},
410410
ModelID: "test-model",
411-
MaxReplicas: 10,
411+
MaxReplicas: 2,
412412
},
413413
}
414414
Expect(k8sClient.Create(ctx, resource)).To(Succeed())

0 commit comments

Comments
 (0)