Skip to content

Commit 6e68947

Browse files
committed
refactor(api): remove behavior field to align with release plan
Signed-off-by: Vivek Karunai Kiri Ragavan <vkarunai@redhat.com>
1 parent 807e467 commit 6e68947

5 files changed

Lines changed: 1 addition & 403 deletions

File tree

api/v1alpha1/variantautoscaling_types.go

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,6 @@ type VariantAutoscalingConfigSpec struct {
1414
// +kubebuilder:validation:Pattern=`^\d+(\.\d+)?$`
1515
// +kubebuilder:default="10.0"
1616
VariantCost string `json:"variantCost,omitempty"`
17-
18-
// Behavior configures the HPA scaling behavior policies (scale-up and scale-down).
19-
// When omitted, default Kubernetes HPA scaling behavior is used.
20-
// Applied directly to HPA, or passed through to KEDA's underlying HPA via
21-
// spec.advanced.horizontalPodAutoscalerConfig.behavior.
22-
// +kubebuilder:validation:Optional
23-
Behavior *autoscalingv2.HorizontalPodAutoscalerBehavior `json:"behavior,omitempty"`
2417
}
2518

2619
// VariantAutoscalingSpec defines the desired state for autoscaling a model variant.

api/v1alpha1/variantautoscaling_types_test.go

Lines changed: 0 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ import (
99
autoscalingv2 "k8s.io/api/autoscaling/v2"
1010
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1111
"k8s.io/apimachinery/pkg/runtime"
12-
"k8s.io/utils/ptr"
1312
)
1413

1514
// helper: build a valid VariantAutoscaling object
@@ -283,81 +282,3 @@ func TestMinMaxReplicasJSON(t *testing.T) {
283282
t.Errorf("expected minReplicas to be absent when nil, but it was present")
284283
}
285284
}
286-
287-
func TestBehaviorOmitEmpty(t *testing.T) {
288-
va := makeValidVA()
289-
// behavior should be absent when nil
290-
b, err := json.Marshal(va)
291-
if err != nil {
292-
t.Fatalf("marshal failed: %v", err)
293-
}
294-
var probeSpec struct {
295-
Spec map[string]any `json:"spec"`
296-
}
297-
if err := json.Unmarshal(b, &probeSpec); err != nil {
298-
t.Fatalf("unmarshal failed: %v", err)
299-
}
300-
if _, ok := probeSpec.Spec["behavior"]; ok {
301-
t.Errorf("expected behavior to be absent when nil, but it was present")
302-
}
303-
}
304-
305-
func TestBehaviorJSONRoundTrip(t *testing.T) {
306-
va := makeValidVA()
307-
va.Spec.Behavior = &autoscalingv2.HorizontalPodAutoscalerBehavior{
308-
ScaleUp: &autoscalingv2.HPAScalingRules{
309-
StabilizationWindowSeconds: ptr.To(int32(0)),
310-
Policies: []autoscalingv2.HPAScalingPolicy{
311-
{
312-
Type: autoscalingv2.PodsScalingPolicy,
313-
Value: 4,
314-
PeriodSeconds: 60,
315-
},
316-
},
317-
},
318-
ScaleDown: &autoscalingv2.HPAScalingRules{
319-
StabilizationWindowSeconds: ptr.To(int32(300)),
320-
Policies: []autoscalingv2.HPAScalingPolicy{
321-
{
322-
Type: autoscalingv2.PercentScalingPolicy,
323-
Value: 10,
324-
PeriodSeconds: 60,
325-
},
326-
},
327-
},
328-
}
329-
330-
b, err := json.Marshal(va)
331-
if err != nil {
332-
t.Fatalf("marshal failed: %v", err)
333-
}
334-
335-
var back VariantAutoscaling
336-
if err := json.Unmarshal(b, &back); err != nil {
337-
t.Fatalf("unmarshal failed: %v", err)
338-
}
339-
340-
if back.Spec.Behavior == nil {
341-
t.Fatal("expected behavior to be present after round-trip")
342-
}
343-
if back.Spec.Behavior.ScaleUp == nil {
344-
t.Fatal("expected scaleUp to be present after round-trip")
345-
}
346-
if *back.Spec.Behavior.ScaleUp.StabilizationWindowSeconds != 0 {
347-
t.Errorf("expected scaleUp.stabilizationWindowSeconds=0, got %d",
348-
*back.Spec.Behavior.ScaleUp.StabilizationWindowSeconds)
349-
}
350-
if len(back.Spec.Behavior.ScaleUp.Policies) != 1 {
351-
t.Fatalf("expected 1 scaleUp policy, got %d", len(back.Spec.Behavior.ScaleUp.Policies))
352-
}
353-
if back.Spec.Behavior.ScaleUp.Policies[0].Value != 4 {
354-
t.Errorf("expected scaleUp policy value=4, got %d", back.Spec.Behavior.ScaleUp.Policies[0].Value)
355-
}
356-
if back.Spec.Behavior.ScaleDown == nil {
357-
t.Fatal("expected scaleDown to be present after round-trip")
358-
}
359-
if *back.Spec.Behavior.ScaleDown.StabilizationWindowSeconds != 300 {
360-
t.Errorf("expected scaleDown.stabilizationWindowSeconds=300, got %d",
361-
*back.Spec.Behavior.ScaleDown.StabilizationWindowSeconds)
362-
}
363-
}

api/v1alpha1/zz_generated.deepcopy.go

Lines changed: 1 addition & 7 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

charts/workload-variant-autoscaler/crds/llmd.ai_variantautoscalings.yaml

Lines changed: 0 additions & 155 deletions
Original file line numberDiff line numberDiff line change
@@ -66,161 +66,6 @@ spec:
6666
description: Spec defines the desired state for autoscaling the model
6767
variant.
6868
properties:
69-
behavior:
70-
description: |-
71-
Behavior configures the HPA scaling behavior policies (scale-up and scale-down).
72-
When omitted, default Kubernetes HPA scaling behavior is used.
73-
Applied directly to HPA, or passed through to KEDA's underlying HPA via
74-
spec.advanced.horizontalPodAutoscalerConfig.behavior.
75-
properties:
76-
scaleDown:
77-
description: |-
78-
scaleDown is scaling policy for scaling Down.
79-
If not set, the default value is to allow to scale down to minReplicas pods, with a
80-
300 second stabilization window (i.e., the highest recommendation for
81-
the last 300sec is used).
82-
properties:
83-
policies:
84-
description: |-
85-
policies is a list of potential scaling polices which can be used during scaling.
86-
If not set, use the default values:
87-
- For scale up: allow doubling the number of pods, or an absolute change of 4 pods in a 15s window.
88-
- For scale down: allow all pods to be removed in a 15s window.
89-
items:
90-
description: HPAScalingPolicy is a single policy which must
91-
hold true for a specified past interval.
92-
properties:
93-
periodSeconds:
94-
description: |-
95-
periodSeconds specifies the window of time for which the policy should hold true.
96-
PeriodSeconds must be greater than zero and less than or equal to 1800 (30 min).
97-
format: int32
98-
type: integer
99-
type:
100-
description: type is used to specify the scaling policy.
101-
type: string
102-
value:
103-
description: |-
104-
value contains the amount of change which is permitted by the policy.
105-
It must be greater than zero
106-
format: int32
107-
type: integer
108-
required:
109-
- periodSeconds
110-
- type
111-
- value
112-
type: object
113-
type: array
114-
x-kubernetes-list-type: atomic
115-
selectPolicy:
116-
description: |-
117-
selectPolicy is used to specify which policy should be used.
118-
If not set, the default value Max is used.
119-
type: string
120-
stabilizationWindowSeconds:
121-
description: |-
122-
stabilizationWindowSeconds is the number of seconds for which past recommendations should be
123-
considered while scaling up or scaling down.
124-
StabilizationWindowSeconds must be greater than or equal to zero and less than or equal to 3600 (one hour).
125-
If not set, use the default values:
126-
- For scale up: 0 (i.e. no stabilization is done).
127-
- For scale down: 300 (i.e. the stabilization window is 300 seconds long).
128-
format: int32
129-
type: integer
130-
tolerance:
131-
anyOf:
132-
- type: integer
133-
- type: string
134-
description: |-
135-
tolerance is the tolerance on the ratio between the current and desired
136-
metric value under which no updates are made to the desired number of
137-
replicas (e.g. 0.01 for 1%). Must be greater than or equal to zero. If not
138-
set, the default cluster-wide tolerance is applied (by default 10%).
139-
140-
For example, if autoscaling is configured with a memory consumption target of 100Mi,
141-
and scale-down and scale-up tolerances of 5% and 1% respectively, scaling will be
142-
triggered when the actual consumption falls below 95Mi or exceeds 101Mi.
143-
144-
This is an alpha field and requires enabling the HPAConfigurableTolerance
145-
feature gate.
146-
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
147-
x-kubernetes-int-or-string: true
148-
type: object
149-
scaleUp:
150-
description: |-
151-
scaleUp is scaling policy for scaling Up.
152-
If not set, the default value is the higher of:
153-
* increase no more than 4 pods per 60 seconds
154-
* double the number of pods per 60 seconds
155-
No stabilization is used.
156-
properties:
157-
policies:
158-
description: |-
159-
policies is a list of potential scaling polices which can be used during scaling.
160-
If not set, use the default values:
161-
- For scale up: allow doubling the number of pods, or an absolute change of 4 pods in a 15s window.
162-
- For scale down: allow all pods to be removed in a 15s window.
163-
items:
164-
description: HPAScalingPolicy is a single policy which must
165-
hold true for a specified past interval.
166-
properties:
167-
periodSeconds:
168-
description: |-
169-
periodSeconds specifies the window of time for which the policy should hold true.
170-
PeriodSeconds must be greater than zero and less than or equal to 1800 (30 min).
171-
format: int32
172-
type: integer
173-
type:
174-
description: type is used to specify the scaling policy.
175-
type: string
176-
value:
177-
description: |-
178-
value contains the amount of change which is permitted by the policy.
179-
It must be greater than zero
180-
format: int32
181-
type: integer
182-
required:
183-
- periodSeconds
184-
- type
185-
- value
186-
type: object
187-
type: array
188-
x-kubernetes-list-type: atomic
189-
selectPolicy:
190-
description: |-
191-
selectPolicy is used to specify which policy should be used.
192-
If not set, the default value Max is used.
193-
type: string
194-
stabilizationWindowSeconds:
195-
description: |-
196-
stabilizationWindowSeconds is the number of seconds for which past recommendations should be
197-
considered while scaling up or scaling down.
198-
StabilizationWindowSeconds must be greater than or equal to zero and less than or equal to 3600 (one hour).
199-
If not set, use the default values:
200-
- For scale up: 0 (i.e. no stabilization is done).
201-
- For scale down: 300 (i.e. the stabilization window is 300 seconds long).
202-
format: int32
203-
type: integer
204-
tolerance:
205-
anyOf:
206-
- type: integer
207-
- type: string
208-
description: |-
209-
tolerance is the tolerance on the ratio between the current and desired
210-
metric value under which no updates are made to the desired number of
211-
replicas (e.g. 0.01 for 1%). Must be greater than or equal to zero. If not
212-
set, the default cluster-wide tolerance is applied (by default 10%).
213-
214-
For example, if autoscaling is configured with a memory consumption target of 100Mi,
215-
and scale-down and scale-up tolerances of 5% and 1% respectively, scaling will be
216-
triggered when the actual consumption falls below 95Mi or exceeds 101Mi.
217-
218-
This is an alpha field and requires enabling the HPAConfigurableTolerance
219-
feature gate.
220-
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
221-
x-kubernetes-int-or-string: true
222-
type: object
223-
type: object
22469
maxReplicas:
22570
default: 2
22671
description: |-

0 commit comments

Comments
 (0)