llm-d
diff --git a/‎api/v1alpha1/variantautoscaling_types.go‎
Lines changed: 0 additions & 7 deletions b/‎api/v1alpha1/variantautoscaling_types.go‎
Lines changed: 0 additions & 7 deletions
diff --git a/‎api/v1alpha1/variantautoscaling_types_test.go‎
Lines changed: 0 additions & 79 deletions b/‎api/v1alpha1/variantautoscaling_types_test.go‎
Lines changed: 0 additions & 79 deletions
diff --git a/‎api/v1alpha1/zz_generated.deepcopy.go‎
Lines changed: 1 addition & 7 deletions b/‎api/v1alpha1/zz_generated.deepcopy.go‎
Lines changed: 1 addition & 7 deletions
diff --git a/‎charts/workload-variant-autoscaler/crds/llmd.ai_variantautoscalings.yaml‎
Lines changed: 0 additions & 155 deletions b/‎charts/workload-variant-autoscaler/crds/llmd.ai_variantautoscalings.yaml‎
Lines changed: 0 additions & 155 deletions
@@ -14,13 +14,6 @@ type VariantAutoscalingConfigSpec struct {
 	// +kubebuilder:validation:Pattern=`^\d+(\.\d+)?$`
 	// +kubebuilder:default="10.0"
 	VariantCost string `json:"variantCost,omitempty"`
-
-	// Behavior configures the HPA scaling behavior policies (scale-up and scale-down).
-	// When omitted, default Kubernetes HPA scaling behavior is used.
-	// Applied directly to HPA, or passed through to KEDA's underlying HPA via
-	// spec.advanced.horizontalPodAutoscalerConfig.behavior.
-	// +kubebuilder:validation:Optional
-	Behavior *autoscalingv2.HorizontalPodAutoscalerBehavior `json:"behavior,omitempty"`
 }
 
 // VariantAutoscalingSpec defines the desired state for autoscaling a model variant.
 
@@ -9,7 +9,6 @@ import (
 	autoscalingv2 "k8s.io/api/autoscaling/v2"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/runtime"
-	"k8s.io/utils/ptr"
 )
 
 // helper: build a valid VariantAutoscaling object
@@ -283,81 +282,3 @@ func TestMinMaxReplicasJSON(t *testing.T) {
 		t.Errorf("expected minReplicas to be absent when nil, but it was present")
 	}
 }
-
-func TestBehaviorOmitEmpty(t *testing.T) {
-	va := makeValidVA()
-	// behavior should be absent when nil
-	b, err := json.Marshal(va)
-	if err != nil {
-		t.Fatalf("marshal failed: %v", err)
-	}
-	var probeSpec struct {
-		Spec map[string]any `json:"spec"`
-	}
-	if err := json.Unmarshal(b, &probeSpec); err != nil {
-		t.Fatalf("unmarshal failed: %v", err)
-	}
-	if _, ok := probeSpec.Spec["behavior"]; ok {
-		t.Errorf("expected behavior to be absent when nil, but it was present")
-	}
-}
-
-func TestBehaviorJSONRoundTrip(t *testing.T) {
-	va := makeValidVA()
-	va.Spec.Behavior = &autoscalingv2.HorizontalPodAutoscalerBehavior{
-		ScaleUp: &autoscalingv2.HPAScalingRules{
-			StabilizationWindowSeconds: ptr.To(int32(0)),
-			Policies: []autoscalingv2.HPAScalingPolicy{
-				{
-					Type:          autoscalingv2.PodsScalingPolicy,
-					Value:         4,
-					PeriodSeconds: 60,
-				},
-			},
-		},
-		ScaleDown: &autoscalingv2.HPAScalingRules{
-			StabilizationWindowSeconds: ptr.To(int32(300)),
-			Policies: []autoscalingv2.HPAScalingPolicy{
-				{
-					Type:          autoscalingv2.PercentScalingPolicy,
-					Value:         10,
-					PeriodSeconds: 60,
-				},
-			},
-		},
-	}
-
-	b, err := json.Marshal(va)
-	if err != nil {
-		t.Fatalf("marshal failed: %v", err)
-	}
-
-	var back VariantAutoscaling
-	if err := json.Unmarshal(b, &back); err != nil {
-		t.Fatalf("unmarshal failed: %v", err)
-	}
-
-	if back.Spec.Behavior == nil {
-		t.Fatal("expected behavior to be present after round-trip")
-	}
-	if back.Spec.Behavior.ScaleUp == nil {
-		t.Fatal("expected scaleUp to be present after round-trip")
-	}
-	if *back.Spec.Behavior.ScaleUp.StabilizationWindowSeconds != 0 {
-		t.Errorf("expected scaleUp.stabilizationWindowSeconds=0, got %d",
-			*back.Spec.Behavior.ScaleUp.StabilizationWindowSeconds)
-	}
-	if len(back.Spec.Behavior.ScaleUp.Policies) != 1 {
-		t.Fatalf("expected 1 scaleUp policy, got %d", len(back.Spec.Behavior.ScaleUp.Policies))
-	}
-	if back.Spec.Behavior.ScaleUp.Policies[0].Value != 4 {
-		t.Errorf("expected scaleUp policy value=4, got %d", back.Spec.Behavior.ScaleUp.Policies[0].Value)
-	}
-	if back.Spec.Behavior.ScaleDown == nil {
-		t.Fatal("expected scaleDown to be present after round-trip")
-	}
-	if *back.Spec.Behavior.ScaleDown.StabilizationWindowSeconds != 300 {
-		t.Errorf("expected scaleDown.stabilizationWindowSeconds=300, got %d",
-			*back.Spec.Behavior.ScaleDown.StabilizationWindowSeconds)
-	}
-}
@@ -66,161 +66,6 @@ spec:
             description: Spec defines the desired state for autoscaling the model
               variant.
             properties:
-              behavior:
-                description: |-
-                  Behavior configures the HPA scaling behavior policies (scale-up and scale-down).
-                  When omitted, default Kubernetes HPA scaling behavior is used.
-                  Applied directly to HPA, or passed through to KEDA's underlying HPA via
-                  spec.advanced.horizontalPodAutoscalerConfig.behavior.
-                properties:
-                  scaleDown:
-                    description: |-
-                      scaleDown is scaling policy for scaling Down.
-                      If not set, the default value is to allow to scale down to minReplicas pods, with a
-                      300 second stabilization window (i.e., the highest recommendation for
-                      the last 300sec is used).
-                    properties:
-                      policies:
-                        description: |-
-                          policies is a list of potential scaling polices which can be used during scaling.
-                          If not set, use the default values:
-                          - For scale up: allow doubling the number of pods, or an absolute change of 4 pods in a 15s window.
-                          - For scale down: allow all pods to be removed in a 15s window.
-                        items:
-                          description: HPAScalingPolicy is a single policy which must
-                            hold true for a specified past interval.
-                          properties:
-                            periodSeconds:
-                              description: |-
-                                periodSeconds specifies the window of time for which the policy should hold true.
-                                PeriodSeconds must be greater than zero and less than or equal to 1800 (30 min).
-                              format: int32
-                              type: integer
-                            type:
-                              description: type is used to specify the scaling policy.
-                              type: string
-                            value:
-                              description: |-
-                                value contains the amount of change which is permitted by the policy.
-                                It must be greater than zero
-                              format: int32
-                              type: integer
-                          required:
-                          - periodSeconds
-                          - type
-                          - value
-                          type: object
-                        type: array
-                        x-kubernetes-list-type: atomic
-                      selectPolicy:
-                        description: |-
-                          selectPolicy is used to specify which policy should be used.
-                          If not set, the default value Max is used.
-                        type: string
-                      stabilizationWindowSeconds:
-                        description: |-
-                          stabilizationWindowSeconds is the number of seconds for which past recommendations should be
-                          considered while scaling up or scaling down.
-                          StabilizationWindowSeconds must be greater than or equal to zero and less than or equal to 3600 (one hour).
-                          If not set, use the default values:
-                          - For scale up: 0 (i.e. no stabilization is done).
-                          - For scale down: 300 (i.e. the stabilization window is 300 seconds long).
-                        format: int32
-                        type: integer
-                      tolerance:
-                        anyOf:
-                        - type: integer
-                        - type: string
-                        description: |-
-                          tolerance is the tolerance on the ratio between the current and desired
-                          metric value under which no updates are made to the desired number of
-                          replicas (e.g. 0.01 for 1%). Must be greater than or equal to zero. If not
-                          set, the default cluster-wide tolerance is applied (by default 10%).
-
-                          For example, if autoscaling is configured with a memory consumption target of 100Mi,
-                          and scale-down and scale-up tolerances of 5% and 1% respectively, scaling will be
-                          triggered when the actual consumption falls below 95Mi or exceeds 101Mi.
-
-                          This is an alpha field and requires enabling the HPAConfigurableTolerance
-                          feature gate.
-                        pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
-                        x-kubernetes-int-or-string: true
-                    type: object
-                  scaleUp:
-                    description: |-
-                      scaleUp is scaling policy for scaling Up.
-                      If not set, the default value is the higher of:
-                        * increase no more than 4 pods per 60 seconds
-                        * double the number of pods per 60 seconds
-                      No stabilization is used.
-                    properties:
-                      policies:
-                        description: |-
-                          policies is a list of potential scaling polices which can be used during scaling.
-                          If not set, use the default values:
-                          - For scale up: allow doubling the number of pods, or an absolute change of 4 pods in a 15s window.
-                          - For scale down: allow all pods to be removed in a 15s window.
-                        items:
-                          description: HPAScalingPolicy is a single policy which must
-                            hold true for a specified past interval.
-                          properties:
-                            periodSeconds:
-                              description: |-
-                                periodSeconds specifies the window of time for which the policy should hold true.
-                                PeriodSeconds must be greater than zero and less than or equal to 1800 (30 min).
-                              format: int32
-                              type: integer
-                            type:
-                              description: type is used to specify the scaling policy.
-                              type: string
-                            value:
-                              description: |-
-                                value contains the amount of change which is permitted by the policy.
-                                It must be greater than zero
-                              format: int32
-                              type: integer
-                          required:
-                          - periodSeconds
-                          - type
-                          - value
-                          type: object
-                        type: array
-                        x-kubernetes-list-type: atomic
-                      selectPolicy:
-                        description: |-
-                          selectPolicy is used to specify which policy should be used.
-                          If not set, the default value Max is used.
-                        type: string
-                      stabilizationWindowSeconds:
-                        description: |-
-                          stabilizationWindowSeconds is the number of seconds for which past recommendations should be
-                          considered while scaling up or scaling down.
-                          StabilizationWindowSeconds must be greater than or equal to zero and less than or equal to 3600 (one hour).
-                          If not set, use the default values:
-                          - For scale up: 0 (i.e. no stabilization is done).
-                          - For scale down: 300 (i.e. the stabilization window is 300 seconds long).
-                        format: int32
-                        type: integer
-                      tolerance:
-                        anyOf:
-                        - type: integer
-                        - type: string
-                        description: |-
-                          tolerance is the tolerance on the ratio between the current and desired
-                          metric value under which no updates are made to the desired number of
-                          replicas (e.g. 0.01 for 1%). Must be greater than or equal to zero. If not
-                          set, the default cluster-wide tolerance is applied (by default 10%).
-
-                          For example, if autoscaling is configured with a memory consumption target of 100Mi,
-                          and scale-down and scale-up tolerances of 5% and 1% respectively, scaling will be
-                          triggered when the actual consumption falls below 95Mi or exceeds 101Mi.
-
-                          This is an alpha field and requires enabling the HPAConfigurableTolerance
-                          feature gate.
-                        pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
-                        x-kubernetes-int-or-string: true
-                    type: object
-                type: object
               maxReplicas:
                 default: 2
                 description: |-