diff --git a/api/apps/v1alpha1/nimservice_types.go b/api/apps/v1alpha1/nimservice_types.go index 1bb969feb..a29a76fa7 100644 --- a/api/apps/v1alpha1/nimservice_types.go +++ b/api/apps/v1alpha1/nimservice_types.go @@ -123,7 +123,7 @@ type NIMServiceSpec struct { Scale Autoscaling `json:"scale,omitempty"` SchedulerName string `json:"schedulerName,omitempty"` Metrics Metrics `json:"metrics,omitempty"` - // +kubebuilder:validation:Minimum=1 + // +kubebuilder:validation:Minimum=0 Replicas *int32 `json:"replicas,omitempty"` UserID *int64 `json:"userID,omitempty"` GroupID *int64 `json:"groupID,omitempty"` @@ -1090,9 +1090,7 @@ func (n *NIMService) GetDeploymentParams() *rendertypes.DeploymentParams { delete(params.PodAnnotations, utils.NvidiaAnnotationParentSpecHashKey) // Set template spec - if !n.IsAutoScalingEnabled() { - params.Replicas = n.GetReplicas() - } + params.Replicas = n.GetReplicas() params.NodeSelector = n.GetNodeSelector() params.Tolerations = n.GetTolerations() params.Affinity = n.GetAffinity() @@ -1638,26 +1636,32 @@ func (n *NIMService) GetInferenceServiceParams( delete(params.PodAnnotations, utils.NvidiaAnnotationParentSpecHashKey) // Set template spec - if !n.IsAutoScalingEnabled() || !utils.IsKServeStandardDeploymentMode(deploymentMode) { - params.MinReplicas = n.GetReplicas() - } else { - params.Annotations[kserveconstants.AutoscalerClass] = string(kserveconstants.AutoscalerClassHPA) + if utils.IsKServeStandardDeploymentMode(deploymentMode) { + if n.IsAutoScalingEnabled() { + params.Annotations[kserveconstants.AutoscalerClass] = string(kserveconstants.AutoscalerClassHPA) - minReplicas, maxReplicas, metric, metricType, target := n.GetInferenceServiceHPAParams() - if minReplicas != nil { - params.MinReplicas = minReplicas - } - if maxReplicas > 0 { - params.MaxReplicas = ptr.To[int32](maxReplicas) - } - if metric != "" { - params.ScaleMetric = metric - } - if metricType != "" { - params.ScaleMetricType = metricType - } - if target > 0 { - params.ScaleTarget = ptr.To(target) + minReplicas, maxReplicas, metric, metricType, target := n.GetInferenceServiceHPAParams() + if minReplicas != nil { + params.MinReplicas = minReplicas + } + if maxReplicas > 0 { + params.MaxReplicas = ptr.To[int32](maxReplicas) + } + if metric != "" { + params.ScaleMetric = metric + } + if metricType != "" { + params.ScaleMetricType = metricType + } + if target > 0 { + params.ScaleTarget = ptr.To(target) + } + } else { + params.MinReplicas = ptr.To[int32](0) + params.MaxReplicas = ptr.To[int32](0) + params.ScaleMetric = "" + params.ScaleMetricType = "" + params.ScaleTarget = nil } } diff --git a/api/apps/v1alpha1/nimservice_types_test.go b/api/apps/v1alpha1/nimservice_types_test.go index 4e879a66e..ba48cdcec 100644 --- a/api/apps/v1alpha1/nimservice_types_test.go +++ b/api/apps/v1alpha1/nimservice_types_test.go @@ -595,3 +595,277 @@ func TestGetProxyCertConfigMap(t *testing.T) { }) } } + +// TestGetInferenceServiceParams tests the GetInferenceServiceParams function with different autoscaling and deployment mode combinations. +func TestGetInferenceServiceParams(t *testing.T) { + tests := []struct { + name string + nimService *NIMService + deploymentMode string + expectHPAAnnotation bool + expectMinReplicas bool + }{ + { + name: "Autoscaling enabled with RawDeployment mode", + nimService: &NIMService{ + Spec: NIMServiceSpec{ + Image: Image{ + Repository: "test-repo", + Tag: "test-tag", + }, + AuthSecret: "test-secret", + Scale: Autoscaling{ + Enabled: ptr.To(true), + HPA: HorizontalPodAutoscalerSpec{ + MinReplicas: ptr.To[int32](1), + MaxReplicas: 5, + }, + }, + Expose: Expose{ + Service: Service{ + Port: ptr.To[int32](8000), + }, + }, + }, + }, + deploymentMode: "RawDeployment", + expectHPAAnnotation: true, + expectMinReplicas: true, + }, + { + name: "Autoscaling disabled with Standard deployment mode", + nimService: &NIMService{ + Spec: NIMServiceSpec{ + Image: Image{ + Repository: "test-repo", + Tag: "test-tag", + }, + AuthSecret: "test-secret", + Scale: Autoscaling{ + Enabled: ptr.To(false), + }, + Replicas: ptr.To[int32](2), + Expose: Expose{ + Service: Service{ + Port: ptr.To[int32](8000), + }, + }, + }, + }, + deploymentMode: "Standard", + expectHPAAnnotation: true, // Standard mode should enable HPA + expectMinReplicas: false, + }, + { + name: "Autoscaling disabled with Knative deployment mode", + nimService: &NIMService{ + Spec: NIMServiceSpec{ + Image: Image{ + Repository: "test-repo", + Tag: "test-tag", + }, + AuthSecret: "test-secret", + Scale: Autoscaling{ + Enabled: ptr.To(false), + }, + Replicas: ptr.To[int32](2), + Expose: Expose{ + Service: Service{ + Port: ptr.To[int32](8000), + }, + }, + }, + }, + deploymentMode: "Knative", + expectHPAAnnotation: false, // Neither autoscaling nor standard mode + expectMinReplicas: false, + }, + { + name: "Autoscaling enabled with Standard deployment mode", + nimService: &NIMService{ + Spec: NIMServiceSpec{ + Image: Image{ + Repository: "test-repo", + Tag: "test-tag", + }, + AuthSecret: "test-secret", + Scale: Autoscaling{ + Enabled: ptr.To(true), + HPA: HorizontalPodAutoscalerSpec{ + MinReplicas: ptr.To[int32](1), + MaxReplicas: 10, + }, + }, + Expose: Expose{ + Service: Service{ + Port: ptr.To[int32](8000), + }, + }, + }, + }, + deploymentMode: "Standard", + expectHPAAnnotation: true, // Both conditions met + expectMinReplicas: true, + }, + { + name: "Zero replicas with autoscaling disabled", + nimService: &NIMService{ + Spec: NIMServiceSpec{ + Image: Image{ + Repository: "test-repo", + Tag: "test-tag", + }, + AuthSecret: "test-secret", + Scale: Autoscaling{ + Enabled: ptr.To(false), + }, + Replicas: ptr.To[int32](0), // Zero replicas should be valid now + Expose: Expose{ + Service: Service{ + Port: ptr.To[int32](8000), + }, + }, + }, + }, + deploymentMode: "Knative", + expectHPAAnnotation: false, + expectMinReplicas: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + + // Test autoscaling enabled state + isAutoScalingEnabled := tt.nimService.IsAutoScalingEnabled() + if tt.expectHPAAnnotation && !isAutoScalingEnabled && tt.deploymentMode != "Standard" && tt.deploymentMode != "RawDeployment" { + t.Errorf("Expected autoscaling to be enabled or standard deployment mode, but neither condition is met") + } + + // Test replica validation + if tt.nimService.Spec.Replicas != nil { + if *tt.nimService.Spec.Replicas < 0 { + t.Errorf("Replicas should not be negative, got %d", *tt.nimService.Spec.Replicas) + } + // Verify zero replicas are now allowed (changed from minimum of 1 to 0) + if *tt.nimService.Spec.Replicas == 0 { + // This should be valid now - test passes if we reach here + t.Logf("Zero replicas correctly allowed") + } + } + + // Verify GetReplicas works correctly + if tt.expectMinReplicas { + replicas := tt.nimService.GetReplicas() + if replicas == nil { + t.Errorf("Expected replicas to be set for HPA, but got nil") + } + } + }) + } +} + +// TestReplicasMinimumValidation tests that zero replicas are now allowed. +func TestReplicasMinimumValidation(t *testing.T) { + tests := []struct { + name string + replicas *int32 + expectPass bool + }{ + { + name: "Zero replicas should be valid", + replicas: ptr.To[int32](0), + expectPass: true, + }, + { + name: "One replica should be valid", + replicas: ptr.To[int32](1), + expectPass: true, + }, + { + name: "Multiple replicas should be valid", + replicas: ptr.To[int32](5), + expectPass: true, + }, + { + name: "Nil replicas should be valid", + replicas: nil, + expectPass: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + nimService := &NIMService{ + Spec: NIMServiceSpec{ + Replicas: tt.replicas, + }, + } + + // Validate that replicas field accepts the value + if nimService.Spec.Replicas != nil { + if *nimService.Spec.Replicas < 0 { + if tt.expectPass { + t.Errorf("Expected replicas %d to be valid, but validation would fail", *nimService.Spec.Replicas) + } + } else { + if !tt.expectPass { + t.Errorf("Expected replicas %d to be invalid, but validation would pass", *nimService.Spec.Replicas) + } + } + } + + // Test GetReplicas method + replicas := nimService.GetReplicas() + if tt.replicas != nil && replicas != nil { + if *replicas != *tt.replicas { + t.Errorf("GetReplicas() = %d, want %d", *replicas, *tt.replicas) + } + } + }) + } +} + +// TestIsAutoScalingEnabled tests the autoscaling enabled detection. +func TestIsAutoScalingEnabled(t *testing.T) { + tests := []struct { + name string + scale Autoscaling + expected bool + }{ + { + name: "Autoscaling explicitly enabled", + scale: Autoscaling{ + Enabled: ptr.To(true), + }, + expected: true, + }, + { + name: "Autoscaling explicitly disabled", + scale: Autoscaling{ + Enabled: ptr.To(false), + }, + expected: false, + }, + { + name: "Autoscaling not set (nil)", + scale: Autoscaling{}, + expected: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + nimService := &NIMService{ + Spec: NIMServiceSpec{ + Scale: tt.scale, + }, + } + + result := nimService.IsAutoScalingEnabled() + if result != tt.expected { + t.Errorf("IsAutoScalingEnabled() = %v, want %v", result, tt.expected) + } + }) + } +} diff --git a/bundle/manifests/apps.nvidia.com_nimpipelines.yaml b/bundle/manifests/apps.nvidia.com_nimpipelines.yaml index 1f8083988..785d51567 100644 --- a/bundle/manifests/apps.nvidia.com_nimpipelines.yaml +++ b/bundle/manifests/apps.nvidia.com_nimpipelines.yaml @@ -2626,7 +2626,7 @@ spec: type: object replicas: format: int32 - minimum: 1 + minimum: 0 type: integer resources: description: |- diff --git a/bundle/manifests/apps.nvidia.com_nimservices.yaml b/bundle/manifests/apps.nvidia.com_nimservices.yaml index bb2c575ed..56be8c3e8 100644 --- a/bundle/manifests/apps.nvidia.com_nimservices.yaml +++ b/bundle/manifests/apps.nvidia.com_nimservices.yaml @@ -2538,7 +2538,7 @@ spec: type: object replicas: format: int32 - minimum: 1 + minimum: 0 type: integer resources: description: |- diff --git a/config/crd/bases/apps.nvidia.com_nimpipelines.yaml b/config/crd/bases/apps.nvidia.com_nimpipelines.yaml index 1f8083988..785d51567 100644 --- a/config/crd/bases/apps.nvidia.com_nimpipelines.yaml +++ b/config/crd/bases/apps.nvidia.com_nimpipelines.yaml @@ -2626,7 +2626,7 @@ spec: type: object replicas: format: int32 - minimum: 1 + minimum: 0 type: integer resources: description: |- diff --git a/config/crd/bases/apps.nvidia.com_nimservices.yaml b/config/crd/bases/apps.nvidia.com_nimservices.yaml index bb2c575ed..56be8c3e8 100644 --- a/config/crd/bases/apps.nvidia.com_nimservices.yaml +++ b/config/crd/bases/apps.nvidia.com_nimservices.yaml @@ -2538,7 +2538,7 @@ spec: type: object replicas: format: int32 - minimum: 1 + minimum: 0 type: integer resources: description: |- diff --git a/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nimpipelines.yaml b/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nimpipelines.yaml index 1f8083988..785d51567 100644 --- a/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nimpipelines.yaml +++ b/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nimpipelines.yaml @@ -2626,7 +2626,7 @@ spec: type: object replicas: format: int32 - minimum: 1 + minimum: 0 type: integer resources: description: |- diff --git a/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nimservices.yaml b/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nimservices.yaml index bb2c575ed..56be8c3e8 100644 --- a/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nimservices.yaml +++ b/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nimservices.yaml @@ -2538,7 +2538,7 @@ spec: type: object replicas: format: int32 - minimum: 1 + minimum: 0 type: integer resources: description: |- diff --git a/internal/controller/platform/kserve/nimservice_test.go b/internal/controller/platform/kserve/nimservice_test.go index 0fab0621f..4b8e2cba4 100644 --- a/internal/controller/platform/kserve/nimservice_test.go +++ b/internal/controller/platform/kserve/nimservice_test.go @@ -820,7 +820,7 @@ var _ = Describe("NIMServiceReconciler for a KServe platform", func() { err = client.Get(context.TODO(), namespacedName, isvc) Expect(err).NotTo(HaveOccurred()) - Expect(*isvc.Spec.Predictor.MinReplicas).To(Equal(int32(1))) + Expect(*isvc.Spec.Predictor.MinReplicas).To(Equal(int32(0))) Expect(isvc.Spec.Predictor.MaxReplicas).To(Equal(int32(0))) Expect(isvc.Spec.Predictor.ScaleMetricType).To(BeNil()) Expect(isvc.Spec.Predictor.ScaleMetric).To(BeNil())