diff --git a/internal/controller/platform/kserve/nimservice.go b/internal/controller/platform/kserve/nimservice.go index 4c06891bb..e0ea969db 100644 --- a/internal/controller/platform/kserve/nimservice.go +++ b/internal/controller/platform/kserve/nimservice.go @@ -27,12 +27,9 @@ import ( "github.com/go-logr/logr" kservev1beta1 "github.com/kserve/kserve/pkg/apis/serving/v1beta1" kserveconstants "github.com/kserve/kserve/pkg/constants" - isvcutils "github.com/kserve/kserve/pkg/controller/v1beta1/inferenceservice/utils" - kserveutils "github.com/kserve/kserve/pkg/utils" monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" knativeapis "knative.dev/pkg/apis" - appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" rbacv1 "k8s.io/api/rbac/v1" resourcev1 "k8s.io/api/resource/v1" @@ -158,7 +155,8 @@ func (r *NIMServiceReconciler) reconcileNIMService(ctx context.Context, nimServi var deploymentMode kserveconstants.DeploymentModeType // Check KServe deployment mode - deploymentMode, err = r.getKServeDeploymentMode(ctx, nimService.Spec.Annotations) + namespacedName := client.ObjectKeyFromObject(nimService) + deploymentMode, err = utils.GetKServeDeploymentMode(ctx, r.Client, nimService.Spec.Annotations, &namespacedName) if err != nil { return ctrl.Result{}, err } @@ -468,7 +466,7 @@ func (r *NIMServiceReconciler) renderAndSyncInferenceService(ctx context.Context isvcParams.Annotations[kserveconstants.SetPrometheusAnnotation] = "true" // Ensure deployment mode annotation is always set - if _, ok := isvcParams.Annotations[utils.KServeDeploymentModeAnnotationKey]; !ok { + if _, ok := isvcParams.Annotations[kserveconstants.DeploymentMode]; !ok { isvcParams.Annotations[kserveconstants.DeploymentMode] = string(deploymentMode) } @@ -735,6 +733,12 @@ func (r *NIMServiceReconciler) updateModelStatus(ctx context.Context, nimService return err } + // KServe RawDeployment mode creates headless services (clusterIP: None) by default, which prevents + // standard service-based access for model endpoints. To enable regular ClusterIP services: + // - Upstream KServe: Set "serviceClusterIPNone: false" in the "deploy" section of the + // "inferenceservice-config" ConfigMap (in the KServe controller namespace) + // - OpenDataHub: Set "rawDeploymentServiceConfig: Headed" (not "Headless") in the + // kserve spec of the DataScienceCluster object modelName, err := r.getNIMModelName(ctx, clusterEndpoint) if err != nil { return err @@ -922,54 +926,6 @@ func getModelNameFromModelsList(modelsList *nimmodels.ModelsV1List) (string, err return "", fmt.Errorf("no valid model found") } -func (r *NIMServiceReconciler) getKServeDeploymentMode(ctx context.Context, - podAnnotations map[string]string) (kserveconstants.DeploymentModeType, error) { - - var namespace string - deploymentList := &appsv1.DeploymentList{} - if err := r.List(ctx, deploymentList, client.HasLabels{"app.kubernetes.io/name: kserve-controller-manager"}); err != nil { - return "", err - } - for _, deployment := range deploymentList.Items { - if deployment.GetName() == "kserve-controller-manager" { - namespace = deployment.Namespace - break - } - } - - if namespace == "" { - return "", fmt.Errorf("failed to find the namespace of KServe Deployment") - } - - isvcConfigMap := &corev1.ConfigMap{} - err := r.Get(ctx, - types.NamespacedName{ - Name: kserveconstants.InferenceServiceConfigMapName, - Namespace: namespace}, - isvcConfigMap) - if err != nil { - return "", err - } - - isvcConfig, err := kservev1beta1.NewInferenceServicesConfig(isvcConfigMap) - if err != nil { - return "", err - } - - // get annotations from isvc - annotations := kserveutils.Filter(podAnnotations, func(key string) bool { - return !kserveutils.Includes(isvcConfig.ServiceAnnotationDisallowedList, key) - }) - - deployConfig, err := kservev1beta1.NewDeployConfig(isvcConfigMap) - if err != nil { - return "", err - } - - deploymentMode := isvcutils.GetDeploymentMode("", annotations, deployConfig) - return deploymentMode, nil -} - func (r *NIMServiceReconciler) reconcileDRAResources(ctx context.Context, nimService *appsv1alpha1.NIMService, namedDraResources *shared.NamedDRAResourceList) error { for _, namedDraResource := range namedDraResources.Resources { if !shared.ShouldCreateDRAResource(namedDraResource.DRAResource) { diff --git a/internal/controller/platform/kserve/nimservice_test.go b/internal/controller/platform/kserve/nimservice_test.go index 0fab0621f..e87477704 100644 --- a/internal/controller/platform/kserve/nimservice_test.go +++ b/internal/controller/platform/kserve/nimservice_test.go @@ -410,10 +410,10 @@ var _ = Describe("NIMServiceReconciler for a KServe platform", func() { kserveDeployment := &appsv1.Deployment{ ObjectMeta: metav1.ObjectMeta{ - Name: "kserve-controller-manager", + Name: utils.KServeControllerName, Namespace: "default", Labels: map[string]string{ - "app.kubernetes.io/name": "kserve-controller-manager", + "app.kubernetes.io/name": utils.KServeControllerName, }, }, Spec: appsv1.DeploymentSpec{ @@ -507,7 +507,7 @@ var _ = Describe("NIMServiceReconciler for a KServe platform", func() { By("delete the KServe Deployment") kserveDeployment := &appsv1.Deployment{ ObjectMeta: metav1.ObjectMeta{ - Name: "kserve-controller-manager", + Name: utils.KServeControllerName, Namespace: "default", }, } diff --git a/internal/utils/kserve.go b/internal/utils/kserve.go index 88c3f9c2a..896e596b7 100644 --- a/internal/utils/kserve.go +++ b/internal/utils/kserve.go @@ -17,7 +17,25 @@ package utils import ( + "context" + "encoding/json" + "errors" + "fmt" + + kservev1beta1 "github.com/kserve/kserve/pkg/apis/serving/v1beta1" kserveconstants "github.com/kserve/kserve/pkg/constants" + kserveutils "github.com/kserve/kserve/pkg/utils" + + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + k8serrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/log" +) + +const ( + KServeControllerName = "kserve-controller-manager" ) func IsKServeStandardDeploymentMode(deploymentMode kserveconstants.DeploymentModeType) bool { @@ -27,3 +45,207 @@ func IsKServeStandardDeploymentMode(deploymentMode kserveconstants.DeploymentMod func IsKServeKnativeDeploymentMode(deploymentMode kserveconstants.DeploymentModeType) bool { return deploymentMode == kserveconstants.Knative || deploymentMode == kserveconstants.LegacyServerless } + +func GetKServeDeploymentMode(ctx context.Context, k8sClient client.Client, + podAnnotations map[string]string, isvcNamespacedName *types.NamespacedName) (kserveconstants.DeploymentModeType, error) { + logger := log.FromContext(ctx).WithName("KServe").WithName("Deployment Mode") + + var annotations map[string]string + var deployConfig *kservev1beta1.DeployConfig + var statusDeploymentMode string + + if k8sClient != nil { + isvcConfigMap, err := getISVCConfigMap(ctx, k8sClient) + if err != nil { + return "", err + } + + var isvcConfig *kservev1beta1.InferenceServicesConfig + if isvcConfigMap != nil { + isvcConfig, err = kservev1beta1.NewInferenceServicesConfig(isvcConfigMap) + if err != nil { + return "", err + } + } + + if isvcConfig != nil && podAnnotations != nil { + annotations = kserveutils.Filter(podAnnotations, func(key string) bool { + return !kserveutils.Includes(isvcConfig.ServiceAnnotationDisallowedList, key) + }) + } else { + annotations = podAnnotations + } + + if isvcConfigMap != nil { + deployConfig, err = newDeployConfig(isvcConfigMap) + if err != nil { + return "", err + } + } else { + logger.V(1).Info("ConfigMap inferenceservice-config is not found in the cluster, skipping the " + + "default deployment mode check from the ConfigMap") + } + + // Try to get the deployment mode from the InferenceService status if it exists. + // Note: The InferenceService doesn't exist yet, so this will return NotFound (which is handled gracefully). + // Status-based mode detection only applies to existing InferenceServices. This ensures the status value takes + // highest priority when available, maintaining consistency with KServe's behavior. + if isvcNamespacedName != nil { + isvc := &kservev1beta1.InferenceService{} + err = k8sClient.Get(ctx, *isvcNamespacedName, isvc) + if err != nil { + if !k8serrors.IsNotFound(err) { + return "", err + } + logger.V(1).Info("InferenceService not found, deployment mode from status is not available", + "InferenceService", isvcNamespacedName.String()) + } else { + statusDeploymentMode = isvc.Status.DeploymentMode + } + } else { + logger.V(1).Info("InferenceService name is not set, skipping deployment mode check from InferenceService status") + } + } else { + logger.V(1).Info("k8sClient is nil, skipping default deployment mode retrieval from the inferenceservice-config ConfigMap, " + + "skipping deployment mode check from InferenceService status, using the annotations only") + annotations = podAnnotations + } + + deploymentMode := getDeploymentMode(ctx, statusDeploymentMode, annotations, deployConfig) + return deploymentMode, nil +} + +/* +GetDeploymentMode returns the current deployment mode, supports Knative and Standard +case 1: no serving.kserve.org/deploymentMode annotation + + return config.deploy.defaultDeploymentMode + +case 2: serving.kserve.org/deploymentMode is set + + if the mode is "Standard", "Knative", "ModelMesh" or "RawDeployment", "Serverless", return it. + else return config.deploy.defaultDeploymentMode + +ODH 3.0 supports "RawDeployment", "Serverless", and doesn't accept "Standard", "Knative" +Ref: https://github.com/opendatahub-io/kserve/blob/cf2920b4276d97fc2d2d700efe4879749a56b418/pkg/controller/v1beta1/inferenceservice/utils/utils.go#L220 +*/ +func getDeploymentMode(ctx context.Context, statusDeploymentMode string, annotations map[string]string, + deployConfig *kservev1beta1.DeployConfig) kserveconstants.DeploymentModeType { + logger := log.FromContext(ctx).WithName("KServe").WithName("Deployment Mode") + + // First priority is the deploymentMode recorded in the status + if len(statusDeploymentMode) != 0 { + logger.Info("using deployment mode from InferenceService status", + "Deployment Mode", statusDeploymentMode) + return kserveconstants.DeploymentModeType(statusDeploymentMode) + } + + if annotations != nil { + // Second priority, if the status doesn't have the deploymentMode recorded, is explicit annotations + deploymentMode, ok := annotations[kserveconstants.DeploymentMode] + + // Note: ODH 3.0 requires using "RawDeployment" and "Serverless" directly + // without conversion to "Standard" and "Knative". Do not convert legacy modes. + + if ok && (deploymentMode == string(kserveconstants.Standard) || + deploymentMode == string(kserveconstants.Knative) || + deploymentMode == string(kserveconstants.ModelMeshDeployment) || + deploymentMode == string(kserveconstants.LegacyRawDeployment) || + deploymentMode == string(kserveconstants.LegacyServerless)) { + logger.Info("using deployment mode from annotations", + "Deployment Mode", deploymentMode) + return kserveconstants.DeploymentModeType(deploymentMode) + } + + if ok { + logger.V(1).Info("warning: deployment mode annotation found but value is invalid", + "Deployment Mode", deploymentMode) + } else { + logger.V(1).Info("warning: deployment mode annotation not found in annotations") + } + } + + if deployConfig != nil { + // Finally, if an InferenceService is being created and does not explicitly specify a DeploymentMode + logger.Info("using the default deployment mode from the inferenceservice-config ConfigMap", + "Deployment Mode", deployConfig.DefaultDeploymentMode) + return kserveconstants.DeploymentModeType(deployConfig.DefaultDeploymentMode) + } + + // If InferenceServicesConfig doesn't exist, use the default deployment mode + // For ODH, InferenceServicesConfig is always bundled, and this line should not be reached + logger.Info("deployment mode is not found in any configurations, using default deployment mode", + "Deployment Mode", kserveconstants.DefaultDeployment) + return kserveconstants.DefaultDeployment +} + +/* +ODH 3.0 supports "RawDeployment", "Serverless", and doesn't accept "Standard", "Knative" +Ref: https://github.com/opendatahub-io/kserve/blob/cf2920b4276d97fc2d2d700efe4879749a56b418/pkg/apis/serving/v1beta1/configmap.go#L359 +*/ +func newDeployConfig(isvcConfigMap *corev1.ConfigMap) (*kservev1beta1.DeployConfig, error) { + deploy, ok := isvcConfigMap.Data[kservev1beta1.DeployConfigName] + if !ok { + // No deploy config in ConfigMap, return nil so caller falls back to default + return nil, nil + } + + deployConfig := &kservev1beta1.DeployConfig{} + err := json.Unmarshal([]byte(deploy), &deployConfig) + if err != nil { + return nil, fmt.Errorf("unable to parse deploy config json: %w", err) + } + + if deployConfig.DefaultDeploymentMode == "" { + return nil, errors.New("invalid deploy config, defaultDeploymentMode is required") + } + + // Note: ODH 3.0 requires using "RawDeployment" and "Serverless" directly + // without conversion to "Standard" and "Knative". Do not convert legacy modes. + + if deployConfig.DefaultDeploymentMode != string(kserveconstants.Knative) && + deployConfig.DefaultDeploymentMode != string(kserveconstants.Standard) && + deployConfig.DefaultDeploymentMode != string(kserveconstants.ModelMeshDeployment) && + deployConfig.DefaultDeploymentMode != string(kserveconstants.LegacyRawDeployment) && + deployConfig.DefaultDeploymentMode != string(kserveconstants.LegacyServerless) { + return nil, errors.New("invalid deployment mode. Supported modes are Knative," + + " Standard, ModelMesh and RawDeployment, Serverless") + } + + return deployConfig, nil +} + +func getISVCConfigMap(ctx context.Context, k8sClient client.Client) (*corev1.ConfigMap, error) { + var namespace string + // Find the namespace of the KServe controller + deploymentList := &appsv1.DeploymentList{} + if err := k8sClient.List(ctx, deploymentList, client.MatchingLabels{"app.kubernetes.io/name": KServeControllerName}); err != nil { + return nil, err + } + for _, deployment := range deploymentList.Items { + if deployment.GetName() == KServeControllerName { + namespace = deployment.Namespace + break + } + } + + if namespace == "" { + return nil, fmt.Errorf("failed to find the namespace of KServe Deployment") + } + + // Get the inferenceservice-config ConfigMap in the namespace + isvcConfigMap := &corev1.ConfigMap{} + err := k8sClient.Get(ctx, + types.NamespacedName{ + Name: kserveconstants.InferenceServiceConfigMapName, + Namespace: namespace}, + isvcConfigMap) + if err != nil { + if k8serrors.IsNotFound(err) { + return nil, nil + } + return nil, err + } + + return isvcConfigMap, nil +} diff --git a/internal/utils/kserve_test.go b/internal/utils/kserve_test.go new file mode 100644 index 000000000..f72d5169a --- /dev/null +++ b/internal/utils/kserve_test.go @@ -0,0 +1,580 @@ +/** +# Copyright (c) NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package utils + +import ( + "context" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + k8serrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "k8s.io/utils/ptr" + k8sclient "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + kservev1beta1 "github.com/kserve/kserve/pkg/apis/serving/v1beta1" + kserveconstants "github.com/kserve/kserve/pkg/constants" +) + +var _ = Describe("KServe Utilities", func() { + Describe("Get Deployment Mode", func() { + var ( + client k8sclient.Client + scheme *runtime.Scheme + ) + + namespace := "default" + + var kserveDeployment *appsv1.Deployment + var isvcConfig *corev1.ConfigMap + + isvcName := "test-isvc" + + // Helper function to create a fresh KServe deployment for isolated tests + createKServeDeployment := func() *appsv1.Deployment { + return &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: KServeControllerName, + Namespace: namespace, + Labels: map[string]string{ + "app.kubernetes.io/name": KServeControllerName, + }, + }, + Spec: appsv1.DeploymentSpec{ + Replicas: ptr.To[int32](1), + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "app": "nim-test-kserve", + }, + }, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + "app": "nim-test-kserve", + }, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "nim-test-kserve-container", + Image: "nim-test-kserve-image", + Ports: []corev1.ContainerPort{ + { + ContainerPort: 80, + }, + }, + }, + }, + }, + }, + }, + } + } + + BeforeEach(func() { + scheme = runtime.NewScheme() + Expect(appsv1.AddToScheme(scheme)).To(Succeed()) + Expect(corev1.AddToScheme(scheme)).To(Succeed()) + Expect(kservev1beta1.AddToScheme(scheme)).To(Succeed()) + + client = fake.NewClientBuilder().WithScheme(scheme).Build() + + kserveDeployment = &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: KServeControllerName, + Namespace: namespace, + Labels: map[string]string{ + "app.kubernetes.io/name": KServeControllerName, + }, + }, + Spec: appsv1.DeploymentSpec{ + Replicas: ptr.To[int32](1), + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "app": "nim-test-kserve", + }, + }, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + "app": "nim-test-kserve", + }, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "nim-test-kserve-container", + Image: "nim-test-kserve-image", + Ports: []corev1.ContainerPort{ + { + ContainerPort: 80, + }, + }, + }, + }, + }, + }, + }, + } + Expect(client.Create(context.Background(), kserveDeployment)).To(Succeed()) + + isvcConfig = &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: kserveconstants.InferenceServiceConfigMapName, + Namespace: namespace, + }, + Data: map[string]string{ + "deploy": "{\"defaultDeploymentMode\": \"RawDeployment\"}", + }, + } + Expect(client.Create(context.Background(), isvcConfig)).To(Succeed()) + }) + + AfterEach(func() { + By("delete the KServe Deployment") + Expect(client.DeleteAllOf(context.Background(), &appsv1.Deployment{})).To(Succeed()) + + By("delete the isvc ConfigMap - ignore NotFound errors") + err := client.Delete(context.Background(), isvcConfig) + if err != nil && !k8serrors.IsNotFound(err) { + Expect(err).NotTo(HaveOccurred()) + } + }) + + Describe("Priority chain: status > annotation > config > default", func() { + DescribeTable("deployment mode detection", + func(annotations map[string]string, expected kserveconstants.DeploymentModeType) { + mode, err := GetKServeDeploymentMode(context.Background(), client, annotations, nil) + Expect(err).NotTo(HaveOccurred()) + Expect(mode).To(Equal(expected)) + }, + // Test annotation-based detection (no ISVC status) + Entry("should use LegacyServerless from annotations", + map[string]string{kserveconstants.DeploymentMode: string(kserveconstants.LegacyServerless)}, + kserveconstants.LegacyServerless), + Entry("should use Standard from annotations", + map[string]string{kserveconstants.DeploymentMode: string(kserveconstants.Standard)}, + kserveconstants.Standard), + Entry("should use Knative from annotations", + map[string]string{kserveconstants.DeploymentMode: string(kserveconstants.Knative)}, + kserveconstants.Knative), + Entry("should use LegacyRawDeployment from annotations", + map[string]string{kserveconstants.DeploymentMode: string(kserveconstants.LegacyRawDeployment)}, + kserveconstants.LegacyRawDeployment), + Entry("should use ModelMesh from annotations", + map[string]string{kserveconstants.DeploymentMode: string(kserveconstants.ModelMeshDeployment)}, + kserveconstants.ModelMeshDeployment), + // Test ConfigMap default (no annotations) + Entry("should use the default deployment mode from the config map when no annotations", + nil, + kserveconstants.LegacyRawDeployment), + // Test invalid annotation falls back to config default + Entry("should fall back to config default when annotation has invalid mode", + map[string]string{kserveconstants.DeploymentMode: "InvalidMode"}, + kserveconstants.LegacyRawDeployment), + Entry("should fall back to config default when annotation is empty string", + map[string]string{kserveconstants.DeploymentMode: ""}, + kserveconstants.LegacyRawDeployment), + ) + + Context("Status priority tests", func() { + var isvc *kservev1beta1.InferenceService + + BeforeEach(func() { + // Create InferenceService only for tests that need it + isvc = &kservev1beta1.InferenceService{ + ObjectMeta: metav1.ObjectMeta{ + Name: isvcName, + Namespace: namespace, + }, + Spec: kservev1beta1.InferenceServiceSpec{}, + Status: kservev1beta1.InferenceServiceStatus{ + DeploymentMode: string(kserveconstants.Knative), + }, + } + Expect(client.Create(context.Background(), isvc)).To(Succeed()) + }) + + AfterEach(func() { + Expect(client.Delete(context.Background(), isvc)).To(Succeed()) + }) + + It("should use InferenceService status over annotations", func() { + mode, err := GetKServeDeploymentMode(context.Background(), client, + map[string]string{kserveconstants.DeploymentMode: string(kserveconstants.LegacyServerless)}, + &types.NamespacedName{Name: isvcName, Namespace: namespace}) + Expect(err).NotTo(HaveOccurred()) + Expect(mode).To(Equal(kserveconstants.Knative)) + }) + + It("should use InferenceService status over ConfigMap default when no annotation", func() { + mode, err := GetKServeDeploymentMode(context.Background(), client, + nil, + &types.NamespacedName{Name: isvcName, Namespace: namespace}) + Expect(err).NotTo(HaveOccurred()) + Expect(mode).To(Equal(kserveconstants.Knative)) + }) + + It("should ignore empty status string and use annotation", func() { + isvc.Status.DeploymentMode = "" + Expect(client.Update(context.Background(), isvc)).To(Succeed()) + + mode, err := GetKServeDeploymentMode(context.Background(), client, + map[string]string{kserveconstants.DeploymentMode: string(kserveconstants.Standard)}, + &types.NamespacedName{Name: isvcName, Namespace: namespace}) + Expect(err).NotTo(HaveOccurred()) + Expect(mode).To(Equal(kserveconstants.Standard)) + }) + + It("should ignore empty status string and use ConfigMap default when no annotation", func() { + isvc.Status.DeploymentMode = "" + Expect(client.Update(context.Background(), isvc)).To(Succeed()) + + mode, err := GetKServeDeploymentMode(context.Background(), client, + nil, + &types.NamespacedName{Name: isvcName, Namespace: namespace}) + Expect(err).NotTo(HaveOccurred()) + Expect(mode).To(Equal(kserveconstants.LegacyRawDeployment)) + }) + }) + }) + + Describe("Nil client scenarios (webhook validation)", func() { + DescribeTable("should handle nil client gracefully", + func(annotations map[string]string, expected kserveconstants.DeploymentModeType) { + mode, err := GetKServeDeploymentMode(context.Background(), nil, annotations, nil) + Expect(err).NotTo(HaveOccurred()) + Expect(mode).To(Equal(expected)) + }, + Entry("with Standard annotation", + map[string]string{kserveconstants.DeploymentMode: string(kserveconstants.Standard)}, + kserveconstants.Standard), + Entry("with Knative annotation", + map[string]string{kserveconstants.DeploymentMode: string(kserveconstants.Knative)}, + kserveconstants.Knative), + Entry("with LegacyServerless annotation", + map[string]string{kserveconstants.DeploymentMode: string(kserveconstants.LegacyServerless)}, + kserveconstants.LegacyServerless), + Entry("with LegacyRawDeployment annotation", + map[string]string{kserveconstants.DeploymentMode: string(kserveconstants.LegacyRawDeployment)}, + kserveconstants.LegacyRawDeployment), + Entry("with no annotations should use default", + nil, + kserveconstants.DefaultDeployment), + Entry("with invalid annotation should use default", + map[string]string{kserveconstants.DeploymentMode: "InvalidMode"}, + kserveconstants.DefaultDeployment), + ) + }) + + Describe("Error cases", func() { + // Use isolated clients for error tests to avoid state pollution + It("should return error when ConfigMap has malformed JSON", func() { + // Create isolated test client + isolatedClient := fake.NewClientBuilder().WithScheme(scheme).Build() + + // Setup KServe deployment + Expect(isolatedClient.Create(context.Background(), createKServeDeployment())).To(Succeed()) + + // Create ConfigMap with malformed JSON + badConfigMap := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: kserveconstants.InferenceServiceConfigMapName, + Namespace: namespace, + }, + Data: map[string]string{ + "deploy": "{invalid json}", + }, + } + Expect(isolatedClient.Create(context.Background(), badConfigMap)).To(Succeed()) + + _, err := GetKServeDeploymentMode(context.Background(), isolatedClient, nil, nil) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("unable to parse deploy config json")) + }) + + It("should return error when ConfigMap has empty defaultDeploymentMode", func() { + isolatedClient := fake.NewClientBuilder().WithScheme(scheme).Build() + Expect(isolatedClient.Create(context.Background(), createKServeDeployment())).To(Succeed()) + + badConfigMap := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: kserveconstants.InferenceServiceConfigMapName, + Namespace: namespace, + }, + Data: map[string]string{ + "deploy": "{\"defaultDeploymentMode\": \"\"}", + }, + } + Expect(isolatedClient.Create(context.Background(), badConfigMap)).To(Succeed()) + + _, err := GetKServeDeploymentMode(context.Background(), isolatedClient, nil, nil) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("defaultDeploymentMode is required")) + }) + + It("should return error when ConfigMap has unsupported deployment mode", func() { + isolatedClient := fake.NewClientBuilder().WithScheme(scheme).Build() + Expect(isolatedClient.Create(context.Background(), createKServeDeployment())).To(Succeed()) + + badConfigMap := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: kserveconstants.InferenceServiceConfigMapName, + Namespace: namespace, + }, + Data: map[string]string{ + "deploy": "{\"defaultDeploymentMode\": \"UnsupportedMode\"}", + }, + } + Expect(isolatedClient.Create(context.Background(), badConfigMap)).To(Succeed()) + + _, err := GetKServeDeploymentMode(context.Background(), isolatedClient, nil, nil) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("invalid deployment mode")) + }) + + It("should return error when KServe deployment not found", func() { + isolatedClient := fake.NewClientBuilder().WithScheme(scheme).Build() + // Don't create KServe deployment + + _, err := GetKServeDeploymentMode(context.Background(), isolatedClient, nil, nil) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("failed to find the namespace of KServe Deployment")) + }) + + It("should use annotation when ConfigMap doesn't exist", func() { + isolatedClient := fake.NewClientBuilder().WithScheme(scheme).Build() + Expect(isolatedClient.Create(context.Background(), createKServeDeployment())).To(Succeed()) + // Don't create ConfigMap + + mode, err := GetKServeDeploymentMode(context.Background(), isolatedClient, + map[string]string{kserveconstants.DeploymentMode: string(kserveconstants.Knative)}, nil) + Expect(err).NotTo(HaveOccurred()) + Expect(mode).To(Equal(kserveconstants.Knative)) + }) + + It("should use default when ConfigMap exists but has no deploy key", func() { + isolatedClient := fake.NewClientBuilder().WithScheme(scheme).Build() + Expect(isolatedClient.Create(context.Background(), createKServeDeployment())).To(Succeed()) + + emptyConfigMap := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: kserveconstants.InferenceServiceConfigMapName, + Namespace: namespace, + }, + Data: map[string]string{ + // No "deploy" key + "other": "value", + }, + } + Expect(isolatedClient.Create(context.Background(), emptyConfigMap)).To(Succeed()) + + mode, err := GetKServeDeploymentMode(context.Background(), isolatedClient, nil, nil) + Expect(err).NotTo(HaveOccurred()) + Expect(mode).To(Equal(kserveconstants.DefaultDeployment)) + }) + }) + + Describe("ConfigMap variations", func() { + // Use isolated clients to avoid test pollution + It("should work with Standard as default in ConfigMap", func() { + isolatedClient := fake.NewClientBuilder().WithScheme(scheme).Build() + Expect(isolatedClient.Create(context.Background(), createKServeDeployment())).To(Succeed()) + + standardConfig := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: kserveconstants.InferenceServiceConfigMapName, + Namespace: namespace, + }, + Data: map[string]string{ + "deploy": "{\"defaultDeploymentMode\": \"Standard\"}", + }, + } + Expect(isolatedClient.Create(context.Background(), standardConfig)).To(Succeed()) + + mode, err := GetKServeDeploymentMode(context.Background(), isolatedClient, nil, nil) + Expect(err).NotTo(HaveOccurred()) + Expect(mode).To(Equal(kserveconstants.Standard)) + }) + + It("should work with Knative as default in ConfigMap", func() { + isolatedClient := fake.NewClientBuilder().WithScheme(scheme).Build() + Expect(isolatedClient.Create(context.Background(), createKServeDeployment())).To(Succeed()) + + knativeConfig := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: kserveconstants.InferenceServiceConfigMapName, + Namespace: namespace, + }, + Data: map[string]string{ + "deploy": "{\"defaultDeploymentMode\": \"Knative\"}", + }, + } + Expect(isolatedClient.Create(context.Background(), knativeConfig)).To(Succeed()) + + mode, err := GetKServeDeploymentMode(context.Background(), isolatedClient, nil, nil) + Expect(err).NotTo(HaveOccurred()) + Expect(mode).To(Equal(kserveconstants.Knative)) + }) + + It("should work with Serverless as default in ConfigMap", func() { + isolatedClient := fake.NewClientBuilder().WithScheme(scheme).Build() + Expect(isolatedClient.Create(context.Background(), createKServeDeployment())).To(Succeed()) + + serverlessConfig := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: kserveconstants.InferenceServiceConfigMapName, + Namespace: namespace, + }, + Data: map[string]string{ + "deploy": "{\"defaultDeploymentMode\": \"Serverless\"}", + }, + } + Expect(isolatedClient.Create(context.Background(), serverlessConfig)).To(Succeed()) + + mode, err := GetKServeDeploymentMode(context.Background(), isolatedClient, nil, nil) + Expect(err).NotTo(HaveOccurred()) + Expect(mode).To(Equal(kserveconstants.LegacyServerless)) + }) + }) + + Describe("Annotation filtering", func() { + It("should filter annotations based on ServiceAnnotationDisallowedList", func() { + isolatedClient := fake.NewClientBuilder().WithScheme(scheme).Build() + Expect(isolatedClient.Create(context.Background(), createKServeDeployment())).To(Succeed()) + + // Create ConfigMap with disallowed annotations list + configWithFilter := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: kserveconstants.InferenceServiceConfigMapName, + Namespace: namespace, + }, + Data: map[string]string{ + "deploy": "{\"defaultDeploymentMode\": \"Standard\"}", + "inferenceService": `{ + "serviceAnnotationDisallowedList": [ + "disallowed-annotation-key" + ] + }`, + }, + } + Expect(isolatedClient.Create(context.Background(), configWithFilter)).To(Succeed()) + + // Provide annotations including a disallowed one + annotations := map[string]string{ + kserveconstants.DeploymentMode: string(kserveconstants.Knative), + "disallowed-annotation-key": "should-be-filtered", + "allowed-annotation": "should-remain", + } + + mode, err := GetKServeDeploymentMode(context.Background(), isolatedClient, annotations, nil) + Expect(err).NotTo(HaveOccurred()) + // Should still detect Knative mode since the deployment mode annotation is allowed + Expect(mode).To(Equal(kserveconstants.Knative)) + }) + + It("should filter out the deployment mode annotation based on ServiceAnnotationDisallowedList", func() { + isolatedClient := fake.NewClientBuilder().WithScheme(scheme).Build() + Expect(isolatedClient.Create(context.Background(), createKServeDeployment())).To(Succeed()) + + // Create ConfigMap with disallowed annotations list + configWithFilter := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: kserveconstants.InferenceServiceConfigMapName, + Namespace: namespace, + }, + Data: map[string]string{ + "deploy": "{\"defaultDeploymentMode\": \"Standard\"}", + "inferenceService": `{ + "serviceAnnotationDisallowedList": [ + "disallowed-annotation-key", + "serving.kserve.io/deploymentMode" + ] + }`, + }, + } + Expect(isolatedClient.Create(context.Background(), configWithFilter)).To(Succeed()) + + // Provide annotations including deployment mode which will be filtered + annotations := map[string]string{ + kserveconstants.DeploymentMode: string(kserveconstants.Knative), + "disallowed-annotation-key": "should-be-filtered", + "allowed-annotation": "should-remain", + } + + mode, err := GetKServeDeploymentMode(context.Background(), isolatedClient, annotations, nil) + Expect(err).NotTo(HaveOccurred()) + // Should fall back to ConfigMap default (Standard) since deployment mode annotation was filtered + Expect(mode).To(Equal(kserveconstants.Standard)) + }) + + It("should use status over filtered annotations", func() { + isolatedClient := fake.NewClientBuilder().WithScheme(scheme).Build() + Expect(isolatedClient.Create(context.Background(), createKServeDeployment())).To(Succeed()) + + // Create ConfigMap with disallowed annotations list including deployment mode + configWithFilter := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: kserveconstants.InferenceServiceConfigMapName, + Namespace: namespace, + }, + Data: map[string]string{ + "deploy": "{\"defaultDeploymentMode\": \"Standard\"}", + "inferenceService": `{ + "serviceAnnotationDisallowedList": [ + "serving.kserve.io/deploymentMode" + ] + }`, + }, + } + Expect(isolatedClient.Create(context.Background(), configWithFilter)).To(Succeed()) + + // Create InferenceService with status + isvc := &kservev1beta1.InferenceService{ + ObjectMeta: metav1.ObjectMeta{ + Name: isvcName, + Namespace: namespace, + }, + Spec: kservev1beta1.InferenceServiceSpec{}, + Status: kservev1beta1.InferenceServiceStatus{ + DeploymentMode: string(kserveconstants.Knative), + }, + } + Expect(isolatedClient.Create(context.Background(), isvc)).To(Succeed()) + + // Provide annotation that will be filtered + annotations := map[string]string{ + kserveconstants.DeploymentMode: string(kserveconstants.LegacyServerless), + } + + mode, err := GetKServeDeploymentMode(context.Background(), isolatedClient, annotations, + &types.NamespacedName{Name: isvcName, Namespace: namespace}) + Expect(err).NotTo(HaveOccurred()) + // Status should take priority even when annotation is filtered + // Status = Knative, filtered annotation = LegacyServerless, config = Standard + // Result should be Knative (status wins) + Expect(mode).To(Equal(kserveconstants.Knative)) + }) + }) + }) +}) diff --git a/internal/utils/suite_test.go b/internal/utils/suite_test.go index 77fcadda8..fe8853035 100644 --- a/internal/utils/suite_test.go +++ b/internal/utils/suite_test.go @@ -17,13 +17,69 @@ limitations under the License. package utils_test import ( + "fmt" + "path/filepath" + "runtime" "testing" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + + "k8s.io/client-go/kubernetes/scheme" + "k8s.io/client-go/rest" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/envtest" + logf "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/log/zap" + + appsv1alpha1 "github.com/NVIDIA/k8s-nim-operator/api/apps/v1alpha1" ) +var cfg *rest.Config +var k8sClient client.Client +var testEnv *envtest.Environment + func TestUtils(t *testing.T) { RegisterFailHandler(Fail) RunSpecs(t, "Utils Package Test Suite") } + +var _ = BeforeSuite(func() { + logf.SetLogger(zap.New(zap.WriteTo(GinkgoWriter), zap.UseDevMode(true))) + + By("bootstrapping test environment") + testEnv = &envtest.Environment{ + CRDDirectoryPaths: []string{filepath.Join("..", "..", "config", "crd", "bases")}, + ErrorIfCRDPathMissing: true, + + // The BinaryAssetsDirectory is only required if you want to run the tests directly + // without call the makefile target test. If not informed it will look for the + // default path defined in controller-runtime which is /usr/local/kubebuilder/. + // Note that you must have the required binaries setup under the bin directory to perform + // the tests directly. When we run make test it will be setup and used automatically. + BinaryAssetsDirectory: filepath.Join("..", "..", "bin", "k8s", + fmt.Sprintf("1.30.0-%s-%s", runtime.GOOS, runtime.GOARCH)), + } + + var err error + // cfg is defined in this file globally. + cfg, err = testEnv.Start() + Expect(err).NotTo(HaveOccurred()) + Expect(cfg).NotTo(BeNil()) + + err = appsv1alpha1.AddToScheme(scheme.Scheme) + Expect(err).NotTo(HaveOccurred()) + + // +kubebuilder:scaffold:scheme + + k8sClient, err = client.New(cfg, client.Options{Scheme: scheme.Scheme}) + Expect(err).NotTo(HaveOccurred()) + Expect(k8sClient).NotTo(BeNil()) + +}) + +var _ = AfterSuite(func() { + By("tearing down the test environment") + err := testEnv.Stop() + Expect(err).NotTo(HaveOccurred()) +}) diff --git a/internal/utils/utils.go b/internal/utils/utils.go index 09f634ad4..e2141a5f5 100644 --- a/internal/utils/utils.go +++ b/internal/utils/utils.go @@ -55,9 +55,6 @@ const ( // DefaultModelStorePath is the default path for model store. DefaultModelStorePath = "/model-store" - - // KServeDeploymentModeAnnotationKey indicates annotation name for the kserve deployment mode of the NIMService. - KServeDeploymentModeAnnotationKey = "serving.kserve.io/deploymentMode" ) const ( diff --git a/internal/webhook/apps/v1alpha1/nimservice_webhook.go b/internal/webhook/apps/v1alpha1/nimservice_webhook.go index 307536946..0b5599f3f 100644 --- a/internal/webhook/apps/v1alpha1/nimservice_webhook.go +++ b/internal/webhook/apps/v1alpha1/nimservice_webhook.go @@ -25,6 +25,7 @@ import ( "k8s.io/client-go/discovery" "k8s.io/client-go/rest" ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" logf "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/webhook" "sigs.k8s.io/controller-runtime/pkg/webhook/admission" @@ -38,7 +39,7 @@ var nimservicelog = logf.Log.WithName("webhooks").WithName("NIMService") // SetupNIMServiceWebhookWithManager registers the webhook for NIMService in the manager. func SetupNIMServiceWebhookWithManager(mgr ctrl.Manager) error { - validator, err := NewNIMServiceCustomValidator() + validator, err := NewNIMServiceCustomValidator(mgr.GetClient()) if err != nil { return err } @@ -63,12 +64,13 @@ func SetupNIMServiceWebhookWithManager(mgr ctrl.Manager) error { // as this struct is used only for temporary operations and does not need to be deeply copied. type NIMServiceCustomValidator struct { k8sVersion string + k8sClient client.Client } var _ webhook.CustomValidator = &NIMServiceCustomValidator{} // NewNIMServiceCustomValidator fetches and caches the Kubernetes version. -func NewNIMServiceCustomValidator() (*NIMServiceCustomValidator, error) { +func NewNIMServiceCustomValidator(k8sClient client.Client) (*NIMServiceCustomValidator, error) { config, err := rest.InClusterConfig() if err != nil { return nil, fmt.Errorf("failed to get in-cluster config: %v", err) @@ -81,7 +83,10 @@ func NewNIMServiceCustomValidator() (*NIMServiceCustomValidator, error) { if err != nil { return nil, fmt.Errorf("failed to get Kubernetes server version: %v", err) } - return &NIMServiceCustomValidator{k8sVersion: versionInfo.GitVersion}, nil + return &NIMServiceCustomValidator{ + k8sVersion: versionInfo.GitVersion, + k8sClient: k8sClient, + }, nil } // ValidateCreate implements webhook.CustomValidator so a webhook will be registered for the type NIMService. @@ -95,7 +100,7 @@ func (v *NIMServiceCustomValidator) ValidateCreate(_ context.Context, obj runtim fldPath := field.NewPath("nimservice").Child("spec") // Perform comprehensive spec validation via helper. - warningList, errList := validateNIMServiceSpec(&nimservice.Spec, fldPath, v.k8sVersion) + warningList, errList := validateNIMServiceSpec(nimservice, fldPath, v.k8sVersion, v.k8sClient) if len(errList) > 0 { return warningList, errList.ToAggregate() @@ -118,7 +123,7 @@ func (v *NIMServiceCustomValidator) ValidateUpdate(_ context.Context, oldObj, ne fldPath := field.NewPath("nimservice").Child("spec") // Start with structural validation to ensure the updated object is well formed. - warningList, errList := validateNIMServiceSpec(&newNIMService.Spec, fldPath, v.k8sVersion) + warningList, errList := validateNIMServiceSpec(newNIMService, fldPath, v.k8sVersion, v.k8sClient) wList, eList := validateMultiNodeImmutability(oldNIMService, newNIMService, field.NewPath("spec").Child("multiNode")) warningList = append(warningList, wList...) diff --git a/internal/webhook/apps/v1alpha1/nimservice_webhook_validation_helper.go b/internal/webhook/apps/v1alpha1/nimservice_webhook_validation_helper.go index e5e5324bf..866ced587 100644 --- a/internal/webhook/apps/v1alpha1/nimservice_webhook_validation_helper.go +++ b/internal/webhook/apps/v1alpha1/nimservice_webhook_validation_helper.go @@ -17,6 +17,7 @@ limitations under the License. package v1alpha1 import ( + "context" "fmt" "reflect" "slices" @@ -25,7 +26,9 @@ import ( corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/equality" apiresource "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/validation/field" + "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/webhook/admission" "github.com/blang/semver/v4" @@ -64,7 +67,10 @@ var validDRAResourceQuantitySelectorOps = []appsv1alpha1.DRAResourceQuantitySele // object. It is intended to be invoked by both ValidateCreate and ValidateUpdate to // ensure the resource is well-formed before any other validation (e.g. immutability) // is performed. -func validateNIMServiceSpec(spec *appsv1alpha1.NIMServiceSpec, fldPath *field.Path, kubeVersion string) (admission.Warnings, field.ErrorList) { +func validateNIMServiceSpec(nimservice *appsv1alpha1.NIMService, fldPath *field.Path, kubeVersion string, + k8sClient client.Client) (admission.Warnings, field.ErrorList) { + spec := &nimservice.Spec + warningList, errList := validateImageConfiguration(&spec.Image, fldPath.Child("image")) // TODO abstract all validation functions with a single signature validateFunc(*appsv1alpha1.NIMServiceSpec, *field.Path) (admission.Warnings, field.ErrorList) @@ -100,7 +106,8 @@ func validateNIMServiceSpec(spec *appsv1alpha1.NIMServiceSpec, fldPath *field.Pa warningList = append(warningList, w...) errList = append(errList, err...) - w, err = validateKServeConfiguration(spec, fldPath) + namespacedName := client.ObjectKeyFromObject(nimservice) + w, err = validateKServeConfiguration(spec, fldPath, &namespacedName, k8sClient) warningList = append(warningList, w...) errList = append(errList, err...) @@ -504,50 +511,54 @@ func validateResourcesConfiguration(resources *corev1.ResourceRequirements, fldP } // validateKServeonfiguration implements required KServe validations. -func validateKServeConfiguration(spec *appsv1alpha1.NIMServiceSpec, fldPath *field.Path) (admission.Warnings, field.ErrorList) { +func validateKServeConfiguration(spec *appsv1alpha1.NIMServiceSpec, fldPath *field.Path, + namespacedName *types.NamespacedName, k8sClient client.Client) (admission.Warnings, field.ErrorList) { errList := field.ErrorList{} warningList := admission.Warnings{} - platformIsKServe := spec.InferencePlatform == appsv1alpha1.PlatformTypeKServe - - // mode is the value, and annotated is true if the key-value pair exist. - mode, annotated := spec.Annotations[utils.KServeDeploymentModeAnnotationKey] - // If the annotation is absent, kserve defaults to Knative. - knative := !annotated || strings.EqualFold(mode, string(kserveconstants.Knative)) || strings.EqualFold(mode, string(kserveconstants.LegacyServerless)) - - // When Spec.InferencePlatform is "kserve" and used in "Knative" mode: - if platformIsKServe && knative { - // Spec.Scale (autoscaling) cannot be set. - if spec.Scale.Enabled != nil && *spec.Scale.Enabled { - errList = append(errList, field.Forbidden(fldPath.Child("scale").Child("enabled"), fmt.Sprintf("%s (autoscaling) cannot be set when KServe runs in knative mode", fldPath.Child("scale")))) - } - - // TODO deprecate this once we have removed the .spec.expose.ingress field from the spec - if spec.Expose.Ingress.Enabled != nil && *spec.Expose.Ingress.Enabled { //nolint:staticcheck - errList = append(errList, field.Forbidden(fldPath.Child("expose").Child("ingress").Child("enabled"), fmt.Sprintf("%s cannot be set when KServe runs in knative mode", fldPath.Child("expose").Child("ingress").Child("enabled")))) - } - - // Spec.Expose.Router.Ingress cannot be set. - if spec.Expose.Router.Ingress != nil { - errList = append(errList, field.Forbidden(fldPath.Child("router").Child("ingress"), fmt.Sprintf("%s cannot be set when KServe runs in knative mode", fldPath.Child("router").Child("ingress")))) - } - - // Spec.Expose.Router.Gateway cannot be set. - if spec.Expose.Router.Gateway != nil { - errList = append(errList, field.Forbidden(fldPath.Child("router").Child("gateway"), fmt.Sprintf("%s cannot be set when KServe runs in knative mode", fldPath.Child("router").Child("gateway")))) + if spec.InferencePlatform == appsv1alpha1.PlatformTypeKServe { + // Get the deployment mode + mode, err := utils.GetKServeDeploymentMode(context.TODO(), k8sClient, spec.Annotations, namespacedName) + if err != nil { + errList = append(errList, field.InternalError(fldPath, fmt.Errorf("failed to determine KServe deployment mode: %w", err))) + } else { + knative := strings.EqualFold(string(mode), string(kserveconstants.Knative)) || strings.EqualFold(string(mode), string(kserveconstants.LegacyServerless)) + + // When Spec.InferencePlatform is "kserve" and used in "Knative" mode: + if knative { + // Spec.Scale (autoscaling) cannot be set. + if spec.Scale.Enabled != nil && *spec.Scale.Enabled { + errList = append(errList, field.Forbidden(fldPath.Child("scale").Child("enabled"), fmt.Sprintf("%s (autoscaling) cannot be set when KServe runs in knative mode", fldPath.Child("scale")))) + } + + // TODO deprecate this once we have removed the .spec.expose.ingress field from the spec + if spec.Expose.Ingress.Enabled != nil && *spec.Expose.Ingress.Enabled { //nolint:staticcheck + errList = append(errList, field.Forbidden(fldPath.Child("expose").Child("ingress").Child("enabled"), fmt.Sprintf("%s cannot be set when KServe runs in knative mode", fldPath.Child("expose").Child("ingress").Child("enabled")))) + } + + // Spec.Expose.Router.Ingress cannot be set. + if spec.Expose.Router.Ingress != nil { + errList = append(errList, field.Forbidden(fldPath.Child("router").Child("ingress"), fmt.Sprintf("%s cannot be set when KServe runs in knative mode", fldPath.Child("router").Child("ingress")))) + } + + // Spec.Expose.Router.Gateway cannot be set. + if spec.Expose.Router.Gateway != nil { + errList = append(errList, field.Forbidden(fldPath.Child("router").Child("gateway"), fmt.Sprintf("%s cannot be set when KServe runs in knative mode", fldPath.Child("router").Child("gateway")))) + } + + // Spec.Metrics.ServiceMonitor cannot be set. + if spec.Metrics.Enabled != nil && *spec.Metrics.Enabled { + errList = append(errList, field.Forbidden(fldPath.Child("metrics").Child("enabled"), fmt.Sprintf("%s cannot be set when KServe runs in knative mode", fldPath.Child("metrics").Child("enabled")))) + } + } } - // Spec.Metrics.ServiceMonitor cannot be set. - if spec.Metrics.Enabled != nil && *spec.Metrics.Enabled { - errList = append(errList, field.Forbidden(fldPath.Child("metrics").Child("enabled"), fmt.Sprintf("%s cannot be set when KServe runs in knative mode", fldPath.Child("metrics").Child("serviceMonitor")))) + // Spec.MultiNode cannot be enabled when inferencePlatform is kserve. + if spec.MultiNode != nil { + errList = append(errList, field.Forbidden(fldPath.Child("multiNode"), "cannot be set when the inferencePlatform is KServe")) } } - // Spec.MultiNode cannot be enabled when inferencePlatform is kserve. - if platformIsKServe && spec.MultiNode != nil { - errList = append(errList, field.Forbidden(fldPath.Child("multiNode"), "cannot be set when KServe runs in knative mode")) - } - return warningList, errList } diff --git a/internal/webhook/apps/v1alpha1/nimservice_webhook_validation_helper_test.go b/internal/webhook/apps/v1alpha1/nimservice_webhook_validation_helper_test.go index 8dba31831..a29127f9c 100644 --- a/internal/webhook/apps/v1alpha1/nimservice_webhook_validation_helper_test.go +++ b/internal/webhook/apps/v1alpha1/nimservice_webhook_validation_helper_test.go @@ -25,7 +25,7 @@ import ( "k8s.io/utils/ptr" appsv1alpha1 "github.com/NVIDIA/k8s-nim-operator/api/apps/v1alpha1" - "github.com/NVIDIA/k8s-nim-operator/internal/utils" + kserveconstants "github.com/kserve/kserve/pkg/constants" ) // TestValidateImageConfiguration covers required field checks on Image. @@ -1009,19 +1009,19 @@ func TestValidateKServeConfiguration(t *testing.T) { wantWarnings: 0, }, { - name: "kserve knative (annotation absent) – valid", + name: "kserve standard (annotation absent) – valid", modify: func(ns *appsv1alpha1.NIMService) { ns.Spec.InferencePlatform = appsv1alpha1.PlatformTypeKServe - // No annotation ⇒ knative by default. + // No annotation ⇒ Standard by default. }, wantErrs: 0, wantWarnings: 0, }, { - name: "kserve knative (annotation present) – autoscaling set", + name: "kserve knative – autoscaling set", modify: func(ns *appsv1alpha1.NIMService) { ns.Spec.InferencePlatform = appsv1alpha1.PlatformTypeKServe - ns.Spec.Annotations = map[string]string{utils.KServeDeploymentModeAnnotationKey: "Knative"} + ns.Spec.Annotations = map[string]string{kserveconstants.DeploymentMode: string(kserveconstants.Knative)} ns.Spec.Scale.Enabled = &trueVal }, wantErrs: 1, @@ -1031,6 +1031,7 @@ func TestValidateKServeConfiguration(t *testing.T) { name: "kserve knative – ingress set", modify: func(ns *appsv1alpha1.NIMService) { ns.Spec.InferencePlatform = appsv1alpha1.PlatformTypeKServe + ns.Spec.Annotations = map[string]string{kserveconstants.DeploymentMode: string(kserveconstants.Knative)} ns.Spec.Expose.Router.Ingress = &appsv1alpha1.RouterIngress{ IngressClass: "nginx", } @@ -1042,6 +1043,7 @@ func TestValidateKServeConfiguration(t *testing.T) { name: "kserve knative – servicemonitor set", modify: func(ns *appsv1alpha1.NIMService) { ns.Spec.InferencePlatform = appsv1alpha1.PlatformTypeKServe + ns.Spec.Annotations = map[string]string{kserveconstants.DeploymentMode: string(kserveconstants.Knative)} ns.Spec.Metrics.Enabled = &trueVal }, wantErrs: 1, @@ -1051,6 +1053,7 @@ func TestValidateKServeConfiguration(t *testing.T) { name: "kserve knative – all prohibited set", modify: func(ns *appsv1alpha1.NIMService) { ns.Spec.InferencePlatform = appsv1alpha1.PlatformTypeKServe + ns.Spec.Annotations = map[string]string{kserveconstants.DeploymentMode: string(kserveconstants.Knative)} ns.Spec.Scale.Enabled = &trueVal ns.Spec.Expose.Router.Ingress = &appsv1alpha1.RouterIngress{ IngressClass: "nginx", @@ -1060,11 +1063,24 @@ func TestValidateKServeConfiguration(t *testing.T) { wantErrs: 3, wantWarnings: 0, }, + { + name: "kserve standard (annotation absent) – all set", + modify: func(ns *appsv1alpha1.NIMService) { + ns.Spec.InferencePlatform = appsv1alpha1.PlatformTypeKServe + ns.Spec.Scale.Enabled = &trueVal + ns.Spec.Expose.Router.Ingress = &appsv1alpha1.RouterIngress{ + IngressClass: "nginx", + } + ns.Spec.Metrics.Enabled = &trueVal + }, + wantErrs: 0, + wantWarnings: 0, + }, { name: "kserve rawdeployment – allowed autoscaling, but multidnode forbidden", modify: func(ns *appsv1alpha1.NIMService) { ns.Spec.InferencePlatform = appsv1alpha1.PlatformTypeKServe - ns.Spec.Annotations = map[string]string{utils.KServeDeploymentModeAnnotationKey: "RawDeployment"} + ns.Spec.Annotations = map[string]string{kserveconstants.DeploymentMode: string(kserveconstants.LegacyRawDeployment)} ns.Spec.Scale.Enabled = &trueVal // should be fine ns.Spec.MultiNode = &appsv1alpha1.NimServiceMultiNodeConfig{Parallelism: &appsv1alpha1.ParallelismSpec{Pipeline: ptr.To(uint32(1))}} }, @@ -1093,7 +1109,7 @@ func TestValidateKServeConfiguration(t *testing.T) { tc.modify(ns) - w, errs := validateKServeConfiguration(&ns.Spec, fld) + w, errs := validateKServeConfiguration(&ns.Spec, fld, nil, nil) gotErrs := len(errs) gotWarnings := len(w) if gotErrs != tc.wantErrs || gotWarnings != tc.wantWarnings {