NVIDIA · visheshtanksale · Jun 28, 2025 · Jun 27, 2025
@@ -108,6 +108,7 @@ type HuggingFaceHubSource struct {
 }
 
 // NGCSource references a model stored on NVIDIA NGC.
+// +kubebuilder:validation:XValidation:rule="!(has(self.model) && has(self.modelEndpoint))",message="Only one of 'model' or 'modelEndpoint' can be specified"
 type NGCSource struct {
 	// The name of an existing pull secret containing the NGC_API_KEY
 	AuthSecret string `json:"authSecret"`
@@ -117,7 +118,9 @@ type NGCSource struct {
 	// PullSecret to pull the model puller image
 	PullSecret string `json:"pullSecret,omitempty"`
 	// Model spec for caching
-	Model ModelSpec `json:"model,omitempty"`
+	Model *ModelSpec `json:"model,omitempty"`
+	// ModelEndpoint is the endpoint for the model to be cached for Universal NIM
+	ModelEndpoint *string `json:"modelEndpoint,omitempty"`
 }
 
 // ModelSpec is the spec required to cache selected models.
@@ -309,6 +312,36 @@ func (n *NIMCache) GetRuntimeClassName() *string {
 	return &n.Spec.RuntimeClassName
 }
 
+// IsUniversalNIM returns true if the NIMCache is for a universal NIM.
+func (n *NIMCache) IsUniversalNIM() bool {
+	// Universal NIM is when the modelEndpoint is set in the NGCSource.
+	if n.Spec.Source.NGC != nil && n.Spec.Source.NGC.ModelEndpoint != nil {
+		return true
+	}
+	// Universal NIM also support HuggingFaceEndpoints
+	if n.Spec.Source.HF != nil {
+		return true
+	}
+	return false
+}
+
+// IsOptimizedNIM returns true if the NIMCache is for an optimized NIM.
+func (n *NIMCache) IsOptimizedNIM() bool {
+	// Universal NIM is when the modelEndpoint is set in the NGCSource.
+	if n.Spec.Source.NGC != nil && n.Spec.Source.NGC.ModelEndpoint == nil {
+		return true
+	}
+	return false
+}
+
+// GetModelSpec returns the model spec for the NIMCache.
+func (n *NIMCache) GetModelSpec() ModelSpec {
+	if n.Spec.Source.NGC != nil && n.Spec.Source.NGC.Model != nil {
+		return *n.Spec.Source.NGC.Model
+	}
+	return ModelSpec{}
+}
+
 // GetProxySpec returns the proxy spec for the NIMService deployment.
 func (n *NIMCache) GetProxySpec() *ProxySpec {
 	return n.Spec.Proxy

@@ -189,7 +189,7 @@ func (n *NIMService) GetStandardEnv() []corev1.EnvVar {
 	envVars := []corev1.EnvVar{
 		{
 			Name:  "NIM_CACHE_PATH",
-			Value: "/model-store",
+			Value: utils.DefaultModelStorePath,
 		},
 		{
 			Name: "NGC_API_KEY",

@@ -389,6 +389,10 @@ spec:
                               for the model computations
                             type: string
                         type: object
+                      modelEndpoint:
+                        description: ModelEndpoint is the endpoint for the model to
+                          be cached for Universal NIM
+                        type: string
                       modelPuller:
                         description: ModelPuller is the container image that can pull
                           the model
@@ -405,6 +409,9 @@ spec:
                     - authSecret
                     - modelPuller
                     type: object
+                    x-kubernetes-validations:
+                    - message: Only one of 'model' or 'modelEndpoint' can be specified
+                      rule: '!(has(self.model) && has(self.modelEndpoint))'
                 type: object
                 x-kubernetes-validations:
                 - message: Exactly one of ngc, dataStore, or hf must be defined

@@ -389,6 +389,10 @@ spec:
                               for the model computations
                             type: string
                         type: object
+                      modelEndpoint:
+                        description: ModelEndpoint is the endpoint for the model to
+                          be cached for Universal NIM
+                        type: string
                       modelPuller:
                         description: ModelPuller is the container image that can pull
                           the model
@@ -405,6 +409,9 @@ spec:
                     - authSecret
                     - modelPuller
                     type: object
+                    x-kubernetes-validations:
+                    - message: Only one of 'model' or 'modelEndpoint' can be specified
+                      rule: '!(has(self.model) && has(self.modelEndpoint))'
                 type: object
                 x-kubernetes-validations:
                 - message: Exactly one of ngc, dataStore, or hf must be defined

@@ -389,6 +389,10 @@ spec:
                               for the model computations
                             type: string
                         type: object
+                      modelEndpoint:
+                        description: ModelEndpoint is the endpoint for the model to
+                          be cached for Universal NIM
+                        type: string
                       modelPuller:
                         description: ModelPuller is the container image that can pull
                           the model
@@ -405,6 +409,9 @@ spec:
                     - authSecret
                     - modelPuller
                     type: object
+                    x-kubernetes-validations:
+                    - message: Only one of 'model' or 'modelEndpoint' can be specified
+                      rule: '!(has(self.model) && has(self.modelEndpoint))'
                 type: object
                 x-kubernetes-validations:
                 - message: Exactly one of ngc, dataStore, or hf must be defined

@@ -494,6 +494,7 @@ func (r *NIMCacheReconciler) reconcileServiceAccount(ctx context.Context, nimCac
 		}
 
 		// Create the ServiceAccount
+
 		err = r.Create(ctx, newSA)
 		if err != nil {
 			logger.Error(err, "Failed to create ServiceAccount", "Name", saName)
@@ -550,8 +551,8 @@ func (r *NIMCacheReconciler) reconcilePVC(ctx context.Context, nimCache *appsv1a
 // Model auto-selection is enabled and
 // Explicit model profiles are not provided by the user.
 func isModelSelectionRequired(nimCache *appsv1alpha1.NIMCache) bool {
-	if nimCache.Spec.Source.NGC != nil &&
-		len(nimCache.Spec.Source.NGC.Model.Profiles) == 0 {
+	if nimCache.IsOptimizedNIM() &&
+		len(nimCache.GetModelSpec().Profiles) == 0 {
 		return true
 	}
 	return false
@@ -567,8 +568,8 @@ func isModelSelectionDone(nimCache *appsv1alpha1.NIMCache) bool {
 }
 
 func getSelectedProfiles(nimCache *appsv1alpha1.NIMCache) ([]string, error) {
-	if nimCache.Spec.Source.NGC != nil {
-		if len(nimCache.Spec.Source.NGC.Model.Profiles) > 0 {
+	if nimCache.IsOptimizedNIM() {
+		if len(nimCache.GetModelSpec().Profiles) > 0 {
 			return nimCache.Spec.Source.NGC.Model.Profiles, nil
 		}
 
@@ -589,7 +590,7 @@ func (r *NIMCacheReconciler) reconcileModelManifest(ctx context.Context, nimCach
 	logger := r.GetLogger()
 
 	// Model manifest is available only for NGC model pullers
-	if nimCache.Spec.Source.NGC == nil {
+	if !nimCache.IsOptimizedNIM() {
 		return false, nil
 	}
 
@@ -677,7 +678,7 @@ func (r *NIMCacheReconciler) reconcileModelSelection(ctx context.Context, nimCac
 	if isModelSelectionRequired(nimCache) && !isModelSelectionDone(nimCache) {
 		var discoveredGPUs []string
 		// If no specific GPUs are provided, then auto-detect GPUs in the cluster for profile selection
-		if len(nimCache.Spec.Source.NGC.Model.GPUs) == 0 {
+		if len(nimCache.GetModelSpec().GPUs) == 0 {
 			gpusByNode, err := r.GetNodeGPUProducts(ctx)
 			if err != nil {
 				logger.Error(err, "Failed to get gpus in the cluster")
@@ -693,7 +694,7 @@ func (r *NIMCacheReconciler) reconcileModelSelection(ctx context.Context, nimCac
 		}
 
 		// Match profiles with user input
-		profiles, err := nimManifest.MatchProfiles(nimCache.Spec.Source.NGC.Model, discoveredGPUs)
+		profiles, err := nimManifest.MatchProfiles(nimCache.GetModelSpec(), discoveredGPUs)
 		if err != nil {
 			logger.Error(err, "Failed to match profiles for given model parameters")
 			return err
@@ -1084,7 +1085,8 @@ func (r *NIMCacheReconciler) constructJob(ctx context.Context, nimCache *appsv1a
 		}
 	}
 
-	if nimCache.Spec.Source.DataStore != nil || nimCache.Spec.Source.HF != nil {
+	switch {
+	case nimCache.Spec.Source.DataStore != nil || nimCache.Spec.Source.HF != nil:
 		var hfDataSource HFInterface
 		if nimCache.Spec.Source.DataStore != nil {
 			hfDataSource = nimCache.Spec.Source.DataStore
@@ -1117,6 +1119,10 @@ func (r *NIMCacheReconciler) constructJob(ctx context.Context, nimCache *appsv1a
 						Name:  "HF_ENDPOINT",
 						Value: hfDataSource.GetEndpoint(),
 					},
+					{
+						Name:  "HF_HUB_OFFLINE",
+						Value: "0",
+					},
 				},
 				VolumeMounts: []corev1.VolumeMount{
 					{
@@ -1152,7 +1158,8 @@ func (r *NIMCacheReconciler) constructJob(ctx context.Context, nimCache *appsv1a
 				Name: hfDataSource.GetPullSecret(),
 			},
 		}
-	} else if nimCache.Spec.Source.NGC != nil {
+
+	case nimCache.Spec.Source.NGC != nil && nimCache.Spec.Source.NGC.ModelEndpoint == nil:
 		job.Spec.Template.Spec.Containers = []corev1.Container{
 			{
 				Name:    NIMCacheContainerName,
@@ -1220,8 +1227,57 @@ func (r *NIMCacheReconciler) constructJob(ctx context.Context, nimCache *appsv1a
 				job.Spec.Template.Spec.Containers[0].Args = append(job.Spec.Template.Spec.Containers[0].Args, selectedProfiles...)
 			}
 		}
-	}
 
+	case nimCache.Spec.Source.NGC != nil && nimCache.Spec.Source.NGC.ModelEndpoint != nil:
+		job.Spec.Template.Spec.Containers = []corev1.Container{
+			{
+				Name:    NIMCacheContainerName,
+				Image:   nimCache.Spec.Source.NGC.ModelPuller,
+				Command: []string{"create-model-store"},
+				Args:    []string{"--model-repo", *nimCache.Spec.Source.NGC.ModelEndpoint, "--model-store", "/model-store"},
+				EnvFrom: nimCache.Spec.Source.EnvFromSecrets(),
+				Env: []corev1.EnvVar{
+					{
+						Name:  "NIM_CACHE_PATH",
+						Value: utils.DefaultModelStorePath,
+					},
+				},
+				VolumeMounts: []corev1.VolumeMount{
+					{
+						Name:      "nim-cache-volume",
+						MountPath: utils.DefaultModelStorePath,
+						SubPath:   nimCache.Spec.Storage.PVC.SubPath,
+					},
+				},
+				Resources: corev1.ResourceRequirements{
+					Limits: map[corev1.ResourceName]apiResource.Quantity{
+						"cpu":    nimCache.Spec.Resources.CPU,
+						"memory": nimCache.Spec.Resources.Memory,
+					},
+					Requests: map[corev1.ResourceName]apiResource.Quantity{
+						"cpu":    nimCache.Spec.Resources.CPU,
+						"memory": nimCache.Spec.Resources.Memory,
+					},
+				},
+				TerminationMessagePath:   "/dev/termination-log",
+				TerminationMessagePolicy: corev1.TerminationMessageFallbackToLogsOnError,
+				SecurityContext: &corev1.SecurityContext{
+					AllowPrivilegeEscalation: ptr.To[bool](false),
+					Capabilities: &corev1.Capabilities{
+						Drop: []corev1.Capability{"ALL"},
+					},
+					RunAsNonRoot: ptr.To[bool](true),
+					RunAsGroup:   nimCache.GetGroupID(),
+					RunAsUser:    nimCache.GetUserID(),
+				},
+			},
+		}
+		job.Spec.Template.Spec.ImagePullSecrets = []corev1.LocalObjectReference{
+			{
+				Name: nimCache.Spec.Source.NGC.PullSecret,
+			},
+		}
+	}
 	// Merge env with the user provided values
 	job.Spec.Template.Spec.Containers[0].Env = utils.MergeEnvVars(job.Spec.Template.Spec.Containers[0].Env, nimCache.Spec.Env)
 

@@ -487,7 +487,7 @@ var _ = Describe("NIMCache Controller", func() {
 					Namespace: "default",
 				},
 				Spec: appsv1alpha1.NIMCacheSpec{
-					Source: appsv1alpha1.NIMSource{NGC: &appsv1alpha1.NGCSource{ModelPuller: "nvcr.io/nim:test", PullSecret: "my-secret", Model: appsv1alpha1.ModelSpec{Profiles: profiles}}},
+					Source: appsv1alpha1.NIMSource{NGC: &appsv1alpha1.NGCSource{ModelPuller: "nvcr.io/nim:test", PullSecret: "my-secret", Model: &appsv1alpha1.ModelSpec{Profiles: profiles}}},
 				},
 			}
 
@@ -513,7 +513,7 @@ var _ = Describe("NIMCache Controller", func() {
 					Annotations: map[string]string{SelectedNIMProfilesAnnotationKey: string(profilesJSON)},
 				},
 				Spec: appsv1alpha1.NIMCacheSpec{
-					Source: appsv1alpha1.NIMSource{NGC: &appsv1alpha1.NGCSource{ModelPuller: "nvcr.io/nim:test", PullSecret: "my-secret", Model: appsv1alpha1.ModelSpec{GPUs: []appsv1alpha1.GPUSpec{{IDs: []string{"26b5"}}}}}},
+					Source: appsv1alpha1.NIMSource{NGC: &appsv1alpha1.NGCSource{ModelPuller: "nvcr.io/nim:test", PullSecret: "my-secret", Model: &appsv1alpha1.ModelSpec{GPUs: []appsv1alpha1.GPUSpec{{IDs: []string{"26b5"}}}}}},
 					Env: []corev1.EnvVar{
 						{
 							Name:  "NGC_HOME",
@@ -567,7 +567,7 @@ var _ = Describe("NIMCache Controller", func() {
 					Namespace: "default",
 				},
 				Spec: appsv1alpha1.NIMCacheSpec{
-					Source: appsv1alpha1.NIMSource{NGC: &appsv1alpha1.NGCSource{ModelPuller: "nvcr.io/nim:test", PullSecret: "my-secret", Model: appsv1alpha1.ModelSpec{Profiles: profiles}}},
+					Source: appsv1alpha1.NIMSource{NGC: &appsv1alpha1.NGCSource{ModelPuller: "nvcr.io/nim:test", PullSecret: "my-secret", Model: &appsv1alpha1.ModelSpec{Profiles: profiles}}},
 					Proxy: &appsv1alpha1.ProxySpec{
 						HttpProxy:     "http://proxy:1000",
 						HttpsProxy:    "https://proxy:1000",

@@ -252,8 +252,8 @@ func (r *NIMServiceReconciler) reconcileNIMService(ctx context.Context, nimServi
 
 	// Select PVC for model store
 	nimCacheName := nimService.GetNIMCacheName()
+	nimCache := appsv1alpha1.NIMCache{}
 	if nimCacheName != "" { // nolint:gocritic
-		nimCache := appsv1alpha1.NIMCache{}
 		if err := r.Get(ctx, types.NamespacedName{Name: nimCacheName, Namespace: nimService.GetNamespace()}, &nimCache); err != nil {
 			// Fail the NIMService if the NIMCache is not found
 			if k8serrors.IsNotFound(err) {
@@ -338,24 +338,34 @@ func (r *NIMServiceReconciler) reconcileNIMService(ctx context.Context, nimServi
 		}
 		deploymentParams.Env = append(deploymentParams.Env, profileEnv)
 
-		// Retrieve and set profile details from NIMCache
-		var profile *appsv1alpha1.NIMProfile
-		profile, err = r.getNIMCacheProfile(ctx, nimService, modelProfile)
-		if err != nil {
-			logger.Error(err, "Failed to get cached NIM profile")
-			return ctrl.Result{}, err
-		}
-
-		// Auto assign GPU resources in case of the optimized profile
-		if profile != nil {
-			if err = r.assignGPUResources(ctx, nimService, profile, deploymentParams); err != nil {
+		// Only assign GPU resources if the NIMCache is for optimized NIM
+		if nimCache.IsOptimizedNIM() {
+			// Retrieve and set profile details from NIMCache
+			var profile *appsv1alpha1.NIMProfile
+			profile, err = r.getNIMCacheProfile(ctx, nimService, modelProfile)
+			if err != nil {
+				logger.Error(err, "Failed to get cached NIM profile")
 				return ctrl.Result{}, err
 			}
+
+			// Auto assign GPU resources in case of the optimized profile
+			if profile != nil {
+				if err = r.assignGPUResources(ctx, nimService, profile, deploymentParams); err != nil {
+					return ctrl.Result{}, err
+				}
+			}
 		}
 
 		// TODO: assign GPU resources and node selector that is required for the selected profile
 	}
 
+	if nimCache.IsUniversalNIM() {
+		deploymentParams.Env = append(deploymentParams.Env, corev1.EnvVar{
+			Name:  "NIM_MODEL_NAME",
+			Value: utils.DefaultModelStorePath,
+		})
+	}
+
 	// Setup pod resource claims
 	namedDraResources := shared.GenerateNamedDRAResources(nimService)
 	deploymentParams.PodResourceClaims = shared.GetPodResourceClaims(namedDraResources)

@@ -49,6 +49,9 @@ const (
 
 	// DRAPodClaimNameAnnotationKey indicates annotation name for the identifier of a resource claim template in a pod spec.
 	DRAPodClaimNameAnnotationKey = "resource.kubernetes.io/pod-claim-name"
+
+	// DefaultModelStorePath is the default path for model store.
+	DefaultModelStorePath = "/model-store"
 )
 
 const (