NVIDIA · shivamerla · Jul 15, 2025 · Jul 2, 2025
@@ -175,9 +175,7 @@ func (r *NIMServiceReconciler) reconcileNIMService(ctx context.Context, nimServi
 	}
 
 	var result *ctrl.Result
-	result, err = r.checkInferenceServiceStatus(ctx,
-		&types.NamespacedName{Name: nimService.GetName(), Namespace: nimService.GetNamespace()},
-		nimService, deploymentMode)
+	result, err = r.checkInferenceServiceStatus(ctx, nimService, deploymentMode)
 
 	if err != nil {
 		r.log.Error(err, "Unable to update status")
@@ -400,7 +398,7 @@ func (r *NIMServiceReconciler) renderAndSyncInferenceService(ctx context.Context
 
 	logger := r.log
 
-	var profileEnv *corev1.EnvVar
+	var profileEnv *[]corev1.EnvVar
 	var profile *appsv1alpha1.NIMProfile
 	var gpuResources *corev1.ResourceRequirements
 	var initContainers []corev1.Container
@@ -410,9 +408,11 @@ func (r *NIMServiceReconciler) renderAndSyncInferenceService(ctx context.Context
 
 	// Setup env for explicit override profile is specified
 	if modelProfile != "" {
-		profileEnv = &corev1.EnvVar{
-			Name:  "NIM_MODEL_PROFILE",
-			Value: modelProfile,
+		profileEnv = &[]corev1.EnvVar{
+			{
+				Name:  "NIM_MODEL_PROFILE",
+				Value: modelProfile,
+			},
 		}
 
 		// Only assign GPU resources if the NIMCache is for optimized NIM
@@ -457,16 +457,18 @@ func (r *NIMServiceReconciler) renderAndSyncInferenceService(ctx context.Context
 
 	isvcParams.PodResourceClaims = shared.GetPodResourceClaims(namedDraResources)
 	if nimCache.IsUniversalNIM() {
-		isvcParams.Env = append(isvcParams.Env, corev1.EnvVar{
-			Name:  "NIM_MODEL_NAME",
-			Value: utils.DefaultModelStorePath,
-		})
+		isvcParams.Env = utils.MergeEnvVars([]corev1.EnvVar{
+			{
+				Name:  "NIM_MODEL_NAME",
+				Value: utils.DefaultModelStorePath,
+			},
+		}, isvcParams.Env)
 	}
 	// Setup volume mounts with model store
 	isvcParams.Volumes = nimService.GetVolumes(*modelPVC)
 	isvcParams.VolumeMounts = nimService.GetVolumeMounts(*modelPVC)
 	if profileEnv != nil {
-		isvcParams.Env = append(isvcParams.Env, *profileEnv)
+		isvcParams.Env = utils.MergeEnvVars(*profileEnv, isvcParams.Env)
 	}
 	// Auto assign GPU resources in case of the optimized profile
 	if gpuResources != nil {
@@ -602,12 +604,12 @@ func (r *NIMServiceReconciler) addGPUResources(ctx context.Context, nimService *
 	return resources, nil
 }
 
-func (r *NIMServiceReconciler) checkInferenceServiceStatus(ctx context.Context, namespacedName *types.NamespacedName,
-	nimService *appsv1alpha1.NIMService, deploymentMode kserveconstants.DeploymentModeType) (*ctrl.Result, error) {
+func (r *NIMServiceReconciler) checkInferenceServiceStatus(ctx context.Context, nimService *appsv1alpha1.NIMService,
+	deploymentMode kserveconstants.DeploymentModeType) (*ctrl.Result, error) {
 	logger := r.log
 
 	// Check if InferenceService is ready
-	msg, ready, err := r.isInferenceServiceReady(ctx, namespacedName)
+	msg, ready, err := r.isInferenceServiceReady(ctx, nimService)
 	if err != nil {
 		return &ctrl.Result{}, err
 	}
@@ -632,7 +634,7 @@ func (r *NIMServiceReconciler) checkInferenceServiceStatus(ctx context.Context,
 			"NIMService %s not ready yet, msg: %s", nimService.Name, msg)
 	} else {
 		// Update NIMServiceStatus with model config.
-		updateErr := r.updateModelStatus(ctx, nimService, namespacedName, deploymentMode)
+		updateErr := r.updateModelStatus(ctx, nimService, deploymentMode)
 		if updateErr != nil {
 			logger.Info("WARN: Model status update failed, will retry in 5 seconds", "error", updateErr.Error())
 			return &ctrl.Result{RequeueAfter: 5 * time.Second}, nil
@@ -653,33 +655,36 @@ func (r *NIMServiceReconciler) checkInferenceServiceStatus(ctx context.Context,
 }
 
 // isInferenceServiceReady checks if the InferenceService is ready.
-func (r *NIMServiceReconciler) isInferenceServiceReady(ctx context.Context, namespacedName *types.NamespacedName) (string, bool, error) {
+func (r *NIMServiceReconciler) isInferenceServiceReady(ctx context.Context, nimService *appsv1alpha1.NIMService) (string, bool, error) {
 	logger := r.log
 
 	isvc := &kservev1beta1.InferenceService{}
-	err := r.Get(ctx, client.ObjectKey{Name: namespacedName.Name, Namespace: namespacedName.Namespace}, isvc)
+	err := r.Get(ctx, client.ObjectKey{Name: nimService.Name, Namespace: nimService.Namespace}, isvc)
 	if err != nil {
 		logger.Error(err, "failed to fetch inferenceservice")
 		if k8serrors.IsNotFound(err) {
-			return "", false, nil
+			return fmt.Sprintf("Waiting for InferenceService %q creation", nimService.Name), false, nil
 		}
 		return "", false, err
 	}
 
-	var cond knativeapis.Condition
+	var cond *knativeapis.Condition
 	for i := range isvc.Status.Conditions {
 		c := isvc.Status.Conditions[i]
 		if c.Type == kservev1beta1.PredictorReady {
-			cond = c
+			cond = &c
 			break
 		}
 	}
-	return cond.GetMessage(), cond.IsTrue(), nil
+	if cond != nil {
+		return cond.GetMessage(), cond.IsTrue(), nil
+	}
+	return fmt.Sprintf("Waiting for InferenceService %q reporting Predictor readiness", isvc.Name), false, nil
 }
 
 func (r *NIMServiceReconciler) updateModelStatus(ctx context.Context, nimService *appsv1alpha1.NIMService,
-	namespacedName *types.NamespacedName, deploymentMode kserveconstants.DeploymentModeType) error {
-	clusterEndpoint, externalEndpoint, err := r.getNIMModelEndpoints(ctx, nimService, namespacedName, deploymentMode)
+	deploymentMode kserveconstants.DeploymentModeType) error {
+	clusterEndpoint, externalEndpoint, err := r.getNIMModelEndpoints(ctx, nimService, deploymentMode)
 	if err != nil {
 		return err
 	}
@@ -698,11 +703,11 @@ func (r *NIMServiceReconciler) updateModelStatus(ctx context.Context, nimService
 }
 
 func (r *NIMServiceReconciler) getNIMModelEndpoints(ctx context.Context, nimService *appsv1alpha1.NIMService,
-	namespacedName *types.NamespacedName, deploymentMode kserveconstants.DeploymentModeType) (string, string, error) {
+	deploymentMode kserveconstants.DeploymentModeType) (string, string, error) {
 	logger := r.log
 
 	isvc := &kservev1beta1.InferenceService{}
-	err := r.Get(ctx, client.ObjectKey{Name: namespacedName.Name, Namespace: namespacedName.Namespace}, isvc)
+	err := r.Get(ctx, client.ObjectKey{Name: nimService.Name, Namespace: nimService.Namespace}, isvc)
 	if err != nil {
 		logger.Error(err, "unable to fetch k8s service", "nimservice", nimService.GetName())
 		return "", "", err