Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 31 additions & 26 deletions internal/controller/platform/kserve/nimservice.go
Original file line number Diff line number Diff line change
Expand Up @@ -175,9 +175,7 @@ func (r *NIMServiceReconciler) reconcileNIMService(ctx context.Context, nimServi
}

var result *ctrl.Result
result, err = r.checkInferenceServiceStatus(ctx,
&types.NamespacedName{Name: nimService.GetName(), Namespace: nimService.GetNamespace()},
nimService, deploymentMode)
result, err = r.checkInferenceServiceStatus(ctx, nimService, deploymentMode)

if err != nil {
r.log.Error(err, "Unable to update status")
Expand Down Expand Up @@ -400,7 +398,7 @@ func (r *NIMServiceReconciler) renderAndSyncInferenceService(ctx context.Context

logger := r.log

var profileEnv *corev1.EnvVar
var profileEnv *[]corev1.EnvVar
var profile *appsv1alpha1.NIMProfile
var gpuResources *corev1.ResourceRequirements
var initContainers []corev1.Container
Expand All @@ -410,9 +408,11 @@ func (r *NIMServiceReconciler) renderAndSyncInferenceService(ctx context.Context

// Setup env for explicit override profile is specified
if modelProfile != "" {
profileEnv = &corev1.EnvVar{
Name: "NIM_MODEL_PROFILE",
Value: modelProfile,
profileEnv = &[]corev1.EnvVar{
{
Name: "NIM_MODEL_PROFILE",
Value: modelProfile,
},
}

// Only assign GPU resources if the NIMCache is for optimized NIM
Expand Down Expand Up @@ -457,16 +457,18 @@ func (r *NIMServiceReconciler) renderAndSyncInferenceService(ctx context.Context

isvcParams.PodResourceClaims = shared.GetPodResourceClaims(namedDraResources)
if nimCache.IsUniversalNIM() {
isvcParams.Env = append(isvcParams.Env, corev1.EnvVar{
Name: "NIM_MODEL_NAME",
Value: utils.DefaultModelStorePath,
})
isvcParams.Env = utils.MergeEnvVars([]corev1.EnvVar{
{
Name: "NIM_MODEL_NAME",
Value: utils.DefaultModelStorePath,
},
}, isvcParams.Env)
}
// Setup volume mounts with model store
isvcParams.Volumes = nimService.GetVolumes(*modelPVC)
isvcParams.VolumeMounts = nimService.GetVolumeMounts(*modelPVC)
if profileEnv != nil {
isvcParams.Env = append(isvcParams.Env, *profileEnv)
isvcParams.Env = utils.MergeEnvVars(*profileEnv, isvcParams.Env)
}
// Auto assign GPU resources in case of the optimized profile
if gpuResources != nil {
Expand Down Expand Up @@ -602,12 +604,12 @@ func (r *NIMServiceReconciler) addGPUResources(ctx context.Context, nimService *
return resources, nil
}

func (r *NIMServiceReconciler) checkInferenceServiceStatus(ctx context.Context, namespacedName *types.NamespacedName,
nimService *appsv1alpha1.NIMService, deploymentMode kserveconstants.DeploymentModeType) (*ctrl.Result, error) {
func (r *NIMServiceReconciler) checkInferenceServiceStatus(ctx context.Context, nimService *appsv1alpha1.NIMService,
deploymentMode kserveconstants.DeploymentModeType) (*ctrl.Result, error) {
logger := r.log

// Check if InferenceService is ready
msg, ready, err := r.isInferenceServiceReady(ctx, namespacedName)
msg, ready, err := r.isInferenceServiceReady(ctx, nimService)
if err != nil {
return &ctrl.Result{}, err
}
Expand All @@ -632,7 +634,7 @@ func (r *NIMServiceReconciler) checkInferenceServiceStatus(ctx context.Context,
"NIMService %s not ready yet, msg: %s", nimService.Name, msg)
} else {
// Update NIMServiceStatus with model config.
updateErr := r.updateModelStatus(ctx, nimService, namespacedName, deploymentMode)
updateErr := r.updateModelStatus(ctx, nimService, deploymentMode)
if updateErr != nil {
logger.Info("WARN: Model status update failed, will retry in 5 seconds", "error", updateErr.Error())
return &ctrl.Result{RequeueAfter: 5 * time.Second}, nil
Expand All @@ -653,33 +655,36 @@ func (r *NIMServiceReconciler) checkInferenceServiceStatus(ctx context.Context,
}

// isInferenceServiceReady checks if the InferenceService is ready.
func (r *NIMServiceReconciler) isInferenceServiceReady(ctx context.Context, namespacedName *types.NamespacedName) (string, bool, error) {
func (r *NIMServiceReconciler) isInferenceServiceReady(ctx context.Context, nimService *appsv1alpha1.NIMService) (string, bool, error) {
logger := r.log

isvc := &kservev1beta1.InferenceService{}
err := r.Get(ctx, client.ObjectKey{Name: namespacedName.Name, Namespace: namespacedName.Namespace}, isvc)
err := r.Get(ctx, client.ObjectKey{Name: nimService.Name, Namespace: nimService.Namespace}, isvc)
if err != nil {
logger.Error(err, "failed to fetch inferenceservice")
if k8serrors.IsNotFound(err) {
return "", false, nil
return fmt.Sprintf("Waiting for InferenceService %q creation", nimService.Name), false, nil
}
return "", false, err
}

var cond knativeapis.Condition
var cond *knativeapis.Condition
for i := range isvc.Status.Conditions {
c := isvc.Status.Conditions[i]
if c.Type == kservev1beta1.PredictorReady {
cond = c
cond = &c
break
}
}
return cond.GetMessage(), cond.IsTrue(), nil
if cond != nil {
return cond.GetMessage(), cond.IsTrue(), nil
}
return fmt.Sprintf("Waiting for InferenceService %q reporting Predictor readiness", isvc.Name), false, nil
}

func (r *NIMServiceReconciler) updateModelStatus(ctx context.Context, nimService *appsv1alpha1.NIMService,
namespacedName *types.NamespacedName, deploymentMode kserveconstants.DeploymentModeType) error {
clusterEndpoint, externalEndpoint, err := r.getNIMModelEndpoints(ctx, nimService, namespacedName, deploymentMode)
deploymentMode kserveconstants.DeploymentModeType) error {
clusterEndpoint, externalEndpoint, err := r.getNIMModelEndpoints(ctx, nimService, deploymentMode)
if err != nil {
return err
}
Expand All @@ -698,11 +703,11 @@ func (r *NIMServiceReconciler) updateModelStatus(ctx context.Context, nimService
}

func (r *NIMServiceReconciler) getNIMModelEndpoints(ctx context.Context, nimService *appsv1alpha1.NIMService,
namespacedName *types.NamespacedName, deploymentMode kserveconstants.DeploymentModeType) (string, string, error) {
deploymentMode kserveconstants.DeploymentModeType) (string, string, error) {
logger := r.log

isvc := &kservev1beta1.InferenceService{}
err := r.Get(ctx, client.ObjectKey{Name: namespacedName.Name, Namespace: namespacedName.Namespace}, isvc)
err := r.Get(ctx, client.ObjectKey{Name: nimService.Name, Namespace: nimService.Namespace}, isvc)
if err != nil {
logger.Error(err, "unable to fetch k8s service", "nimservice", nimService.GetName())
return "", "", err
Expand Down
Loading
Loading