@@ -30,6 +30,7 @@ import (
3030 networkingv1 "k8s.io/api/networking/v1"
3131 rbacv1 "k8s.io/api/rbac/v1"
3232 "k8s.io/apimachinery/pkg/api/errors"
33+ "k8s.io/apimachinery/pkg/api/meta"
3334 apiResource "k8s.io/apimachinery/pkg/api/resource"
3435 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3536 "k8s.io/apimachinery/pkg/runtime"
@@ -185,8 +186,35 @@ func (r *NIMServiceReconciler) reconcileNIMService(ctx context.Context, nimServi
185186
186187 deploymentParams .OrchestratorType = string (r .GetOrchestratorType ())
187188
188- // Select PVC for model store
189- if nimService .GetNIMCacheName () != "" { // nolint:gocritic
189+ // Select PVC for model storea
190+ nimCacheName := nimService .GetNIMCacheName ()
191+ if len (nimCacheName ) > 0 {
192+ nimCache := appsv1alpha1.NIMCache {}
193+ if err := r .Get (ctx , types.NamespacedName {Name : nimCacheName , Namespace : nimService .GetNamespace ()}, & nimCache ); err != nil {
194+ statusError := r .updater .SetConditionsFailed (ctx , nimService , conditions .ReasonNIMCacheFailed , err .Error ())
195+ r .GetEventRecorder ().Eventf (nimService , corev1 .EventTypeWarning , conditions .Failed , err .Error ())
196+ if statusError != nil {
197+ logger .Error (statusError , "failed to update status" , "nimservice" , nimService .Name )
198+ }
199+ return ctrl.Result {}, err
200+ }
201+ switch nimCache .Status .State {
202+ case appsv1alpha1 .NimCacheStatusReady :
203+ logger .V (4 ).Info ("NIMCache is ready" , "nimcache" , nimCacheName )
204+ case appsv1alpha1 .NimCacheStatusFailed :
205+ msg := r .getNIMCacheFailedMessage (& nimCache )
206+ err = r .updater .SetConditionsFailed (ctx , nimService , conditions .ReasonNIMCacheFailed , msg )
207+ r .GetEventRecorder ().Eventf (nimService , corev1 .EventTypeWarning , conditions .Failed , msg )
208+ default :
209+ msg := fmt .Sprintf ("NIMCache %s not ready" , nimCacheName )
210+ err = r .updater .SetConditionsNotReady (ctx , nimService , conditions .NotReady , msg )
211+ r .GetEventRecorder ().Eventf (nimService , corev1 .EventTypeNormal , conditions .NotReady ,
212+ "NIMService %s not ready yet, msg: %s" , nimService .Name , msg )
213+ if err != nil {
214+ logger .Error (err , "failed to " , "nimservice" , nimService .Name )
215+ }
216+ return ctrl.Result {RequeueAfter : 5 * time .Second }, err
217+ }
190218 // Fetch PVC for the associated NIMCache instance and mount it
191219 nimCachePVC , err := r .getNIMCachePVC (ctx , nimService )
192220 if err != nil {
@@ -724,3 +752,11 @@ func (r *NIMServiceReconciler) assignGPUResources(ctx context.Context, nimServic
724752
725753 return nil
726754}
755+
756+ func (r * NIMServiceReconciler ) getNIMCacheFailedMessage (nimCache * appsv1alpha1.NIMCache ) string {
757+ cond := meta .FindStatusCondition (nimCache .Status .Conditions , conditions .Failed )
758+ if cond != nil {
759+ return cond .Message
760+ }
761+ return ""
762+ }
0 commit comments