@@ -498,6 +498,7 @@ func (r *NIMCacheReconciler) reconcileServiceAccount(ctx context.Context, nimCac
498498 }
499499
500500 // Create the ServiceAccount
501+
501502 err = r .Create (ctx , newSA )
502503 if err != nil {
503504 logger .Error (err , "Failed to create ServiceAccount" , "Name" , saName )
@@ -554,8 +555,8 @@ func (r *NIMCacheReconciler) reconcilePVC(ctx context.Context, nimCache *appsv1a
554555// Model auto-selection is enabled and
555556// Explicit model profiles are not provided by the user.
556557func isModelSelectionRequired (nimCache * appsv1alpha1.NIMCache ) bool {
557- if nimCache .Spec . Source . NGC != nil &&
558- len (nimCache .Spec . Source . NGC . Model .Profiles ) == 0 {
558+ if nimCache .IsOptimizedNIM () &&
559+ len (nimCache .GetModelSpec () .Profiles ) == 0 {
559560 return true
560561 }
561562 return false
@@ -571,8 +572,8 @@ func isModelSelectionDone(nimCache *appsv1alpha1.NIMCache) bool {
571572}
572573
573574func getSelectedProfiles (nimCache * appsv1alpha1.NIMCache ) ([]string , error ) {
574- if nimCache .Spec . Source . NGC != nil {
575- if len (nimCache .Spec . Source . NGC . Model .Profiles ) > 0 {
575+ if nimCache .IsOptimizedNIM () {
576+ if len (nimCache .GetModelSpec () .Profiles ) > 0 {
576577 return nimCache .Spec .Source .NGC .Model .Profiles , nil
577578 }
578579
@@ -593,7 +594,7 @@ func (r *NIMCacheReconciler) reconcileModelManifest(ctx context.Context, nimCach
593594 logger := r .GetLogger ()
594595
595596 // Model manifest is available only for NGC model pullers
596- if nimCache .Spec . Source . NGC == nil {
597+ if ! nimCache .IsOptimizedNIM () {
597598 return false , nil
598599 }
599600
@@ -681,7 +682,7 @@ func (r *NIMCacheReconciler) reconcileModelSelection(ctx context.Context, nimCac
681682 if isModelSelectionRequired (nimCache ) && ! isModelSelectionDone (nimCache ) {
682683 var discoveredGPUs []string
683684 // If no specific GPUs are provided, then auto-detect GPUs in the cluster for profile selection
684- if len (nimCache .Spec . Source . NGC . Model .GPUs ) == 0 {
685+ if len (nimCache .GetModelSpec () .GPUs ) == 0 {
685686 gpusByNode , err := r .GetNodeGPUProducts (ctx )
686687 if err != nil {
687688 logger .Error (err , "Failed to get gpus in the cluster" )
@@ -697,7 +698,7 @@ func (r *NIMCacheReconciler) reconcileModelSelection(ctx context.Context, nimCac
697698 }
698699
699700 // Match profiles with user input
700- profiles , err := nimManifest .MatchProfiles (nimCache .Spec . Source . NGC . Model , discoveredGPUs )
701+ profiles , err := nimManifest .MatchProfiles (nimCache .GetModelSpec () , discoveredGPUs )
701702 if err != nil {
702703 logger .Error (err , "Failed to match profiles for given model parameters" )
703704 return err
@@ -1117,7 +1118,8 @@ func (r *NIMCacheReconciler) constructJob(ctx context.Context, nimCache *appsv1a
11171118 }
11181119 }
11191120
1120- if nimCache .Spec .Source .DataStore != nil || nimCache .Spec .Source .HF != nil {
1121+ switch {
1122+ case nimCache .Spec .Source .DataStore != nil || nimCache .Spec .Source .HF != nil :
11211123 var hfDataSource HFInterface
11221124 if nimCache .Spec .Source .DataStore != nil {
11231125 hfDataSource = nimCache .Spec .Source .DataStore
@@ -1150,6 +1152,10 @@ func (r *NIMCacheReconciler) constructJob(ctx context.Context, nimCache *appsv1a
11501152 Name : "HF_ENDPOINT" ,
11511153 Value : hfDataSource .GetEndpoint (),
11521154 },
1155+ {
1156+ Name : "HF_HUB_OFFLINE" ,
1157+ Value : "0" ,
1158+ },
11531159 },
11541160 VolumeMounts : []corev1.VolumeMount {
11551161 {
@@ -1185,7 +1191,8 @@ func (r *NIMCacheReconciler) constructJob(ctx context.Context, nimCache *appsv1a
11851191 Name : hfDataSource .GetPullSecret (),
11861192 },
11871193 }
1188- } else if nimCache .Spec .Source .NGC != nil {
1194+
1195+ case nimCache .Spec .Source .NGC != nil && nimCache .Spec .Source .NGC .ModelEndpoint == nil :
11891196 job .Spec .Template .Spec .Containers = []corev1.Container {
11901197 {
11911198 Name : NIMCacheContainerName ,
@@ -1253,8 +1260,57 @@ func (r *NIMCacheReconciler) constructJob(ctx context.Context, nimCache *appsv1a
12531260 job .Spec .Template .Spec .Containers [0 ].Args = append (job .Spec .Template .Spec .Containers [0 ].Args , selectedProfiles ... )
12541261 }
12551262 }
1256- }
12571263
1264+ case nimCache .Spec .Source .NGC != nil && nimCache .Spec .Source .NGC .ModelEndpoint != nil :
1265+ job .Spec .Template .Spec .Containers = []corev1.Container {
1266+ {
1267+ Name : NIMCacheContainerName ,
1268+ Image : nimCache .Spec .Source .NGC .ModelPuller ,
1269+ Command : []string {"create-model-store" },
1270+ Args : []string {"--model-repo" , * nimCache .Spec .Source .NGC .ModelEndpoint , "--model-store" , "/model-store" },
1271+ EnvFrom : nimCache .Spec .Source .EnvFromSecrets (),
1272+ Env : []corev1.EnvVar {
1273+ {
1274+ Name : "NIM_CACHE_PATH" ,
1275+ Value : utils .DefaultModelStorePath ,
1276+ },
1277+ },
1278+ VolumeMounts : []corev1.VolumeMount {
1279+ {
1280+ Name : "nim-cache-volume" ,
1281+ MountPath : utils .DefaultModelStorePath ,
1282+ SubPath : nimCache .Spec .Storage .PVC .SubPath ,
1283+ },
1284+ },
1285+ Resources : corev1.ResourceRequirements {
1286+ Limits : map [corev1.ResourceName ]apiResource.Quantity {
1287+ "cpu" : nimCache .Spec .Resources .CPU ,
1288+ "memory" : nimCache .Spec .Resources .Memory ,
1289+ },
1290+ Requests : map [corev1.ResourceName ]apiResource.Quantity {
1291+ "cpu" : nimCache .Spec .Resources .CPU ,
1292+ "memory" : nimCache .Spec .Resources .Memory ,
1293+ },
1294+ },
1295+ TerminationMessagePath : "/dev/termination-log" ,
1296+ TerminationMessagePolicy : corev1 .TerminationMessageFallbackToLogsOnError ,
1297+ SecurityContext : & corev1.SecurityContext {
1298+ AllowPrivilegeEscalation : ptr.To [bool ](false ),
1299+ Capabilities : & corev1.Capabilities {
1300+ Drop : []corev1.Capability {"ALL" },
1301+ },
1302+ RunAsNonRoot : ptr.To [bool ](true ),
1303+ RunAsGroup : nimCache .GetGroupID (),
1304+ RunAsUser : nimCache .GetUserID (),
1305+ },
1306+ },
1307+ }
1308+ job .Spec .Template .Spec .ImagePullSecrets = []corev1.LocalObjectReference {
1309+ {
1310+ Name : nimCache .Spec .Source .NGC .PullSecret ,
1311+ },
1312+ }
1313+ }
12581314 // Merge env with the user provided values
12591315 job .Spec .Template .Spec .Containers [0 ].Env = utils .MergeEnvVars (job .Spec .Template .Spec .Containers [0 ].Env , nimCache .Spec .Env )
12601316
0 commit comments