@@ -494,6 +494,7 @@ func (r *NIMCacheReconciler) reconcileServiceAccount(ctx context.Context, nimCac
494494 }
495495
496496 // Create the ServiceAccount
497+
497498 err = r .Create (ctx , newSA )
498499 if err != nil {
499500 logger .Error (err , "Failed to create ServiceAccount" , "Name" , saName )
@@ -550,8 +551,8 @@ func (r *NIMCacheReconciler) reconcilePVC(ctx context.Context, nimCache *appsv1a
550551// Model auto-selection is enabled and
551552// Explicit model profiles are not provided by the user.
552553func isModelSelectionRequired (nimCache * appsv1alpha1.NIMCache ) bool {
553- if nimCache .Spec . Source . NGC != nil &&
554- len (nimCache .Spec . Source . NGC . Model .Profiles ) == 0 {
554+ if nimCache .IsOptimizedNIM () &&
555+ len (nimCache .GetModelSpec () .Profiles ) == 0 {
555556 return true
556557 }
557558 return false
@@ -567,8 +568,8 @@ func isModelSelectionDone(nimCache *appsv1alpha1.NIMCache) bool {
567568}
568569
569570func getSelectedProfiles (nimCache * appsv1alpha1.NIMCache ) ([]string , error ) {
570- if nimCache .Spec . Source . NGC != nil {
571- if len (nimCache .Spec . Source . NGC . Model .Profiles ) > 0 {
571+ if nimCache .IsOptimizedNIM () {
572+ if len (nimCache .GetModelSpec () .Profiles ) > 0 {
572573 return nimCache .Spec .Source .NGC .Model .Profiles , nil
573574 }
574575
@@ -589,7 +590,7 @@ func (r *NIMCacheReconciler) reconcileModelManifest(ctx context.Context, nimCach
589590 logger := r .GetLogger ()
590591
591592 // Model manifest is available only for NGC model pullers
592- if nimCache .Spec . Source . NGC == nil {
593+ if ! nimCache .IsOptimizedNIM () {
593594 return false , nil
594595 }
595596
@@ -677,7 +678,7 @@ func (r *NIMCacheReconciler) reconcileModelSelection(ctx context.Context, nimCac
677678 if isModelSelectionRequired (nimCache ) && ! isModelSelectionDone (nimCache ) {
678679 var discoveredGPUs []string
679680 // If no specific GPUs are provided, then auto-detect GPUs in the cluster for profile selection
680- if len (nimCache .Spec . Source . NGC . Model .GPUs ) == 0 {
681+ if len (nimCache .GetModelSpec () .GPUs ) == 0 {
681682 gpusByNode , err := r .GetNodeGPUProducts (ctx )
682683 if err != nil {
683684 logger .Error (err , "Failed to get gpus in the cluster" )
@@ -693,7 +694,7 @@ func (r *NIMCacheReconciler) reconcileModelSelection(ctx context.Context, nimCac
693694 }
694695
695696 // Match profiles with user input
696- profiles , err := nimManifest .MatchProfiles (nimCache .Spec . Source . NGC . Model , discoveredGPUs )
697+ profiles , err := nimManifest .MatchProfiles (nimCache .GetModelSpec () , discoveredGPUs )
697698 if err != nil {
698699 logger .Error (err , "Failed to match profiles for given model parameters" )
699700 return err
@@ -1084,7 +1085,8 @@ func (r *NIMCacheReconciler) constructJob(ctx context.Context, nimCache *appsv1a
10841085 }
10851086 }
10861087
1087- if nimCache .Spec .Source .DataStore != nil || nimCache .Spec .Source .HF != nil {
1088+ switch {
1089+ case nimCache .Spec .Source .DataStore != nil || nimCache .Spec .Source .HF != nil :
10881090 var hfDataSource HFInterface
10891091 if nimCache .Spec .Source .DataStore != nil {
10901092 hfDataSource = nimCache .Spec .Source .DataStore
@@ -1117,6 +1119,10 @@ func (r *NIMCacheReconciler) constructJob(ctx context.Context, nimCache *appsv1a
11171119 Name : "HF_ENDPOINT" ,
11181120 Value : hfDataSource .GetEndpoint (),
11191121 },
1122+ {
1123+ Name : "HF_HUB_OFFLINE" ,
1124+ Value : "0" ,
1125+ },
11201126 },
11211127 VolumeMounts : []corev1.VolumeMount {
11221128 {
@@ -1152,7 +1158,8 @@ func (r *NIMCacheReconciler) constructJob(ctx context.Context, nimCache *appsv1a
11521158 Name : hfDataSource .GetPullSecret (),
11531159 },
11541160 }
1155- } else if nimCache .Spec .Source .NGC != nil {
1161+
1162+ case nimCache .Spec .Source .NGC != nil && nimCache .Spec .Source .NGC .ModelEndpoint == nil :
11561163 job .Spec .Template .Spec .Containers = []corev1.Container {
11571164 {
11581165 Name : NIMCacheContainerName ,
@@ -1220,8 +1227,57 @@ func (r *NIMCacheReconciler) constructJob(ctx context.Context, nimCache *appsv1a
12201227 job .Spec .Template .Spec .Containers [0 ].Args = append (job .Spec .Template .Spec .Containers [0 ].Args , selectedProfiles ... )
12211228 }
12221229 }
1223- }
12241230
1231+ case nimCache .Spec .Source .NGC != nil && nimCache .Spec .Source .NGC .ModelEndpoint != nil :
1232+ job .Spec .Template .Spec .Containers = []corev1.Container {
1233+ {
1234+ Name : NIMCacheContainerName ,
1235+ Image : nimCache .Spec .Source .NGC .ModelPuller ,
1236+ Command : []string {"create-model-store" },
1237+ Args : []string {"--model-repo" , * nimCache .Spec .Source .NGC .ModelEndpoint , "--model-store" , "/model-store" },
1238+ EnvFrom : nimCache .Spec .Source .EnvFromSecrets (),
1239+ Env : []corev1.EnvVar {
1240+ {
1241+ Name : "NIM_CACHE_PATH" ,
1242+ Value : utils .DefaultModelStorePath ,
1243+ },
1244+ },
1245+ VolumeMounts : []corev1.VolumeMount {
1246+ {
1247+ Name : "nim-cache-volume" ,
1248+ MountPath : utils .DefaultModelStorePath ,
1249+ SubPath : nimCache .Spec .Storage .PVC .SubPath ,
1250+ },
1251+ },
1252+ Resources : corev1.ResourceRequirements {
1253+ Limits : map [corev1.ResourceName ]apiResource.Quantity {
1254+ "cpu" : nimCache .Spec .Resources .CPU ,
1255+ "memory" : nimCache .Spec .Resources .Memory ,
1256+ },
1257+ Requests : map [corev1.ResourceName ]apiResource.Quantity {
1258+ "cpu" : nimCache .Spec .Resources .CPU ,
1259+ "memory" : nimCache .Spec .Resources .Memory ,
1260+ },
1261+ },
1262+ TerminationMessagePath : "/dev/termination-log" ,
1263+ TerminationMessagePolicy : corev1 .TerminationMessageFallbackToLogsOnError ,
1264+ SecurityContext : & corev1.SecurityContext {
1265+ AllowPrivilegeEscalation : ptr.To [bool ](false ),
1266+ Capabilities : & corev1.Capabilities {
1267+ Drop : []corev1.Capability {"ALL" },
1268+ },
1269+ RunAsNonRoot : ptr.To [bool ](true ),
1270+ RunAsGroup : nimCache .GetGroupID (),
1271+ RunAsUser : nimCache .GetUserID (),
1272+ },
1273+ },
1274+ }
1275+ job .Spec .Template .Spec .ImagePullSecrets = []corev1.LocalObjectReference {
1276+ {
1277+ Name : nimCache .Spec .Source .NGC .PullSecret ,
1278+ },
1279+ }
1280+ }
12251281 // Merge env with the user provided values
12261282 job .Spec .Template .Spec .Containers [0 ].Env = utils .MergeEnvVars (job .Spec .Template .Spec .Containers [0 ].Env , nimCache .Spec .Env )
12271283
0 commit comments