Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 19 additions & 19 deletions api/apps/v1alpha1/nimservice_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -309,95 +309,95 @@ func (n *NIMService) GetLWSLeaderEnv() []corev1.EnvVar {
mpiTimeout = n.Spec.MultiNode.MPI.MPIStartTimeout
}

env = append(env,
corev1.EnvVar{
env = utils.MergeEnvVars([]corev1.EnvVar{
{
Name: "NIM_LEADER_ROLE",
Value: "1",
},
corev1.EnvVar{
{
Name: "NIM_MPI_ALLOW_RUN_AS_ROOT",
Value: "0",
},
corev1.EnvVar{
{
Name: "OMPI_MCA_orte_keep_fqdn_hostnames",
Value: "true",
},
corev1.EnvVar{
{
Name: "OMPI_MCA_plm_rsh_args",
Value: "-o ConnectionAttempts=20",
},
corev1.EnvVar{
{
Name: "NIM_NUM_COMPUTE_NODES",
Value: fmt.Sprintf("%d", n.Spec.MultiNode.Size),
},
corev1.EnvVar{
{
Name: "GPUS_PER_NODE",
Value: fmt.Sprintf("%d", n.Spec.MultiNode.GPUSPerPod),
},
corev1.EnvVar{
{
Name: "CLUSTER_START_TIMEOUT",
Value: fmt.Sprintf("%d", mpiTimeout),
},
corev1.EnvVar{
{
Name: "CLUSTER_SIZE",
ValueFrom: &corev1.EnvVarSource{
FieldRef: &corev1.ObjectFieldSelector{
FieldPath: "metadata.annotations['leaderworkerset.sigs.k8s.io/size']",
},
},
},
corev1.EnvVar{
{
Name: "GROUP_INDEX",
ValueFrom: &corev1.EnvVarSource{
FieldRef: &corev1.ObjectFieldSelector{
FieldPath: "metadata.labels['leaderworkerset.sigs.k8s.io/group-index']",
},
},
},
)
}, env)
return env
}

func (n *NIMService) GetLWSWorkerEnv() []corev1.EnvVar {
env := n.GetEnv()
env = append(env,
corev1.EnvVar{
env = utils.MergeEnvVars([]corev1.EnvVar{
{
Name: "NIM_LEADER_ROLE",
Value: "0",
},
corev1.EnvVar{
{
Name: "NIM_MPI_ALLOW_RUN_AS_ROOT",
Value: "0",
},
corev1.EnvVar{
{
Name: "NIM_NUM_COMPUTE_NODES",
Value: fmt.Sprintf("%d", n.Spec.MultiNode.Size),
},
corev1.EnvVar{
{
Name: "LEADER_NAME",
ValueFrom: &corev1.EnvVarSource{
FieldRef: &corev1.ObjectFieldSelector{
FieldPath: "metadata.annotations['leaderworkerset.sigs.k8s.io/leader-name']",
},
},
},
corev1.EnvVar{
{
Name: "NAMESPACE",
ValueFrom: &corev1.EnvVarSource{
FieldRef: &corev1.ObjectFieldSelector{
FieldPath: "metadata.namespace",
},
},
},
corev1.EnvVar{
{
Name: "LWS_NAME",
ValueFrom: &corev1.EnvVarSource{
FieldRef: &corev1.ObjectFieldSelector{
FieldPath: "metadata.labels['leaderworkerset.sigs.k8s.io/name']",
},
},
},
)
}, env)
return env
}

Expand Down
24 changes: 12 additions & 12 deletions internal/controller/platform/standalone/nimservice.go
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,7 @@ func (r *NIMServiceReconciler) reconcileNIMService(ctx context.Context, nimServi
return ctrl.Result{}, err
}

var profileEnv *corev1.EnvVar
var profileEnv *[]corev1.EnvVar
var profile *appsv1alpha1.NIMProfile
var gpuResources *corev1.ResourceRequirements
var initContainers []corev1.Container
Expand All @@ -333,10 +333,10 @@ func (r *NIMServiceReconciler) reconcileNIMService(ctx context.Context, nimServi
var renderObj client.Object

if modelProfile != "" {
profileEnv = &corev1.EnvVar{
profileEnv = &[]corev1.EnvVar{{
Name: "NIM_MODEL_PROFILE",
Value: modelProfile,
}
}}

// Only assign GPU resources if the NIMCache is for optimized NIM
if nimCache.IsOptimizedNIM() {
Expand Down Expand Up @@ -370,20 +370,20 @@ func (r *NIMServiceReconciler) reconcileNIMService(ctx context.Context, nimServi
lwsParams.LeaderVolumes = nimService.GetLeaderVolumes(*modelPVC)
lwsParams.WorkerVolumes = nimService.GetWorkerVolumes(*modelPVC)
if nimCache.IsUniversalNIM() {
lwsParams.WorkerEnvs = append(lwsParams.WorkerEnvs, corev1.EnvVar{
lwsParams.WorkerEnvs = utils.MergeEnvVars([]corev1.EnvVar{{
Name: "NIM_MODEL_NAME",
Value: utils.DefaultModelStorePath,
})
lwsParams.LeaderEnvs = append(lwsParams.LeaderEnvs, corev1.EnvVar{
}}, lwsParams.WorkerEnvs)
lwsParams.LeaderEnvs = utils.MergeEnvVars([]corev1.EnvVar{{
Name: "NIM_MODEL_NAME",
Value: utils.DefaultModelStorePath,
})
}}, lwsParams.LeaderEnvs)
}
lwsParams.LeaderVolumeMounts = nimService.GetLeaderVolumeMounts(*modelPVC)
lwsParams.WorkerVolumeMounts = nimService.GetWorkerVolumeMounts(*modelPVC)
if profileEnv != nil {
lwsParams.WorkerEnvs = append(lwsParams.WorkerEnvs, *profileEnv)
lwsParams.LeaderEnvs = append(lwsParams.LeaderEnvs, *profileEnv)
lwsParams.WorkerEnvs = utils.MergeEnvVars(*profileEnv, lwsParams.WorkerEnvs)
lwsParams.LeaderEnvs = utils.MergeEnvVars(*profileEnv, lwsParams.LeaderEnvs)
}
if gpuResources != nil {
lwsParams.Resources = gpuResources
Expand Down Expand Up @@ -415,16 +415,16 @@ func (r *NIMServiceReconciler) reconcileNIMService(ctx context.Context, nimServi
deploymentParams.OrchestratorType = string(r.GetOrchestratorType())
deploymentParams.PodResourceClaims = shared.GetPodResourceClaims(namedDraResources)
if nimCache.IsUniversalNIM() {
deploymentParams.Env = append(deploymentParams.Env, corev1.EnvVar{
deploymentParams.Env = utils.MergeEnvVars([]corev1.EnvVar{{
Name: "NIM_MODEL_NAME",
Value: utils.DefaultModelStorePath,
})
}}, deploymentParams.Env)
}
// Setup volume mounts with model store
deploymentParams.Volumes = nimService.GetVolumes(*modelPVC)
deploymentParams.VolumeMounts = nimService.GetVolumeMounts(*modelPVC)
if profileEnv != nil {
deploymentParams.Env = append(deploymentParams.Env, *profileEnv)
deploymentParams.Env = utils.MergeEnvVars(*profileEnv, deploymentParams.Env)
}
// Auto assign GPU resources in case of the optimized profile
if gpuResources != nil {
Expand Down
Loading
Loading