Skip to content

Commit 6ce2ed1

Browse files
authored
Merge branch 'NVIDIA:main' into webhook-scaffolding
2 parents 32d36ee + 9e3375e commit 6ce2ed1

35 files changed

+2848
-265
lines changed

api/apps/v1alpha1/common_types.go

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -244,8 +244,6 @@ type HFSecret struct {
244244
}
245245

246246
// PersistentVolumeClaim defines the attributes of PVC.
247-
// +kubebuilder:validation:XValidation:rule="!has(self.create) || !self.create || (has(self.size) && self.size != \"\")", message="size is required for pvc creation"
248-
// +kubebuilder:validation:XValidation:rule="!has(self.create) || !self.create || (has(self.volumeAccessMode) && self.volumeAccessMode != \"\")", message="volumeAccessMode is required for pvc creation"
249247
type PersistentVolumeClaim struct {
250248
// Create specifies whether to create a new PersistentVolumeClaim (PVC).
251249
// If set to false, an existing PVC must be referenced via the `Name` field.

api/apps/v1alpha1/nimcache_types.go

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -72,19 +72,22 @@ type NIMSource struct {
7272

7373
// +kubebuilder:validation:XValidation:rule="(has(self.modelName) ? 1 : 0) + (has(self.datasetName) ? 1 : 0) == 1",message="Exactly one of modelName or datasetName must be defined"
7474
type DSHFCommonFields struct {
75-
// modelName is the name of the model
75+
// ModelName is the name of the model
7676
ModelName *string `json:"modelName,omitempty"`
77-
// datasetName is the name of the dataset
77+
// DatasetName is the name of the dataset
7878
DatasetName *string `json:"datasetName,omitempty"`
79-
// authSecret is the name of the secret containing the "HF_TOKEN" token
79+
// AuthSecret is the name of the secret containing the "HF_TOKEN" token
8080
// +kubebuilder:validation:MinLength=1
8181
AuthSecret string `json:"authSecret"`
82-
// modelPuller is the containerized huggingface-cli image to pull the data
82+
// ModelPuller is the containerized huggingface-cli image to pull the data
8383
// +kubebuilder:validation:MinLength=1
8484
ModelPuller string `json:"modelPuller"`
85-
// pullSecret is the name of the image pull secret for the modelPuller image
85+
// PullSecret is the name of the image pull secret for the modelPuller image
8686
// +kubebuilder:validation:MinLength=1
8787
PullSecret string `json:"pullSecret"`
88+
// Revision is the revision of the object to be cached. This is either a commit hash, branch name or tag.
89+
// +kubebuilder:validation:MinLength=1
90+
Revision *string `json:"revision,omitempty"`
8891
}
8992

9093
type NemoDataStoreSource struct {
@@ -156,6 +159,8 @@ type NIMCacheStorage struct {
156159
// PersistentVolumeClaim is the pvc volume used for caching NIM
157160
PVC PersistentVolumeClaim `json:"pvc,omitempty"`
158161
// HostPath is the host path volume for caching NIM
162+
//
163+
// Deprecated: use PVC instead.
159164
HostPath *string `json:"hostPath,omitempty"`
160165
}
161166

@@ -423,6 +428,13 @@ func (d *DSHFCommonFields) GetPullSecret() string {
423428
return d.PullSecret
424429
}
425430

431+
func (d *DSHFCommonFields) GetRevision() string {
432+
if d.Revision == nil {
433+
return ""
434+
}
435+
return *d.Revision
436+
}
437+
426438
func (d *HuggingFaceHubSource) GetEndpoint() string {
427439
return d.Endpoint
428440
}

api/apps/v1alpha1/nimservice_types.go

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -309,95 +309,95 @@ func (n *NIMService) GetLWSLeaderEnv() []corev1.EnvVar {
309309
mpiTimeout = n.Spec.MultiNode.MPI.MPIStartTimeout
310310
}
311311

312-
env = append(env,
313-
corev1.EnvVar{
312+
env = utils.MergeEnvVars([]corev1.EnvVar{
313+
{
314314
Name: "NIM_LEADER_ROLE",
315315
Value: "1",
316316
},
317-
corev1.EnvVar{
317+
{
318318
Name: "NIM_MPI_ALLOW_RUN_AS_ROOT",
319319
Value: "0",
320320
},
321-
corev1.EnvVar{
321+
{
322322
Name: "OMPI_MCA_orte_keep_fqdn_hostnames",
323323
Value: "true",
324324
},
325-
corev1.EnvVar{
325+
{
326326
Name: "OMPI_MCA_plm_rsh_args",
327327
Value: "-o ConnectionAttempts=20",
328328
},
329-
corev1.EnvVar{
329+
{
330330
Name: "NIM_NUM_COMPUTE_NODES",
331331
Value: fmt.Sprintf("%d", n.Spec.MultiNode.Size),
332332
},
333-
corev1.EnvVar{
333+
{
334334
Name: "GPUS_PER_NODE",
335335
Value: fmt.Sprintf("%d", n.Spec.MultiNode.GPUSPerPod),
336336
},
337-
corev1.EnvVar{
337+
{
338338
Name: "CLUSTER_START_TIMEOUT",
339339
Value: fmt.Sprintf("%d", mpiTimeout),
340340
},
341-
corev1.EnvVar{
341+
{
342342
Name: "CLUSTER_SIZE",
343343
ValueFrom: &corev1.EnvVarSource{
344344
FieldRef: &corev1.ObjectFieldSelector{
345345
FieldPath: "metadata.annotations['leaderworkerset.sigs.k8s.io/size']",
346346
},
347347
},
348348
},
349-
corev1.EnvVar{
349+
{
350350
Name: "GROUP_INDEX",
351351
ValueFrom: &corev1.EnvVarSource{
352352
FieldRef: &corev1.ObjectFieldSelector{
353353
FieldPath: "metadata.labels['leaderworkerset.sigs.k8s.io/group-index']",
354354
},
355355
},
356356
},
357-
)
357+
}, env)
358358
return env
359359
}
360360

361361
func (n *NIMService) GetLWSWorkerEnv() []corev1.EnvVar {
362362
env := n.GetEnv()
363-
env = append(env,
364-
corev1.EnvVar{
363+
env = utils.MergeEnvVars([]corev1.EnvVar{
364+
{
365365
Name: "NIM_LEADER_ROLE",
366366
Value: "0",
367367
},
368-
corev1.EnvVar{
368+
{
369369
Name: "NIM_MPI_ALLOW_RUN_AS_ROOT",
370370
Value: "0",
371371
},
372-
corev1.EnvVar{
372+
{
373373
Name: "NIM_NUM_COMPUTE_NODES",
374374
Value: fmt.Sprintf("%d", n.Spec.MultiNode.Size),
375375
},
376-
corev1.EnvVar{
376+
{
377377
Name: "LEADER_NAME",
378378
ValueFrom: &corev1.EnvVarSource{
379379
FieldRef: &corev1.ObjectFieldSelector{
380380
FieldPath: "metadata.annotations['leaderworkerset.sigs.k8s.io/leader-name']",
381381
},
382382
},
383383
},
384-
corev1.EnvVar{
384+
{
385385
Name: "NAMESPACE",
386386
ValueFrom: &corev1.EnvVarSource{
387387
FieldRef: &corev1.ObjectFieldSelector{
388388
FieldPath: "metadata.namespace",
389389
},
390390
},
391391
},
392-
corev1.EnvVar{
392+
{
393393
Name: "LWS_NAME",
394394
ValueFrom: &corev1.EnvVarSource{
395395
FieldRef: &corev1.ObjectFieldSelector{
396396
FieldPath: "metadata.labels['leaderworkerset.sigs.k8s.io/name']",
397397
},
398398
},
399399
},
400-
)
400+
}, env)
401401
return env
402402
}
403403

api/apps/v1alpha1/zz_generated.deepcopy.go

Lines changed: 5 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

bundle/manifests/apps.nvidia.com_nemocustomizers.yaml

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2080,13 +2080,6 @@ spec:
20802080
the PVC
20812081
type: string
20822082
type: object
2083-
x-kubernetes-validations:
2084-
- message: size is required for pvc creation
2085-
rule: '!has(self.create) || !self.create || (has(self.size)
2086-
&& self.size != "")'
2087-
- message: volumeAccessMode is required for pvc creation
2088-
rule: '!has(self.create) || !self.create || (has(self.volumeAccessMode)
2089-
&& self.volumeAccessMode != "")'
20902083
networkConfig:
20912084
description: NetworkConfig is the network configuration for multi-node
20922085
training

bundle/manifests/apps.nvidia.com_nemodatastores.yaml

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -815,13 +815,6 @@ spec:
815815
PVC
816816
type: string
817817
type: object
818-
x-kubernetes-validations:
819-
- message: size is required for pvc creation
820-
rule: '!has(self.create) || !self.create || (has(self.size) && self.size
821-
!= "")'
822-
- message: volumeAccessMode is required for pvc creation
823-
rule: '!has(self.create) || !self.create || (has(self.volumeAccessMode)
824-
&& self.volumeAccessMode != "")'
825818
replicas:
826819
default: 1
827820
minimum: 1

bundle/manifests/apps.nvidia.com_nemoguardrails.yaml

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -106,13 +106,6 @@ spec:
106106
the PVC
107107
type: string
108108
type: object
109-
x-kubernetes-validations:
110-
- message: size is required for pvc creation
111-
rule: '!has(self.create) || !self.create || (has(self.size)
112-
&& self.size != "")'
113-
- message: volumeAccessMode is required for pvc creation
114-
rule: '!has(self.create) || !self.create || (has(self.volumeAccessMode)
115-
&& self.volumeAccessMode != "")'
116109
type: object
117110
x-kubernetes-validations:
118111
- message: Cannot set both ConfigMap and PVC in ConfigStore

bundle/manifests/apps.nvidia.com_nimcaches.yaml

Lines changed: 24 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -243,23 +243,23 @@ spec:
243243
DataStore service
244244
properties:
245245
authSecret:
246-
description: authSecret is the name of the secret containing
246+
description: AuthSecret is the name of the secret containing
247247
the "HF_TOKEN" token
248248
minLength: 1
249249
type: string
250250
datasetName:
251-
description: datasetName is the name of the dataset
251+
description: DatasetName is the name of the dataset
252252
type: string
253253
endpoint:
254254
description: Endpoint is the HuggingFace endpoint from NeMo
255255
DataStore
256256
pattern: ^https?://.*/v1/hf/?$
257257
type: string
258258
modelName:
259-
description: modelName is the name of the model
259+
description: ModelName is the name of the model
260260
type: string
261261
modelPuller:
262-
description: modelPuller is the containerized huggingface-cli
262+
description: ModelPuller is the containerized huggingface-cli
263263
image to pull the data
264264
minLength: 1
265265
type: string
@@ -268,10 +268,15 @@ spec:
268268
description: Namespace is the namespace within NeMo DataStore
269269
type: string
270270
pullSecret:
271-
description: pullSecret is the name of the image pull secret
271+
description: PullSecret is the name of the image pull secret
272272
for the modelPuller image
273273
minLength: 1
274274
type: string
275+
revision:
276+
description: Revision is the revision of the object to be
277+
cached. This is either a commit hash, branch name or tag.
278+
minLength: 1
279+
type: string
275280
required:
276281
- authSecret
277282
- endpoint
@@ -288,22 +293,22 @@ spec:
288293
Hub
289294
properties:
290295
authSecret:
291-
description: authSecret is the name of the secret containing
296+
description: AuthSecret is the name of the secret containing
292297
the "HF_TOKEN" token
293298
minLength: 1
294299
type: string
295300
datasetName:
296-
description: datasetName is the name of the dataset
301+
description: DatasetName is the name of the dataset
297302
type: string
298303
endpoint:
299304
description: Endpoint is the HuggingFace endpoint
300305
pattern: ^https?://.*$
301306
type: string
302307
modelName:
303-
description: modelName is the name of the model
308+
description: ModelName is the name of the model
304309
type: string
305310
modelPuller:
306-
description: modelPuller is the containerized huggingface-cli
311+
description: ModelPuller is the containerized huggingface-cli
307312
image to pull the data
308313
minLength: 1
309314
type: string
@@ -313,10 +318,15 @@ spec:
313318
minLength: 1
314319
type: string
315320
pullSecret:
316-
description: pullSecret is the name of the image pull secret
321+
description: PullSecret is the name of the image pull secret
317322
for the modelPuller image
318323
minLength: 1
319324
type: string
325+
revision:
326+
description: Revision is the revision of the object to be
327+
cached. This is either a commit hash, branch name or tag.
328+
minLength: 1
329+
type: string
320330
required:
321331
- authSecret
322332
- endpoint
@@ -421,7 +431,10 @@ spec:
421431
description: Storage is the target storage for caching NIM model
422432
properties:
423433
hostPath:
424-
description: HostPath is the host path volume for caching NIM
434+
description: |-
435+
HostPath is the host path volume for caching NIM
436+
437+
Deprecated: use PVC instead.
425438
type: string
426439
pvc:
427440
description: PersistentVolumeClaim is the pvc volume used for
@@ -459,13 +472,6 @@ spec:
459472
the PVC
460473
type: string
461474
type: object
462-
x-kubernetes-validations:
463-
- message: size is required for pvc creation
464-
rule: '!has(self.create) || !self.create || (has(self.size)
465-
&& self.size != "")'
466-
- message: volumeAccessMode is required for pvc creation
467-
rule: '!has(self.create) || !self.create || (has(self.volumeAccessMode)
468-
&& self.volumeAccessMode != "")'
469475
type: object
470476
tolerations:
471477
description: Tolerations for running the job to cache the NIM model

bundle/manifests/apps.nvidia.com_nimpipelines.yaml

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2352,13 +2352,6 @@ spec:
23522352
mode of the PVC
23532353
type: string
23542354
type: object
2355-
x-kubernetes-validations:
2356-
- message: size is required for pvc creation
2357-
rule: '!has(self.create) || !self.create || (has(self.size)
2358-
&& self.size != "")'
2359-
- message: volumeAccessMode is required for pvc creation
2360-
rule: '!has(self.create) || !self.create || (has(self.volumeAccessMode)
2361-
&& self.volumeAccessMode != "")'
23622355
readOnly:
23632356
description: ReadOnly mode indicates if the volume should
23642357
be mounted as read-only

bundle/manifests/apps.nvidia.com_nimservices.yaml

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2263,13 +2263,6 @@ spec:
22632263
the PVC
22642264
type: string
22652265
type: object
2266-
x-kubernetes-validations:
2267-
- message: size is required for pvc creation
2268-
rule: '!has(self.create) || !self.create || (has(self.size)
2269-
&& self.size != "")'
2270-
- message: volumeAccessMode is required for pvc creation
2271-
rule: '!has(self.create) || !self.create || (has(self.volumeAccessMode)
2272-
&& self.volumeAccessMode != "")'
22732266
readOnly:
22742267
description: ReadOnly mode indicates if the volume should be mounted
22752268
as read-only

0 commit comments

Comments
 (0)