Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions api/apps/v1alpha1/nemo_customizer_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import (
corev1 "k8s.io/api/core/v1"
networkingv1 "k8s.io/api/networking/v1"
rbacv1 "k8s.io/api/rbac/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/utils/ptr"
Expand Down Expand Up @@ -149,6 +150,8 @@ type TrainingConfig struct {
Tolerations []corev1.Toleration `json:"tolerations,omitempty"`
// PodAffinity for the training jobs
PodAffinity *corev1.PodAffinity `json:"podAffinity,omitempty"`
// SharedMemorySizeLimit sets the max size of the shared memory volume (emptyDir) used by the training jobs for fast model runtime I/O.
SharedMemorySizeLimit *resource.Quantity `json:"sharedMemorySizeLimit,omitempty"`
// Resources for the training jobs
Resources *corev1.ResourceRequirements `json:"resources,omitempty"`
}
Expand Down
6 changes: 5 additions & 1 deletion api/apps/v1alpha1/nimservice_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (
corev1 "k8s.io/api/core/v1"
networkingv1 "k8s.io/api/networking/v1"
rbacv1 "k8s.io/api/rbac/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/utils/ptr"
Expand Down Expand Up @@ -134,6 +135,8 @@ type NIMServiceList struct {
// NIMServiceStorage defines the attributes of various storage targets used to store the model
type NIMServiceStorage struct {
NIMCache NIMCacheVolSpec `json:"nimCache,omitempty"`
// SharedMemorySizeLimit sets the max size of the shared memory volume (emptyDir) used by NIMs for fast model runtime I/O.
SharedMemorySizeLimit *resource.Quantity `json:"sharedMemorySizeLimit,omitempty"`
// PersistentVolumeClaim is the pvc volume used for caching NIM
PVC PersistentVolumeClaim `json:"pvc,omitempty"`
// HostPath is the host path volume for caching NIM
Expand Down Expand Up @@ -448,7 +451,8 @@ func (n *NIMService) GetVolumes(modelPVC PersistentVolumeClaim) []corev1.Volume
Name: "dshm",
VolumeSource: corev1.VolumeSource{
EmptyDir: &corev1.EmptyDirVolumeSource{
Medium: corev1.StorageMediumMemory,
Medium: corev1.StorageMediumMemory,
SizeLimit: n.Spec.Storage.SharedMemorySizeLimit,
},
},
},
Expand Down
10 changes: 10 additions & 0 deletions api/apps/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

29 changes: 19 additions & 10 deletions bundle/manifests/apps.nvidia.com_nemocustomizers.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ spec:
name: v1alpha1
schema:
openAPIV3Schema:
description: NemoCustomizer is the Schema for the NemoCustomizer API
description: NemoCustomizer is the Schema for the NemoCustomizer API.
properties:
apiVersion:
description: |-
Expand All @@ -45,7 +45,7 @@ spec:
metadata:
type: object
spec:
description: NemoCustomizerSpec defines the desired state of NemoCustomizer
description: NemoCustomizerSpec defines the desired state of NemoCustomizer.
properties:
annotations:
additionalProperties:
Expand Down Expand Up @@ -254,7 +254,7 @@ spec:
type: object
type: array
expose:
description: ExposeV1 defines attributes to expose the service
description: ExposeV1 defines attributes to expose the service.
properties:
ingress:
description: IngressV1 defines attributes for ingress
Expand All @@ -275,7 +275,7 @@ spec:
paths:
items:
description: IngressPath defines attributes for ingress
paths
paths.
properties:
path:
default: /
Expand All @@ -296,7 +296,7 @@ spec:
rule: (has(self.spec) && has(self.enabled) && self.enabled)
|| !has(self.enabled) || !self.enabled
service:
description: Service defines attributes to create a service
description: Service defines attributes to create a service.
properties:
annotations:
additionalProperties:
Expand All @@ -323,7 +323,7 @@ spec:
format: int64
type: integer
image:
description: Image defines image attributes
description: Image defines image attributes.
properties:
pullPolicy:
type: string
Expand All @@ -344,7 +344,7 @@ spec:
type: string
type: object
metrics:
description: Metrics defines attributes to setup metrics collection
description: Metrics defines attributes to setup metrics collection.
properties:
enabled:
type: boolean
Expand Down Expand Up @@ -1145,7 +1145,7 @@ spec:
type: string
scale:
description: Autoscaling defines attributes to automatically scale
the service based on metrics
the service based on metrics.
properties:
annotations:
additionalProperties:
Expand All @@ -1155,7 +1155,7 @@ spec:
type: boolean
hpa:
description: HorizontalPodAutoscalerSpec defines the parameters
required to setup HPA
required to setup HPA.
properties:
behavior:
description: |-
Expand Down Expand Up @@ -2525,6 +2525,15 @@ spec:
RunAIQueue is the Run.AI's scheduler queue to be used for training jobs.
Used only if the scheduler is set to runai.
type: string
sharedMemorySizeLimit:
anyOf:
- type: integer
- type: string
description: SharedMemorySizeLimit sets the max size of the shared
memory volume (emptyDir) used by the training jobs for fast
model runtime I/O.
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
timeout:
description: Timeout for the training job to complete
type: integer
Expand Down Expand Up @@ -2655,7 +2664,7 @@ spec:
- wandb
type: object
status:
description: NemoCustomizerStatus defines the observed state of NemoCustomizer
description: NemoCustomizerStatus defines the observed state of NemoCustomizer.
properties:
availableReplicas:
format: int32
Expand Down
20 changes: 10 additions & 10 deletions bundle/manifests/apps.nvidia.com_nemodatastores.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ spec:
name: v1alpha1
schema:
openAPIV3Schema:
description: NemoDatastore is the Schema for the NemoDatastore API
description: NemoDatastore is the Schema for the NemoDatastore API.
properties:
apiVersion:
description: |-
Expand All @@ -45,7 +45,7 @@ spec:
metadata:
type: object
spec:
description: NemoDatastoreSpec defines the desired state of NemoDatastore
description: NemoDatastoreSpec defines the desired state of NemoDatastore.
properties:
annotations:
additionalProperties:
Expand Down Expand Up @@ -234,7 +234,7 @@ spec:
type: object
type: array
expose:
description: ExposeV1 defines attributes to expose the service
description: ExposeV1 defines attributes to expose the service.
properties:
ingress:
description: IngressV1 defines attributes for ingress
Expand All @@ -255,7 +255,7 @@ spec:
paths:
items:
description: IngressPath defines attributes for ingress
paths
paths.
properties:
path:
default: /
Expand All @@ -276,7 +276,7 @@ spec:
rule: (has(self.spec) && has(self.enabled) && self.enabled)
|| !has(self.enabled) || !self.enabled
service:
description: Service defines attributes to create a service
description: Service defines attributes to create a service.
properties:
annotations:
additionalProperties:
Expand All @@ -303,7 +303,7 @@ spec:
format: int64
type: integer
image:
description: Image defines image attributes
description: Image defines image attributes.
properties:
pullPolicy:
type: string
Expand All @@ -324,7 +324,7 @@ spec:
type: string
type: object
metrics:
description: Metrics defines attributes to setup metrics collection
description: Metrics defines attributes to setup metrics collection.
properties:
enabled:
type: boolean
Expand Down Expand Up @@ -857,7 +857,7 @@ spec:
type: string
scale:
description: Autoscaling defines attributes to automatically scale
the service based on metrics
the service based on metrics.
properties:
annotations:
additionalProperties:
Expand All @@ -867,7 +867,7 @@ spec:
type: boolean
hpa:
description: HorizontalPodAutoscalerSpec defines the parameters
required to setup HPA
required to setup HPA.
properties:
behavior:
description: |-
Expand Down Expand Up @@ -1529,7 +1529,7 @@ spec:
- secrets
type: object
status:
description: NemoDatastoreStatus defines the observed state of NemoDatastore
description: NemoDatastoreStatus defines the observed state of NemoDatastore.
properties:
availableReplicas:
format: int32
Expand Down
20 changes: 10 additions & 10 deletions bundle/manifests/apps.nvidia.com_nemoentitystores.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ spec:
name: v1alpha1
schema:
openAPIV3Schema:
description: NemoEntitystore is the Schema for the NemoEntitystore API
description: NemoEntitystore is the Schema for the NemoEntitystore API.
properties:
apiVersion:
description: |-
Expand All @@ -45,7 +45,7 @@ spec:
metadata:
type: object
spec:
description: NemoEntitystoreSpec defines the desired state of NemoEntitystore
description: NemoEntitystoreSpec defines the desired state of NemoEntitystore.
properties:
annotations:
additionalProperties:
Expand Down Expand Up @@ -246,7 +246,7 @@ spec:
type: object
type: array
expose:
description: ExposeV1 defines attributes to expose the service
description: ExposeV1 defines attributes to expose the service.
properties:
ingress:
description: IngressV1 defines attributes for ingress
Expand All @@ -267,7 +267,7 @@ spec:
paths:
items:
description: IngressPath defines attributes for ingress
paths
paths.
properties:
path:
default: /
Expand All @@ -288,7 +288,7 @@ spec:
rule: (has(self.spec) && has(self.enabled) && self.enabled)
|| !has(self.enabled) || !self.enabled
service:
description: Service defines attributes to create a service
description: Service defines attributes to create a service.
properties:
annotations:
additionalProperties:
Expand All @@ -315,7 +315,7 @@ spec:
format: int64
type: integer
image:
description: Image defines image attributes
description: Image defines image attributes.
properties:
pullPolicy:
type: string
Expand All @@ -336,7 +336,7 @@ spec:
type: string
type: object
metrics:
description: Metrics defines attributes to setup metrics collection
description: Metrics defines attributes to setup metrics collection.
properties:
enabled:
type: boolean
Expand Down Expand Up @@ -795,7 +795,7 @@ spec:
type: string
scale:
description: Autoscaling defines attributes to automatically scale
the service based on metrics
the service based on metrics.
properties:
annotations:
additionalProperties:
Expand All @@ -805,7 +805,7 @@ spec:
type: boolean
hpa:
description: HorizontalPodAutoscalerSpec defines the parameters
required to setup HPA
required to setup HPA.
properties:
behavior:
description: |-
Expand Down Expand Up @@ -1446,7 +1446,7 @@ spec:
- image
type: object
status:
description: NemoEntitystoreStatus defines the observed state of NemoEntitystore
description: NemoEntitystoreStatus defines the observed state of NemoEntitystore.
properties:
availableReplicas:
format: int32
Expand Down
Loading
Loading