Skip to content

Commit b9c82ca

Browse files
committed
Allow size of emptyDir volumes to be specified for NIMs Customizer
1 parent 117e676 commit b9c82ca

4 files changed

Lines changed: 19 additions & 5 deletions

File tree

api/apps/v1alpha1/nemo_customizer_types.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ import (
2929
corev1 "k8s.io/api/core/v1"
3030
networkingv1 "k8s.io/api/networking/v1"
3131
rbacv1 "k8s.io/api/rbac/v1"
32+
"k8s.io/apimachinery/pkg/api/resource"
3233
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3334
"k8s.io/apimachinery/pkg/util/intstr"
3435
"k8s.io/utils/ptr"
@@ -150,6 +151,8 @@ type TrainingConfig struct {
150151
Tolerations []corev1.Toleration `json:"tolerations,omitempty"`
151152
// PodAffinity for the training jobs
152153
PodAffinity *corev1.PodAffinity `json:"podAffinity,omitempty"`
154+
// EmptyDirSizeLimit is the size limit for the emptyDir volume for the training jobs
155+
EmptyDirSizeLimit *resource.Quantity `json:"emptyDirSizeLimit,omitempty"`
153156
// Resources for the training jobs
154157
Resources *corev1.ResourceRequirements `json:"resources,omitempty"`
155158
}

api/apps/v1alpha1/nimservice_types.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ import (
2626
corev1 "k8s.io/api/core/v1"
2727
networkingv1 "k8s.io/api/networking/v1"
2828
rbacv1 "k8s.io/api/rbac/v1"
29+
"k8s.io/apimachinery/pkg/api/resource"
2930
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3031
"k8s.io/apimachinery/pkg/util/intstr"
3132
"k8s.io/utils/ptr"
@@ -135,6 +136,8 @@ type NIMServiceList struct {
135136
// NIMServiceStorage defines the attributes of various storage targets used to store the model.
136137
type NIMServiceStorage struct {
137138
NIMCache NIMCacheVolSpec `json:"nimCache,omitempty"`
139+
// EmptyDirSizeLimit is the size limit for the emptyDir volume
140+
EmptyDirSizeLimit *resource.Quantity `json:"emptyDirSizeLimit,omitempty"`
138141
// PersistentVolumeClaim is the pvc volume used for caching NIM
139142
PVC PersistentVolumeClaim `json:"pvc,omitempty"`
140143
// HostPath is the host path volume for caching NIM
@@ -449,7 +452,8 @@ func (n *NIMService) GetVolumes(modelPVC PersistentVolumeClaim) []corev1.Volume
449452
Name: "dshm",
450453
VolumeSource: corev1.VolumeSource{
451454
EmptyDir: &corev1.EmptyDirVolumeSource{
452-
Medium: corev1.StorageMediumMemory,
455+
Medium: corev1.StorageMediumMemory,
456+
SizeLimit: n.Spec.Storage.EmptyDirSizeLimit,
453457
},
454458
},
455459
},

internal/controller/nemocustomizer_controller.go

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -628,6 +628,12 @@ func (r *NemoCustomizerReconciler) addTrainingConfig(ctx context.Context, cfg ma
628628
// Add PVC configuration
629629
r.addWorkspacePVCConfig(ctx, trainingCfg, n)
630630

631+
emptyDir := map[string]interface{}{
632+
"medium": "Memory",
633+
}
634+
if n.Spec.Training.EmptyDirSizeLimit != nil {
635+
emptyDir["sizeLimit"] = n.Spec.Training.EmptyDirSizeLimit.String()
636+
}
631637
trainingCfg["volumes"] = []map[string]interface{}{
632638
{
633639
"name": "models",
@@ -637,10 +643,8 @@ func (r *NemoCustomizerReconciler) addTrainingConfig(ctx context.Context, cfg ma
637643
},
638644
},
639645
{
640-
"name": "dshm",
641-
"emptyDir": map[string]interface{}{
642-
"medium": "Memory",
643-
},
646+
"name": "dshm",
647+
"emptyDir": emptyDir,
644648
},
645649
}
646650

manifests/deployment.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,9 @@ spec:
121121
{{- if .EmptyDir }}
122122
emptyDir:
123123
medium: {{ .EmptyDir.Medium }}
124+
{{- if .EmptyDir.SizeLimit }}
125+
sizeLimit: {{ .EmptyDir.SizeLimit }}
126+
{{- end }}
124127
{{- end }}
125128
{{- if .PersistentVolumeClaim }}
126129
persistentVolumeClaim:

0 commit comments

Comments
 (0)