Skip to content

Commit 366e7c8

Browse files
committed
This change implements support for setting dedicated annotations on the DCGM Exporter. Additionally, it adds new test cases to ensure the new functionality is properly validated.
Modified files: api/nvidia/v1/clusterpolicy_types.go api/nvidia/v1/zz_generated.deepcopy.go bundle/manifests/nvidia.com_clusterpolicies.yaml config/crd/bases/nvidia.com_clusterpolicies.yaml controllers/object_controls.go controllers/transforms_test.go deployments/gpu-operator/crds/nvidia.com_clusterpolicies.yaml deployments/gpu-operator/templates/clusterpolicy.yaml Signed-off-by: Leonardo Lima Silva <[email protected]> Signed-off-by: leolimas <[email protected]>
1 parent 821f1de commit 366e7c8

File tree

7 files changed

+158
-1
lines changed

7 files changed

+158
-1
lines changed

api/nvidia/v1/clusterpolicy_types.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -921,6 +921,11 @@ type DCGMExporterSpec struct {
921921
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors=true
922922
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="Service configuration for NVIDIA DCGM Exporter"
923923
ServiceSpec *DCGMExporterServiceConfig `json:"service,omitempty"`
924+
925+
// Optional: Annotations is an unstructured key value map stored with a resource that may be
926+
// set by external tools to store and retrieve arbitrary metadata. They are not
927+
// queryable and should be preserved when modifying objects.
928+
Annotations map[string]string `json:"annotations,omitempty"`
924929
}
925930

926931
// DCGMExporterMetricsConfig defines metrics to be collected by NVIDIA DCGM Exporter

api/nvidia/v1/zz_generated.deepcopy.go

Lines changed: 7 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

config/crd/bases/nvidia.com_clusterpolicies.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -510,6 +510,14 @@ spec:
510510
version:
511511
description: NVIDIA DCGM Exporter image tag
512512
type: string
513+
annotations:
514+
additionalProperties:
515+
type: string
516+
description: |-
517+
Optional: Annotations is an unstructured key value map stored with a resource that may be
518+
set by external tools to store and retrieve arbitrary metadata. They are not
519+
queryable and should be preserved when modifying objects.
520+
type: object
513521
type: object
514522
devicePlugin:
515523
description: DevicePlugin component spec

controllers/object_controls.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1621,6 +1621,16 @@ func TransformDCGMExporter(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpe
16211621
obj.Spec.Template.Spec.Containers[0].Args = config.DCGMExporter.Args
16221622
}
16231623

1624+
// set annotations if specified for exporter
1625+
if len(config.DCGMExporter.Annotations) > 0 {
1626+
if obj.Spec.Template.Annotations == nil {
1627+
obj.Spec.Template.Annotations = make(map[string]string)
1628+
}
1629+
for annoKey, annoVal := range config.DCGMExporter.Annotations {
1630+
obj.Spec.Template.Annotations[annoKey] = annoVal
1631+
}
1632+
}
1633+
16241634
// check if DCGM hostengine is enabled as a separate Pod and setup env accordingly
16251635
if config.DCGM.IsEnabled() {
16261636
setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), DCGMRemoteEngineEnvName, fmt.Sprintf("nvidia-dcgm:%d", DCGMDefaultPort))

controllers/transforms_test.go

Lines changed: 117 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -803,7 +803,7 @@ func TestTransformDCGMExporter(t *testing.T) {
803803
expectedDs Daemonset // Expected output DaemonSet
804804
}{
805805
{
806-
description: "transform dcgm exporter",
806+
description: "transform dcgm exporter without annotations",
807807
ds: NewDaemonset().
808808
WithContainer(corev1.Container{Name: "dcgm-exporter"}).
809809
WithContainer(corev1.Container{Name: "dummy"}),
@@ -833,6 +833,122 @@ func TestTransformDCGMExporter(t *testing.T) {
833833
},
834834
}).WithContainer(corev1.Container{Name: "dummy"}).WithPullSecret("pull-secret").WithRuntimeClassName("nvidia"),
835835
},
836+
{
837+
description: "transform dcgm exporter with annotations",
838+
ds: NewDaemonset().
839+
WithContainer(corev1.Container{Name: "dcgm-exporter"}).
840+
WithContainer(corev1.Container{Name: "dummy"}),
841+
cpSpec: &gpuv1.ClusterPolicySpec{
842+
DCGMExporter: gpuv1.DCGMExporterSpec{
843+
Repository: "nvcr.io/nvidia/cloud-native",
844+
Image: "dcgm-exporter",
845+
Version: "v1.0.0",
846+
ImagePullPolicy: "IfNotPresent",
847+
ImagePullSecrets: []string{"pull-secret"},
848+
Args: []string{"--fail-on-init-error=false"},
849+
Annotations: map[string]string{"dcgm-exporter": "test"},
850+
Env: []gpuv1.EnvVar{
851+
{Name: "foo", Value: "bar"},
852+
},
853+
},
854+
DCGM: gpuv1.DCGMSpec{
855+
Enabled: newBoolPtr(true),
856+
},
857+
},
858+
expectedDs: NewDaemonset().WithContainer(corev1.Container{
859+
Name: "dcgm-exporter",
860+
Image: "nvcr.io/nvidia/cloud-native/dcgm-exporter:v1.0.0",
861+
ImagePullPolicy: corev1.PullIfNotPresent,
862+
Args: []string{"--fail-on-init-error=false"},
863+
Env: []corev1.EnvVar{
864+
{Name: "DCGM_REMOTE_HOSTENGINE_INFO", Value: "nvidia-dcgm:5555"},
865+
},
866+
}).WithContainer(corev1.Container{Name: "dummy"}).WithPullSecret("pull-secret").WithRuntimeClassName("nvidia").WithPodAnnotations(map[string]string{"dcgm-exporter": "test"}),
867+
},
868+
{
869+
description: "transform dcgm exporter with annotations and common annotations",
870+
ds: NewDaemonset().
871+
WithContainer(corev1.Container{Name: "dcgm-exporter"}).
872+
WithContainer(corev1.Container{Name: "dummy"}),
873+
cpSpec: &gpuv1.ClusterPolicySpec{
874+
Daemonsets: gpuv1.DaemonsetsSpec{Annotations: map[string]string{
875+
"key": "value",
876+
"app": "value",
877+
"app.kubernetes.io/part-of": "value",
878+
}},
879+
DCGMExporter: gpuv1.DCGMExporterSpec{
880+
Repository: "nvcr.io/nvidia/cloud-native",
881+
Image: "dcgm-exporter",
882+
Version: "v1.0.0",
883+
ImagePullPolicy: "IfNotPresent",
884+
ImagePullSecrets: []string{"pull-secret"},
885+
Args: []string{"--fail-on-init-error=false"},
886+
Annotations: map[string]string{"dcgm-exporter": "test"},
887+
Env: []gpuv1.EnvVar{
888+
{Name: "foo", Value: "bar"},
889+
},
890+
},
891+
DCGM: gpuv1.DCGMSpec{
892+
Enabled: newBoolPtr(true),
893+
},
894+
},
895+
expectedDs: NewDaemonset().WithContainer(corev1.Container{
896+
Name: "dcgm-exporter",
897+
Image: "nvcr.io/nvidia/cloud-native/dcgm-exporter:v1.0.0",
898+
ImagePullPolicy: corev1.PullIfNotPresent,
899+
Args: []string{"--fail-on-init-error=false"},
900+
Env: []corev1.EnvVar{
901+
{Name: "DCGM_REMOTE_HOSTENGINE_INFO", Value: "nvidia-dcgm:5555"},
902+
},
903+
}).WithContainer(corev1.Container{Name: "dummy"}).WithPullSecret("pull-secret").WithRuntimeClassName("nvidia").
904+
WithPodAnnotations(map[string]string{
905+
"dcgm-exporter": "test",
906+
"key": "value",
907+
"app": "value",
908+
"app.kubernetes.io/part-of": "value",
909+
}),
910+
},
911+
{
912+
description: "transform dcgm exporter only with common annotations",
913+
ds: NewDaemonset().
914+
WithContainer(corev1.Container{Name: "dcgm-exporter"}).
915+
WithContainer(corev1.Container{Name: "dummy"}),
916+
cpSpec: &gpuv1.ClusterPolicySpec{
917+
Daemonsets: gpuv1.DaemonsetsSpec{Annotations: map[string]string{
918+
"key": "value",
919+
"app": "value",
920+
"app.kubernetes.io/part-of": "value",
921+
}},
922+
DCGMExporter: gpuv1.DCGMExporterSpec{
923+
Repository: "nvcr.io/nvidia/cloud-native",
924+
Image: "dcgm-exporter",
925+
Version: "v1.0.0",
926+
ImagePullPolicy: "IfNotPresent",
927+
ImagePullSecrets: []string{"pull-secret"},
928+
Args: []string{"--fail-on-init-error=false"},
929+
Env: []gpuv1.EnvVar{
930+
{Name: "foo", Value: "bar"},
931+
},
932+
},
933+
DCGM: gpuv1.DCGMSpec{
934+
Enabled: newBoolPtr(true),
935+
},
936+
},
937+
expectedDs: NewDaemonset().WithContainer(corev1.Container{
938+
Name: "dcgm-exporter",
939+
Image: "nvcr.io/nvidia/cloud-native/dcgm-exporter:v1.0.0",
940+
ImagePullPolicy: corev1.PullIfNotPresent,
941+
Args: []string{"--fail-on-init-error=false"},
942+
Env: []corev1.EnvVar{
943+
{Name: "DCGM_REMOTE_HOSTENGINE_INFO", Value: "nvidia-dcgm:5555"},
944+
},
945+
}).WithContainer(corev1.Container{Name: "dummy"}).WithPullSecret("pull-secret").WithRuntimeClassName("nvidia").
946+
WithPodAnnotations(map[string]string{
947+
"key": "value",
948+
"app": "value",
949+
"app.kubernetes.io/part-of": "value",
950+
}),
951+
},
836952
}
837953

838954
for _, tc := range testCases {

deployments/gpu-operator/crds/nvidia.com_clusterpolicies.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -510,6 +510,14 @@ spec:
510510
version:
511511
description: NVIDIA DCGM Exporter image tag
512512
type: string
513+
annotations:
514+
additionalProperties:
515+
type: string
516+
description: |-
517+
Optional: Annotations is an unstructured key value map stored with a resource that may be
518+
set by external tools to store and retrieve arbitrary metadata. They are not
519+
queryable and should be preserved when modifying objects.
520+
type: object
513521
type: object
514522
devicePlugin:
515523
description: DevicePlugin component spec

deployments/gpu-operator/templates/clusterpolicy.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -521,6 +521,9 @@ spec:
521521
{{- if .Values.dcgmExporter.service }}
522522
service: {{ toYaml .Values.dcgmExporter.service | nindent 6 }}
523523
{{- end }}
524+
{{- if .Values.dcgmExporter.annotations }}
525+
annotations: {{ toYaml .Values.dcgmExporter.annotations | nindent 6 }}
526+
{{- end }}
524527
gfd:
525528
enabled: {{ .Values.gfd.enabled }}
526529
{{- if .Values.gfd.repository }}

0 commit comments

Comments
 (0)