@@ -20,14 +20,17 @@ package e2e
2020
2121import (
2222 "context"
23+ "fmt"
2324 "os"
25+ "strings"
2426
2527 . "github.com/onsi/ginkgo"
2628 . "github.com/onsi/gomega"
2729 batchv1 "k8s.io/api/batch/v1"
2830 corev1 "k8s.io/api/core/v1"
2931 "k8s.io/apimachinery/pkg/api/resource"
3032 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
33+ "k8s.io/client-go/kubernetes"
3134 "sigs.k8s.io/cluster-api/test/framework"
3235)
3336
@@ -73,7 +76,9 @@ func AzureGPUSpec(ctx context.Context, inputGetter func() AzureGPUSpecInput) {
7376 }
7477 }
7578 return false
76- }, e2eConfig .GetIntervals (specName , "wait-worker-nodes" )... ).Should (BeTrue ())
79+ }, e2eConfig .GetIntervals (specName , "wait-worker-nodes" )... ).Should (BeTrue (), func () string {
80+ return getGPUOperatorPodLogs (ctx , clientset )
81+ })
7782
7883 By ("running a CUDA vector calculation job" )
7984 jobsClient := clientset .BatchV1 ().Jobs (corev1 .NamespaceDefault )
@@ -90,7 +95,7 @@ func AzureGPUSpec(ctx context.Context, inputGetter func() AzureGPUSpecInput) {
9095 Containers : []corev1.Container {
9196 {
9297 Name : jobName ,
93- Image : "nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda11.1-ubuntu18.04 " ,
98+ Image : "nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda11.2.1 " ,
9499 Resources : corev1.ResourceRequirements {
95100 Limits : corev1.ResourceList {
96101 "nvidia.com/gpu" : resource .MustParse ("1" ),
@@ -112,3 +117,18 @@ func AzureGPUSpec(ctx context.Context, inputGetter func() AzureGPUSpecInput) {
112117 }
113118 WaitForJobComplete (ctx , gpuJobInput , e2eConfig .GetIntervals (specName , "wait-job" )... )
114119}
120+
121+ // getGPUOperatorPodLogs returns the logs of the Nvidia GPU operator pods.
122+ func getGPUOperatorPodLogs (ctx context.Context , clientset * kubernetes.Clientset ) string {
123+ podsClient := clientset .CoreV1 ().Pods (corev1 .NamespaceAll )
124+ pods , err := podsClient .List (ctx , metav1.ListOptions {LabelSelector : "app.kubernetes.io/instance=gpu-operator" })
125+ if err != nil {
126+ return err .Error ()
127+ }
128+ b := strings.Builder {}
129+ for _ , pod := range pods .Items {
130+ b .WriteString (fmt .Sprintf ("\n Logs for pod %s:\n " , pod .Name ))
131+ b .WriteString (getPodLogs (ctx , clientset , pod ))
132+ }
133+ return b .String ()
134+ }
0 commit comments