@@ -31,6 +31,8 @@ import (
3131 helmValues "github.com/mittwald/go-helm-client/values"
3232 "helm.sh/helm/v3/pkg/repo"
3333 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
34+ clientset "k8s.io/client-go/kubernetes"
35+ kubernetes "k8s.io/client-go/kubernetes"
3436 "sigs.k8s.io/yaml"
3537
3638 "github.com/NVIDIA/k8s-test-infra/pkg/diagnostics"
@@ -63,6 +65,21 @@ var _ = Describe("NIM Operator", func() {
6365
6466 err = diagnosticsCollector .Collect (ctx )
6567 Expect (err ).NotTo (HaveOccurred ())
68+
69+ cli , err := versioned .NewForConfig (clientConfig )
70+ Expect (err ).NotTo (HaveOccurred ())
71+
72+ nimCacheObject , _ := cli .AppsV1alpha1 ().NIMCaches (testNamespace .Name ).Get (ctx , "meta-llama3-8b-instruct" , metav1.GetOptions {})
73+ fmt .Printf ("NIMCache object: %#v\n " , nimCacheObject )
74+
75+ clientSet , err = clientset .NewForConfig (clientConfig )
76+ Expect (err ).NotTo (HaveOccurred ())
77+ pods , err := clientSet .CoreV1 ().Pods (testNamespace .Name ).List (ctx , metav1.ListOptions {})
78+ Expect (err ).NotTo (HaveOccurred ())
79+ for _ , pod := range pods .Items {
80+ fmt .Println ("Pod name:" , pod .Name )
81+ }
82+
6683 }
6784 })
6885
@@ -84,9 +101,11 @@ var _ = Describe("NIM Operator", func() {
84101 values := helmValues.Options {
85102 Values : []string {
86103 fmt .Sprintf ("operator.image.repository=%s" , ImageRepo ),
87- fmt .Sprintf ("operator.image.tag=%s" , ImageTag ),
104+ "operator.image.tag=e2e-test" ,
105+ //fmt.Sprintf("operator.image.tag=%s", ImageTag),
88106 fmt .Sprintf ("operator.image.pullPolicy=%s" , ImagePullPolicy ),
89107 fmt .Sprintf ("operator.image.pullSecrets={%s}" , strings .Join (pullSecrets , "," )),
108+ "operator.admissionController.enabled=false" ,
90109 },
91110 }
92111
@@ -120,6 +139,9 @@ var _ = Describe("NIM Operator", func() {
120139 cli , err := versioned .NewForConfig (clientConfig )
121140 Expect (err ).NotTo (HaveOccurred ())
122141
142+ clientSet , err = clientset .NewForConfig (clientConfig )
143+ Expect (err ).NotTo (HaveOccurred ())
144+
123145 nimCache := & v1alpha1.NIMCache {}
124146 data , err := os .ReadFile (filepath .Join (cwd , "data" , "nimcache.yml" ))
125147 Expect (err ).NotTo (HaveOccurred ())
@@ -133,6 +155,16 @@ var _ = Describe("NIM Operator", func() {
133155 By ("Checking the NIMCache object state is ready" )
134156 Eventually (func () bool {
135157 nimCacheObject , _ := cli .AppsV1alpha1 ().NIMCaches (testNamespace .Name ).Get (ctx , nimCache .Name , metav1.GetOptions {})
158+ fmt .Println ("NIMCache object conditions:" , nimCacheObject .Status .Conditions )
159+ fmt .Println ("NIMCache object state:" , nimCacheObject .Status .State )
160+ //pod, err := clientSet.CoreV1().Pods(testNamespace.Name).Get(ctx, fmt.Sprintf("%s-pod", nimCache.GetName()), metav1.GetOptions{})
161+
162+ err := DescribePod (clientSet .(* kubernetes.Clientset ), testNamespace .Name , fmt .Sprintf ("%s-pod" , nimCache .GetName ()))
163+ if err != nil {
164+ fmt .Println ("Error:" , err )
165+ }
166+
167+ fmt .Println ("***************111" )
136168 return nimCacheObject .Status .State == v1alpha1 .NimCacheStatusReady
137169 }, Timeout , 5 * time .Second ).Should (BeTrue ())
138170
@@ -377,9 +409,70 @@ func installEntitystoreDependencies() {
377409 CreateNamespace : false ,
378410 Wait : true ,
379411 WaitForJobs : true ,
380- Timeout : 10 * time .Minute ,
412+ Timeout : 2 * time .Minute ,
381413 CleanupOnFail : true ,
382414 ValuesOptions : values }
383415 _ , err = helmClient .InstallOrUpgradeChart (ctx , chartSpec , nil )
384416 Expect (err ).NotTo (HaveOccurred ())
385417}
418+
419+ func DescribePod (clientset * kubernetes.Clientset , namespace , name string ) error {
420+ ctx := context .Background ()
421+
422+ // --- Get Pod
423+ pod , err := clientset .CoreV1 ().Pods (namespace ).Get (ctx , name , metav1.GetOptions {})
424+ if err != nil {
425+ return fmt .Errorf ("failed to get pod: %w" , err )
426+ }
427+
428+ fmt .Printf ("Name: %s\n " , pod .Name )
429+ fmt .Printf ("Namespace: %s\n " , pod .Namespace )
430+ fmt .Printf ("Node: %s\n " , pod .Spec .NodeName )
431+ fmt .Printf ("Start Time: %s\n " , pod .Status .StartTime )
432+ fmt .Printf ("Phase: %s\n " , pod .Status .Phase )
433+
434+ fmt .Println ("\n Conditions:" )
435+ for _ , c := range pod .Status .Conditions {
436+ fmt .Printf (" - Type=%s Status=%s Reason=%s\n " , c .Type , c .Status , c .Reason )
437+ }
438+
439+ fmt .Println ("\n Containers:" )
440+ for _ , cs := range pod .Status .ContainerStatuses {
441+ state := cs .State
442+ fmt .Printf (" * %s:\n " , cs .Name )
443+ if state .Running != nil {
444+ fmt .Printf (" Running since %s\n " , state .Running .StartedAt )
445+ }
446+ if state .Waiting != nil {
447+ fmt .Printf (" Waiting: %s (%s)\n " , state .Waiting .Reason , state .Waiting .Message )
448+ }
449+ if state .Terminated != nil {
450+ fmt .Printf (" Terminated: %s at %s (exit %d)\n " ,
451+ state .Terminated .Reason , state .Terminated .FinishedAt , state .Terminated .ExitCode )
452+ }
453+ fmt .Printf (" Ready=%v Restarts=%d\n " , cs .Ready , cs .RestartCount )
454+ }
455+
456+ // --- Get related Events
457+ events , err := clientset .CoreV1 ().Events (namespace ).List (ctx , metav1.ListOptions {
458+ FieldSelector : fmt .Sprintf ("involvedObject.kind=Pod,involvedObject.name=%s" , pod .Name ),
459+ })
460+ if err != nil {
461+ return fmt .Errorf ("failed to get events: %w" , err )
462+ }
463+
464+ fmt .Println ("\n Events:" )
465+ if len (events .Items ) == 0 {
466+ fmt .Println (" <none>" )
467+ }
468+ for _ , e := range events .Items {
469+ t := e .LastTimestamp .Time
470+ if t .IsZero () {
471+ t = e .EventTime .Time
472+ }
473+ fmt .Printf (" %s %s %s: %s\n " ,
474+ t .Format (time .RFC3339 ), e .Type , e .Reason , e .Message )
475+ }
476+
477+ return nil
478+ }
0 commit comments