Skip to content

Commit 4173cb5

Browse files
Removing extras
Signed-off-by: Vishesh Tanksale <vtanksale@nvidia.com>
1 parent 58cfd6a commit 4173cb5

File tree

5 files changed

+6
-46
lines changed

5 files changed

+6
-46
lines changed

.github/workflows/e2e.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@ jobs:
3232
e2e-tests:
3333
runs-on: linux-amd64-cpu4
3434
if: ${{ github.event.workflow_run.conclusion == 'success' }} && ${{ github.event.workflow_run.event == 'push' }}
35-
# if: false # TODO: Disabled until e2e test infra is fixed
3635
steps:
3736
- name: Check out code
3837
uses: actions/checkout@v4

api/apps/v1alpha1/nimcache_types.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -303,9 +303,9 @@ func (n *NIMCache) GetTolerations() []corev1.Toleration {
303303

304304
// GetNodeSelectors returns nodeselectors configured for the NIMCache Job.
305305
func (n *NIMCache) GetNodeSelectors() map[string]string {
306-
/*if n.Spec.NodeSelector == nil {
306+
if n.Spec.NodeSelector == nil {
307307
return map[string]string{"feature.node.kubernetes.io/pci-10de.present": "true"}
308-
}*/
308+
}
309309
return n.Spec.NodeSelector
310310
}
311311

internal/controller/nimcache_controller.go

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -608,33 +608,27 @@ func (r *NIMCacheReconciler) reconcileModelManifest(ctx context.Context, nimCach
608608
logger.Error(err, "failed to create", "pod", pod.Name)
609609
return false, err
610610
}
611-
logger.Info("Created pod for model manifest extraction", "pod", pod.Name)
612611

613612
existingPod := &corev1.Pod{}
614613
err = r.Get(ctx, client.ObjectKey{Name: pod.Name, Namespace: nimCache.Namespace}, existingPod)
615614
if err != nil {
616615
logger.Error(err, "failed to get pod for model selection", "pod", pod.Name)
617-
return true, err
616+
return false, err
618617
}
619-
logger.Info("HERE", "pod", pod.Name)
620618
if existingPod.Status.Phase != corev1.PodRunning {
621619
// requeue request with delay until the pod is ready
622-
logger.Info("HERE", "pod", existingPod.Status)
623620
return true, nil
624621
}
625-
logger.Info("HERE1", "pod", pod.Name)
626622
// Extract manifest file
627623
output, err := k8sutil.GetPodLogs(ctx, existingPod, NIMCacheContainerName)
628624
if err != nil {
629625
logger.Error(err, "failed to get pod logs for parsing model manifest file", "pod", pod.Name)
630626
return false, err
631627
}
632-
logger.Info("HERE2", "pod", pod.Name)
633628
if output == "" {
634629
logger.Info("Requeuing to wait for the manifest to be copied from the container")
635630
return true, nil
636631
}
637-
logger.Info("HERE3", "pod", pod.Name)
638632
parser := nimparserutils.GetNIMParser([]byte(output))
639633
// Parse the file
640634
manifest, err := parser.ParseModelManifestFromRawOutput([]byte(output))
@@ -643,14 +637,12 @@ func (r *NIMCacheReconciler) reconcileModelManifest(ctx context.Context, nimCach
643637
return false, err
644638
}
645639
logger.Info("manifest file", "nimcache", nimCache.Name, "manifest", manifest)
646-
logger.Info("HERE4", "pod", pod.Name)
647640
// Create a ConfigMap with the model manifest file for re-use
648641
err = r.createManifestConfigMap(ctx, nimCache, &manifest)
649642
if err != nil {
650643
logger.Error(err, "Failed to create model manifest config map")
651644
return false, err
652645
}
653-
logger.Info("HERE5", "pod", pod.Name)
654646
// Model manifest is successfully extracted, cleanup temporary pod
655647
err = r.Delete(ctx, existingPod)
656648
if err != nil && !errors.IsNotFound(err) {
@@ -664,7 +656,6 @@ func (r *NIMCacheReconciler) reconcileModelManifest(ctx context.Context, nimCach
664656

665657
func (r *NIMCacheReconciler) reconcileModelSelection(ctx context.Context, nimCache *appsv1alpha1.NIMCache) error {
666658
logger := r.GetLogger()
667-
logger.Info("Reconciling model selection", "nimcache", nimCache.Name)
668659

669660
// reconcile model selection pod
670661
if isModelSelectionRequired(nimCache) && !isModelSelectionDone(nimCache) {
@@ -710,7 +701,6 @@ func (r *NIMCacheReconciler) reconcileModelSelection(ctx context.Context, nimCac
710701

711702
func (r *NIMCacheReconciler) reconcileJob(ctx context.Context, nimCache *appsv1alpha1.NIMCache) error {
712703
logger := r.GetLogger()
713-
logger.Info("Reconciling job", "nimcache", nimCache.Name)
714704
// reconcile model caching job
715705
job := &batchv1.Job{}
716706
jobName := types.NamespacedName{Name: getJobName(nimCache), Namespace: nimCache.GetNamespace()}

test/e2e/data/nimservice.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@ metadata:
44
name: meta-llama3-8b-instruct
55
spec:
66
image:
7-
repository: nvcr.io/nim/meta/llama3-8b-instruct
8-
tag: 1.0.0
7+
repository: nvcr.io/nim/meta/llama-3.2-1b-instruct
8+
tag: 1.8
99
pullPolicy: IfNotPresent
1010
pullSecrets:
1111
- ngc-secret

test/e2e/nim-operator_test.go

Lines changed: 1 addition & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@ import (
3131
helmValues "github.com/mittwald/go-helm-client/values"
3232
"helm.sh/helm/v3/pkg/repo"
3333
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
34-
clientset "k8s.io/client-go/kubernetes"
3534
kubernetes "k8s.io/client-go/kubernetes"
3635
"sigs.k8s.io/yaml"
3736

@@ -65,21 +64,6 @@ var _ = Describe("NIM Operator", Ordered, func() {
6564

6665
err = diagnosticsCollector.Collect(ctx)
6766
Expect(err).NotTo(HaveOccurred())
68-
69-
cli, err := versioned.NewForConfig(clientConfig)
70-
Expect(err).NotTo(HaveOccurred())
71-
72-
nimCacheObject, _ := cli.AppsV1alpha1().NIMCaches(testNamespace.Name).Get(ctx, "meta-llama3-8b-instruct", metav1.GetOptions{})
73-
fmt.Printf("NIMCache object: %#v\n", nimCacheObject)
74-
75-
clientSet, err = clientset.NewForConfig(clientConfig)
76-
Expect(err).NotTo(HaveOccurred())
77-
pods, err := clientSet.CoreV1().Pods(testNamespace.Name).List(ctx, metav1.ListOptions{})
78-
Expect(err).NotTo(HaveOccurred())
79-
for _, pod := range pods.Items {
80-
fmt.Println("Pod name:", pod.Name)
81-
}
82-
8367
}
8468
})
8569

@@ -137,9 +121,6 @@ var _ = Describe("NIM Operator", Ordered, func() {
137121
cli, err := versioned.NewForConfig(clientConfig)
138122
Expect(err).NotTo(HaveOccurred())
139123

140-
clientSet, err = clientset.NewForConfig(clientConfig)
141-
Expect(err).NotTo(HaveOccurred())
142-
143124
nimCache := &v1alpha1.NIMCache{}
144125
data, err := os.ReadFile(filepath.Join(cwd, "data", "nimcache.yml"))
145126
Expect(err).NotTo(HaveOccurred())
@@ -153,16 +134,6 @@ var _ = Describe("NIM Operator", Ordered, func() {
153134
By("Checking the NIMCache object state is ready")
154135
Eventually(func() bool {
155136
nimCacheObject, _ := cli.AppsV1alpha1().NIMCaches(testNamespace.Name).Get(ctx, nimCache.Name, metav1.GetOptions{})
156-
fmt.Println("NIMCache object conditions:", nimCacheObject.Status.Conditions)
157-
fmt.Println("NIMCache object state:", nimCacheObject.Status.State)
158-
//pod, err := clientSet.CoreV1().Pods(testNamespace.Name).Get(ctx, fmt.Sprintf("%s-pod", nimCache.GetName()), metav1.GetOptions{})
159-
160-
err := DescribePod(clientSet.(*kubernetes.Clientset), testNamespace.Name, fmt.Sprintf("%s-pod", nimCache.GetName()))
161-
if err != nil {
162-
fmt.Println("Error:", err)
163-
}
164-
165-
fmt.Println("***************111")
166137
return nimCacheObject.Status.State == v1alpha1.NimCacheStatusReady
167138
}, Timeout, 5*time.Second).Should(BeTrue())
168139

@@ -407,7 +378,7 @@ func installEntitystoreDependencies() {
407378
CreateNamespace: false,
408379
Wait: true,
409380
WaitForJobs: true,
410-
Timeout: 2 * time.Minute,
381+
Timeout: 10 * time.Minute,
411382
CleanupOnFail: true,
412383
ValuesOptions: values}
413384
_, err = helmClient.InstallOrUpgradeChart(ctx, chartSpec, nil)

0 commit comments

Comments
 (0)