Adding multi llm e2e test

visheshtanksale · visheshtanksale · commit 5ae722bf27e1 · 2025-10-09T20:51:24.000Z
Signed-off-by: Vishesh Tanksale &lt;vtanksale@nvidia.com&gt;
diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml
@@ -44,7 +44,7 @@ jobs:
           echo "GOLANG_VERSION=${GOLANG_VERSION##GOLANG_VERSION := }" >> $GITHUB_ENV
 
       - name: Set up Holodeck
-        uses: NVIDIA/holodeck@v0.2.7
+        uses: NVIDIA/holodeck@v0.2.17
         with:
           aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }}
           aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
@@ -69,6 +69,7 @@ jobs:
           E2E_IMAGE_TAG: ${{ env.COMMIT_SHORT_SHA }}
           LOG_ARTIFACTS: ${{ github.workspace }}/e2e_logs
           NGC_API_KEY: ${{ secrets.NGC_API_KEY }}
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
         run: |
           ./hack/e2e_tests.sh
 
diff --git a/test/e2e/data/nimcache-multi-llm.yml b/test/e2e/data/nimcache-multi-llm.yml
@@ -0,0 +1,21 @@
+apiVersion: apps.nvidia.com/v1alpha1
+kind: NIMCache
+metadata:
+  name: nim-cache-multi-llm
+spec:
+  nodeSelector:
+    nvidia.com/gpu.present: "true"
+  source:
+    hf:
+      endpoint: "https://huggingface.co"
+      namespace: "meta-llama"
+      authSecret: hf-token-secret
+      modelPuller: nvcr.io/nim/nvidia/llm-nim:1.12
+      pullSecret: ngc-secret
+      modelName: "Llama-3.2-1B-Instruct"
+  storage:
+    pvc:
+      create: true
+      storageClass: "local-path"
+      size: "50Gi"
+      volumeAccessMode: ReadWriteOnce
diff --git a/test/e2e/data/nimservice-multi-llm.yml b/test/e2e/data/nimservice-multi-llm.yml
@@ -0,0 +1,30 @@
+apiVersion: apps.nvidia.com/v1alpha1
+kind: NIMService
+metadata:
+  name: meta-llama-3-2-1b-instruct
+spec:
+  image:
+    repository: nvcr.io/nim/nvidia/llm-nim
+    tag: "1.12"
+    pullPolicy: IfNotPresent
+    pullSecrets:
+      - ngc-secret
+  authSecret: ngc-api-secret
+  storage:
+    nimCache:
+      name: nim-cache-multi-llm
+      profile: 'tensorrt_llm'
+  resources:
+    limits:
+      nvidia.com/gpu: 1
+      cpu: "12"
+      memory: 32Gi
+    requests:
+      nvidia.com/gpu: 1
+      cpu: "4"
+      memory: 6Gi
+  replicas: 1
+  expose:
+    service:
+      type: ClusterIP
+      port: 8000
diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go
@@ -476,6 +476,24 @@ func createPullSecrets() {
 	_, err := clientSet.CoreV1().Secrets(testNamespace.Name).Create(ctx, ngcAPIsecret, metav1.CreateOptions{})
 	Expect(err).NotTo(HaveOccurred())
 
+	// Get the HF_TOKEN
+	HF_TOKEN := os.Getenv("HF_TOKEN")
+
+	// Create a secret for pulling the image
+	hfTokenSecret := &corev1.Secret{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      "hf-token-secret",
+			Namespace: testNamespace.Name,
+		},
+		Type: corev1.SecretTypeOpaque,
+		StringData: map[string]string{
+			"HF_TOKEN": HF_TOKEN,
+		},
+	}
+
+	_, err = clientSet.CoreV1().Secrets(testNamespace.Name).Create(ctx, hfTokenSecret, metav1.CreateOptions{})
+	Expect(err).NotTo(HaveOccurred())
+
 	// Create the dockerconfigjson type secret
 	dockerServer := "nvcr.io"
 	dockerUsername := `$oauthtoken`
diff --git a/test/e2e/infra/aws.yml b/test/e2e/infra/aws.yml
@@ -11,6 +11,7 @@ spec:
   instance:
     type: g6e.2xlarge
     region: us-east-1
+    rootVolumeSizeGB: 120
     ingressIpRanges:
     - 18.190.12.32/32
     - 3.143.46.93/32
diff --git a/test/e2e/nim-operator_test.go b/test/e2e/nim-operator_test.go
@@ -155,6 +155,57 @@ var _ = Describe("NIM Operator", Ordered, func() {
 		})
 	})
 
+	When("deploying Multi LLM NIMCache and NIMService", Ordered, func() {
+
+		AfterEach(func() {
+			// Clean up
+			if !CurrentSpecReport().Failed() {
+				cleanupNIMCRs()
+			}
+		})
+
+		It("should go to READY state", func(ctx context.Context) {
+			// Create a NIMCache object
+			By("Creating a NIMCache object")
+			cli, err := versioned.NewForConfig(clientConfig)
+			Expect(err).NotTo(HaveOccurred())
+
+			nimCache := &v1alpha1.NIMCache{}
+			data, err := os.ReadFile(filepath.Join(cwd, "data", "nimcache-multi-llm.yml"))
+			Expect(err).NotTo(HaveOccurred())
+
+			err = yaml.Unmarshal(data, nimCache)
+			Expect(err).NotTo(HaveOccurred())
+
+			_, err = cli.AppsV1alpha1().NIMCaches(testNamespace.Name).Create(ctx, nimCache, metav1.CreateOptions{})
+			Expect(err).NotTo(HaveOccurred())
+
+			By("Checking the NIMCache object state is ready")
+			Eventually(func() bool {
+				nimCacheObject, _ := cli.AppsV1alpha1().NIMCaches(testNamespace.Name).Get(ctx, nimCache.Name, metav1.GetOptions{})
+				return nimCacheObject.Status.State == v1alpha1.NimCacheStatusReady
+			}, Timeout, 5*time.Second).Should(BeTrue())
+
+			// Create a NIMService object
+			By("Creating a NIMService object")
+			nimService := &v1alpha1.NIMService{}
+			data, err = os.ReadFile(filepath.Join(cwd, "data", "nimservice-multi-llm.yml"))
+			Expect(err).NotTo(HaveOccurred())
+
+			err = yaml.Unmarshal(data, nimService)
+			Expect(err).NotTo(HaveOccurred())
+
+			_, err = cli.AppsV1alpha1().NIMServices(testNamespace.Name).Create(ctx, nimService, metav1.CreateOptions{})
+			Expect(err).NotTo(HaveOccurred())
+
+			By("Checking the NIMService object state is ready")
+			Eventually(func() bool {
+				nimServiceObject, _ := cli.AppsV1alpha1().NIMServices(testNamespace.Name).Get(ctx, nimService.Name, metav1.GetOptions{})
+				return nimServiceObject.Status.State == v1alpha1.NIMServiceStatusReady
+			}, Timeout, 5*time.Second).Should(BeTrue())
+		})
+	})
+
 	When("deploying NEMO microservices", func() {
 		BeforeEach(func() {
 			if !EnableNemoMicroservices {