Skip to content

Commit 5ae722b

Browse files
Adding multi llm e2e test
Signed-off-by: Vishesh Tanksale <[email protected]>
1 parent 951fd40 commit 5ae722b

File tree

6 files changed

+123
-1
lines changed

6 files changed

+123
-1
lines changed

.github/workflows/e2e.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ jobs:
4444
echo "GOLANG_VERSION=${GOLANG_VERSION##GOLANG_VERSION := }" >> $GITHUB_ENV
4545
4646
- name: Set up Holodeck
47-
uses: NVIDIA/[email protected].7
47+
uses: NVIDIA/[email protected].17
4848
with:
4949
aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }}
5050
aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
@@ -69,6 +69,7 @@ jobs:
6969
E2E_IMAGE_TAG: ${{ env.COMMIT_SHORT_SHA }}
7070
LOG_ARTIFACTS: ${{ github.workspace }}/e2e_logs
7171
NGC_API_KEY: ${{ secrets.NGC_API_KEY }}
72+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
7273
run: |
7374
./hack/e2e_tests.sh
7475
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
apiVersion: apps.nvidia.com/v1alpha1
2+
kind: NIMCache
3+
metadata:
4+
name: nim-cache-multi-llm
5+
spec:
6+
nodeSelector:
7+
nvidia.com/gpu.present: "true"
8+
source:
9+
hf:
10+
endpoint: "https://huggingface.co"
11+
namespace: "meta-llama"
12+
authSecret: hf-token-secret
13+
modelPuller: nvcr.io/nim/nvidia/llm-nim:1.12
14+
pullSecret: ngc-secret
15+
modelName: "Llama-3.2-1B-Instruct"
16+
storage:
17+
pvc:
18+
create: true
19+
storageClass: "local-path"
20+
size: "50Gi"
21+
volumeAccessMode: ReadWriteOnce
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
apiVersion: apps.nvidia.com/v1alpha1
2+
kind: NIMService
3+
metadata:
4+
name: meta-llama-3-2-1b-instruct
5+
spec:
6+
image:
7+
repository: nvcr.io/nim/nvidia/llm-nim
8+
tag: "1.12"
9+
pullPolicy: IfNotPresent
10+
pullSecrets:
11+
- ngc-secret
12+
authSecret: ngc-api-secret
13+
storage:
14+
nimCache:
15+
name: nim-cache-multi-llm
16+
profile: 'tensorrt_llm'
17+
resources:
18+
limits:
19+
nvidia.com/gpu: 1
20+
cpu: "12"
21+
memory: 32Gi
22+
requests:
23+
nvidia.com/gpu: 1
24+
cpu: "4"
25+
memory: 6Gi
26+
replicas: 1
27+
expose:
28+
service:
29+
type: ClusterIP
30+
port: 8000

test/e2e/e2e_test.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -476,6 +476,24 @@ func createPullSecrets() {
476476
_, err := clientSet.CoreV1().Secrets(testNamespace.Name).Create(ctx, ngcAPIsecret, metav1.CreateOptions{})
477477
Expect(err).NotTo(HaveOccurred())
478478

479+
// Get the HF_TOKEN
480+
HF_TOKEN := os.Getenv("HF_TOKEN")
481+
482+
// Create a secret for pulling the image
483+
hfTokenSecret := &corev1.Secret{
484+
ObjectMeta: metav1.ObjectMeta{
485+
Name: "hf-token-secret",
486+
Namespace: testNamespace.Name,
487+
},
488+
Type: corev1.SecretTypeOpaque,
489+
StringData: map[string]string{
490+
"HF_TOKEN": HF_TOKEN,
491+
},
492+
}
493+
494+
_, err = clientSet.CoreV1().Secrets(testNamespace.Name).Create(ctx, hfTokenSecret, metav1.CreateOptions{})
495+
Expect(err).NotTo(HaveOccurred())
496+
479497
// Create the dockerconfigjson type secret
480498
dockerServer := "nvcr.io"
481499
dockerUsername := `$oauthtoken`

test/e2e/infra/aws.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ spec:
1111
instance:
1212
type: g6e.2xlarge
1313
region: us-east-1
14+
rootVolumeSizeGB: 120
1415
ingressIpRanges:
1516
- 18.190.12.32/32
1617
- 3.143.46.93/32

test/e2e/nim-operator_test.go

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,57 @@ var _ = Describe("NIM Operator", Ordered, func() {
155155
})
156156
})
157157

158+
When("deploying Multi LLM NIMCache and NIMService", Ordered, func() {
159+
160+
AfterEach(func() {
161+
// Clean up
162+
if !CurrentSpecReport().Failed() {
163+
cleanupNIMCRs()
164+
}
165+
})
166+
167+
It("should go to READY state", func(ctx context.Context) {
168+
// Create a NIMCache object
169+
By("Creating a NIMCache object")
170+
cli, err := versioned.NewForConfig(clientConfig)
171+
Expect(err).NotTo(HaveOccurred())
172+
173+
nimCache := &v1alpha1.NIMCache{}
174+
data, err := os.ReadFile(filepath.Join(cwd, "data", "nimcache-multi-llm.yml"))
175+
Expect(err).NotTo(HaveOccurred())
176+
177+
err = yaml.Unmarshal(data, nimCache)
178+
Expect(err).NotTo(HaveOccurred())
179+
180+
_, err = cli.AppsV1alpha1().NIMCaches(testNamespace.Name).Create(ctx, nimCache, metav1.CreateOptions{})
181+
Expect(err).NotTo(HaveOccurred())
182+
183+
By("Checking the NIMCache object state is ready")
184+
Eventually(func() bool {
185+
nimCacheObject, _ := cli.AppsV1alpha1().NIMCaches(testNamespace.Name).Get(ctx, nimCache.Name, metav1.GetOptions{})
186+
return nimCacheObject.Status.State == v1alpha1.NimCacheStatusReady
187+
}, Timeout, 5*time.Second).Should(BeTrue())
188+
189+
// Create a NIMService object
190+
By("Creating a NIMService object")
191+
nimService := &v1alpha1.NIMService{}
192+
data, err = os.ReadFile(filepath.Join(cwd, "data", "nimservice-multi-llm.yml"))
193+
Expect(err).NotTo(HaveOccurred())
194+
195+
err = yaml.Unmarshal(data, nimService)
196+
Expect(err).NotTo(HaveOccurred())
197+
198+
_, err = cli.AppsV1alpha1().NIMServices(testNamespace.Name).Create(ctx, nimService, metav1.CreateOptions{})
199+
Expect(err).NotTo(HaveOccurred())
200+
201+
By("Checking the NIMService object state is ready")
202+
Eventually(func() bool {
203+
nimServiceObject, _ := cli.AppsV1alpha1().NIMServices(testNamespace.Name).Get(ctx, nimService.Name, metav1.GetOptions{})
204+
return nimServiceObject.Status.State == v1alpha1.NIMServiceStatusReady
205+
}, Timeout, 5*time.Second).Should(BeTrue())
206+
})
207+
})
208+
158209
When("deploying NEMO microservices", func() {
159210
BeforeEach(func() {
160211
if !EnableNemoMicroservices {

0 commit comments

Comments
 (0)