Skip to content

Commit 983e767

Browse files
Updating e2e (#673)
Signed-off-by: Vishesh Tanksale <vtanksale@nvidia.com>
1 parent 05898fc commit 983e767

File tree

6 files changed

+53
-53
lines changed

6 files changed

+53
-53
lines changed

.github/workflows/e2e.yml

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,7 @@ on:
2727
jobs:
2828
e2e-tests:
2929
runs-on: linux-amd64-cpu4
30-
#if: ${{ github.event.workflow_run.conclusion == 'success' }} && ${{ github.event.workflow_run.event == 'push' }}
31-
if: false # TODO: Disabled until e2e test infra is fixed
30+
if: ${{ github.event.workflow_run.conclusion == 'success' }} && ${{ github.event.workflow_run.event == 'push' }}
3231
steps:
3332
- name: Check out code
3433
uses: actions/checkout@v4
@@ -41,7 +40,7 @@ jobs:
4140
echo "GOLANG_VERSION=${GOLANG_VERSION##GOLANG_VERSION := }" >> $GITHUB_ENV
4241
4342
- name: Set up Holodeck
44-
uses: NVIDIA/holodeck@v0.2.10
43+
uses: NVIDIA/holodeck@v0.2.7
4544
with:
4645
aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }}
4746
aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
@@ -53,7 +52,7 @@ jobs:
5352
with:
5453
go-version: ${{ env.GOLANG_VERSION }}
5554

56-
- name: Intall dependencies
55+
- name: Install Dependencies
5756
run: |
5857
sudo apt-get update
5958
sudo apt-get install -y make

test/e2e/data/nimcache.yml

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,22 +5,18 @@ metadata:
55
app.kubernetes.io/name: k8s-nim-operator
66
name: meta-llama3-8b-instruct
77
spec:
8+
nodeSelector:
9+
nvidia.com/gpu.present: "true"
810
source:
911
ngc:
10-
modelPuller: nvcr.io/nim/meta/llama3-8b-instruct:1.0.0
12+
modelPuller: nvcr.io/nim/meta/llama-3.2-1b-instruct:1.8
1113
pullSecret: ngc-secret
1214
authSecret: ngc-api-secret
1315
model:
1416
profiles: []
1517
lora: false
16-
precision: "fp16"
17-
engine: "tensorrt_llm"
18-
qosProfile: "throughput"
19-
gpus:
20-
- product: "A100"
21-
ids:
22-
- "20b2"
2318
tensorParallelism: "1"
19+
engine: "vllm"
2420
resources:
2521
cpu: 500m
2622
memory: 20Gi

test/e2e/data/nimservice.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@ metadata:
44
name: meta-llama3-8b-instruct
55
spec:
66
image:
7-
repository: nvcr.io/nim/meta/llama3-8b-instruct
8-
tag: 1.0.0
7+
repository: nvcr.io/nim/meta/llama-3.2-1b-instruct
8+
tag: 1.8
99
pullPolicy: IfNotPresent
1010
pullSecrets:
1111
- ngc-secret

test/e2e/e2e_test.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ var (
7878
ImageTag string
7979
ImagePullPolicy string
8080
CollectLogsFrom string
81+
AdmissionControllerEnabled bool
8182
Timeout time.Duration
8283

8384
// k8s clients.
@@ -106,6 +107,7 @@ var (
106107
"namespaces",
107108
"deployments",
108109
"daemonsets",
110+
"jobs",
109111
}
110112

111113
// NEMO microservice variables.
@@ -333,6 +335,8 @@ func getTestEnv() {
333335

334336
CollectLogsFrom = os.Getenv("COLLECT_LOGS_FROM")
335337

338+
AdmissionControllerEnabled = getBoolEnvVar("ADMISSION_CONTROLLER_ENABLED", false)
339+
336340
if EnableNemoMicroservices {
337341
// Entitystore env variables.
338342
NemoEntityStoreRepo = os.Getenv("NEMO_ENTITYSTORE_REPO")

test/e2e/infra/aws.yml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ spec:
99
keyName: cnt-ci-east-1
1010
privateKey: HOLODECK_PRIVATE_KEY
1111
instance:
12-
type: g6e.xlarge
12+
type: g6e.2xlarge
1313
region: us-east-1
1414
ingressIpRanges:
1515
- 18.190.12.32/32
@@ -29,4 +29,6 @@ spec:
2929
kubernetes:
3030
install: true
3131
installer: kubeadm
32-
version: v1.32.1
32+
version: v1.32.3
33+
crictlVersion: v1.32.0
34+
calicoVersion: v3.29.3

test/e2e/nim-operator_test.go

Lines changed: 36 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ const (
4747
)
4848

4949
// Actual test suite.
50-
var _ = Describe("NIM Operator", func() {
50+
var _ = Describe("NIM Operator", Ordered, func() {
5151

5252
AfterEach(func(ctx context.Context) {
5353
// Run diagnostic collector if test failed
@@ -66,46 +66,45 @@ var _ = Describe("NIM Operator", func() {
6666
}
6767
})
6868

69-
When("deploying the K8s-NIM-Operator via Helm", Ordered, func() {
70-
It("should be successful", func(ctx context.Context) {
71-
// Add or Update Helm repo
72-
helmRepo := repo.Entry{
73-
Name: "nvidia",
74-
URL: nvidiaHelm,
75-
}
76-
err := helmClient.AddOrUpdateChartRepo(helmRepo)
77-
Expect(err).NotTo(HaveOccurred())
69+
BeforeAll(func() {
70+
// Add or Update Helm repo
71+
helmRepo := repo.Entry{
72+
Name: "nvidia",
73+
URL: nvidiaHelm,
74+
}
75+
err := helmClient.AddOrUpdateChartRepo(helmRepo)
76+
Expect(err).NotTo(HaveOccurred())
7877

79-
err = helmClient.UpdateChartRepos()
80-
Expect(err).NotTo(HaveOccurred())
78+
err = helmClient.UpdateChartRepos()
79+
Expect(err).NotTo(HaveOccurred())
8180

82-
pullSecrets := []string{"ngc-secret"}
83-
// Values
84-
values := helmValues.Options{
85-
Values: []string{
86-
fmt.Sprintf("operator.image.repository=%s", ImageRepo),
87-
fmt.Sprintf("operator.image.tag=%s", ImageTag),
88-
fmt.Sprintf("operator.image.pullPolicy=%s", ImagePullPolicy),
89-
fmt.Sprintf("operator.image.pullSecrets={%s}", strings.Join(pullSecrets, ",")),
90-
},
91-
}
81+
pullSecrets := []string{"ngc-secret"}
82+
// Values
83+
values := helmValues.Options{
84+
Values: []string{
85+
fmt.Sprintf("operator.image.repository=%s", ImageRepo),
86+
fmt.Sprintf("operator.image.tag=%s", ImageTag),
87+
fmt.Sprintf("operator.image.pullPolicy=%s", ImagePullPolicy),
88+
fmt.Sprintf("operator.image.pullSecrets={%s}", strings.Join(pullSecrets, ",")),
89+
fmt.Sprintf("operator.admissionController.enabled=%t", AdmissionControllerEnabled),
90+
},
91+
}
9292

93-
// Chart spec
94-
chartSpec := &helm.ChartSpec{
95-
ReleaseName: helmReleaseName,
96-
ChartName: helmChart,
97-
Namespace: testNamespace.Name,
98-
CreateNamespace: true,
99-
Wait: true,
100-
Timeout: 10 * time.Minute, // pull time is long
101-
ValuesOptions: values,
102-
CleanupOnFail: true,
103-
}
93+
// Chart spec
94+
chartSpec := &helm.ChartSpec{
95+
ReleaseName: helmReleaseName,
96+
ChartName: helmChart,
97+
Namespace: testNamespace.Name,
98+
CreateNamespace: true,
99+
Wait: true,
100+
Timeout: 10 * time.Minute, // pull time is long
101+
ValuesOptions: values,
102+
CleanupOnFail: true,
103+
}
104104

105-
By("Installing k8s-nim-operator Helm chart")
106-
_, err = helmClient.InstallOrUpgradeChart(ctx, chartSpec, nil)
107-
Expect(err).NotTo(HaveOccurred())
108-
})
105+
By("Installing k8s-nim-operator Helm chart")
106+
_, err = helmClient.InstallOrUpgradeChart(ctx, chartSpec, nil)
107+
Expect(err).NotTo(HaveOccurred())
109108
})
110109

111110
When("deploying NIMCache and NIMService", Ordered, func() {

0 commit comments

Comments
 (0)