Skip to content

Commit 6a63ef7

Browse files
authored
Merge pull request #397 from NVIDIA/holodeck-kernelupgrade
holodeck kernel version upgrade
2 parents 407b170 + 404c815 commit 6a63ef7

File tree

5 files changed

+8
-95
lines changed

5 files changed

+8
-95
lines changed

.github/workflows/precompiled.yaml

Lines changed: 4 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -319,9 +319,12 @@ jobs:
319319
driver_branch_json="${{ needs.set-driver-version-matrix.outputs.driver_branch }}"
320320
DRIVER_BRANCHES=($(echo "$driver_branch_json" | jq -r '.[]'))
321321
echo "DRIVER_BRANCHES=${DRIVER_BRANCHES[*]}" >> $GITHUB_ENV
322+
- name: Set kernel version in holodeck_${{ env.DIST }}.yaml
323+
run: |
324+
yq eval '.spec += {"kernel": {"version": strenv(KERNEL_VERSION)}}' -i tests/holodeck_${{ env.DIST }}.yaml
322325
323326
- name: Set up Holodeck
324-
uses: NVIDIA/[email protected].6
327+
uses: NVIDIA/[email protected].15
325328
env:
326329
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
327330
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
@@ -346,27 +349,6 @@ jobs:
346349
sudo apt-get update
347350
sudo apt-get install -y gh
348351
349-
- name: Upgrade the kernel for Precompiled e2e test
350-
env:
351-
UPGRADE_KERNEL_SCRIPT: "./tests/scripts/upgrade-kernel.sh"
352-
run: |
353-
status=0
354-
./tests/ci-remote-exec.sh "${UPGRADE_KERNEL_SCRIPT}" "${KERNEL_VERSION}" || status=$?
355-
# On the target system, all scripts/test-case exit with code 1 for error handling.
356-
# However, since reboot-related disconnections break the SSH connection
357-
# and can cause the entire job to exit, we should ignore all errors except
358-
# exit code 1. During a reboot, exit code 1 will not be thrown, so handling
359-
# other errors as code 1 will ensure proper management of reboot scenarios
360-
if [ $status -eq 1 ]; then
361-
echo "Kernel version $KERNEL_VERSION upgrade failed"
362-
exit 1
363-
fi
364-
./tests/scripts/remote_retry.sh || status=$?
365-
if [ $status -ne 0 ]; then
366-
echo "Failed to connect to remote instance"
367-
exit $status
368-
fi
369-
370352
- name: Precompiled e2e test gpu driver validation
371353
env:
372354
TEST_CASE: "./tests/cases/nvidia-driver.sh"

tests/holodeck_ubuntu22.04.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,5 +30,5 @@ spec:
3030
kubernetes:
3131
install: true
3232
installer: kubeadm
33-
version: v1.28.5
34-
crictlVersion: v1.28.0
33+
version: v1.33.0
34+
crictlVersion: v1.33.0

tests/holodeck_ubuntu24.04.yaml

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,8 @@ spec:
2727
containerRuntime:
2828
install: true
2929
name: containerd
30-
version: 1.7.22
3130
kubernetes:
3231
install: true
3332
installer: kubeadm
34-
version: v1.30.0
35-
crictlVersion: v1.30.0
33+
version: v1.33.0
34+
crictlVersion: v1.33.0

tests/scripts/kernel-upgrade-helper.sh

Lines changed: 0 additions & 54 deletions
This file was deleted.

tests/scripts/upgrade-kernel.sh

Lines changed: 0 additions & 14 deletions
This file was deleted.

0 commit comments

Comments
 (0)