Skip to content

Commit c0b8bed

Browse files
authored
Merge pull request #4059 from tpdownes/fix_a3u_packaging
A3 Ultra Slurm: workaround temporary driver packaging issue
2 parents b1cfe40 + ed06e2f commit c0b8bed

File tree

1 file changed

+18
-2
lines changed

1 file changed

+18
-2
lines changed

examples/machine-learning/a3-ultragpu-8g/a3ultra-slurm-blueprint.yaml

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ vars:
2626
# Image settings
2727
base_image:
2828
project: ubuntu-os-accelerator-images
29-
family: ubuntu-accelerator-2204-amd64-with-nvidia-570
29+
image: ubuntu-accelerator-2204-amd64-with-nvidia-570-v20250425
3030
image_build_machine_type: n2-standard-16
3131
build_slurm_from_git_ref: 6.10.0
3232
# Cluster env settings
@@ -137,6 +137,22 @@ deployment_groups:
137137
ansible.builtin.apt:
138138
deb: "{{ cuda_repo_filename }}"
139139
state: present
140+
# The following 3 tasks work around a temporary issue with Ubuntu
141+
# packaging of NVIDIA 570 driver series for kernel 6.8.0-1028
142+
- name: Unfreeze 570 driver metapackage
143+
ansible.builtin.command:
144+
argv:
145+
- apt-mark
146+
- unhold
147+
- linux-modules-nvidia-570-server-open-gcp
148+
- name: Remove 570 driver metapackage
149+
ansible.builtin.apt:
150+
name: linux-modules-nvidia-570-server-open-gcp
151+
state: absent
152+
- name: Install latest 570 driver for kernel
153+
ansible.builtin.apt:
154+
name: linux-modules-nvidia-570-server-open-6.8.0-1028-gcp
155+
state: latest
140156
- name: Reduce NVIDIA repository priority
141157
ansible.builtin.copy:
142158
dest: /etc/apt/preferences.d/cuda-repository-pin-600
@@ -221,7 +237,7 @@ deployment_groups:
221237
settings:
222238
disk_size: $(vars.disk_size_gb)
223239
machine_type: $(vars.image_build_machine_type)
224-
source_image_family: $(vars.base_image.family)
240+
source_image: $(vars.base_image.image)
225241
source_image_project_id: [$(vars.base_image.project)]
226242
image_family: $(vars.instance_image.family)
227243
omit_external_ip: false

0 commit comments

Comments
 (0)