|
26 | 26 | # Image settings |
27 | 27 | base_image: |
28 | 28 | project: ubuntu-os-accelerator-images |
29 | | - family: ubuntu-accelerator-2204-amd64-with-nvidia-570 |
| 29 | + image: ubuntu-accelerator-2204-amd64-with-nvidia-570-v20250425 |
30 | 30 | image_build_machine_type: n2-standard-16 |
31 | 31 | build_slurm_from_git_ref: 6.10.0 |
32 | 32 | # Cluster env settings |
@@ -137,6 +137,22 @@ deployment_groups: |
137 | 137 | ansible.builtin.apt: |
138 | 138 | deb: "{{ cuda_repo_filename }}" |
139 | 139 | state: present |
| 140 | + # The following 3 tasks work around a temporary issue with Ubuntu |
| 141 | + # packaging of NVIDIA 570 driver series for kernel 6.8.0-1028 |
| 142 | + - name: Unfreeze 570 driver metapackage |
| 143 | + ansible.builtin.command: |
| 144 | + argv: |
| 145 | + - apt-mark |
| 146 | + - unhold |
| 147 | + - linux-modules-nvidia-570-server-open-gcp |
| 148 | + - name: Remove 570 driver metapackage |
| 149 | + ansible.builtin.apt: |
| 150 | + name: linux-modules-nvidia-570-server-open-gcp |
| 151 | + state: absent |
| 152 | + - name: Install latest 570 driver for kernel |
| 153 | + ansible.builtin.apt: |
| 154 | + name: linux-modules-nvidia-570-server-open-6.8.0-1028-gcp |
| 155 | + state: latest |
140 | 156 | - name: Reduce NVIDIA repository priority |
141 | 157 | ansible.builtin.copy: |
142 | 158 | dest: /etc/apt/preferences.d/cuda-repository-pin-600 |
@@ -221,7 +237,7 @@ deployment_groups: |
221 | 237 | settings: |
222 | 238 | disk_size: $(vars.disk_size_gb) |
223 | 239 | machine_type: $(vars.image_build_machine_type) |
224 | | - source_image_family: $(vars.base_image.family) |
| 240 | + source_image: $(vars.base_image.image) |
225 | 241 | source_image_project_id: [$(vars.base_image.project)] |
226 | 242 | image_family: $(vars.instance_image.family) |
227 | 243 | omit_external_ip: false |
|
0 commit comments