diff --git a/Makefile b/Makefile
index 6006000c89..76c0984a62 100644
--- a/Makefile
+++ b/Makefile
@@ -291,7 +291,7 @@ simple-tests: unit-tests # Compatibility target.
 .PHONY: simple-tests
 
 # Images needed for GPU smoke tests.
-gpu-smoke-images: load-gpu_cuda-tests load-gpu_cuda-tests-12-8
+gpu-smoke-images: load-gpu_cuda-tests.12.2.2 load-gpu_cuda-tests-12.8.1
 .PHONY: gpu-smoke-images
 
 gpu-smoke-tests: gpu-smoke-images $(RUNTIME_BIN)
@@ -338,7 +338,7 @@ cos-gpu-all-tests: gpu-images cos-gpu-smoke-tests $(RUNTIME_BIN)
 	@$(call sudo,test/gpu:sniffer_test,--runtime=$(RUNTIME) -test.v --cos-gpu $(ARGS))
 .PHONY: cos-gpu-all-tests
 
-cuda-tests: load-basic_alpine load-gpu_cuda-tests $(RUNTIME_BIN)
+cuda-tests: load-basic_alpine load-gpu_cuda-tests.12.2.2 $(RUNTIME_BIN)
 	@$(call install_runtime,$(RUNTIME),--nvproxy=true --nvproxy-docker=true --nvproxy-allowed-driver-capabilities=all)
 	@$(call sudo,test/gpu:cuda_test,--runtime=$(RUNTIME) -test.v $(ARGS))
 .PHONY: cuda-tests
diff --git a/images/README.md b/images/README.md
index 83e681d47f..f383679a36 100644
--- a/images/README.md
+++ b/images/README.md
@@ -67,3 +67,65 @@ achieved by specifying `ARCH` variable to make. For example:
 ```
 $ make ARCH=aarch64 rebuild-default
 ```
+
+## Templated images
+
+If an image directory ends in `.tmpl`, it will be ignored from the set of images
+that the `Makefile` recognizes. Instead, this directory can be used to
+instantiate other images.
+
+For example, given the following filesystem structure:
+
+```
+images/
+├─ my-little-image.tmpl/
+│  └─ Dockerfile
+├─ my-little-image.foo.bar → my-little-image.tmpl (symlink)
+├─ my-little-image.baz.qux → my-little-image.tmpl (symlink)
+└─ this README.md file
+```
+
+Then this will effectively create two images, `my-little-image.foo.bar` and
+`my-little-image.baz.qux`. It will not create a `my-little-image.tmpl` image.
+
+The behavior of the template instance images is determined by the
+`TEMPLATE_VERSION` build argument passed to `my-little-image.tmpl/Dockerfile`.
+This argument takes on the value of everything after the first `.` character of
+the last component of the template instance image name. For example, the image
+`my-little-image.foo.bar` will be built with `docker build
+--build-arg=TEMPLATE_VERSION=foo.bar`, whereas the `my-little-image.baz.quux`
+will be built with `docker build --build-arg=TEMPLATE_VERSION=baz.qux`. The
+`my-little-image.tmpl/Dockerfile` image definition file can use this variable to
+make the necessary tweaks to distinguish these two images.
+
+Note that build arguments do not carry over `FROM` lines in `Dockerfile` unless
+specifically passed. For example, this will not work:
+
+```dockerfile
+# You should put this line at the top of the file to clearly indicate
+# to users that don't use the `Makefile` build system that they are going
+# to be building an image that doesn't make sense:
+ARG TEMPLATE_VERSION=POPULATED_BY_BUILD_SYSTEM
+
+FROM base-image:${TEMPLATE_VERSION}-alpine
+
+# WRONG: TEMPLATE_VERSION will not be defined here!
+# This will try cloning the empty string branch.
+RUN git clone https://some-url --branch="${TEMPLATE_VERSION}"
+```
+
+This will work:
+
+```dockerfile
+ARG TEMPLATE_VERSION=POPULATED_BY_BUILD_SYSTEM
+
+FROM base-image:${TEMPLATE_VERSION}-alpine
+
+# CORRECT: This declares that TEMPLATE_VERSION should be inherited from the
+# previous build stage; the lack of value assignment means that its value
+# should be carried over as-is.
+ARG TEMPLATE_VERSION
+
+# TEMPLATE_VERSION will be defined here.
+RUN git clone https://some-url --branch="${TEMPLATE_VERSION}"
+```
diff --git a/images/gpu/cuda-tests-12-8/Dockerfile b/images/gpu/cuda-tests-12-8/Dockerfile
deleted file mode 100644
index c76d554d82..0000000000
--- a/images/gpu/cuda-tests-12-8/Dockerfile
+++ /dev/null
@@ -1,49 +0,0 @@
-FROM nvidia/cuda:12.8.1-devel-ubuntu22.04
-
-WORKDIR /
-ENV PATH=$PATH:/usr/local/nvidia/bin
-RUN export DEBIAN_FRONTEND=noninteractive; \
-    apt-get update && \
-    apt-get install -y \
-      build-essential \
-      cmake \
-      freeglut3 freeglut3-dev \
-      git \
-      golang \
-      imagemagick \
-      libegl-dev \
-      libfreeimage3 libfreeimage-dev \
-      libfreeimageplus3 libfreeimageplus-dev \
-      libgles2-mesa-dev \
-      libglfw3 libglfw3-dev \
-      libglu1-mesa libglu1-mesa-dev \
-      libxi-dev \
-      libxmu-dev \
-      llvm \
-      mpich \
-      pkg-config \
-      vim \
-      x11-xserver-utils \
-      xdotool \
-      xvfb \
-      zlib1g zlib1g-dev
-
-RUN git clone  \
-      https://github.com/NVIDIA/cuda-samples.git /cuda-samples && cd /cuda-samples && \
-      git checkout 7b60178984e96bc09d066077d5455df71fee2a9f && cd /
-
-RUN apt install -y wget && apt -y purge golang*
-
-RUN wget https://go.dev/dl/go1.24.1.linux-amd64.tar.gz && tar -C /usr/local -xzf go1.24.1.linux-amd64.tar.gz && \
-   ln -s /usr/local/go/bin/go /usr/local/bin/go
-
-ADD *.cu *.h *.sh *.py *.cc /
-
-RUN chmod 555 /*.sh && gcc -o /unsupported_ioctl /unsupported_ioctl.cc
-
-RUN mkdir /cuda-samples/build && cd /cuda-samples/build && \
-      cmake ..
-
-# Override entrypoint to nothing, otherwise all invocations will have
-# a copyright notice printed, which breaks parsing the stdout logs.
-ENTRYPOINT []
\ No newline at end of file
diff --git a/images/gpu/cuda-tests-12-8/cuda_malloc.cu b/images/gpu/cuda-tests-12-8/cuda_malloc.cu
deleted file mode 100644
index d837f097bf..0000000000
--- a/images/gpu/cuda-tests-12-8/cuda_malloc.cu
+++ /dev/null
@@ -1,205 +0,0 @@
-// Copyright 2023 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include <cuda_runtime.h>
-#include <err.h>
-#include <errno.h>
-#include <stdlib.h>
-#include <unistd.h>
-
-#include <cstdint>
-#include <iostream>
-#include <random>
-
-#include "cuda_test_util.h"  // NOLINT(build/include)
-
-__global__ void addKernel(std::uint32_t* data) {
-  size_t index = blockIdx.x * blockDim.x + threadIdx.x;
-  data[index] += static_cast<std::uint32_t>(index);
-}
-
-void TestMallocManagedRoundTrip(int device, unsigned int malloc_flags,
-                                bool prefetch) {
-  constexpr size_t kNumBlocks = 32;
-  constexpr size_t kNumThreads = 64;
-  constexpr size_t kNumElems = kNumBlocks * kNumThreads;
-
-  std::uint32_t* data = nullptr;
-  constexpr size_t kNumBytes = kNumElems * sizeof(*data);
-  CHECK_CUDA(cudaMallocManaged(&data, kNumBytes, malloc_flags));
-
-  // Initialize all elements in the array with a random value on the host.
-  std::default_random_engine rd;
-  const std::uint32_t init_val =
-      std::uniform_int_distribution<std::uint32_t>()(rd);
-  for (size_t i = 0; i < kNumElems; i++) {
-    data[i] = init_val;
-  }
-
-  if (prefetch) {
-    CHECK_CUDA(cudaMemPrefetchAsync(data, kNumBytes, device));
-  }
-
-  // Mutate the array on the device.
-  addKernel<<<kNumBlocks, kNumThreads>>>(data);
-  CHECK_CUDA(cudaGetLastError());
-  CHECK_CUDA(cudaDeviceSynchronize());
-
-  if (prefetch) {
-    CHECK_CUDA(cudaMemPrefetchAsync(data, kNumBytes, cudaCpuDeviceId));
-  }
-
-  // Check that the array has the expected result.
-  for (size_t i = 0; i < kNumElems; i++) {
-    std::uint32_t want = init_val + static_cast<std::uint32_t>(i);
-    if (data[i] != want) {
-      std::cout << "data[" << i << "]: got " << data[i] << ", wanted " << want
-                << " = " << init_val << " + " << i << std::endl;
-      abort();
-    }
-  }
-
-  CHECK_CUDA(cudaFree(data));
-}
-
-void TestMallocManagedReadWrite(int device) {
-  constexpr size_t kNumBlocks = 32;
-  constexpr size_t kNumThreads = 64;
-  constexpr size_t kNumElems = kNumBlocks * kNumThreads;
-
-  std::uint32_t* data = nullptr;
-  constexpr size_t kNumBytes = kNumElems * sizeof(*data);
-  CHECK_CUDA(cudaMallocManaged(&data, kNumBytes, cudaMemAttachGlobal));
-
-  // Initialize all elements in the array with a random value on the host.
-  std::default_random_engine rd;
-  const std::uint32_t init_val =
-      std::uniform_int_distribution<std::uint32_t>()(rd);
-  for (size_t i = 0; i < kNumElems; i++) {
-    data[i] = init_val;
-  }
-
-  // Write the array's contents to a temporary file.
-  char filename[] = "/tmp/cudaMallocManagedTest.XXXXXX";
-  int fd = mkstemp(filename);
-  if (fd < 0) {
-    err(1, "mkstemp");
-  }
-  size_t done = 0;
-  while (done < kNumBytes) {
-    ssize_t n = write(fd, reinterpret_cast<char*>(data) + done,
-                      kNumBytes - done);
-    if (n >= 0) {
-      done += n;
-    } else if (n < 0 && errno != EINTR) {
-      err(1, "write");
-    }
-  }
-
-  // Mutate the array on the device.
-  addKernel<<<kNumBlocks, kNumThreads>>>(data);
-  CHECK_CUDA(cudaGetLastError());
-  CHECK_CUDA(cudaDeviceSynchronize());
-
-  // Check that the array has the expected result.
-  for (size_t i = 0; i < kNumElems; i++) {
-    std::uint32_t want = init_val + static_cast<std::uint32_t>(i);
-    if (data[i] != want) {
-      std::cout << "data[" << i << "]: got " << data[i] << ", wanted " << want
-                << " = " << init_val << " + " << i << std::endl;
-      abort();
-    }
-  }
-
-  // Read the array's original contents back from the temporary file.
-  if (lseek(fd, 0, SEEK_SET) < 0) {
-    err(1, "lseek");
-  }
-  done = 0;
-  while (done < kNumBytes) {
-    ssize_t n = read(fd, reinterpret_cast<char*>(data) + done,
-                     kNumBytes - done);
-    if (n > 0) {
-      done += n;
-    } else if (n == 0) {
-      errx(1, "read: unexpected EOF after %zu bytes", done);
-    } else if (n < 0 && errno != EINTR) {
-      err(1, "read");
-    }
-  }
-
-  // Check that the array matches what we originally wrote.
-  for (size_t i = 0; i < kNumElems; i++) {
-    std::uint32_t want = init_val;
-    if (data[i] != want) {
-      std::cout << "data[" << i << "]: got " << data[i] << ", wanted " << want
-                << " = " << init_val << " + " << i << std::endl;
-      abort();
-    }
-  }
-
-  // Mutate the array on the device again.
-  addKernel<<<kNumBlocks, kNumThreads>>>(data);
-  CHECK_CUDA(cudaGetLastError());
-  CHECK_CUDA(cudaDeviceSynchronize());
-
-  // Check that the array has the expected result again.
-  for (size_t i = 0; i < kNumElems; i++) {
-    std::uint32_t want = init_val + static_cast<std::uint32_t>(i);
-    if (data[i] != want) {
-      std::cout << "data[" << i << "]: got " << data[i] << ", wanted " << want
-                << " = " << init_val << " + " << i << std::endl;
-      abort();
-    }
-  }
-
-  close(fd);
-  CHECK_CUDA(cudaFree(data));
-}
-
-int main() {
-  int device;
-  CHECK_CUDA(cudaGetDevice(&device));
-
-  std::cout << "Testing cudaMallocManaged(flags=cudaMemAttachGlobal)"
-            << std::endl;
-  TestMallocManagedRoundTrip(device, cudaMemAttachGlobal, false);
-
-  int cma = 0;
-  CHECK_CUDA(
-      cudaDeviceGetAttribute(&cma, cudaDevAttrConcurrentManagedAccess, device));
-  if (!cma) {
-    std::cout << "cudaDevAttrConcurrentManagedAccess not available"
-              << std::endl;
-  } else {
-    std::cout << "Testing cudaMallocManaged(flags=cudaMemAttachGlobal) "
-                 "with prefetching"
-              << std::endl;
-    TestMallocManagedRoundTrip(device, cudaMemAttachGlobal, true);
-    std::cout << "Testing cudaMallocManaged(flags=cudaMemAttachHost)"
-              << std::endl;
-    TestMallocManagedRoundTrip(device, cudaMemAttachHost, false);
-    std::cout << "Testing cudaMallocManaged(flags=cudaMemAttachHost) "
-                 "with prefetching"
-              << std::endl;
-    TestMallocManagedRoundTrip(device, cudaMemAttachHost, true);
-  }
-
-  std::cout << "Testing read/write syscalls on cudaMallocManaged memory"
-            << std::endl;
-  TestMallocManagedReadWrite(device);
-
-  std::cout << "All tests passed" << std::endl;
-  return 0;
-}
diff --git a/images/gpu/cuda-tests-12-8/run_cuda_test.py b/images/gpu/cuda-tests-12-8/run_cuda_test.py
deleted file mode 100644
index b2d6670c7b..0000000000
--- a/images/gpu/cuda-tests-12-8/run_cuda_test.py
+++ /dev/null
@@ -1,56 +0,0 @@
-# Copyright 2025 The gVisor Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import os
-import shlex
-import subprocess
-import sys
-
-ROOT_CMAKE_DIR = '/cuda-samples/build'
-SAMPLES_DIR = ROOT_CMAKE_DIR + '/Samples'
-
-parser = argparse.ArgumentParser()
-
-parser.add_argument(
-    'test',
-    help=(
-        'Test to run. This should be some thing like'
-        ' "0_Introduction/UnifiedMemoryStreams"'
-    ),
-    type=str,
-)
-
-
-def run_test(test_dir: str = ''):
-  make_path = os.path.join(SAMPLES_DIR, test_dir)
-  cmd = shlex.split(f'make -C {make_path}')
-  subprocess.run(
-      args=cmd,
-      check=True,
-      stdout=sys.stdout,
-      stderr=sys.stderr,
-  )
-  test_path = [os.path.join(make_path, os.path.basename(test_dir))]
-  subprocess.run(
-      args=test_path,
-      check=True,
-      stdout=sys.stdout,
-      stderr=sys.stderr,
-  )
-
-
-if __name__ == '__main__':
-  args = parser.parse_args(sys.argv[1:])
-  run_test(test_dir=args.test)
diff --git a/images/gpu/cuda-tests-12-8/run_smoke.sh b/images/gpu/cuda-tests-12-8/run_smoke.sh
deleted file mode 100755
index d261c6e0a2..0000000000
--- a/images/gpu/cuda-tests-12-8/run_smoke.sh
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/bin/bash
-
-# Copyright 2021 The gVisor Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-set -euxo pipefail
-
-cd /
-nvcc cuda_malloc.cu -o cuda_malloc -Wno-deprecated-gpu-targets
-./cuda_malloc
-echo 'All tests passed'
diff --git a/images/gpu/cuda-tests.12.2.2 b/images/gpu/cuda-tests.12.2.2
new file mode 120000
index 0000000000..25b3047a4a
--- /dev/null
+++ b/images/gpu/cuda-tests.12.2.2
@@ -0,0 +1 @@
+cuda-tests.tmpl
\ No newline at end of file
diff --git a/images/gpu/cuda-tests.12.8.1 b/images/gpu/cuda-tests.12.8.1
new file mode 120000
index 0000000000..25b3047a4a
--- /dev/null
+++ b/images/gpu/cuda-tests.12.8.1
@@ -0,0 +1 @@
+cuda-tests.tmpl
\ No newline at end of file
diff --git a/images/gpu/cuda-tests/Dockerfile b/images/gpu/cuda-tests.tmpl/Dockerfile
similarity index 76%
rename from images/gpu/cuda-tests/Dockerfile
rename to images/gpu/cuda-tests.tmpl/Dockerfile
index 7196ba1a96..9ec5a532de 100644
--- a/images/gpu/cuda-tests/Dockerfile
+++ b/images/gpu/cuda-tests.tmpl/Dockerfile
@@ -1,8 +1,10 @@
-FROM nvidia/cuda:12.2.2-devel-ubuntu22.04
+ARG TEMPLATE_VERSION=PLEASE_USE_MAKEFILE_TO_BUILD_THIS_IMAGE
 
-# From: https://github.com/NVIDIA/cuda-samples/releases
-# Ideally, pick a release that matches the CUDA version of the image above.
-ARG CUDA_SAMPLES_VERSION=v12.2
+FROM nvidia/cuda:${TEMPLATE_VERSION}-devel-ubuntu22.04
+
+# Necessary to keep TEMPLATE_VERSION available for future
+# lines in this Dockerfile past the FROM.
+ARG TEMPLATE_VERSION
 
 WORKDIR /
 COPY *.cu *.h *.sh *.go *.cc /
@@ -33,7 +35,9 @@ RUN export DEBIAN_FRONTEND=noninteractive; \
       zlib1g zlib1g-dev \
     && \
     chmod 555 /*.sh && \
-    git clone --depth=1 --branch="$CUDA_SAMPLES_VERSION" --single-branch \
+    git clone --depth=1 \
+      --branch="v$(echo "${TEMPLATE_VERSION}" | cut -d. -f1-2)" \
+      --single-branch \
       https://github.com/NVIDIA/cuda-samples.git /cuda-samples && \
     go install \
       github.com/TheZoraiz/ascii-image-converter@d05a757c5e02ab23e97b6f6fca4e1fbeb10ab559 && \
diff --git a/images/gpu/cuda-tests/cuda_malloc.cu b/images/gpu/cuda-tests.tmpl/cuda_malloc.cu
similarity index 100%
rename from images/gpu/cuda-tests/cuda_malloc.cu
rename to images/gpu/cuda-tests.tmpl/cuda_malloc.cu
diff --git a/images/gpu/cuda-tests-12-8/cuda_test_util.h b/images/gpu/cuda-tests.tmpl/cuda_test_util.h
similarity index 100%
rename from images/gpu/cuda-tests-12-8/cuda_test_util.h
rename to images/gpu/cuda-tests.tmpl/cuda_test_util.h
diff --git a/images/gpu/cuda-tests/list_features.cu b/images/gpu/cuda-tests.tmpl/list_features.cu
similarity index 100%
rename from images/gpu/cuda-tests/list_features.cu
rename to images/gpu/cuda-tests.tmpl/list_features.cu
diff --git a/images/gpu/cuda-tests/list_features.sh b/images/gpu/cuda-tests.tmpl/list_features.sh
old mode 100644
new mode 100755
similarity index 100%
rename from images/gpu/cuda-tests/list_features.sh
rename to images/gpu/cuda-tests.tmpl/list_features.sh
diff --git a/images/gpu/cuda-tests/list_sample_tests.sh b/images/gpu/cuda-tests.tmpl/list_sample_tests.sh
old mode 100644
new mode 100755
similarity index 100%
rename from images/gpu/cuda-tests/list_sample_tests.sh
rename to images/gpu/cuda-tests.tmpl/list_sample_tests.sh
diff --git a/images/gpu/cuda-tests/run_sample.go b/images/gpu/cuda-tests.tmpl/run_sample.go
similarity index 100%
rename from images/gpu/cuda-tests/run_sample.go
rename to images/gpu/cuda-tests.tmpl/run_sample.go
diff --git a/images/gpu/cuda-tests/run_smoke.sh b/images/gpu/cuda-tests.tmpl/run_smoke.sh
similarity index 100%
rename from images/gpu/cuda-tests/run_smoke.sh
rename to images/gpu/cuda-tests.tmpl/run_smoke.sh
diff --git a/images/gpu/cuda-tests-12-8/unsupported_ioctl.cc b/images/gpu/cuda-tests.tmpl/unsupported_ioctl.cc
similarity index 100%
rename from images/gpu/cuda-tests-12-8/unsupported_ioctl.cc
rename to images/gpu/cuda-tests.tmpl/unsupported_ioctl.cc
diff --git a/images/gpu/cuda-tests/cuda_test_util.h b/images/gpu/cuda-tests/cuda_test_util.h
deleted file mode 100644
index 2877f78b17..0000000000
--- a/images/gpu/cuda-tests/cuda_test_util.h
+++ /dev/null
@@ -1,42 +0,0 @@
-// Copyright 2023 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef THIRD_PARTY_GVISOR_IMAGES_GPU_CUDA_TESTS_CUDA_TEST_UTIL_H_
-#define THIRD_PARTY_GVISOR_IMAGES_GPU_CUDA_TESTS_CUDA_TEST_UTIL_H_
-
-#include <iostream>
-
-// cudaError_t is returned by CUDA runtime functions.
-#define CHECK_CUDA(expr)                                                     \
-  do {                                                                       \
-    cudaError_t code = (expr);                                               \
-    if (code != cudaSuccess) {                                               \
-      std::cout << "Check failed at " << __FILE__ << ":" << __LINE__ << ": " \
-                << #expr << ": " << cudaGetErrorString(code) << std::endl;   \
-      abort();                                                               \
-    }                                                                        \
-  } while (0)
-
-// CUresult is returned by CUDA driver functions.
-#define CHECK_CUDA_RESULT(expr)                                              \
-  do {                                                                       \
-    CUresult code = (expr);                                                  \
-    if (code != CUDA_SUCCESS) {                                              \
-      std::cout << "Check failed at " << __FILE__ << ":" << __LINE__ << ": " \
-                << #expr << ": " << code << std::endl;                       \
-      abort();                                                               \
-    }                                                                        \
-  } while (0)
-
-#endif  // THIRD_PARTY_GVISOR_IMAGES_GPU_CUDA_TESTS_CUDA_TEST_UTIL_H_
diff --git a/images/gpu/cuda-tests/unsupported_ioctl.cc b/images/gpu/cuda-tests/unsupported_ioctl.cc
deleted file mode 100644
index e2f81b2679..0000000000
--- a/images/gpu/cuda-tests/unsupported_ioctl.cc
+++ /dev/null
@@ -1,28 +0,0 @@
-// Copyright 2024 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include <fcntl.h>
-#include <sys/ioctl.h>
-
-// This test makes a non-existing ioctl call to the nvidia driver.
-//  It's used to test that ioctl_sniffer is catching unsupported ioctls.
-int main() {
-  int fd = open("/dev/nvidiactl", O_RDWR);
-  if (fd < 0) {
-    return 1;
-  }
-
-  ioctl(fd, 0, nullptr);
-  return 0;
-}
diff --git a/pkg/test/dockerutil/gpu.go b/pkg/test/dockerutil/gpu.go
index b23eec4e98..714d510075 100644
--- a/pkg/test/dockerutil/gpu.go
+++ b/pkg/test/dockerutil/gpu.go
@@ -305,7 +305,7 @@ func MaxSuportedCUDAVersion(ctx context.Context, t *testing.T) (*CudaVersion, er
 	if err != nil {
 		return nil, fmt.Errorf("could not create opts: %w", err)
 	}
-	opts.Image = "gpu/cuda-tests"
+	opts.Image = "gpu/cuda-tests.12.2.2"
 
 	out, err := c.Run(ctx, opts, "nvidia-smi")
 	if err != nil {
diff --git a/test/gpu/cuda_test.go b/test/gpu/cuda_test.go
index 83f4ec1c7b..e62fd79ddd 100644
--- a/test/gpu/cuda_test.go
+++ b/test/gpu/cuda_test.go
@@ -137,7 +137,7 @@ func TestCUDA(t *testing.T) {
 		FlakyTests:             flakyTests,
 		ExclusiveTests:         exclusiveTests,
 		AlwaysSkippedTests:     alwaysSkippedTests,
-		Image:                  "gpu/cuda-tests",
+		Image:                  "gpu/cuda-tests.12.2.2",
 	}
 	cuda.RunCudaTests(ctx, t, args)
 }
diff --git a/test/gpu/smoke_test.go b/test/gpu/smoke_test.go
index e74db6d85d..03cdffa89e 100644
--- a/test/gpu/smoke_test.go
+++ b/test/gpu/smoke_test.go
@@ -34,7 +34,7 @@ func TestNvidiaSmi(t *testing.T) {
 	if err != nil {
 		t.Fatalf("failed to get GPU run options: %v", err)
 	}
-	opts.Image = "gpu/cuda-tests"
+	opts.Image = "gpu/cuda-tests.12.2.2"
 	cmd := "nvidia-smi"
 	out, err := c.Run(ctx, opts, cmd)
 	t.Logf("%q output:", cmd)
@@ -57,7 +57,7 @@ func TestGPUHello(t *testing.T) {
 	if err != nil {
 		t.Fatalf("failed to get GPU run options: %v", err)
 	}
-	opts.Image = "gpu/cuda-tests"
+	opts.Image = "gpu/cuda-tests.12.2.2"
 	out, err := c.Run(ctx, opts, "/run_sample", "--timeout=120s", "0_Introduction/vectorAdd")
 	t.Logf("0_Introduction/vectorAdd output: %s", string(out))
 	if err != nil {
@@ -84,7 +84,7 @@ func TestGPUHello_12_8(t *testing.T) {
 	if err != nil {
 		t.Fatalf("failed to get GPU run options: %v", err)
 	}
-	opts.Image = "gpu/cuda-tests-12-8"
+	opts.Image = "gpu/cuda-tests.12.8.1"
 	out, err := c.Run(ctx, opts, "python3", "run_cuda_test.py", "0_Introduction/vectorAdd")
 	t.Logf("0_Introduction/vectorAdd output: %s", string(out))
 	if err != nil {
@@ -93,7 +93,7 @@ func TestGPUHello_12_8(t *testing.T) {
 }
 
 func TestCUDASmokeTests(t *testing.T) {
-	runCUDASmokeTests(context.Background(), t, "gpu/cuda-tests")
+	runCUDASmokeTests(context.Background(), t, "gpu/cuda-tests.12.2.2")
 }
 
 func TestCUDASmokeTests_12_8(t *testing.T) {
@@ -105,7 +105,7 @@ func TestCUDASmokeTests_12_8(t *testing.T) {
 	if !cudaVersion.IsAtLeast(dockerutil.MustParseCudaVersion("12.8")) {
 		t.Skipf("CUDA version %s is not at least 12.8, skipping test", cudaVersion)
 	}
-	runCUDASmokeTests(ctx, t, "gpu/cuda-tests-12-8")
+	runCUDASmokeTests(ctx, t, "gpu/cuda-tests.12.8.1")
 }
 
 func runCUDASmokeTests(ctx context.Context, t *testing.T, image string) {
diff --git a/test/gpu/sniffer_test.go b/test/gpu/sniffer_test.go
index c89254f002..7a101165a8 100644
--- a/test/gpu/sniffer_test.go
+++ b/test/gpu/sniffer_test.go
@@ -42,7 +42,7 @@ func runCUDATestsCommand(t *testing.T, cmd ...string) (string, error) {
 	if err != nil {
 		return "", fmt.Errorf("failed to get GPU run options: %w", err)
 	}
-	opts.Image = "gpu/cuda-tests"
+	opts.Image = "gpu/cuda-tests.12.2.2"
 	return container.Run(ctx, opts, cmd...)
 }
 
diff --git a/test/gpu/sr_test.go b/test/gpu/sr_test.go
index 95fcb2b7db..b54266c923 100644
--- a/test/gpu/sr_test.go
+++ b/test/gpu/sr_test.go
@@ -41,7 +41,7 @@ func TestGPUCheckpointRestore(t *testing.T) {
 	if err != nil {
 		t.Fatalf("failed to get GPU run options: %v", err)
 	}
-	opts.Image = "gpu/cuda-tests"
+	opts.Image = "gpu/cuda-tests.12.2.2"
 	if err := c.Spawn(ctx, opts, "sleep", "infinity"); err != nil {
 		t.Fatalf("could not start cuda-tests container: %v", err)
 	}
diff --git a/tools/images.mk b/tools/images.mk
index c9ca263e4e..3197015476 100644
--- a/tools/images.mk
+++ b/tools/images.mk
@@ -42,9 +42,9 @@ endif
 # tests are using locally-defined images (that are consistent and idempotent).
 REMOTE_IMAGE_PREFIX ?= us-central1-docker.pkg.dev/gvisor-presubmit/gvisor-presubmit-images
 LOCAL_IMAGE_PREFIX  ?= gvisor.dev/images
-ALL_IMAGES          := $(subst /,_,$(subst images/,,$(shell find images/ -name Dockerfile -o -name Dockerfile.$(ARCH) | xargs -n 1 dirname | uniq)))
+ALL_IMAGES          := $(subst /,_,$(subst images/,,$(shell find -L images/ '(' -name Dockerfile -o -name Dockerfile.$(ARCH) ')' -a '!' -wholename '*.tmpl/*' | xargs -n 1 dirname | uniq)))
 NON_TEST_IMAGES     := gpu/ollama/bench\|gpu/vllm
-TEST_IMAGES         := $(subst /,_,$(subst images/,,$(shell find images/ -name Dockerfile -o -name Dockerfile.$(ARCH) | xargs -n 1 dirname | uniq | grep -v "$(NON_TEST_IMAGES)")))
+TEST_IMAGES         := $(subst /,_,$(subst images/,,$(shell find -L images/ '(' -name Dockerfile -o -name Dockerfile.$(ARCH) ')' -a '!' -wholename '*.tmpl/*' | xargs -n 1 dirname | uniq | grep -v "$(NON_TEST_IMAGES)")))
 SUB_IMAGES          := $(foreach image,$(ALL_IMAGES),$(if $(findstring _,$(image)),$(image),))
 IMAGE_GROUPS        := $(sort $(foreach image,$(SUB_IMAGES),$(firstword $(subst _, ,$(image)))))
 
@@ -95,6 +95,10 @@ push-all-test-images: $(patsubst %,push-%,$(TEST_IMAGES))
 path = images/$(subst _,/,$(1))
 dockerfile = $$(if [ -f "$(call path,$(1))/Dockerfile.$(ARCH)" ]; then echo Dockerfile.$(ARCH); else echo Dockerfile; fi)
 
+# Extract template version from image name, if present.
+# Arguments 1 and 3 are prefix/suffix; argument 2 is the image name.
+template_version = $$(if echo "$(2)" | grep -qF '.'; then echo "$(1)$$(echo "$(2)" | cut -d. -f2-)$(3)"; fi)
+
 # The tag construct is used to memoize the image generated (see README.md).
 # This scheme is used to enable aggressive caching in a central repository, but
 # ensuring that images will always be sourced using the local files.
@@ -158,9 +162,10 @@ pull-%: register-cross ## Force a repull of the image.
 rebuild = \
   $(call header,REBUILD $(1)) && \
   (T=$$(mktemp -d) && cp -a $(call path,$(1))/* $$T && \
-  $(foreach image,$(shell grep FROM "$(call path,$(1))/$(call dockerfile,$(1))" 2>/dev/null | cut -d' ' -f2),docker pull $(DOCKER_PLATFORM_ARGS) $(image) >&2 &&) \
+  $(foreach image,$(shell grep "^FROM " "$(call path,$(1))/$(call dockerfile,$(1))" 2>/dev/null | sed "s~\$${TEMPLATE_VERSION}~$(call template_version,,$(1),)~g" | cut -d' ' -f2),docker pull $(DOCKER_PLATFORM_ARGS) $(image) >&2 &&) \
   docker build $(DOCKER_PLATFORM_ARGS) \
     -f "$$T/$(call dockerfile,$(1))" \
+    $(call template_version,--build-arg=TEMPLATE_VERSION=,$(1),) \
     -t "$(call remote_image,$(1)):$(call tag,$(1))" \
     -t "$(call remote_image,$(1))":latest \
     $$T >&2 && \