diff --git a/Makefile b/Makefile index 6006000c89..76c0984a62 100644 --- a/Makefile +++ b/Makefile @@ -291,7 +291,7 @@ simple-tests: unit-tests # Compatibility target. .PHONY: simple-tests # Images needed for GPU smoke tests. -gpu-smoke-images: load-gpu_cuda-tests load-gpu_cuda-tests-12-8 +gpu-smoke-images: load-gpu_cuda-tests.12.2.2 load-gpu_cuda-tests-12.8.1 .PHONY: gpu-smoke-images gpu-smoke-tests: gpu-smoke-images $(RUNTIME_BIN) @@ -338,7 +338,7 @@ cos-gpu-all-tests: gpu-images cos-gpu-smoke-tests $(RUNTIME_BIN) @$(call sudo,test/gpu:sniffer_test,--runtime=$(RUNTIME) -test.v --cos-gpu $(ARGS)) .PHONY: cos-gpu-all-tests -cuda-tests: load-basic_alpine load-gpu_cuda-tests $(RUNTIME_BIN) +cuda-tests: load-basic_alpine load-gpu_cuda-tests.12.2.2 $(RUNTIME_BIN) @$(call install_runtime,$(RUNTIME),--nvproxy=true --nvproxy-docker=true --nvproxy-allowed-driver-capabilities=all) @$(call sudo,test/gpu:cuda_test,--runtime=$(RUNTIME) -test.v $(ARGS)) .PHONY: cuda-tests diff --git a/images/README.md b/images/README.md index 83e681d47f..f383679a36 100644 --- a/images/README.md +++ b/images/README.md @@ -67,3 +67,65 @@ achieved by specifying `ARCH` variable to make. For example: ``` $ make ARCH=aarch64 rebuild-default ``` + +## Templated images + +If an image directory ends in `.tmpl`, it will be ignored from the set of images +that the `Makefile` recognizes. Instead, this directory can be used to +instantiate other images. + +For example, given the following filesystem structure: + +``` +images/ +├─ my-little-image.tmpl/ +│ └─ Dockerfile +├─ my-little-image.foo.bar → my-little-image.tmpl (symlink) +├─ my-little-image.baz.qux → my-little-image.tmpl (symlink) +└─ this README.md file +``` + +Then this will effectively create two images, `my-little-image.foo.bar` and +`my-little-image.baz.qux`. It will not create a `my-little-image.tmpl` image. + +The behavior of the template instance images is determined by the +`TEMPLATE_VERSION` build argument passed to `my-little-image.tmpl/Dockerfile`. +This argument takes on the value of everything after the first `.` character of +the last component of the template instance image name. For example, the image +`my-little-image.foo.bar` will be built with `docker build +--build-arg=TEMPLATE_VERSION=foo.bar`, whereas the `my-little-image.baz.quux` +will be built with `docker build --build-arg=TEMPLATE_VERSION=baz.qux`. The +`my-little-image.tmpl/Dockerfile` image definition file can use this variable to +make the necessary tweaks to distinguish these two images. + +Note that build arguments do not carry over `FROM` lines in `Dockerfile` unless +specifically passed. For example, this will not work: + +```dockerfile +# You should put this line at the top of the file to clearly indicate +# to users that don't use the `Makefile` build system that they are going +# to be building an image that doesn't make sense: +ARG TEMPLATE_VERSION=POPULATED_BY_BUILD_SYSTEM + +FROM base-image:${TEMPLATE_VERSION}-alpine + +# WRONG: TEMPLATE_VERSION will not be defined here! +# This will try cloning the empty string branch. +RUN git clone https://some-url --branch="${TEMPLATE_VERSION}" +``` + +This will work: + +```dockerfile +ARG TEMPLATE_VERSION=POPULATED_BY_BUILD_SYSTEM + +FROM base-image:${TEMPLATE_VERSION}-alpine + +# CORRECT: This declares that TEMPLATE_VERSION should be inherited from the +# previous build stage; the lack of value assignment means that its value +# should be carried over as-is. +ARG TEMPLATE_VERSION + +# TEMPLATE_VERSION will be defined here. +RUN git clone https://some-url --branch="${TEMPLATE_VERSION}" +``` diff --git a/images/gpu/cuda-tests-12-8/Dockerfile b/images/gpu/cuda-tests-12-8/Dockerfile deleted file mode 100644 index c76d554d82..0000000000 --- a/images/gpu/cuda-tests-12-8/Dockerfile +++ /dev/null @@ -1,49 +0,0 @@ -FROM nvidia/cuda:12.8.1-devel-ubuntu22.04 - -WORKDIR / -ENV PATH=$PATH:/usr/local/nvidia/bin -RUN export DEBIAN_FRONTEND=noninteractive; \ - apt-get update && \ - apt-get install -y \ - build-essential \ - cmake \ - freeglut3 freeglut3-dev \ - git \ - golang \ - imagemagick \ - libegl-dev \ - libfreeimage3 libfreeimage-dev \ - libfreeimageplus3 libfreeimageplus-dev \ - libgles2-mesa-dev \ - libglfw3 libglfw3-dev \ - libglu1-mesa libglu1-mesa-dev \ - libxi-dev \ - libxmu-dev \ - llvm \ - mpich \ - pkg-config \ - vim \ - x11-xserver-utils \ - xdotool \ - xvfb \ - zlib1g zlib1g-dev - -RUN git clone \ - https://github.com/NVIDIA/cuda-samples.git /cuda-samples && cd /cuda-samples && \ - git checkout 7b60178984e96bc09d066077d5455df71fee2a9f && cd / - -RUN apt install -y wget && apt -y purge golang* - -RUN wget https://go.dev/dl/go1.24.1.linux-amd64.tar.gz && tar -C /usr/local -xzf go1.24.1.linux-amd64.tar.gz && \ - ln -s /usr/local/go/bin/go /usr/local/bin/go - -ADD *.cu *.h *.sh *.py *.cc / - -RUN chmod 555 /*.sh && gcc -o /unsupported_ioctl /unsupported_ioctl.cc - -RUN mkdir /cuda-samples/build && cd /cuda-samples/build && \ - cmake .. - -# Override entrypoint to nothing, otherwise all invocations will have -# a copyright notice printed, which breaks parsing the stdout logs. -ENTRYPOINT [] \ No newline at end of file diff --git a/images/gpu/cuda-tests-12-8/cuda_malloc.cu b/images/gpu/cuda-tests-12-8/cuda_malloc.cu deleted file mode 100644 index d837f097bf..0000000000 --- a/images/gpu/cuda-tests-12-8/cuda_malloc.cu +++ /dev/null @@ -1,205 +0,0 @@ -// Copyright 2023 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "cuda_test_util.h" // NOLINT(build/include) - -__global__ void addKernel(std::uint32_t* data) { - size_t index = blockIdx.x * blockDim.x + threadIdx.x; - data[index] += static_cast(index); -} - -void TestMallocManagedRoundTrip(int device, unsigned int malloc_flags, - bool prefetch) { - constexpr size_t kNumBlocks = 32; - constexpr size_t kNumThreads = 64; - constexpr size_t kNumElems = kNumBlocks * kNumThreads; - - std::uint32_t* data = nullptr; - constexpr size_t kNumBytes = kNumElems * sizeof(*data); - CHECK_CUDA(cudaMallocManaged(&data, kNumBytes, malloc_flags)); - - // Initialize all elements in the array with a random value on the host. - std::default_random_engine rd; - const std::uint32_t init_val = - std::uniform_int_distribution()(rd); - for (size_t i = 0; i < kNumElems; i++) { - data[i] = init_val; - } - - if (prefetch) { - CHECK_CUDA(cudaMemPrefetchAsync(data, kNumBytes, device)); - } - - // Mutate the array on the device. - addKernel<<>>(data); - CHECK_CUDA(cudaGetLastError()); - CHECK_CUDA(cudaDeviceSynchronize()); - - if (prefetch) { - CHECK_CUDA(cudaMemPrefetchAsync(data, kNumBytes, cudaCpuDeviceId)); - } - - // Check that the array has the expected result. - for (size_t i = 0; i < kNumElems; i++) { - std::uint32_t want = init_val + static_cast(i); - if (data[i] != want) { - std::cout << "data[" << i << "]: got " << data[i] << ", wanted " << want - << " = " << init_val << " + " << i << std::endl; - abort(); - } - } - - CHECK_CUDA(cudaFree(data)); -} - -void TestMallocManagedReadWrite(int device) { - constexpr size_t kNumBlocks = 32; - constexpr size_t kNumThreads = 64; - constexpr size_t kNumElems = kNumBlocks * kNumThreads; - - std::uint32_t* data = nullptr; - constexpr size_t kNumBytes = kNumElems * sizeof(*data); - CHECK_CUDA(cudaMallocManaged(&data, kNumBytes, cudaMemAttachGlobal)); - - // Initialize all elements in the array with a random value on the host. - std::default_random_engine rd; - const std::uint32_t init_val = - std::uniform_int_distribution()(rd); - for (size_t i = 0; i < kNumElems; i++) { - data[i] = init_val; - } - - // Write the array's contents to a temporary file. - char filename[] = "/tmp/cudaMallocManagedTest.XXXXXX"; - int fd = mkstemp(filename); - if (fd < 0) { - err(1, "mkstemp"); - } - size_t done = 0; - while (done < kNumBytes) { - ssize_t n = write(fd, reinterpret_cast(data) + done, - kNumBytes - done); - if (n >= 0) { - done += n; - } else if (n < 0 && errno != EINTR) { - err(1, "write"); - } - } - - // Mutate the array on the device. - addKernel<<>>(data); - CHECK_CUDA(cudaGetLastError()); - CHECK_CUDA(cudaDeviceSynchronize()); - - // Check that the array has the expected result. - for (size_t i = 0; i < kNumElems; i++) { - std::uint32_t want = init_val + static_cast(i); - if (data[i] != want) { - std::cout << "data[" << i << "]: got " << data[i] << ", wanted " << want - << " = " << init_val << " + " << i << std::endl; - abort(); - } - } - - // Read the array's original contents back from the temporary file. - if (lseek(fd, 0, SEEK_SET) < 0) { - err(1, "lseek"); - } - done = 0; - while (done < kNumBytes) { - ssize_t n = read(fd, reinterpret_cast(data) + done, - kNumBytes - done); - if (n > 0) { - done += n; - } else if (n == 0) { - errx(1, "read: unexpected EOF after %zu bytes", done); - } else if (n < 0 && errno != EINTR) { - err(1, "read"); - } - } - - // Check that the array matches what we originally wrote. - for (size_t i = 0; i < kNumElems; i++) { - std::uint32_t want = init_val; - if (data[i] != want) { - std::cout << "data[" << i << "]: got " << data[i] << ", wanted " << want - << " = " << init_val << " + " << i << std::endl; - abort(); - } - } - - // Mutate the array on the device again. - addKernel<<>>(data); - CHECK_CUDA(cudaGetLastError()); - CHECK_CUDA(cudaDeviceSynchronize()); - - // Check that the array has the expected result again. - for (size_t i = 0; i < kNumElems; i++) { - std::uint32_t want = init_val + static_cast(i); - if (data[i] != want) { - std::cout << "data[" << i << "]: got " << data[i] << ", wanted " << want - << " = " << init_val << " + " << i << std::endl; - abort(); - } - } - - close(fd); - CHECK_CUDA(cudaFree(data)); -} - -int main() { - int device; - CHECK_CUDA(cudaGetDevice(&device)); - - std::cout << "Testing cudaMallocManaged(flags=cudaMemAttachGlobal)" - << std::endl; - TestMallocManagedRoundTrip(device, cudaMemAttachGlobal, false); - - int cma = 0; - CHECK_CUDA( - cudaDeviceGetAttribute(&cma, cudaDevAttrConcurrentManagedAccess, device)); - if (!cma) { - std::cout << "cudaDevAttrConcurrentManagedAccess not available" - << std::endl; - } else { - std::cout << "Testing cudaMallocManaged(flags=cudaMemAttachGlobal) " - "with prefetching" - << std::endl; - TestMallocManagedRoundTrip(device, cudaMemAttachGlobal, true); - std::cout << "Testing cudaMallocManaged(flags=cudaMemAttachHost)" - << std::endl; - TestMallocManagedRoundTrip(device, cudaMemAttachHost, false); - std::cout << "Testing cudaMallocManaged(flags=cudaMemAttachHost) " - "with prefetching" - << std::endl; - TestMallocManagedRoundTrip(device, cudaMemAttachHost, true); - } - - std::cout << "Testing read/write syscalls on cudaMallocManaged memory" - << std::endl; - TestMallocManagedReadWrite(device); - - std::cout << "All tests passed" << std::endl; - return 0; -} diff --git a/images/gpu/cuda-tests-12-8/run_cuda_test.py b/images/gpu/cuda-tests-12-8/run_cuda_test.py deleted file mode 100644 index b2d6670c7b..0000000000 --- a/images/gpu/cuda-tests-12-8/run_cuda_test.py +++ /dev/null @@ -1,56 +0,0 @@ -# Copyright 2025 The gVisor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import argparse -import os -import shlex -import subprocess -import sys - -ROOT_CMAKE_DIR = '/cuda-samples/build' -SAMPLES_DIR = ROOT_CMAKE_DIR + '/Samples' - -parser = argparse.ArgumentParser() - -parser.add_argument( - 'test', - help=( - 'Test to run. This should be some thing like' - ' "0_Introduction/UnifiedMemoryStreams"' - ), - type=str, -) - - -def run_test(test_dir: str = ''): - make_path = os.path.join(SAMPLES_DIR, test_dir) - cmd = shlex.split(f'make -C {make_path}') - subprocess.run( - args=cmd, - check=True, - stdout=sys.stdout, - stderr=sys.stderr, - ) - test_path = [os.path.join(make_path, os.path.basename(test_dir))] - subprocess.run( - args=test_path, - check=True, - stdout=sys.stdout, - stderr=sys.stderr, - ) - - -if __name__ == '__main__': - args = parser.parse_args(sys.argv[1:]) - run_test(test_dir=args.test) diff --git a/images/gpu/cuda-tests-12-8/run_smoke.sh b/images/gpu/cuda-tests-12-8/run_smoke.sh deleted file mode 100755 index d261c6e0a2..0000000000 --- a/images/gpu/cuda-tests-12-8/run_smoke.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/bash - -# Copyright 2021 The gVisor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -set -euxo pipefail - -cd / -nvcc cuda_malloc.cu -o cuda_malloc -Wno-deprecated-gpu-targets -./cuda_malloc -echo 'All tests passed' diff --git a/images/gpu/cuda-tests.12.2.2 b/images/gpu/cuda-tests.12.2.2 new file mode 120000 index 0000000000..25b3047a4a --- /dev/null +++ b/images/gpu/cuda-tests.12.2.2 @@ -0,0 +1 @@ +cuda-tests.tmpl \ No newline at end of file diff --git a/images/gpu/cuda-tests.12.8.1 b/images/gpu/cuda-tests.12.8.1 new file mode 120000 index 0000000000..25b3047a4a --- /dev/null +++ b/images/gpu/cuda-tests.12.8.1 @@ -0,0 +1 @@ +cuda-tests.tmpl \ No newline at end of file diff --git a/images/gpu/cuda-tests/Dockerfile b/images/gpu/cuda-tests.tmpl/Dockerfile similarity index 76% rename from images/gpu/cuda-tests/Dockerfile rename to images/gpu/cuda-tests.tmpl/Dockerfile index 7196ba1a96..9ec5a532de 100644 --- a/images/gpu/cuda-tests/Dockerfile +++ b/images/gpu/cuda-tests.tmpl/Dockerfile @@ -1,8 +1,10 @@ -FROM nvidia/cuda:12.2.2-devel-ubuntu22.04 +ARG TEMPLATE_VERSION=PLEASE_USE_MAKEFILE_TO_BUILD_THIS_IMAGE -# From: https://github.com/NVIDIA/cuda-samples/releases -# Ideally, pick a release that matches the CUDA version of the image above. -ARG CUDA_SAMPLES_VERSION=v12.2 +FROM nvidia/cuda:${TEMPLATE_VERSION}-devel-ubuntu22.04 + +# Necessary to keep TEMPLATE_VERSION available for future +# lines in this Dockerfile past the FROM. +ARG TEMPLATE_VERSION WORKDIR / COPY *.cu *.h *.sh *.go *.cc / @@ -33,7 +35,9 @@ RUN export DEBIAN_FRONTEND=noninteractive; \ zlib1g zlib1g-dev \ && \ chmod 555 /*.sh && \ - git clone --depth=1 --branch="$CUDA_SAMPLES_VERSION" --single-branch \ + git clone --depth=1 \ + --branch="v$(echo "${TEMPLATE_VERSION}" | cut -d. -f1-2)" \ + --single-branch \ https://github.com/NVIDIA/cuda-samples.git /cuda-samples && \ go install \ github.com/TheZoraiz/ascii-image-converter@d05a757c5e02ab23e97b6f6fca4e1fbeb10ab559 && \ diff --git a/images/gpu/cuda-tests/cuda_malloc.cu b/images/gpu/cuda-tests.tmpl/cuda_malloc.cu similarity index 100% rename from images/gpu/cuda-tests/cuda_malloc.cu rename to images/gpu/cuda-tests.tmpl/cuda_malloc.cu diff --git a/images/gpu/cuda-tests-12-8/cuda_test_util.h b/images/gpu/cuda-tests.tmpl/cuda_test_util.h similarity index 100% rename from images/gpu/cuda-tests-12-8/cuda_test_util.h rename to images/gpu/cuda-tests.tmpl/cuda_test_util.h diff --git a/images/gpu/cuda-tests/list_features.cu b/images/gpu/cuda-tests.tmpl/list_features.cu similarity index 100% rename from images/gpu/cuda-tests/list_features.cu rename to images/gpu/cuda-tests.tmpl/list_features.cu diff --git a/images/gpu/cuda-tests/list_features.sh b/images/gpu/cuda-tests.tmpl/list_features.sh old mode 100644 new mode 100755 similarity index 100% rename from images/gpu/cuda-tests/list_features.sh rename to images/gpu/cuda-tests.tmpl/list_features.sh diff --git a/images/gpu/cuda-tests/list_sample_tests.sh b/images/gpu/cuda-tests.tmpl/list_sample_tests.sh old mode 100644 new mode 100755 similarity index 100% rename from images/gpu/cuda-tests/list_sample_tests.sh rename to images/gpu/cuda-tests.tmpl/list_sample_tests.sh diff --git a/images/gpu/cuda-tests/run_sample.go b/images/gpu/cuda-tests.tmpl/run_sample.go similarity index 100% rename from images/gpu/cuda-tests/run_sample.go rename to images/gpu/cuda-tests.tmpl/run_sample.go diff --git a/images/gpu/cuda-tests/run_smoke.sh b/images/gpu/cuda-tests.tmpl/run_smoke.sh similarity index 100% rename from images/gpu/cuda-tests/run_smoke.sh rename to images/gpu/cuda-tests.tmpl/run_smoke.sh diff --git a/images/gpu/cuda-tests-12-8/unsupported_ioctl.cc b/images/gpu/cuda-tests.tmpl/unsupported_ioctl.cc similarity index 100% rename from images/gpu/cuda-tests-12-8/unsupported_ioctl.cc rename to images/gpu/cuda-tests.tmpl/unsupported_ioctl.cc diff --git a/images/gpu/cuda-tests/cuda_test_util.h b/images/gpu/cuda-tests/cuda_test_util.h deleted file mode 100644 index 2877f78b17..0000000000 --- a/images/gpu/cuda-tests/cuda_test_util.h +++ /dev/null @@ -1,42 +0,0 @@ -// Copyright 2023 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef THIRD_PARTY_GVISOR_IMAGES_GPU_CUDA_TESTS_CUDA_TEST_UTIL_H_ -#define THIRD_PARTY_GVISOR_IMAGES_GPU_CUDA_TESTS_CUDA_TEST_UTIL_H_ - -#include - -// cudaError_t is returned by CUDA runtime functions. -#define CHECK_CUDA(expr) \ - do { \ - cudaError_t code = (expr); \ - if (code != cudaSuccess) { \ - std::cout << "Check failed at " << __FILE__ << ":" << __LINE__ << ": " \ - << #expr << ": " << cudaGetErrorString(code) << std::endl; \ - abort(); \ - } \ - } while (0) - -// CUresult is returned by CUDA driver functions. -#define CHECK_CUDA_RESULT(expr) \ - do { \ - CUresult code = (expr); \ - if (code != CUDA_SUCCESS) { \ - std::cout << "Check failed at " << __FILE__ << ":" << __LINE__ << ": " \ - << #expr << ": " << code << std::endl; \ - abort(); \ - } \ - } while (0) - -#endif // THIRD_PARTY_GVISOR_IMAGES_GPU_CUDA_TESTS_CUDA_TEST_UTIL_H_ diff --git a/images/gpu/cuda-tests/unsupported_ioctl.cc b/images/gpu/cuda-tests/unsupported_ioctl.cc deleted file mode 100644 index e2f81b2679..0000000000 --- a/images/gpu/cuda-tests/unsupported_ioctl.cc +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright 2024 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include - -// This test makes a non-existing ioctl call to the nvidia driver. -// It's used to test that ioctl_sniffer is catching unsupported ioctls. -int main() { - int fd = open("/dev/nvidiactl", O_RDWR); - if (fd < 0) { - return 1; - } - - ioctl(fd, 0, nullptr); - return 0; -} diff --git a/pkg/test/dockerutil/gpu.go b/pkg/test/dockerutil/gpu.go index b23eec4e98..714d510075 100644 --- a/pkg/test/dockerutil/gpu.go +++ b/pkg/test/dockerutil/gpu.go @@ -305,7 +305,7 @@ func MaxSuportedCUDAVersion(ctx context.Context, t *testing.T) (*CudaVersion, er if err != nil { return nil, fmt.Errorf("could not create opts: %w", err) } - opts.Image = "gpu/cuda-tests" + opts.Image = "gpu/cuda-tests.12.2.2" out, err := c.Run(ctx, opts, "nvidia-smi") if err != nil { diff --git a/test/gpu/cuda_test.go b/test/gpu/cuda_test.go index 83f4ec1c7b..e62fd79ddd 100644 --- a/test/gpu/cuda_test.go +++ b/test/gpu/cuda_test.go @@ -137,7 +137,7 @@ func TestCUDA(t *testing.T) { FlakyTests: flakyTests, ExclusiveTests: exclusiveTests, AlwaysSkippedTests: alwaysSkippedTests, - Image: "gpu/cuda-tests", + Image: "gpu/cuda-tests.12.2.2", } cuda.RunCudaTests(ctx, t, args) } diff --git a/test/gpu/smoke_test.go b/test/gpu/smoke_test.go index e74db6d85d..03cdffa89e 100644 --- a/test/gpu/smoke_test.go +++ b/test/gpu/smoke_test.go @@ -34,7 +34,7 @@ func TestNvidiaSmi(t *testing.T) { if err != nil { t.Fatalf("failed to get GPU run options: %v", err) } - opts.Image = "gpu/cuda-tests" + opts.Image = "gpu/cuda-tests.12.2.2" cmd := "nvidia-smi" out, err := c.Run(ctx, opts, cmd) t.Logf("%q output:", cmd) @@ -57,7 +57,7 @@ func TestGPUHello(t *testing.T) { if err != nil { t.Fatalf("failed to get GPU run options: %v", err) } - opts.Image = "gpu/cuda-tests" + opts.Image = "gpu/cuda-tests.12.2.2" out, err := c.Run(ctx, opts, "/run_sample", "--timeout=120s", "0_Introduction/vectorAdd") t.Logf("0_Introduction/vectorAdd output: %s", string(out)) if err != nil { @@ -84,7 +84,7 @@ func TestGPUHello_12_8(t *testing.T) { if err != nil { t.Fatalf("failed to get GPU run options: %v", err) } - opts.Image = "gpu/cuda-tests-12-8" + opts.Image = "gpu/cuda-tests.12.8.1" out, err := c.Run(ctx, opts, "python3", "run_cuda_test.py", "0_Introduction/vectorAdd") t.Logf("0_Introduction/vectorAdd output: %s", string(out)) if err != nil { @@ -93,7 +93,7 @@ func TestGPUHello_12_8(t *testing.T) { } func TestCUDASmokeTests(t *testing.T) { - runCUDASmokeTests(context.Background(), t, "gpu/cuda-tests") + runCUDASmokeTests(context.Background(), t, "gpu/cuda-tests.12.2.2") } func TestCUDASmokeTests_12_8(t *testing.T) { @@ -105,7 +105,7 @@ func TestCUDASmokeTests_12_8(t *testing.T) { if !cudaVersion.IsAtLeast(dockerutil.MustParseCudaVersion("12.8")) { t.Skipf("CUDA version %s is not at least 12.8, skipping test", cudaVersion) } - runCUDASmokeTests(ctx, t, "gpu/cuda-tests-12-8") + runCUDASmokeTests(ctx, t, "gpu/cuda-tests.12.8.1") } func runCUDASmokeTests(ctx context.Context, t *testing.T, image string) { diff --git a/test/gpu/sniffer_test.go b/test/gpu/sniffer_test.go index c89254f002..7a101165a8 100644 --- a/test/gpu/sniffer_test.go +++ b/test/gpu/sniffer_test.go @@ -42,7 +42,7 @@ func runCUDATestsCommand(t *testing.T, cmd ...string) (string, error) { if err != nil { return "", fmt.Errorf("failed to get GPU run options: %w", err) } - opts.Image = "gpu/cuda-tests" + opts.Image = "gpu/cuda-tests.12.2.2" return container.Run(ctx, opts, cmd...) } diff --git a/test/gpu/sr_test.go b/test/gpu/sr_test.go index 95fcb2b7db..b54266c923 100644 --- a/test/gpu/sr_test.go +++ b/test/gpu/sr_test.go @@ -41,7 +41,7 @@ func TestGPUCheckpointRestore(t *testing.T) { if err != nil { t.Fatalf("failed to get GPU run options: %v", err) } - opts.Image = "gpu/cuda-tests" + opts.Image = "gpu/cuda-tests.12.2.2" if err := c.Spawn(ctx, opts, "sleep", "infinity"); err != nil { t.Fatalf("could not start cuda-tests container: %v", err) } diff --git a/tools/images.mk b/tools/images.mk index c9ca263e4e..3197015476 100644 --- a/tools/images.mk +++ b/tools/images.mk @@ -42,9 +42,9 @@ endif # tests are using locally-defined images (that are consistent and idempotent). REMOTE_IMAGE_PREFIX ?= us-central1-docker.pkg.dev/gvisor-presubmit/gvisor-presubmit-images LOCAL_IMAGE_PREFIX ?= gvisor.dev/images -ALL_IMAGES := $(subst /,_,$(subst images/,,$(shell find images/ -name Dockerfile -o -name Dockerfile.$(ARCH) | xargs -n 1 dirname | uniq))) +ALL_IMAGES := $(subst /,_,$(subst images/,,$(shell find -L images/ '(' -name Dockerfile -o -name Dockerfile.$(ARCH) ')' -a '!' -wholename '*.tmpl/*' | xargs -n 1 dirname | uniq))) NON_TEST_IMAGES := gpu/ollama/bench\|gpu/vllm -TEST_IMAGES := $(subst /,_,$(subst images/,,$(shell find images/ -name Dockerfile -o -name Dockerfile.$(ARCH) | xargs -n 1 dirname | uniq | grep -v "$(NON_TEST_IMAGES)"))) +TEST_IMAGES := $(subst /,_,$(subst images/,,$(shell find -L images/ '(' -name Dockerfile -o -name Dockerfile.$(ARCH) ')' -a '!' -wholename '*.tmpl/*' | xargs -n 1 dirname | uniq | grep -v "$(NON_TEST_IMAGES)"))) SUB_IMAGES := $(foreach image,$(ALL_IMAGES),$(if $(findstring _,$(image)),$(image),)) IMAGE_GROUPS := $(sort $(foreach image,$(SUB_IMAGES),$(firstword $(subst _, ,$(image))))) @@ -95,6 +95,10 @@ push-all-test-images: $(patsubst %,push-%,$(TEST_IMAGES)) path = images/$(subst _,/,$(1)) dockerfile = $$(if [ -f "$(call path,$(1))/Dockerfile.$(ARCH)" ]; then echo Dockerfile.$(ARCH); else echo Dockerfile; fi) +# Extract template version from image name, if present. +# Arguments 1 and 3 are prefix/suffix; argument 2 is the image name. +template_version = $$(if echo "$(2)" | grep -qF '.'; then echo "$(1)$$(echo "$(2)" | cut -d. -f2-)$(3)"; fi) + # The tag construct is used to memoize the image generated (see README.md). # This scheme is used to enable aggressive caching in a central repository, but # ensuring that images will always be sourced using the local files. @@ -158,9 +162,10 @@ pull-%: register-cross ## Force a repull of the image. rebuild = \ $(call header,REBUILD $(1)) && \ (T=$$(mktemp -d) && cp -a $(call path,$(1))/* $$T && \ - $(foreach image,$(shell grep FROM "$(call path,$(1))/$(call dockerfile,$(1))" 2>/dev/null | cut -d' ' -f2),docker pull $(DOCKER_PLATFORM_ARGS) $(image) >&2 &&) \ + $(foreach image,$(shell grep "^FROM " "$(call path,$(1))/$(call dockerfile,$(1))" 2>/dev/null | sed "s~\$${TEMPLATE_VERSION}~$(call template_version,,$(1),)~g" | cut -d' ' -f2),docker pull $(DOCKER_PLATFORM_ARGS) $(image) >&2 &&) \ docker build $(DOCKER_PLATFORM_ARGS) \ -f "$$T/$(call dockerfile,$(1))" \ + $(call template_version,--build-arg=TEMPLATE_VERSION=,$(1),) \ -t "$(call remote_image,$(1)):$(call tag,$(1))" \ -t "$(call remote_image,$(1))":latest \ $$T >&2 && \