|
14 | 14 | # See the License for the specific language governing permissions and |
15 | 15 | # limitations under the License. |
16 | 16 |
|
| 17 | +# This CI Dockerfile supports CUDA 13 and CUDA 12 from a single BASE_IMAGE |
| 18 | +# build arg. The default is the recommended CUDA 13 image: |
| 19 | +# nvcr.io/nvidia/cuda-dl-base:26.04-cuda13.2-devel-ubuntu24.04 |
| 20 | +# The current recommended CUDA 12 image is: |
| 21 | +# nvcr.io/nvidia/cuda-dl-base:25.06-cuda12.9-devel-ubuntu24.04 |
| 22 | +# |
| 23 | +# The build derives CUDA_FLAVOR internally from BASE_IMAGE by matching |
| 24 | +# "cuda13" or "cuda12" in the image tag. That flavor selects the matching uv |
| 25 | +# extra (cu13 or cu12) and CUDA Python package include path. If BASE_IMAGE does |
| 26 | +# not contain either token, the build fails early. |
| 27 | +# |
| 28 | +# Example CUDA 13 H100+ build: |
| 29 | +# docker buildx build -f docker/Dockerfile \ |
| 30 | +# --build-arg GPU_TARGET=h100plus . |
| 31 | +# Example CUDA 12 A100 build: |
| 32 | +# docker buildx build -f docker/Dockerfile \ |
| 33 | +# --build-arg BASE_IMAGE=nvcr.io/nvidia/cuda-dl-base:25.06-cuda12.9-devel-ubuntu24.04 \ |
| 34 | +# --build-arg GPU_TARGET=a100 . |
| 35 | +# |
| 36 | +# GPU_TARGET controls compiled Automodel dependency tuning. "h100plus" builds |
| 37 | +# for SM90/SM100/SM120 and includes H100+ features such as DeepEP and |
| 38 | +# flash-attn-4. "a100" builds only SM80 and uses an A100-specific DeepEP patch |
| 39 | +# to avoid unsupported newer-GPU/NVSHMEM build paths. This keeps CI images |
| 40 | +# smaller and avoids compiling kernels for architectures a target image cannot |
| 41 | +# use. |
17 | 42 | ARG BASE_IMAGE=nvcr.io/nvidia/cuda-dl-base:26.04-cuda13.2-devel-ubuntu24.04 |
18 | 43 | FROM ${BASE_IMAGE} AS base-image |
| 44 | +ARG BASE_IMAGE |
19 | 45 | ARG UV_VERSION=0.11.14 |
20 | 46 |
|
21 | 47 | ENV DEBIAN_FRONTEND=noninteractive |
@@ -47,11 +73,62 @@ curl -LsSf "https://astral.sh/uv/${UV_VERSION}/install.sh" | sh |
47 | 73 | uv --version |
48 | 74 | EOF |
49 | 75 |
|
| 76 | +RUN <<"EOF" bash -euxo pipefail |
| 77 | +case "${BASE_IMAGE}" in |
| 78 | + *cuda12*) cuda_flavor=cu12 ;; |
| 79 | + *cuda13*) cuda_flavor=cu13 ;; |
| 80 | + *) |
| 81 | + echo "Cannot derive CUDA flavor from BASE_IMAGE='${BASE_IMAGE}'. Expected image tag containing 'cuda12' or 'cuda13'." |
| 82 | + exit 1 |
| 83 | + ;; |
| 84 | +esac |
| 85 | +cuda_major_minor="$(sed -n 's/.*cuda\([0-9][0-9]*\.[0-9][0-9]*\).*/\1/p' <<<"${BASE_IMAGE}")" |
| 86 | +if [[ -z "${cuda_major_minor}" ]]; then |
| 87 | + echo "Cannot derive CUDA major.minor from BASE_IMAGE='${BASE_IMAGE}'. Expected image tag containing e.g. 'cuda12.9' or 'cuda13.2'." |
| 88 | + exit 1 |
| 89 | +fi |
| 90 | +cat >/usr/local/bin/nemo-cuda-flavor <<SCRIPT |
| 91 | +#!/usr/bin/env bash |
| 92 | +set -euo pipefail |
| 93 | +echo "${cuda_flavor}" |
| 94 | +SCRIPT |
| 95 | +cat >/usr/local/bin/nemo-install-cuda-python <<SCRIPT |
| 96 | +#!/usr/bin/env bash |
| 97 | +set -euo pipefail |
| 98 | +cuda_major_minor="${cuda_major_minor}" |
| 99 | +cuda_major="\${cuda_major_minor%%.*}" |
| 100 | +cuda_minor="\${cuda_major_minor#*.}" |
| 101 | +cuda_next_minor="\$((cuda_minor + 1))" |
| 102 | +uv pip install \ |
| 103 | + "cuda-bindings>=\${cuda_major_minor}.0,<\${cuda_major}.\${cuda_next_minor}" \ |
| 104 | + "cuda-python>=\${cuda_major_minor}.0,<\${cuda_major}.\${cuda_next_minor}" |
| 105 | +SCRIPT |
| 106 | +chmod +x /usr/local/bin/nemo-cuda-flavor /usr/local/bin/nemo-install-cuda-python |
| 107 | +echo "Derived CUDA flavor: ${cuda_flavor}" |
| 108 | +echo "Derived CUDA Python major.minor: ${cuda_major_minor}" |
| 109 | +EOF |
| 110 | + |
50 | 111 | WORKDIR /workspace |
51 | 112 | COPY pyproject.toml uv.lock /workspace/ |
52 | 113 | COPY nemo/__init__.py nemo/package_info.py /workspace/nemo/ |
53 | 114 | RUN <<"EOF" bash -ex |
54 | | -uv sync --link-mode copy --locked --extra all --extra cu13 --group test |
| 115 | +cuda_flavor="$(nemo-cuda-flavor)" |
| 116 | +uv sync --link-mode copy --locked --extra all --extra "${cuda_flavor}" --group test |
| 117 | +nemo-install-cuda-python |
| 118 | +EOF |
| 119 | + |
| 120 | +RUN <<"EOF" bash -ex |
| 121 | +# Container-only runtime utilities. Keep these out of pyproject.toml so they do |
| 122 | +# not become NeMo package dependencies. |
| 123 | +uv pip install \ |
| 124 | + dill \ |
| 125 | + orjson |
| 126 | + |
| 127 | +case "$(nemo-cuda-flavor)" in |
| 128 | + cu12) torchcodec_index=https://download.pytorch.org/whl/cu126 ;; |
| 129 | + cu13) torchcodec_index=https://download.pytorch.org/whl/cu132 ;; |
| 130 | +esac |
| 131 | +uv pip install --index-url "${torchcodec_index}" torchcodec |
55 | 132 | EOF |
56 | 133 | COPY nemo /workspace/nemo |
57 | 134 |
|
@@ -101,8 +178,9 @@ case "${GPU_TARGET}" in |
101 | 178 | ;; |
102 | 179 | esac |
103 | 180 |
|
| 181 | +CUDA_FLAVOR="$(nemo-cuda-flavor)" |
104 | 182 | AUTOMODEL_CCCL_INCLUDES="/usr/local/cuda/include/cccl" |
105 | | -PYTHON_CCCL_INCLUDE="${VIRTUAL_ENV}/lib/python${UV_PYTHON}/site-packages/nvidia/cu13/include/cccl" |
| 183 | +PYTHON_CCCL_INCLUDE="${VIRTUAL_ENV}/lib/python${UV_PYTHON}/site-packages/nvidia/${CUDA_FLAVOR}/include/cccl" |
106 | 184 | if [[ -d "${PYTHON_CCCL_INCLUDE}" ]]; then |
107 | 185 | AUTOMODEL_CCCL_INCLUDES="${AUTOMODEL_CCCL_INCLUDES}:${PYTHON_CCCL_INCLUDE}" |
108 | 186 | fi |
@@ -136,14 +214,16 @@ if [[ "${GPU_TARGET}" == "h100plus" ]]; then |
136 | 214 | elif [[ "${GPU_TARGET}" == "a100" ]]; then |
137 | 215 | automodel_extra=compiled-a100 |
138 | 216 | fi |
| 217 | +cuda_flavor="$(nemo-cuda-flavor)" |
139 | 218 | uv sync \ |
140 | 219 | --inexact \ |
141 | 220 | --link-mode copy \ |
142 | 221 | --locked \ |
143 | 222 | --extra all \ |
144 | | - --extra cu13 \ |
| 223 | + --extra "${cuda_flavor}" \ |
145 | 224 | --extra "${automodel_extra}" \ |
146 | 225 | --group test |
| 226 | +nemo-install-cuda-python |
147 | 227 |
|
148 | 228 | if [[ "${GPU_TARGET}" == "a100" ]]; then |
149 | 229 | git clone "${DEEPEP_REPO}" /opt/automodel-src/DeepEP |
|
159 | 239 | if [[ "${GPU_TARGET}" == "h100plus" ]]; then |
160 | 240 | # flash-attn-4 requires apache-tvm-ffi 0.1.11, while mamba-ssm |
161 | 241 | # currently constrains the solved environment to apache-tvm-ffi<=0.1.9. |
| 242 | + cutlass_packages=( |
| 243 | + "nvidia-cutlass-dsl==4.5.2" |
| 244 | + "nvidia-cutlass-dsl-libs-base==4.5.2" |
| 245 | + ) |
| 246 | + if [[ "$(nemo-cuda-flavor)" == "cu13" ]]; then |
| 247 | + cutlass_packages+=("nvidia-cutlass-dsl-libs-cu13==4.5.2") |
| 248 | + fi |
162 | 249 | uv pip install \ |
163 | 250 | --no-deps \ |
164 | 251 | "apache-tvm-ffi==0.1.11" \ |
165 | | - "nvidia-cutlass-dsl==4.5.2" \ |
166 | | - "nvidia-cutlass-dsl-libs-base==4.5.2" \ |
167 | | - "nvidia-cutlass-dsl-libs-cu13==4.5.2" \ |
| 252 | + "${cutlass_packages[@]}" \ |
168 | 253 | "quack-kernels==0.5.0" \ |
169 | 254 | "torch-c-dlpack-ext==0.1.5" |
170 | 255 |
|
@@ -196,6 +281,18 @@ LABEL com.nvidia.build.ref="${NVIDIA_BUILD_REF}" |
196 | 281 | ARG RC_DATE=00.00 |
197 | 282 | ARG TARGETARCH |
198 | 283 |
|
| 284 | +ARG INSTALL_FFMPEG=false |
| 285 | +RUN <<"EOF" bash -ex |
| 286 | +if [ "${INSTALL_FFMPEG}" = "true" ]; then |
| 287 | + apt-get update |
| 288 | + apt-get install -y ffmpeg |
| 289 | + apt-get clean |
| 290 | + rm -rf /var/lib/apt/lists/* |
| 291 | +fi |
| 292 | +EOF |
| 293 | + |
| 294 | +ENV NEMO_HOME="/home/TestData/nemo_home" |
| 295 | + |
199 | 296 | # NOTICES.txt file points to where the OSS source code is archived |
200 | 297 | RUN echo "This distribution includes open source which is archived at the following URL: https://opensource.nvidia.com/oss/teams/nvidia/nemo/${RC_DATE}:linux-${TARGETARCH}/index.html" > NOTICES.txt && \ |
201 | 298 | echo "For further inquiries or assistance, contact us at oss-requests@nvidia.com" >> NOTICES.txt |
0 commit comments