|
| 1 | +FROM quay.io/opendatahub/odh-midstream-cuda-base-13-0:268a2d4baec5ed3c3ae09a6cce325fb83622d87b |
| 2 | + |
| 3 | +ARG SSH_PORT=2222 |
| 4 | +ARG OPENMPI_VERSION=4.1.6 |
| 5 | +ARG UCX_VERSION=1.20.0 |
| 6 | + |
| 7 | +LABEL name="training:py312-cuda130-torch29-openmpi41" \ |
| 8 | + summary="CUDA 13.0 Python 3.12 PyTorch 2.9.0 OpenMPI 4.1 image based on C9S for Training" \ |
| 9 | + description="CUDA 13.0 Python 3.12 PyTorch 2.9.0 OpenMPI 4.1 image based on C9S for Training" \ |
| 10 | + io.k8s.display-name="CUDA 13.0 Python 3.12 PyTorch 2.9.0 OpenMPI 4.1 base image for Training" \ |
| 11 | + io.k8s.description="CUDA 13.0 Python 3.12 PyTorch 2.9.0 OpenMPI 4.1 image based on C9S for Training" \ |
| 12 | + authoritative-source-url="https://github.com/opendatahub-io/distributed-workloads" |
| 13 | + |
| 14 | +USER 0 |
| 15 | + |
| 16 | +# libjpeg-turbo: libjpeg.so.62 required by torchvision image I/O extension. |
| 17 | +# libpng: libpng16.so.16 required by torchvision image I/O extension. |
| 18 | +# libwebp: libwebp.so.7 required by torchvision image I/O extension. |
| 19 | +# The C9S base image compiles numpy/scipy/pyarrow/pillow against system libraries |
| 20 | +# not available in C9S (libopenblasp.so.0, libthrift-0.15.0.so, libre2.so.9); |
| 21 | +# those packages are reinstalled from manylinux wheels after micropipenv runs. |
| 22 | +# numactl-libs and openblas-openmp are installed separately AFTER the OpenMPI |
| 23 | +# build step to avoid dnf clean_requirements_on_remove sweeping them out. |
| 24 | +RUN dnf install -y openssh-server libjpeg-turbo libpng libwebp && dnf clean all |
| 25 | + |
| 26 | +# Install UCX 1.20.0 prebuilt RPMs (BSD-3-Clause licensed — fully redistributable). |
| 27 | +# Built against MOFED 24.10 + CUDA 13 — first UCX release with CUDA 13 support. |
| 28 | +# Runtime packages (ucx, ucx-cuda, ucx-ib, ucx-ib-mlx5, ucx-rdmacm) are kept in |
| 29 | +# the image; ucx-devel (headers) is removed after OpenMPI is built against it. |
| 30 | +# UCX transport plugins are dlopen'd at runtime: ucx-ib-mlx5 gracefully skips |
| 31 | +# if MOFED is absent; ucx-cuda activates when CUDA libs are present (always true here). |
| 32 | +RUN curl -fsSL https://github.com/openucx/ucx/releases/download/v${UCX_VERSION}/ucx-${UCX_VERSION}-rocky9-mofed24.10-cuda13-x86_64.tar.bz2 \ |
| 33 | + | tar -xj -C /tmp \ |
| 34 | + && rpm -ivh --nodeps \ |
| 35 | + /tmp/ucx-${UCX_VERSION}-1.el9.x86_64.rpm \ |
| 36 | + /tmp/ucx-cuda-${UCX_VERSION}-1.el9.x86_64.rpm \ |
| 37 | + /tmp/ucx-ib-${UCX_VERSION}-1.el9.x86_64.rpm \ |
| 38 | + /tmp/ucx-ib-mlx5-${UCX_VERSION}-1.el9.x86_64.rpm \ |
| 39 | + /tmp/ucx-rdmacm-${UCX_VERSION}-1.el9.x86_64.rpm \ |
| 40 | + /tmp/ucx-devel-${UCX_VERSION}-1.el9.x86_64.rpm \ |
| 41 | + && rm -f /tmp/ucx-*.rpm |
| 42 | + |
| 43 | +# Build OpenMPI from source with CUDA support (BSD licensed — fully redistributable). |
| 44 | +# OpenMPI install prefix: /usr/lib64/openmpi (keeps existing symlink/PATH layout) |
| 45 | +# |
| 46 | +# --with-cuda: opal_built_with_cuda_support=true — MPI calls accept GPU pointers. |
| 47 | +# --with-ucx: links against UCX 1.20.0 (CUDA 13 aware) installed at /usr. |
| 48 | +# ucx-cuda provides cuda_copy/cuda_ipc transports for GPU-Direct; |
| 49 | +# ucx-ib-mlx5 provides rc_mlx5/dc_mlx5 for GPU-Direct RDMA over IB |
| 50 | +# (activated when MOFED is present on the host at runtime). |
| 51 | +RUN dnf install -y \ |
| 52 | + # Runtime IB/RDMA libraries (kept after build) |
| 53 | + rdma-core libibverbs librdmacm libibumad libmlx5 infiniband-diags \ |
| 54 | + # OpenMPI runtime dependencies (kept after build) |
| 55 | + hwloc libevent pmix \ |
| 56 | + # Build tools (removed after build) |
| 57 | + gcc gcc-c++ make perl \ |
| 58 | + # Dev headers (removed after build) |
| 59 | + rdma-core-devel libibverbs-devel librdmacm-devel \ |
| 60 | + hwloc-devel libevent-devel pmix-devel zlib-devel \ |
| 61 | + && curl -fsSL https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-${OPENMPI_VERSION}.tar.gz \ |
| 62 | + | tar -xz -C /tmp \ |
| 63 | + && cd /tmp/openmpi-${OPENMPI_VERSION} \ |
| 64 | + && ./configure \ |
| 65 | + --prefix=/usr/lib64/openmpi \ |
| 66 | + --with-cuda=/usr/local/cuda \ |
| 67 | + --with-ucx=/usr \ |
| 68 | + --with-verbs \ |
| 69 | + --with-hwloc \ |
| 70 | + --with-libevent=external \ |
| 71 | + --with-pmix=external \ |
| 72 | + --enable-mpi-cxx \ |
| 73 | + --enable-mpi1-compatibility \ |
| 74 | + --disable-silent-rules \ |
| 75 | + && make -j$(nproc) install \ |
| 76 | + && rpm -e --nodeps ucx-devel-${UCX_VERSION}-1.el9.x86_64 \ |
| 77 | + && dnf remove -y \ |
| 78 | + gcc gcc-c++ make perl \ |
| 79 | + rdma-core-devel libibverbs-devel librdmacm-devel \ |
| 80 | + hwloc-devel libevent-devel pmix-devel zlib-devel \ |
| 81 | + && dnf clean all \ |
| 82 | + && rm -rf /tmp/openmpi-${OPENMPI_VERSION} |
| 83 | + |
| 84 | +# Re-install PyTorch runtime deps that dnf clean_requirements_on_remove sweeps |
| 85 | +# out during the OpenMPI build step above (numactl-libs and openblas-openmp are |
| 86 | +# transitive deps of hwloc-devel / the build toolchain and get auto-removed). |
| 87 | +# Installing them in a fresh RUN marks them as explicit user installs. |
| 88 | +RUN dnf install -y numactl-libs openblas-openmp && dnf clean all |
| 89 | + |
| 90 | +# Create symlinks for OpenMPI binaries in /usr/bin so they're in default SSH PATH |
| 91 | +RUN ln -s /usr/lib64/openmpi/bin/orted /usr/bin/orted \ |
| 92 | + && ln -s /usr/lib64/openmpi/bin/mpiexec /usr/bin/mpiexec |
| 93 | + |
| 94 | +# mpirun wrapper: registers the OpenShift random UID in /etc/passwd before launching mpirun. |
| 95 | +# This is needed because the SDK overrides the container entrypoint, bypassing uid_entrypoint.sh. |
| 96 | +# Placed in /usr/local/bin (not /usr/bin) so it takes precedence over the real mpirun in |
| 97 | +# /usr/lib64/openmpi/bin, which the base image puts before /usr/bin in PATH. |
| 98 | +COPY mpirun_wrapper.sh /usr/local/bin/mpirun |
| 99 | +RUN chmod +x /usr/local/bin/mpirun |
| 100 | + |
| 101 | +# Wrapper script so python is reachable in SSH sessions AND the virtualenv is activated. |
| 102 | +# A symlink won't work: Python uses argv[0] to locate pyvenv.cfg, and /usr/local/bin/python |
| 103 | +# has no venv in its parent chain. The wrapper execs /opt/app-root/bin/python, which does. |
| 104 | +RUN printf '#!/bin/sh\nexec /opt/app-root/bin/python "$@"\n' > /usr/local/bin/python \ |
| 105 | + && chmod +x /usr/local/bin/python |
| 106 | + |
| 107 | +# Set LD_LIBRARY_PATH in /etc/environment for SSH sessions (loaded by PAM, not inherited from container). |
| 108 | +# CUDA and cuDNN are registered in ldconfig; OpenMPI and UCX plugin dirs need explicit entries. |
| 109 | +RUN echo "LD_LIBRARY_PATH=/usr/lib64/openmpi/lib:/usr/lib64/ucx" >> /etc/environment |
| 110 | + |
| 111 | +RUN mkdir -p /var/run/sshd |
| 112 | + |
| 113 | +# SSH client config |
| 114 | +RUN sed -i "s/[ #]\(.*StrictHostKeyChecking \).*/ \1no/g" /etc/ssh/ssh_config \ |
| 115 | + && echo " UserKnownHostsFile /dev/null" >> /etc/ssh/ssh_config \ |
| 116 | + && echo " Port ${SSH_PORT}" >> /etc/ssh/ssh_config \ |
| 117 | + && echo " SendEnv PATH LD_LIBRARY_PATH" >> /etc/ssh/ssh_config |
| 118 | + |
| 119 | +# SSH server config |
| 120 | +RUN sed -i "s/#\(StrictModes \).*/\1no/g" /etc/ssh/sshd_config \ |
| 121 | + && sed -i "s/#\(Port \).*/\1${SSH_PORT}/g" /etc/ssh/sshd_config \ |
| 122 | + && echo "StrictModes no" >> /etc/ssh/sshd_config \ |
| 123 | + && echo "Port ${SSH_PORT}" >> /etc/ssh/sshd_config |
| 124 | + |
| 125 | +# User-level sshd config for running as non-root |
| 126 | +# OpenMPI MCA params file: read by every OpenMPI process regardless of environment variables, |
| 127 | +# so this reliably applies to orted and worker processes launched via SSH. |
| 128 | +RUN mkdir -p /home/mpiuser /home/mpiuser/.openmpi && \ |
| 129 | + echo "PidFile /tmp/sshd.pid" > /home/mpiuser/.sshd_config && \ |
| 130 | + echo "HostKey /home/mpiuser/.ssh/id_rsa" >> /home/mpiuser/.sshd_config && \ |
| 131 | + echo "StrictModes no" >> /home/mpiuser/.sshd_config && \ |
| 132 | + echo "Port ${SSH_PORT}" >> /home/mpiuser/.sshd_config && \ |
| 133 | + echo "AcceptEnv PATH LD_LIBRARY_PATH" >> /home/mpiuser/.sshd_config |
| 134 | + |
| 135 | +# Install micropipenv to deploy packages from Pipfile.lock |
| 136 | +RUN pip install --no-cache-dir -U "micropipenv[toml]" |
| 137 | + |
| 138 | +# Install Python dependencies from Pipfile.lock file |
| 139 | +WORKDIR /opt/app-root/bin |
| 140 | +COPY Pipfile.lock ./ |
| 141 | + |
| 142 | +RUN micropipenv install -- --no-cache-dir && \ |
| 143 | + rm -f ./Pipfile.lock && \ |
| 144 | + pip install --no-cache-dir --no-deps s3fs==2026.1.0 && \ |
| 145 | + # The C9S base image ships numpy, scipy, pyarrow, and pillow compiled against |
| 146 | + # system libraries that are not present in the C9S repos (libopenblasp.so.0, |
| 147 | + # libthrift-0.15.0.so, libre2.so.9). Reinstall from manylinux wheels, which |
| 148 | + # bundle all required native libraries, replacing the C9S-compiled builds. |
| 149 | + pip install --force-reinstall --no-cache-dir \ |
| 150 | + numpy==1.26.4 \ |
| 151 | + scipy==1.17.0 \ |
| 152 | + pyarrow==22.0.0 \ |
| 153 | + pillow==12.1.0 && \ |
| 154 | + chmod -R g+w /opt/app-root/lib/python3.12/site-packages |
| 155 | + |
| 156 | +# OpenShift GID 0 pattern: give root group same permissions as owner. |
| 157 | +# OpenShift random UIDs always have GID 0 as primary group. |
| 158 | +RUN chgrp -R 0 /home/mpiuser && chmod -R g=u /home/mpiuser |
| 159 | + |
| 160 | +# Allow uid_entrypoint to add random UID to /etc/passwd at runtime |
| 161 | +RUN chmod g=u /etc/passwd |
| 162 | + |
| 163 | +# uid_entrypoint: register the OpenShift random UID in /etc/passwd so that |
| 164 | +# getpwuid() calls (used by Python getpass, PyTorch cache dirs, etc.) succeed. |
| 165 | +COPY uid_entrypoint.sh /usr/local/bin/uid_entrypoint.sh |
| 166 | +RUN chmod +x /usr/local/bin/uid_entrypoint.sh |
| 167 | + |
| 168 | +WORKDIR /home/mpiuser |
| 169 | +ENV HOME=/home/mpiuser |
| 170 | +ENV PATH=/usr/local/bin:$PATH:$HOME/.local/bin |
| 171 | +ENV LD_LIBRARY_PATH=/usr/lib64/openmpi/lib:/usr/lib64/ucx:${LD_LIBRARY_PATH} |
| 172 | +# Override the base image's overly restrictive NVIDIA_REQUIRE_CUDA which only lists |
| 173 | +# specific driver minor versions (535, 550, 565, 570, 575). Any driver >= 570 supports |
| 174 | +# CUDA 13.0; the constraint caused the nvidia-container-runtime-hook to fail on 580.x+. |
| 175 | +ENV NVIDIA_REQUIRE_CUDA="cuda>=13.0 driver>=570" |
| 176 | +ENTRYPOINT ["/usr/local/bin/uid_entrypoint.sh"] |
| 177 | +USER 1001 |
0 commit comments