Skip to content

Commit e7db758

Browse files
committed
Provide Dockerfile for midstream MPI CUDA image
1 parent 45ec38c commit e7db758

1 file changed

Lines changed: 177 additions & 0 deletions

File tree

  • images/runtime/training/py312-cuda130-torch29-openmpi41
Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,177 @@
1+
FROM quay.io/opendatahub/odh-midstream-cuda-base-13-0:268a2d4baec5ed3c3ae09a6cce325fb83622d87b
2+
3+
ARG SSH_PORT=2222
4+
ARG OPENMPI_VERSION=4.1.6
5+
ARG UCX_VERSION=1.20.0
6+
7+
LABEL name="training:py312-cuda130-torch29-openmpi41" \
8+
summary="CUDA 13.0 Python 3.12 PyTorch 2.9.0 OpenMPI 4.1 image based on C9S for Training" \
9+
description="CUDA 13.0 Python 3.12 PyTorch 2.9.0 OpenMPI 4.1 image based on C9S for Training" \
10+
io.k8s.display-name="CUDA 13.0 Python 3.12 PyTorch 2.9.0 OpenMPI 4.1 base image for Training" \
11+
io.k8s.description="CUDA 13.0 Python 3.12 PyTorch 2.9.0 OpenMPI 4.1 image based on C9S for Training" \
12+
authoritative-source-url="https://github.com/opendatahub-io/distributed-workloads"
13+
14+
USER 0
15+
16+
# libjpeg-turbo: libjpeg.so.62 required by torchvision image I/O extension.
17+
# libpng: libpng16.so.16 required by torchvision image I/O extension.
18+
# libwebp: libwebp.so.7 required by torchvision image I/O extension.
19+
# The C9S base image compiles numpy/scipy/pyarrow/pillow against system libraries
20+
# not available in C9S (libopenblasp.so.0, libthrift-0.15.0.so, libre2.so.9);
21+
# those packages are reinstalled from manylinux wheels after micropipenv runs.
22+
# numactl-libs and openblas-openmp are installed separately AFTER the OpenMPI
23+
# build step to avoid dnf clean_requirements_on_remove sweeping them out.
24+
RUN dnf install -y openssh-server libjpeg-turbo libpng libwebp && dnf clean all
25+
26+
# Install UCX 1.20.0 prebuilt RPMs (BSD-3-Clause licensed — fully redistributable).
27+
# Built against MOFED 24.10 + CUDA 13 — first UCX release with CUDA 13 support.
28+
# Runtime packages (ucx, ucx-cuda, ucx-ib, ucx-ib-mlx5, ucx-rdmacm) are kept in
29+
# the image; ucx-devel (headers) is removed after OpenMPI is built against it.
30+
# UCX transport plugins are dlopen'd at runtime: ucx-ib-mlx5 gracefully skips
31+
# if MOFED is absent; ucx-cuda activates when CUDA libs are present (always true here).
32+
RUN curl -fsSL https://github.com/openucx/ucx/releases/download/v${UCX_VERSION}/ucx-${UCX_VERSION}-rocky9-mofed24.10-cuda13-x86_64.tar.bz2 \
33+
| tar -xj -C /tmp \
34+
&& rpm -ivh --nodeps \
35+
/tmp/ucx-${UCX_VERSION}-1.el9.x86_64.rpm \
36+
/tmp/ucx-cuda-${UCX_VERSION}-1.el9.x86_64.rpm \
37+
/tmp/ucx-ib-${UCX_VERSION}-1.el9.x86_64.rpm \
38+
/tmp/ucx-ib-mlx5-${UCX_VERSION}-1.el9.x86_64.rpm \
39+
/tmp/ucx-rdmacm-${UCX_VERSION}-1.el9.x86_64.rpm \
40+
/tmp/ucx-devel-${UCX_VERSION}-1.el9.x86_64.rpm \
41+
&& rm -f /tmp/ucx-*.rpm
42+
43+
# Build OpenMPI from source with CUDA support (BSD licensed — fully redistributable).
44+
# OpenMPI install prefix: /usr/lib64/openmpi (keeps existing symlink/PATH layout)
45+
#
46+
# --with-cuda: opal_built_with_cuda_support=true — MPI calls accept GPU pointers.
47+
# --with-ucx: links against UCX 1.20.0 (CUDA 13 aware) installed at /usr.
48+
# ucx-cuda provides cuda_copy/cuda_ipc transports for GPU-Direct;
49+
# ucx-ib-mlx5 provides rc_mlx5/dc_mlx5 for GPU-Direct RDMA over IB
50+
# (activated when MOFED is present on the host at runtime).
51+
RUN dnf install -y \
52+
# Runtime IB/RDMA libraries (kept after build)
53+
rdma-core libibverbs librdmacm libibumad libmlx5 infiniband-diags \
54+
# OpenMPI runtime dependencies (kept after build)
55+
hwloc libevent pmix \
56+
# Build tools (removed after build)
57+
gcc gcc-c++ make perl \
58+
# Dev headers (removed after build)
59+
rdma-core-devel libibverbs-devel librdmacm-devel \
60+
hwloc-devel libevent-devel pmix-devel zlib-devel \
61+
&& curl -fsSL https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-${OPENMPI_VERSION}.tar.gz \
62+
| tar -xz -C /tmp \
63+
&& cd /tmp/openmpi-${OPENMPI_VERSION} \
64+
&& ./configure \
65+
--prefix=/usr/lib64/openmpi \
66+
--with-cuda=/usr/local/cuda \
67+
--with-ucx=/usr \
68+
--with-verbs \
69+
--with-hwloc \
70+
--with-libevent=external \
71+
--with-pmix=external \
72+
--enable-mpi-cxx \
73+
--enable-mpi1-compatibility \
74+
--disable-silent-rules \
75+
&& make -j$(nproc) install \
76+
&& rpm -e --nodeps ucx-devel-${UCX_VERSION}-1.el9.x86_64 \
77+
&& dnf remove -y \
78+
gcc gcc-c++ make perl \
79+
rdma-core-devel libibverbs-devel librdmacm-devel \
80+
hwloc-devel libevent-devel pmix-devel zlib-devel \
81+
&& dnf clean all \
82+
&& rm -rf /tmp/openmpi-${OPENMPI_VERSION}
83+
84+
# Re-install PyTorch runtime deps that dnf clean_requirements_on_remove sweeps
85+
# out during the OpenMPI build step above (numactl-libs and openblas-openmp are
86+
# transitive deps of hwloc-devel / the build toolchain and get auto-removed).
87+
# Installing them in a fresh RUN marks them as explicit user installs.
88+
RUN dnf install -y numactl-libs openblas-openmp && dnf clean all
89+
90+
# Create symlinks for OpenMPI binaries in /usr/bin so they're in default SSH PATH
91+
RUN ln -s /usr/lib64/openmpi/bin/orted /usr/bin/orted \
92+
&& ln -s /usr/lib64/openmpi/bin/mpiexec /usr/bin/mpiexec
93+
94+
# mpirun wrapper: registers the OpenShift random UID in /etc/passwd before launching mpirun.
95+
# This is needed because the SDK overrides the container entrypoint, bypassing uid_entrypoint.sh.
96+
# Placed in /usr/local/bin (not /usr/bin) so it takes precedence over the real mpirun in
97+
# /usr/lib64/openmpi/bin, which the base image puts before /usr/bin in PATH.
98+
COPY mpirun_wrapper.sh /usr/local/bin/mpirun
99+
RUN chmod +x /usr/local/bin/mpirun
100+
101+
# Wrapper script so python is reachable in SSH sessions AND the virtualenv is activated.
102+
# A symlink won't work: Python uses argv[0] to locate pyvenv.cfg, and /usr/local/bin/python
103+
# has no venv in its parent chain. The wrapper execs /opt/app-root/bin/python, which does.
104+
RUN printf '#!/bin/sh\nexec /opt/app-root/bin/python "$@"\n' > /usr/local/bin/python \
105+
&& chmod +x /usr/local/bin/python
106+
107+
# Set LD_LIBRARY_PATH in /etc/environment for SSH sessions (loaded by PAM, not inherited from container).
108+
# CUDA and cuDNN are registered in ldconfig; OpenMPI and UCX plugin dirs need explicit entries.
109+
RUN echo "LD_LIBRARY_PATH=/usr/lib64/openmpi/lib:/usr/lib64/ucx" >> /etc/environment
110+
111+
RUN mkdir -p /var/run/sshd
112+
113+
# SSH client config
114+
RUN sed -i "s/[ #]\(.*StrictHostKeyChecking \).*/ \1no/g" /etc/ssh/ssh_config \
115+
&& echo " UserKnownHostsFile /dev/null" >> /etc/ssh/ssh_config \
116+
&& echo " Port ${SSH_PORT}" >> /etc/ssh/ssh_config \
117+
&& echo " SendEnv PATH LD_LIBRARY_PATH" >> /etc/ssh/ssh_config
118+
119+
# SSH server config
120+
RUN sed -i "s/#\(StrictModes \).*/\1no/g" /etc/ssh/sshd_config \
121+
&& sed -i "s/#\(Port \).*/\1${SSH_PORT}/g" /etc/ssh/sshd_config \
122+
&& echo "StrictModes no" >> /etc/ssh/sshd_config \
123+
&& echo "Port ${SSH_PORT}" >> /etc/ssh/sshd_config
124+
125+
# User-level sshd config for running as non-root
126+
# OpenMPI MCA params file: read by every OpenMPI process regardless of environment variables,
127+
# so this reliably applies to orted and worker processes launched via SSH.
128+
RUN mkdir -p /home/mpiuser /home/mpiuser/.openmpi && \
129+
echo "PidFile /tmp/sshd.pid" > /home/mpiuser/.sshd_config && \
130+
echo "HostKey /home/mpiuser/.ssh/id_rsa" >> /home/mpiuser/.sshd_config && \
131+
echo "StrictModes no" >> /home/mpiuser/.sshd_config && \
132+
echo "Port ${SSH_PORT}" >> /home/mpiuser/.sshd_config && \
133+
echo "AcceptEnv PATH LD_LIBRARY_PATH" >> /home/mpiuser/.sshd_config
134+
135+
# Install micropipenv to deploy packages from Pipfile.lock
136+
RUN pip install --no-cache-dir -U "micropipenv[toml]"
137+
138+
# Install Python dependencies from Pipfile.lock file
139+
WORKDIR /opt/app-root/bin
140+
COPY Pipfile.lock ./
141+
142+
RUN micropipenv install -- --no-cache-dir && \
143+
rm -f ./Pipfile.lock && \
144+
pip install --no-cache-dir --no-deps s3fs==2026.1.0 && \
145+
# The C9S base image ships numpy, scipy, pyarrow, and pillow compiled against
146+
# system libraries that are not present in the C9S repos (libopenblasp.so.0,
147+
# libthrift-0.15.0.so, libre2.so.9). Reinstall from manylinux wheels, which
148+
# bundle all required native libraries, replacing the C9S-compiled builds.
149+
pip install --force-reinstall --no-cache-dir \
150+
numpy==1.26.4 \
151+
scipy==1.17.0 \
152+
pyarrow==22.0.0 \
153+
pillow==12.1.0 && \
154+
chmod -R g+w /opt/app-root/lib/python3.12/site-packages
155+
156+
# OpenShift GID 0 pattern: give root group same permissions as owner.
157+
# OpenShift random UIDs always have GID 0 as primary group.
158+
RUN chgrp -R 0 /home/mpiuser && chmod -R g=u /home/mpiuser
159+
160+
# Allow uid_entrypoint to add random UID to /etc/passwd at runtime
161+
RUN chmod g=u /etc/passwd
162+
163+
# uid_entrypoint: register the OpenShift random UID in /etc/passwd so that
164+
# getpwuid() calls (used by Python getpass, PyTorch cache dirs, etc.) succeed.
165+
COPY uid_entrypoint.sh /usr/local/bin/uid_entrypoint.sh
166+
RUN chmod +x /usr/local/bin/uid_entrypoint.sh
167+
168+
WORKDIR /home/mpiuser
169+
ENV HOME=/home/mpiuser
170+
ENV PATH=/usr/local/bin:$PATH:$HOME/.local/bin
171+
ENV LD_LIBRARY_PATH=/usr/lib64/openmpi/lib:/usr/lib64/ucx:${LD_LIBRARY_PATH}
172+
# Override the base image's overly restrictive NVIDIA_REQUIRE_CUDA which only lists
173+
# specific driver minor versions (535, 550, 565, 570, 575). Any driver >= 570 supports
174+
# CUDA 13.0; the constraint caused the nvidia-container-runtime-hook to fail on 580.x+.
175+
ENV NVIDIA_REQUIRE_CUDA="cuda>=13.0 driver>=570"
176+
ENTRYPOINT ["/usr/local/bin/uid_entrypoint.sh"]
177+
USER 1001

0 commit comments

Comments
 (0)