1515ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:26.04-py3
1616FROM ${BASE_IMAGE} AS megatron_bridge
1717WORKDIR /opt/Megatron-Bridge
18- ENV PATH="/root/.local/bin:$PATH"
19- ENV UV_PROJECT_ENVIRONMENT=/opt/venv
20- ENV PATH="$UV_PROJECT_ENVIRONMENT/bin:$PATH"
21- ENV VIRTUAL_ENV=/opt/venv
22- ENV PATH="$VIRTUAL_ENV/bin:$PATH"
23- ENV UV_LINK_MODE=copy
24- ENV UV_VERSION="0.7.2"
25- ENV NVTE_BUILD_NUM_PHILOX_ROUNDS=3
26- RUN curl -LsSf https://astral.sh/uv/${UV_VERSION}/install.sh | sh && \
27- uv venv ${UV_PROJECT_ENVIRONMENT} --system-site-packages && \
28- # Address CVE-2025-68973
29- apt-get update && apt install -y --only-upgrade gnupg && \
30- apt-get clean && \
31- rm -rf /var/lib/apt/lists/*
32-
33- ##############################################################################
34- ##
35- ## Install DeepEP and nvshmem
36- ##
37- ##############################################################################
3818
3919ARG INSTALL_DEEPEP=True
4020ARG DEEPEP_COMMIT=34152ae28f80bcc3ee38d7a12cb2ad87cfd4ea72
4121
42- ENV HYBRID_EP_MULTINODE=1
43- ENV RDMA_CORE_HOME=/opt/rdma-core/build
44- ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64/:$LD_LIBRARY_PATH
22+ ENV PATH="/opt/venv/bin:/opt/venv/bin:/root/.local/bin:$PATH" \
23+ UV_PROJECT_ENVIRONMENT=/opt/venv \
24+ VIRTUAL_ENV=/opt/venv \
25+ UV_LINK_MODE=copy \
26+ UV_VERSION="0.7.2" \
27+ NVTE_BUILD_NUM_PHILOX_ROUNDS=3 \
28+ HYBRID_EP_MULTINODE=1 \
29+ RDMA_CORE_HOME=/opt/rdma-core/build \
30+ LD_LIBRARY_PATH="/usr/local/cuda/lib64/:$LD_LIBRARY_PATH"
4531RUN --mount=type=bind,source=docker/patches/deepep.patch,target=/opt/deepep.patch \
32+ curl -LsSf https://astral.sh/uv/${UV_VERSION}/install.sh | sh && \
33+ uv venv ${UV_PROJECT_ENVIRONMENT} --system-site-packages && \
34+ # Address CVE-2025-68973
35+ apt-get update && apt install -y --only-upgrade gnupg && \
36+ apt-get clean && \
37+ rm -rf /var/lib/apt/lists/* && \
4638 if [ "$INSTALL_DEEPEP" = "True" ]; then \
4739 # Upgrade system rdma-core to v60; libibverbs-dev supplies the unversioned
4840 # libibverbs.so symlink required at link time.
@@ -78,12 +70,7 @@ RUN --mount=type=bind,source=docker/patches/deepep.patch,target=/opt/deepep.patc
7870 popd; \
7971 fi
8072
81- COPY pyproject.toml uv.lock /opt/Megatron-Bridge/
82- COPY src/megatron/bridge/__init__.py src/megatron/bridge/package_info.py /opt/Megatron-Bridge/src/megatron/bridge/
83- COPY 3rdparty/Megatron-LM/pyproject.toml 3rdparty/Megatron-LM/setup.py /opt/Megatron-Bridge/3rdparty/Megatron-LM/
84- COPY 3rdparty/Megatron-LM/megatron/training/__init__.py /opt/Megatron-Bridge/3rdparty/Megatron-LM/megatron/training/
85- COPY 3rdparty/Megatron-LM/megatron/core/__init__.py 3rdparty/Megatron-LM/megatron/core/package_info.py /opt/Megatron-Bridge/3rdparty/Megatron-LM/megatron/core/
86- COPY 3rdparty/Megatron-LM/megatron/core/datasets/Makefile 3rdparty/Megatron-LM/megatron/core/datasets/helpers.cpp /opt/Megatron-Bridge/3rdparty/Megatron-LM/megatron/core/datasets/
73+ COPY --chmod=644 . /opt/Megatron-Bridge
8774
8875# Build arg to skip --locked when testing with different MCore versions
8976ARG MCORE_TRIGGERED_TESTING=false
@@ -109,5 +96,3 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
10996 # Otherwise, the stub will be called in some cases and fail
11097 ln -sf "$(ldconfig -p | awk '/libcudart\.so\.[0-9]+ /{print $NF; exit}')" /opt/venv/lib/python3.12/site-packages/tilelang/lib/libcudart_stub.so && \
11198 uv cache prune ${UV_CACHE_PRUNE_ARGS}
112-
113- COPY --chmod=644 . /opt/Megatron-Bridge
0 commit comments