-
Notifications
You must be signed in to change notification settings - Fork 21
docker: add clean build and wheel-based install Dockerfiles #278
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,70 @@ | ||
| # ATOM Docker — Wheel-only install (zero source compilation) | ||
| # | ||
| # Installs all packages from pre-built wheels. No git clones, no compiles. | ||
| # Requires a wheel directory or builder image (see Dockerfile.wheels). | ||
| # | ||
| # Option A — from a local wheels directory: | ||
| # DOCKER_BUILDKIT=1 docker build \ | ||
| # --build-context wheels=/path/to/wheels \ | ||
| # -f docker/Dockerfile.clean -t atom:clean . | ||
| # | ||
| # Option B — multi-stage from Dockerfile.wheels builder image: | ||
| # docker build -f docker/Dockerfile.wheels -t atom:wheels . | ||
| # DOCKER_BUILDKIT=1 docker build \ | ||
| # --build-context wheels=docker-image://atom:wheels \ | ||
| # -f docker/Dockerfile.clean -t atom:clean . | ||
| # | ||
| # Run: | ||
| # docker run --rm -it --device=/dev/kfd --device=/dev/dri \ | ||
| # --group-add video --shm-size=16G atom:clean | ||
|
|
||
| ARG BASE_IMAGE="rocm/dev-ubuntu-24.04:7.2-complete" | ||
| FROM ${BASE_IMAGE} | ||
|
|
||
| ENV DEBIAN_FRONTEND=noninteractive | ||
| # Disable Triton async copy for stable behavior on ROCm | ||
| ENV TRITON_HIP_USE_ASYNC_COPY=0 | ||
|
|
||
| # ── 1. System packages (minimal — no build tools needed) ───────────── | ||
| RUN apt-get update && apt-get install -y --no-install-recommends \ | ||
| git python3-pip python3-dev \ | ||
| ibverbs-utils libpci-dev locales \ | ||
| openmpi-bin libopenmpi-dev libdw1 \ | ||
| && rm -rf /var/lib/apt/lists/* | ||
|
|
||
| RUN pip3 install --break-system-packages --ignore-installed pip setuptools wheel | ||
|
|
||
| # ── 2. Install all pre-built wheels ──────────────────────────────────── | ||
| # Uses bind-mount to avoid a 60+ GB COPY layer from the wheels image. | ||
| # Works with both Option A (flat directory) and Option B (docker-image://). | ||
| RUN --mount=type=bind,from=wheels,source=/,target=/mnt/wheels \ | ||
| mkdir -p /tmp/wheels \ | ||
| && find /mnt/wheels -name '*.whl' -exec cp {} /tmp/wheels/ \; \ | ||
| && ls -lhS /tmp/wheels/*.whl \ | ||
|
Comment on lines
+40
to
+43
|
||
| && pip3 install --break-system-packages --no-deps \ | ||
| /tmp/wheels/torch-*.whl \ | ||
| /tmp/wheels/torchvision-*.whl \ | ||
| /tmp/wheels/torchaudio-*.whl \ | ||
| /tmp/wheels/triton-*.whl \ | ||
| /tmp/wheels/triton_kernels-*.whl \ | ||
| && pip3 install --break-system-packages \ | ||
| filelock typing-extensions sympy networkx jinja2 fsspec numpy pillow \ | ||
| && pip3 install --break-system-packages \ | ||
| /tmp/wheels/mori-*.whl \ | ||
| /tmp/wheels/flydsl-*.whl \ | ||
| && pip3 install --break-system-packages \ | ||
| /tmp/wheels/amd_aiter-*.whl \ | ||
| && rm -rf /tmp/wheels \ | ||
| && python3 -c "import torch; print(f'PyTorch {torch.__version__}, ROCm: {torch.version.hip}')" \ | ||
| && python3 -c "import triton; print(f'Triton {triton.__version__}')" \ | ||
| && python3 -c "import aiter; print('AITER OK')" \ | ||
| && python3 -c "import flydsl; print('FlyDSL OK')" \ | ||
| && pip3 show mori && echo "MORI wheel installed OK" | ||
|
|
||
| # ── 3. ATOM (from build context — pure Python, instant install) ────── | ||
| COPY . /app/ATOM | ||
| RUN cd /app/ATOM && pip3 install --break-system-packages -e . \ | ||
| && python3 -c "import atom; print('ATOM OK')" | ||
|
|
||
| WORKDIR /app/ATOM | ||
| CMD ["/bin/bash"] | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,120 @@ | ||
| # ATOM Docker — Multi-stage wheel builder | ||
| # | ||
| # Builds/downloads all wheels needed for Dockerfile.clean: | ||
|
Comment on lines
+1
to
+3
|
||
| # torch, torchvision, torchaudio (PyTorch ROCm nightly) | ||
| # triton (PyPI wheel) | ||
| # triton_kernels (ROCm/triton source, pure Python) | ||
| # flydsl (pre-built nightly wheel) | ||
| # mori (MORI source) | ||
| # amd_aiter (ENABLE_CK=0, Triton-only) | ||
| # | ||
| # Build: | ||
| # docker build -f docker/Dockerfile.wheels -t atom:wheels . | ||
| # | ||
| # Extract wheels to host: | ||
| # docker run --rm atom:wheels tar cf - /wheels | tar xf - -C ./dist --strip-components=1 | ||
| # | ||
| # Or pipe directly into Dockerfile.clean (multi-stage): | ||
| # DOCKER_BUILDKIT=1 docker build \ | ||
| # --build-context wheels=docker-image://atom:wheels \ | ||
| # -f docker/Dockerfile.clean -t atom:clean . | ||
|
|
||
| ARG BASE_IMAGE="rocm/dev-ubuntu-24.04:7.2-complete" | ||
| FROM ${BASE_IMAGE} | ||
|
|
||
| ARG GPU_ARCH="gfx942;gfx950" | ||
| ARG AITER_REPO="https://github.com/ROCm/aiter.git" | ||
| ARG AITER_BRANCH="main" | ||
| ARG FLYDSL_WHL_URL="https://rocm.frameworks-nightlies.amd.com/whl-staging/gfx942-gfx950/flydsl-0.0.1.dev0%2Bc0d3534-cp312-cp312-manylinux_2_38_x86_64.whl" | ||
| ARG MORI_REPO="https://github.com/ROCm/mori.git" | ||
| ARG MORI_COMMIT="b0dce4beebeb1f26c784eee17d5fd9785ee9447f" | ||
| ARG MAX_JOBS="" | ||
| ARG PREBUILD_TRITON=1 | ||
|
|
||
| ENV GPU_ARCH_LIST=${GPU_ARCH} | ||
| ENV PYTORCH_ROCM_ARCH=${GPU_ARCH} | ||
| ENV DEBIAN_FRONTEND=noninteractive | ||
| # Disable Triton async copy for stable behavior on ROCm | ||
| ENV TRITON_HIP_USE_ASYNC_COPY=0 | ||
|
|
||
| # ── 1. System packages + build tools ──────────────────────────────── | ||
| RUN apt-get update && apt-get install -y --no-install-recommends \ | ||
| git cmake ninja-build wget \ | ||
| python3-pip python3-dev python3-venv \ | ||
| ibverbs-utils libpci-dev locales \ | ||
| && rm -rf /var/lib/apt/lists/* | ||
|
|
||
| RUN pip3 install --break-system-packages --ignore-installed \ | ||
| pip setuptools wheel build | ||
|
|
||
| RUN mkdir -p /wheels | ||
|
|
||
| # ── 2. Pull PyTorch ROCm nightly wheels ──────────────────────────── | ||
| RUN pip3 download --no-deps --dest /wheels \ | ||
| torch torchvision torchaudio \ | ||
| --index-url https://download.pytorch.org/whl/nightly/rocm7.2 | ||
|
|
||
| # ── 3. Download Triton wheel + build triton_kernels ────────────── | ||
| RUN pip3 download --no-deps --dest /wheels triton==3.6.0 \ | ||
| && ls -lh /wheels/triton-*.whl | ||
|
|
||
| # triton_kernels is pure Python, only available in ROCm/triton fork | ||
| RUN git clone --depth=1 --branch release/internal/3.5.x \ | ||
| --filter=blob:none --sparse \ | ||
| https://github.com/ROCm/triton.git /build/triton \ | ||
| && cd /build/triton \ | ||
| && git sparse-checkout set python/triton_kernels \ | ||
| && pip3 wheel --no-deps -w /wheels python/triton_kernels/ \ | ||
| && ls -lh /wheels/triton_kernels-*.whl | ||
|
|
||
| # ── 4. Download pre-built FlyDSL wheel ─────────────────────────── | ||
| RUN wget -q -P /wheels/ "${FLYDSL_WHL_URL}" \ | ||
| && ls -lh /wheels/flydsl-*.whl | ||
|
|
||
| # ── 5. Install torch + triton (needed for AITER/MORI builds) ────── | ||
| RUN pip3 install --break-system-packages --no-deps \ | ||
| /wheels/torch-*.whl /wheels/triton-*.whl \ | ||
| && pip3 install --break-system-packages \ | ||
| filelock typing-extensions sympy networkx jinja2 fsspec numpy | ||
|
|
||
| # ── 6. Build MORI wheel ─────────────────────────────────────────── | ||
| RUN apt-get update && apt-get install -y --no-install-recommends \ | ||
| openmpi-bin libopenmpi-dev cython3 libdw1 \ | ||
| && rm -rf /var/lib/apt/lists/* | ||
|
|
||
| # Patch PyTorch's Caffe2Config.cmake: the ROCm nightly wheel's config | ||
| # hard-errors when CUDA toolkit is not found, even though we only need ROCm. | ||
| # Convert the fatal error to a warning so MORI (and other torch-cmake users) | ||
| # can build against the ROCm PyTorch wheel without CUDA installed. | ||
| RUN CAFFE2_CFG=$(python3 -c "import torch, pathlib; print(pathlib.Path(torch.__file__).parent / 'share/cmake/Caffe2/Caffe2Config.cmake')") \ | ||
| && sed -i 's/message(FATAL_ERROR "Your installed Caffe2 version uses CUDA/message(WARNING "Skipped: Your installed Caffe2 version uses CUDA/' "$CAFFE2_CFG" | ||
|
|
||
| RUN git clone ${MORI_REPO} /build/mori \ | ||
| && cd /build/mori \ | ||
| && git checkout ${MORI_COMMIT} \ | ||
| && grep -iv '^torch\|^triton' requirements-build.txt \ | ||
| | pip3 install --break-system-packages -r /dev/stdin \ | ||
| && git submodule update --init --recursive \ | ||
| && pip3 wheel --no-build-isolation --no-deps -w /wheels . \ | ||
| && ls -lh /wheels/mori-*.whl | ||
|
|
||
| # ── 7. Build AITER wheel (ENABLE_CK=0, Triton-only) ────────────── | ||
| RUN git clone --depth=1 --branch ${AITER_BRANCH} ${AITER_REPO} /build/aiter | ||
|
|
||
| RUN cd /build/aiter \ | ||
| && pip3 install --break-system-packages -r requirements.txt \ | ||
| && export ENABLE_CK=0 PREBUILD_TRITON=${PREBUILD_TRITON} \ | ||
| PREBUILD_TRITON_ARCHS="gfx942,gfx950" \ | ||
| MAX_JOBS=${MAX_JOBS} GPU_ARCHS=${GPU_ARCH_LIST} \ | ||
|
Comment on lines
+106
to
+108
|
||
| && pip3 install --break-system-packages --no-build-isolation -e . \ | ||
|
Comment on lines
+102
to
+109
|
||
| && python3 -c "import aiter; print('editable install OK')" \ | ||
| && echo "install" > aiter/install_mode \ | ||
| && python3 setup.py bdist_wheel \ | ||
| && cp dist/amd_aiter-*.whl /wheels/ \ | ||
| && ls -lh /wheels/amd_aiter-*.whl | ||
|
|
||
| # ── 8. Summary ──────────────────────────────────────────────────── | ||
| RUN echo "=== Wheel inventory ===" && ls -lhS /wheels/*.whl && echo "=== Done ===" | ||
|
|
||
| WORKDIR /wheels | ||
| CMD ["ls", "-lhS", "/wheels/"] | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
apt-get installincludesgit, but this Dockerfile does not use git (and the header says “No git clones”). Removing git reduces image size and keeps the “minimal” runtime promise accurate (or update the comment if git is intentionally required at runtime).