|
1 | | -FROM docker.io/lmsysorg/sglang:nightly-dev-20260212-5875ef0a |
| 1 | +ARG UBUNTU_VERSION=24.04 |
| 2 | +ARG TARGET_PLATFORM=aarch64 |
| 3 | +ARG CUDA_VERSION=13.0.0 |
| 4 | +ARG CUDA_VERSION_PATH=cu130 |
| 5 | +ARG PYTHON_VERSION=3.12 |
| 6 | +ARG BASE_IMAGE=docker.io/library/ubuntu:${UBUNTU_VERSION} |
| 7 | +ARG DEVEL_BASE_IMAGE=docker.io/nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu${UBUNTU_VERSION} |
2 | 8 |
|
3 | | -RUN apt-get update && apt-get install git python3-pip python3-venv -y |
4 | | -RUN pip install git+https://github.com/huggingface/transformers.git@c9ea365a7b56326418769a4ba4682864d407ed63 |
| 9 | +######################################################################### |
| 10 | +# Build image |
| 11 | +######################################################################### |
| 12 | + |
| 13 | +FROM ${DEVEL_BASE_IMAGE} AS build |
| 14 | + |
| 15 | +# NOTE: libnuma1 is mounted from the host via the toml file. |
| 16 | + |
| 17 | +WORKDIR /app/build |
| 18 | + |
| 19 | +# Install miniconda, Python, and Python build dependencies. |
| 20 | +ARG TARGET_PLATFORM |
| 21 | +ARG PYTHON_VERSION |
| 22 | +ENV PATH=/opt/conda/bin:$PATH |
| 23 | +ADD "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-${TARGET_PLATFORM}.sh" /root/miniconda.sh |
| 24 | +RUN chmod +x /root/miniconda.sh && \ |
| 25 | + bash /root/miniconda.sh -b -p /opt/conda && \ |
| 26 | + rm /root/miniconda.sh && \ |
| 27 | + /opt/conda/bin/conda install -y python=${PYTHON_VERSION} cmake conda-build pyyaml numpy ipython git compilers make && \ |
| 28 | + /opt/conda/bin/python -m pip install --upgrade --no-cache-dir pip wheel packaging "setuptools<70.0.0" ninja && \ |
| 29 | + /opt/conda/bin/conda clean -ya |
| 30 | + |
| 31 | +# Install PyTorch for CUDA 13. |
| 32 | +ARG CUDA_VERSION_PATH |
| 33 | +ARG TORCH_VERSION=2.9.1 |
| 34 | +ARG INSTALL_CHANNEL=whl |
| 35 | +RUN pip install --no-cache-dir --index-url https://download.pytorch.org/${INSTALL_CHANNEL}/${CUDA_VERSION_PATH}/ \ |
| 36 | + torch==${TORCH_VERSION} torchvision torchaudio |
| 37 | + |
| 38 | +# Symlink cuDNN and NCCL headers into conda include path. |
| 39 | +RUN CUDNN_DIR=$(dirname $(find /usr -name 'cudnn.h' -print -quit)) && \ |
| 40 | + ln -sf ${CUDNN_DIR}/cudnn*.h /opt/conda/include/ && \ |
| 41 | + ln -sf $(find /opt/conda/lib -path '*/nvidia/nccl/include/nccl.h' -print -quit) /opt/conda/include/nccl.h |
| 42 | + |
| 43 | +# Install flash-attn 3. |
| 44 | +ARG FLASH_ATTN_3_SHA="92ca9da8d66f7b34ff50dc080ec0fef9661260d6" |
| 45 | +ARG FA3_MAX_JOBS=32 |
| 46 | +RUN git clone --depth 1 --recurse-submodules --shallow-submodules https://github.com/Dao-AILab/flash-attention.git && \ |
| 47 | + cd flash-attention && \ |
| 48 | + git fetch --depth 1 origin ${FLASH_ATTN_3_SHA} && \ |
| 49 | + git checkout ${FLASH_ATTN_3_SHA} && \ |
| 50 | + git submodule update --init --depth 1 && \ |
| 51 | + cd hopper && \ |
| 52 | + FLASH_ATTENTION_DISABLE_FP16=TRUE FLASH_ATTENTION_DISABLE_SM80=TRUE MAX_JOBS=${FA3_MAX_JOBS} python setup.py install && \ |
| 53 | + cd /app/build && \ |
| 54 | + rm -rf flash-attention |
| 55 | + |
| 56 | +# Fix flash_attn_3 package structure for imports. |
| 57 | +RUN cd /opt/conda/lib/python${PYTHON_VERSION}/site-packages/ && \ |
| 58 | + mkdir -p flash_attn_3 && \ |
| 59 | + mv flash_attn_3-*.egg/flash_attn_3/* flash_attn_3/ && \ |
| 60 | + mv flash_attn_3-*.egg/flash_attn_interface.py flash_attn_3/ && \ |
| 61 | + touch flash_attn_3/__init__.py && \ |
| 62 | + rm -rf flash_attn_3-* |
| 63 | + |
| 64 | +# Install sglang with all dependencies. |
| 65 | +# NOTE: sglang pins torch to cuda12 wheels, so we use --no-deps first, |
| 66 | +# then install its non-torch dependencies, then re-force cu130 torch. |
| 67 | +RUN pip install --no-cache-dir "sglang[all]" |
| 68 | + |
| 69 | +# Re-install PyTorch cu130 — sglang downgrades to cuda12 torch via its deps. |
| 70 | +ARG CUDA_VERSION_PATH2=${CUDA_VERSION_PATH} |
| 71 | +RUN pip install --no-cache-dir --force-reinstall --index-url https://download.pytorch.org/${INSTALL_CHANNEL}/${CUDA_VERSION_PATH2}/ \ |
| 72 | + torch==${TORCH_VERSION} torchvision torchaudio |
| 73 | + |
| 74 | +# Re-install sgl-kernel with cu130 wheels (default pulls cuda12 version). |
| 75 | +# Download wheel directly to avoid hash mismatch from the index. |
| 76 | +RUN pip install --no-cache-dir --no-deps --force-reinstall \ |
| 77 | + "sgl-kernel @ https://github.com/sgl-project/whl/releases/download/v0.3.21/sgl_kernel-0.3.21+cu130-cp312-abi3-manylinux2014_aarch64.whl" |
| 78 | + |
| 79 | +# Install nvidia-cublas for torch compile compatibility. |
| 80 | +RUN pip install --no-cache-dir nvidia-cublas |
| 81 | + |
| 82 | +# Upgrade CuDNN to 9.16 — SGLang requires >= 9.15 with PyTorch 2.9.1. |
| 83 | +RUN pip install --no-cache-dir nvidia-cudnn-cu12==9.16.0.29 |
| 84 | + |
| 85 | +# Install curl for router health checks and sglang-router for load balancing. |
| 86 | +RUN conda install -y curl && conda clean -ya |
| 87 | +RUN pip install --no-cache-dir sglang-router |
| 88 | + |
| 89 | +# Install GLM5-specific transformers version. |
| 90 | +RUN pip install --no-cache-dir git+https://github.com/huggingface/transformers.git@c9ea365a7b56326418769a4ba4682864d407ed63 |
| 91 | + |
| 92 | +WORKDIR /opt |
0 commit comments