diff --git a/vllm/buildspec-cpu-sm.yml b/vllm/buildspec-cpu-sm.yml new file mode 100644 index 000000000000..59a15887d89c --- /dev/null +++ b/vllm/buildspec-cpu-sm.yml @@ -0,0 +1,48 @@ +account_id: &ACCOUNT_ID +prod_account_id: &PROD_ACCOUNT_ID 763104351884 +region: ®ION +framework: &FRAMEWORK vllm +version: &VERSION "0.15.1" +short_version: &SHORT_VERSION "0.15" +arch_type: &ARCH_TYPE x86_64 +autopatch_build: "False" + +repository_info: + build_repository: &BUILD_REPOSITORY + image_type: &IMAGE_TYPE cpu + root: . + repository_name: &REPOSITORY_NAME !join [ pr, "-", *FRAMEWORK ] + repository: &REPOSITORY !join [ *ACCOUNT_ID, .dkr.ecr., *REGION, .amazonaws.com/, *REPOSITORY_NAME ] + release_repository_name: &RELEASE_REPOSITORY_NAME !join [ *FRAMEWORK ] + release_repository: &RELEASE_REPOSITORY !join [ *PROD_ACCOUNT_ID, .dkr.ecr., *REGION, .amazonaws.com/, *RELEASE_REPOSITORY_NAME ] + +context: + build_context: &BUILD_CONTEXT + deep_learning_container: + source: src/deep_learning_container.py + target: deep_learning_container.py + sagemaker_entrypoint: + source: vllm/build_artifacts/sagemaker_entrypoint.sh + target: sagemaker_entrypoint.sh + +images: + BuildVLLMCPUSageMakerPy312: + <<: *BUILD_REPOSITORY + context: + <<: *BUILD_CONTEXT + image_size_baseline: 5000 + device_type: &DEVICE_TYPE cpu + python_version: &DOCKER_PYTHON_VERSION py3 + tag_python_version: &TAG_PYTHON_VERSION py312 + os_version: &OS_VERSION ubuntu22.04 + tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *OS_VERSION, "-sagemaker" ] + latest_release_tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *OS_VERSION, "-sagemaker" ] + docker_file: !join [ *FRAMEWORK, /, *ARCH_TYPE, /, *DEVICE_TYPE, /Dockerfile.cpu ] + target: vllm-cpu-sagemaker + build: true + enable_common_stage_build: false + test_configs: + test_platforms: + - sanity + - security + - sagemaker diff --git a/vllm/x86_64/cpu/Dockerfile.cpu b/vllm/x86_64/cpu/Dockerfile.cpu new file mode 100644 index 000000000000..513bef9e0297 --- /dev/null +++ b/vllm/x86_64/cpu/Dockerfile.cpu @@ -0,0 +1,147 @@ +FROM ubuntu:22.04 AS base +ARG PYTHON_VERSION=3.12 +ARG VLLM_VERSION=0.15.1 +ARG PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" +LABEL maintainer="Amazon AI" +LABEL dlc_major_version="1" + +ENV DEBIAN_FRONTEND=noninteractive \ + LANG=C.UTF-8 \ + LC_ALL=C.UTF-8 \ + DLC_CONTAINER_TYPE=base \ + PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 \ + PYTHONIOENCODING=UTF-8 \ + VLLM_TARGET_DEVICE=cpu + +WORKDIR /workspace + +# Install system dependencies and uv +RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \ + --mount=type=cache,target=/var/lib/apt,sharing=locked \ + apt-get update -y && \ + apt-get install -y --no-install-recommends \ + sudo \ + ccache \ + git \ + curl \ + wget \ + ca-certificates \ + gcc-12 \ + g++-12 \ + libtcmalloc-minimal4 \ + libnuma-dev \ + ffmpeg \ + libsm6 \ + libxext6 \ + libgl1 \ + jq \ + lsof && \ + update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 \ + --slave /usr/bin/g++ g++ /usr/bin/g++-12 && \ + curl -LsSf https://astral.sh/uv/install.sh | sh + +# Set compiler environment +ENV CC=/usr/bin/gcc-12 CXX=/usr/bin/g++-12 +ENV CCACHE_DIR=/root/.cache/ccache +ENV CMAKE_CXX_COMPILER_LAUNCHER=ccache + +# Setup uv and virtual environment +ENV PATH="/root/.local/bin:$PATH" +ENV VIRTUAL_ENV="/opt/venv" +ENV UV_PYTHON_INSTALL_DIR=/opt/uv/python +RUN uv venv --python ${PYTHON_VERSION} --seed ${VIRTUAL_ENV} +ENV PATH="$VIRTUAL_ENV/bin:$PATH" + +# Force CPU wheels for PyTorch +ENV PIP_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL} +ENV UV_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL} +ENV UV_INDEX_STRATEGY="unsafe-best-match" +ENV UV_LINK_MODE="copy" +ENV UV_HTTP_TIMEOUT=500 + +# Memory allocator + Intel OpenMP for x86_64 performance +ENV LD_PRELOAD="/usr/lib/x86_64-linux-gnu/libtcmalloc_minimal.so.4:/opt/venv/lib/libiomp5.so" + +RUN echo 'ulimit -c 0' >> ~/.bashrc + +# ====================== vllm-build ========================================= +FROM base AS vllm-build + +ARG VLLM_VERSION +ARG max_jobs=32 +ENV MAX_JOBS=${max_jobs} + +WORKDIR /vllm-workspace + +# Clone vLLM and install build + runtime dependencies +RUN git clone --depth 1 --branch v${VLLM_VERSION} https://github.com/vllm-project/vllm.git . + +RUN --mount=type=cache,target=/root/.cache/uv \ + uv pip install --upgrade pip && \ + uv pip install -r requirements/cpu-build.txt && \ + uv pip install -r requirements/cpu.txt + +# Build vLLM wheel with CPU backend +RUN --mount=type=cache,target=/root/.cache/uv \ + --mount=type=cache,target=/root/.cache/ccache \ + --mount=type=cache,target=/vllm-workspace/.deps,sharing=locked \ + VLLM_TARGET_DEVICE=cpu python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38 + +# ====================== vllm-cpu (final base) ========================================= +FROM base AS vllm-cpu + +ARG VLLM_VERSION +WORKDIR / + +# Install vLLM wheel and runtime dependencies +RUN --mount=type=cache,target=/root/.cache/uv \ + --mount=type=bind,from=vllm-build,src=/vllm-workspace/dist,target=/tmp/dist \ + --mount=type=bind,from=vllm-build,src=/vllm-workspace/requirements,target=/tmp/requirements \ + uv pip install --upgrade pip && \ + uv pip install -r /tmp/requirements/cpu.txt && \ + uv pip install /tmp/dist/*.whl + +COPY deep_learning_container.py /usr/local/bin/deep_learning_container.py +COPY bash_telemetry.sh /usr/local/bin/bash_telemetry.sh + +RUN chmod +x /usr/local/bin/deep_learning_container.py && \ + chmod +x /usr/local/bin/bash_telemetry.sh && \ + echo 'source /usr/local/bin/bash_telemetry.sh' >> /etc/bash.bashrc && \ + HOME_DIR=/root && \ + curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip && \ + python3 -c "import zipfile, os; zipfile.ZipFile('/root/oss_compliance.zip').extractall('/root/'); os.remove('/root/oss_compliance.zip')" && \ + cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance && \ + chmod +x /usr/local/bin/testOSSCompliance && \ + chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh && \ + ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} python3 && \ + rm -rf ${HOME_DIR}/oss_compliance* && \ + rm -rf /tmp/tmp* && \ + rm -rf /root/.cache || true + +# Verify vLLM installation +RUN python3 -c "import vllm; print(f'vLLM version: {vllm.__version__}')" + +# ====================== ec2 ========================================= +FROM vllm-cpu AS vllm-cpu-ec2 + +RUN apt-get update && \ + apt-get upgrade -y && \ + apt-get clean + +COPY dockerd_entrypoint.sh /usr/local/bin/dockerd_entrypoint.sh +RUN chmod +x /usr/local/bin/dockerd_entrypoint.sh + +ENTRYPOINT ["/usr/local/bin/dockerd_entrypoint.sh"] + +# ====================== sagemaker ========================================= +FROM vllm-cpu AS vllm-cpu-sagemaker + +RUN apt-get update && \ + apt-get upgrade -y && \ + apt-get clean + +COPY sagemaker_entrypoint.sh /usr/local/bin/sagemaker_entrypoint.sh +RUN chmod +x /usr/local/bin/sagemaker_entrypoint.sh + +ENTRYPOINT ["/usr/local/bin/sagemaker_entrypoint.sh"]