Skip to content

Commit f921a5e

Browse files
timelfrinkclaude
andcommitted
[vllm] [cpu] [sagemaker] Add vLLM CPU image for SageMaker
Add support for vLLM CPU inference on SageMaker, aligned with official vLLM CPU Dockerfile patterns. Features: - Multi-stage build: base → vllm-build → vllm-cpu → sagemaker - Uses uv package manager for fast dependency installation - Python 3.12 via uv (not limited to system python) - Build caching with --mount=type=cache for apt, uv, ccache - Wheel-based install (build wheel, then install separately) - Uses official vLLM requirements files (cpu.txt, cpu-build.txt) - Intel OpenMP + tcmalloc for x86_64 CPU performance - gcc-12 as explicit compiler version New files: - vllm/x86_64/cpu/Dockerfile.cpu: Multi-stage Dockerfile - vllm/buildspec-cpu-sm.yml: Build configuration for SageMaker Expected image tag: vllm:0.15.1-cpu-py312-ubuntu22.04-sagemaker Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 9a94c90 commit f921a5e

File tree

2 files changed

+193
-0
lines changed

2 files changed

+193
-0
lines changed

vllm/buildspec-cpu-sm.yml

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
account_id: &ACCOUNT_ID <set-$ACCOUNT_ID-in-environment>
2+
prod_account_id: &PROD_ACCOUNT_ID 763104351884
3+
region: &REGION <set-$REGION-in-environment>
4+
framework: &FRAMEWORK vllm
5+
version: &VERSION "0.15.1"
6+
short_version: &SHORT_VERSION "0.15"
7+
arch_type: &ARCH_TYPE x86_64
8+
autopatch_build: "False"
9+
10+
repository_info:
11+
build_repository: &BUILD_REPOSITORY
12+
image_type: &IMAGE_TYPE cpu
13+
root: .
14+
repository_name: &REPOSITORY_NAME !join [ pr, "-", *FRAMEWORK ]
15+
repository: &REPOSITORY !join [ *ACCOUNT_ID, .dkr.ecr., *REGION, .amazonaws.com/, *REPOSITORY_NAME ]
16+
release_repository_name: &RELEASE_REPOSITORY_NAME !join [ *FRAMEWORK ]
17+
release_repository: &RELEASE_REPOSITORY !join [ *PROD_ACCOUNT_ID, .dkr.ecr., *REGION, .amazonaws.com/, *RELEASE_REPOSITORY_NAME ]
18+
19+
context:
20+
build_context: &BUILD_CONTEXT
21+
deep_learning_container:
22+
source: src/deep_learning_container.py
23+
target: deep_learning_container.py
24+
sagemaker_entrypoint:
25+
source: vllm/build_artifacts/sagemaker_entrypoint.sh
26+
target: sagemaker_entrypoint.sh
27+
28+
images:
29+
BuildVLLMCPUSageMakerPy312:
30+
<<: *BUILD_REPOSITORY
31+
context:
32+
<<: *BUILD_CONTEXT
33+
image_size_baseline: 15000
34+
device_type: &DEVICE_TYPE cpu
35+
python_version: &DOCKER_PYTHON_VERSION py3
36+
tag_python_version: &TAG_PYTHON_VERSION py312
37+
os_version: &OS_VERSION ubuntu22.04
38+
tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *OS_VERSION, "-sagemaker" ]
39+
latest_release_tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *OS_VERSION, "-sagemaker" ]
40+
docker_file: !join [ *FRAMEWORK, /, *ARCH_TYPE, /, *DEVICE_TYPE, /Dockerfile.cpu ]
41+
target: vllm-cpu-sagemaker
42+
build: true
43+
enable_common_stage_build: false
44+
test_configs:
45+
test_platforms:
46+
- sanity
47+
- security
48+
- sagemaker

vllm/x86_64/cpu/Dockerfile.cpu

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
FROM ubuntu:22.04 AS base
2+
ARG PYTHON_VERSION=3.12
3+
ARG VLLM_VERSION=0.15.1
4+
ARG PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu"
5+
LABEL maintainer="Amazon AI"
6+
LABEL dlc_major_version="1"
7+
8+
ENV DEBIAN_FRONTEND=noninteractive \
9+
LANG=C.UTF-8 \
10+
LC_ALL=C.UTF-8 \
11+
DLC_CONTAINER_TYPE=base \
12+
PYTHONDONTWRITEBYTECODE=1 \
13+
PYTHONUNBUFFERED=1 \
14+
PYTHONIOENCODING=UTF-8 \
15+
VLLM_TARGET_DEVICE=cpu
16+
17+
WORKDIR /workspace
18+
19+
# Install system dependencies and uv
20+
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
21+
--mount=type=cache,target=/var/lib/apt,sharing=locked \
22+
apt-get update -y && \
23+
apt-get install -y --no-install-recommends \
24+
sudo \
25+
ccache \
26+
git \
27+
curl \
28+
wget \
29+
ca-certificates \
30+
gcc-12 \
31+
g++-12 \
32+
libtcmalloc-minimal4 \
33+
libnuma-dev \
34+
ffmpeg \
35+
libsm6 \
36+
libxext6 \
37+
libgl1 \
38+
jq \
39+
lsof && \
40+
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 \
41+
--slave /usr/bin/g++ g++ /usr/bin/g++-12 && \
42+
curl -LsSf https://astral.sh/uv/install.sh | sh
43+
44+
# Set compiler environment
45+
ENV CC=/usr/bin/gcc-12 CXX=/usr/bin/g++-12
46+
ENV CCACHE_DIR=/root/.cache/ccache
47+
ENV CMAKE_CXX_COMPILER_LAUNCHER=ccache
48+
49+
# Setup uv and virtual environment
50+
ENV PATH="/root/.local/bin:$PATH"
51+
ENV VIRTUAL_ENV="/opt/venv"
52+
ENV UV_PYTHON_INSTALL_DIR=/opt/uv/python
53+
RUN uv venv --python ${PYTHON_VERSION} --seed ${VIRTUAL_ENV}
54+
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
55+
56+
# Force CPU wheels for PyTorch
57+
ENV PIP_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}
58+
ENV UV_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}
59+
ENV UV_INDEX_STRATEGY="unsafe-best-match"
60+
ENV UV_LINK_MODE="copy"
61+
ENV UV_HTTP_TIMEOUT=500
62+
63+
# Memory allocator + Intel OpenMP for x86_64 performance
64+
ENV LD_PRELOAD="/usr/lib/x86_64-linux-gnu/libtcmalloc_minimal.so.4:/opt/venv/lib/libiomp5.so"
65+
66+
# ====================== vllm-build =========================================
67+
FROM base AS vllm-build
68+
69+
ARG VLLM_VERSION
70+
ARG max_jobs=32
71+
ENV MAX_JOBS=${max_jobs}
72+
73+
WORKDIR /vllm-workspace
74+
75+
# Clone vLLM and install build + runtime dependencies
76+
RUN git clone --depth 1 --branch v${VLLM_VERSION} https://github.com/vllm-project/vllm.git .
77+
78+
RUN --mount=type=cache,target=/root/.cache/uv \
79+
uv pip install --upgrade pip && \
80+
uv pip install -r requirements/cpu-build.txt && \
81+
uv pip install -r requirements/cpu.txt
82+
83+
# Build vLLM wheel with CPU backend
84+
RUN --mount=type=cache,target=/root/.cache/uv \
85+
--mount=type=cache,target=/root/.cache/ccache \
86+
--mount=type=cache,target=/vllm-workspace/.deps,sharing=locked \
87+
VLLM_TARGET_DEVICE=cpu python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38
88+
89+
# ====================== vllm-cpu (final base) =========================================
90+
FROM base AS vllm-cpu
91+
92+
ARG VLLM_VERSION
93+
WORKDIR /
94+
95+
# Install vLLM wheel and runtime dependencies
96+
RUN --mount=type=cache,target=/root/.cache/uv \
97+
--mount=type=bind,from=vllm-build,src=/vllm-workspace/dist,target=/tmp/dist \
98+
--mount=type=bind,from=vllm-build,src=/vllm-workspace/requirements,target=/tmp/requirements \
99+
uv pip install --upgrade pip && \
100+
uv pip install -r /tmp/requirements/cpu.txt && \
101+
uv pip install /tmp/dist/*.whl
102+
103+
COPY deep_learning_container.py /usr/local/bin/deep_learning_container.py
104+
COPY bash_telemetry.sh /usr/local/bin/bash_telemetry.sh
105+
106+
RUN chmod +x /usr/local/bin/deep_learning_container.py && \
107+
chmod +x /usr/local/bin/bash_telemetry.sh && \
108+
echo 'source /usr/local/bin/bash_telemetry.sh' >> /etc/bash.bashrc && \
109+
HOME_DIR=/root && \
110+
curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip && \
111+
python3 -c "import zipfile, os; zipfile.ZipFile('/root/oss_compliance.zip').extractall('/root/'); os.remove('/root/oss_compliance.zip')" && \
112+
cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance && \
113+
chmod +x /usr/local/bin/testOSSCompliance && \
114+
chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh && \
115+
${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} python3 && \
116+
rm -rf ${HOME_DIR}/oss_compliance* && \
117+
rm -rf /tmp/tmp* && \
118+
rm -rf /root/.cache || true
119+
120+
# Verify vLLM installation
121+
RUN python3 -c "import vllm; print(f'vLLM version: {vllm.__version__}')"
122+
123+
# ====================== ec2 =========================================
124+
FROM vllm-cpu AS vllm-cpu-ec2
125+
126+
RUN apt-get update && \
127+
apt-get upgrade -y && \
128+
apt-get clean
129+
130+
COPY dockerd_entrypoint.sh /usr/local/bin/dockerd_entrypoint.sh
131+
RUN chmod +x /usr/local/bin/dockerd_entrypoint.sh
132+
133+
ENTRYPOINT ["/usr/local/bin/dockerd_entrypoint.sh"]
134+
135+
# ====================== sagemaker =========================================
136+
FROM vllm-cpu AS vllm-cpu-sagemaker
137+
138+
RUN apt-get update && \
139+
apt-get upgrade -y && \
140+
apt-get clean
141+
142+
COPY sagemaker_entrypoint.sh /usr/local/bin/sagemaker_entrypoint.sh
143+
RUN chmod +x /usr/local/bin/sagemaker_entrypoint.sh
144+
145+
ENTRYPOINT ["/usr/local/bin/sagemaker_entrypoint.sh"]

0 commit comments

Comments
 (0)