From 2e6fa46896f541adf85cb5eb6b62dae7b61b8063 Mon Sep 17 00:00:00 2001 From: Junpu Fan Date: Tue, 7 Oct 2025 21:43:20 +0000 Subject: [PATCH 1/3] vllm 0.11.0 arm64 --- release_images_general.yml | 2 +- vllm/arm64/gpu/Dockerfile.arm64 | 6 +++--- vllm/buildspec-arm64.yml | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/release_images_general.yml b/release_images_general.yml index b1edb5296904..5969923e9d27 100644 --- a/release_images_general.yml +++ b/release_images_general.yml @@ -58,7 +58,7 @@ release_images: public_registry: True 5: framework: "vllm" - version: "0.10.2" + version: "0.11.0" arch_type: "arm64" customer_type: "ec2" general: diff --git a/vllm/arm64/gpu/Dockerfile.arm64 b/vllm/arm64/gpu/Dockerfile.arm64 index 47fbc02ec6f6..54f7dc5976c3 100644 --- a/vllm/arm64/gpu/Dockerfile.arm64 +++ b/vllm/arm64/gpu/Dockerfile.arm64 @@ -1,4 +1,4 @@ -ARG CUDA_VERSION=12.9.0 +ARG CUDA_VERSION=12.9.1 ARG IMAGE_DISTRO=ubuntu22.04 ARG PYTHON_VERSION=3.12 @@ -75,7 +75,7 @@ RUN git clone https://github.com/facebookresearch/xformers.git && \ FROM build-base AS build-vllm RUN git clone https://github.com/vllm-project/vllm.git && \ cd vllm && \ - git checkout v0.10.2 && \ + git checkout v0.11.0 && \ git submodule sync && \ git submodule update --init --recursive -j 8 && \ MAX_JOBS=16 uv build --wheel --no-build-isolation -o /wheels @@ -90,7 +90,7 @@ RUN uv pip install -U build cmake ninja pybind11 setuptools==79.0.1 wheel RUN git clone https://github.com/flashinfer-ai/flashinfer.git --recursive && \ cd flashinfer && \ python -c "import torch; print(torch.__version__, torch.version.cuda)" && \ - git checkout v0.2.6.post1 && \ + git checkout v0.3.1 && \ export FLASHINFER_CUDA_ARCH_LIST="7.5" && \ python -m flashinfer.aot && \ MAX_JOBS=16 uv pip install --system --no-build-isolation . && \ diff --git a/vllm/buildspec-arm64.yml b/vllm/buildspec-arm64.yml index 1d280f3d78c8..9e8f6e69809a 100644 --- a/vllm/buildspec-arm64.yml +++ b/vllm/buildspec-arm64.yml @@ -2,8 +2,8 @@ account_id: &ACCOUNT_ID prod_account_id: &PROD_ACCOUNT_ID 763104351884 region: ®ION framework: &FRAMEWORK vllm -version: &VERSION "0.10.2" -short_version: &SHORT_VERSION "0.10" +version: &VERSION "0.11.0" +short_version: &SHORT_VERSION "0.11" arch_type: &ARCH_TYPE arm64 autopatch_build: "False" @@ -33,7 +33,7 @@ images: <<: *BUILD_REPOSITORY context: <<: *BUILD_CONTEXT - image_size_baseline: 25000 + image_size_baseline: 26000 device_type: &DEVICE_TYPE gpu cuda_version: &CUDA_VERSION cu129 python_version: &DOCKER_PYTHON_VERSION py3 From 3e0c9f8b18241c6349696c8985b92c545579611a Mon Sep 17 00:00:00 2001 From: Junpu Fan Date: Tue, 7 Oct 2025 23:24:58 +0000 Subject: [PATCH 2/3] fix ec2 test --- test/vllm/ec2/test_artifacts/test_ec2.py | 2 +- test/vllm/ec2/utils/run_vllm_on_arm64.sh | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/test/vllm/ec2/test_artifacts/test_ec2.py b/test/vllm/ec2/test_artifacts/test_ec2.py index 91dd079f3f05..e43aac4b13b8 100644 --- a/test/vllm/ec2/test_artifacts/test_ec2.py +++ b/test/vllm/ec2/test_artifacts/test_ec2.py @@ -38,7 +38,7 @@ def setup_env(connection): python3 -m venv vllm_env && \ source vllm_env/bin/activate && \ pip install --upgrade pip setuptools wheel && \ - pip install numpy torch tqdm aiohttp pandas datasets pillow ray vllm==0.10.0 && \ + pip install numpy torch tqdm aiohttp pandas datasets pillow ray vllm==0.11.0 && \ pip install "transformers<4.54.0" """ connection.run(setup_command, shell=True) diff --git a/test/vllm/ec2/utils/run_vllm_on_arm64.sh b/test/vllm/ec2/utils/run_vllm_on_arm64.sh index d59ecc62f4aa..de0cbda5c6e7 100644 --- a/test/vllm/ec2/utils/run_vllm_on_arm64.sh +++ b/test/vllm/ec2/utils/run_vllm_on_arm64.sh @@ -58,7 +58,6 @@ docker run --rm \ --entrypoint /bin/bash \ -e "HUGGING_FACE_HUB_TOKEN=$HF_TOKEN" \ -e "VLLM_WORKER_MULTIPROC_METHOD=spawn" \ - -e "VLLM_USE_V1=0" \ -v /fsx/.cache/huggingface:/root/.cache/huggingface \ --gpus=all \ $DLC_IMAGE \ From 4992805df5b7b8eed07857127f591a4fffa85032 Mon Sep 17 00:00:00 2001 From: Junpu Fan Date: Wed, 8 Oct 2025 01:54:45 +0000 Subject: [PATCH 3/3] set flashinfer version back --- vllm/arm64/gpu/Dockerfile.arm64 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/arm64/gpu/Dockerfile.arm64 b/vllm/arm64/gpu/Dockerfile.arm64 index 54f7dc5976c3..285f8a4dc8cf 100644 --- a/vllm/arm64/gpu/Dockerfile.arm64 +++ b/vllm/arm64/gpu/Dockerfile.arm64 @@ -90,7 +90,7 @@ RUN uv pip install -U build cmake ninja pybind11 setuptools==79.0.1 wheel RUN git clone https://github.com/flashinfer-ai/flashinfer.git --recursive && \ cd flashinfer && \ python -c "import torch; print(torch.__version__, torch.version.cuda)" && \ - git checkout v0.3.1 && \ + git checkout v0.2.6.post1 && \ export FLASHINFER_CUDA_ARCH_LIST="7.5" && \ python -m flashinfer.aot && \ MAX_JOBS=16 uv pip install --system --no-build-isolation . && \ @@ -124,7 +124,7 @@ RUN uv pip install ninja ARG PYTHON="python3" LABEL maintainer="Amazon AI" LABEL dlc_major_version="1" -ARG EFA_VERSION="1.43.2" +ARG EFA_VERSION="1.43.3" ENV DEBIAN_FRONTEND=noninteractive \ LANG=C.UTF-8 \ LC_ALL=C.UTF-8 \