Skip to content

Commit 6228799

Browse files
fgbelidjiEC2 Default UserDevakiBolleneniDevakiBolleneni
authored
Hf vllm 0.14.0 (#5609)
* Added hf-vllm v0.12.0 * Added tests for hf-vllm * Changed dlc_developer_config.toml * modify toml file to add huggingface-vllm * updated buildspec following new pipeline creation * Fix test role * added transformers version * fix region and suffix of base image * fix suffix of base image * fix repo name * reverted dlc_developer_config.toml * huggingface_vllm in dlc_developer_config.toml * Renamed hf-vllm to vllm * renamed hf-vllm tests to vllm * removed renamed folders * added conftest, utils, requirements, and updated text_vllm * changed testrunner so it won't skip hf-vllm tests * support for huggingface_vllm * changed image_type buildspec * enforce g6 instance * fix instance * added local test * changed cuda compat logic * updated test to sagemaker v3 * Enable local tests for huggingface_vllm * Add huggingface/vllm local mode tests with tiny-random-qwen3 model * Fix indentation error in __init__.py * Download Qwen2.5-0.5B model at runtime for huggingface/vllm local tests * hf hub in requirements.txt * Trigger CI * Fix: use docker_image instead of ecr_image for local tests * Upgrade HuggingFace vLLM to 0.14.0 * reverted dlc_develover_config.toml * black * Revert "black" This reverts commit 907d65f. * black --------- Co-authored-by: EC2 Default User <ec2-user@ip-10-90-0-235.ec2.internal> Co-authored-by: Devaki Bolleneni <40668607+DevakiBolleneni@users.noreply.github.com> Co-authored-by: DevakiBolleneni <devakib@amazon.com>
1 parent 5d3d544 commit 6228799

File tree

23 files changed

+1401
-8
lines changed

23 files changed

+1401
-8
lines changed

dlc_developer_config.toml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ deep_canary_mode = false
3636

3737
[build]
3838
# Add in frameworks you would like to build. By default, builds are disabled unless you specify building an image.
39-
# available frameworks - ["base", "vllm", "sglang", "autogluon", "huggingface_tensorflow", "huggingface_pytorch", "huggingface_tensorflow_trcomp", "huggingface_pytorch_trcomp", "pytorch_trcomp", "tensorflow", "pytorch", "stabilityai_pytorch"]
39+
# available frameworks - ["base", "vllm", "sglang", "autogluon", "huggingface_vllm", "huggingface_tensorflow", "huggingface_pytorch", "huggingface_tensorflow_trcomp", "huggingface_pytorch_trcomp", "pytorch_trcomp", "tensorflow", "pytorch", "stabilityai_pytorch"]
4040
build_frameworks = []
4141

4242

@@ -186,5 +186,8 @@ dlc-pr-tensorflow-2-eia-inference = ""
186186
# vllm
187187
dlc-pr-vllm = ""
188188

189+
# HuggingFace vLLM
190+
dlc-pr-huggingface-vllm = ""
191+
189192
# sglang
190193
dlc-pr-sglang = ""
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
#!/bin/bash
2+
# Check if telemetry file exists before executing
3+
# Execute telemetry script if it exists, suppress errors
4+
bash /usr/local/bin/bash_telemetry.sh >/dev/null 2>&1 || true
5+
6+
# Source CUDA compat for older drivers (e.g., g5 instances)
7+
if command -v nvidia-smi >/dev/null 2>&1 && command -v nvcc >/dev/null 2>&1; then
8+
source /usr/local/bin/start_cuda_compat.sh
9+
fi
10+
11+
PREFIX="SM_VLLM_"
12+
ARG_PREFIX="--"
13+
14+
ARGS=(--port 8080)
15+
16+
while IFS='=' read -r key value; do
17+
arg_name=$(echo "${key#"${PREFIX}"}" | tr '[:upper:]' '[:lower:]' | tr '_' '-')
18+
19+
ARGS+=("${ARG_PREFIX}${arg_name}")
20+
if [ -n "$value" ]; then
21+
ARGS+=("$value")
22+
fi
23+
done < <(env | grep "^${PREFIX}")
24+
25+
exec python3 -m vllm.entrypoints.openai.api_server "${ARGS[@]}"
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
#!/bin/bash
2+
3+
verlte() {
4+
[ "$1" = "$2" ] && return 1 || [ "$1" = "$(echo -e "$1\n$2" | sort -V | head -n1)" ]
5+
}
6+
7+
COMPAT_FILE=/usr/local/cuda/compat/libcuda.so.1
8+
if [ -f $COMPAT_FILE ]; then
9+
CUDA_COMPAT_MAX_DRIVER_VERSION=$(readlink $COMPAT_FILE | cut -d'.' -f 3-)
10+
echo "CUDA compat package should be installed for NVIDIA driver smaller than ${CUDA_COMPAT_MAX_DRIVER_VERSION}"
11+
NVIDIA_DRIVER_VERSION=$(sed -n 's/^NVRM.*Kernel Module *\([0-9.]*\).*$/\1/p' /proc/driver/nvidia/version 2>/dev/null || true)
12+
if [ -z "$NVIDIA_DRIVER_VERSION" ]; then
13+
NVIDIA_DRIVER_VERSION=$(nvidia-smi --query-gpu=driver_version --format=csv,noheader --id=0 2>/dev/null || true)
14+
fi
15+
echo "Current installed NVIDIA driver version is ${NVIDIA_DRIVER_VERSION}"
16+
if verlte $NVIDIA_DRIVER_VERSION $CUDA_COMPAT_MAX_DRIVER_VERSION; then
17+
echo "Adding CUDA compat to LD_LIBRARY_PATH"
18+
export LD_LIBRARY_PATH=/usr/local/cuda/compat:$LD_LIBRARY_PATH
19+
echo $LD_LIBRARY_PATH
20+
else
21+
echo "Skipping CUDA compat setup as newer NVIDIA driver is installed"
22+
fi
23+
else
24+
echo "Skipping CUDA compat setup as package not found"
25+
fi

huggingface/vllm/buildspec.yml

Lines changed: 56 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,56 @@
1-
2-
1+
account_id: &ACCOUNT_ID <set-$ACCOUNT_ID-in-environment>
2+
prod_account_id: &PROD_ACCOUNT_ID 763104351884
3+
region: &REGION <set-$REGION-in-environment>
4+
base_framework: &BASE_FRAMEWORK vllm
5+
framework: &FRAMEWORK !join [ "huggingface_", *BASE_FRAMEWORK]
6+
version: &VERSION "0.14.0"
7+
short_version: &SHORT_VERSION "0.14"
8+
arch_type: &ARCH_TYPE x86_64
9+
autopatch_build: "False"
10+
11+
repository_info:
12+
build_repository: &BUILD_REPOSITORY
13+
image_type: &IMAGE_TYPE inference
14+
root: huggingface/vllm
15+
repository_name: &REPOSITORY_NAME !join [ "pr", "-", "huggingface", "-", *BASE_FRAMEWORK ]
16+
repository: &REPOSITORY !join [ *ACCOUNT_ID, .dkr.ecr., *REGION, .amazonaws.com/, *REPOSITORY_NAME ]
17+
release_repository_name: &RELEASE_REPOSITORY_NAME !join [ "huggingface", "-", *BASE_FRAMEWORK ]
18+
release_repository: &RELEASE_REPOSITORY !join [ *PROD_ACCOUNT_ID, .dkr.ecr., *REGION, .amazonaws.com/, *RELEASE_REPOSITORY_NAME ]
19+
20+
context:
21+
build_context: &BUILD_CONTEXT
22+
deep_learning_container:
23+
source: ../../src/deep_learning_container.py
24+
target: deep_learning_container.py
25+
start_cuda_compat:
26+
source: build_artifacts/start_cuda_compat.sh
27+
target: start_cuda_compat.sh
28+
sagemaker_entrypoint:
29+
source: build_artifacts/sagemaker_entrypoint.sh
30+
target: sagemaker_entrypoint.sh
31+
32+
33+
images:
34+
BuildHuggingFaceVllmGpuPy312Cu129DockerImage:
35+
<<: *BUILD_REPOSITORY
36+
context:
37+
<<: *BUILD_CONTEXT
38+
image_size_baseline: 26000
39+
device_type: &DEVICE_TYPE gpu
40+
cuda_version: &CUDA_VERSION cu129
41+
python_version: &DOCKER_PYTHON_VERSION py3
42+
tag_python_version: &TAG_PYTHON_VERSION py312
43+
os_version: &OS_VERSION ubuntu22.04
44+
transformers_version: &TRANSFORMERS_VERSION 4.57.3
45+
vllm_version: &VLLM_VERSION 0.14.0
46+
tag: !join [ "vllm", "-", *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-sagemaker" ]
47+
latest_release_tag: !join [ "vllm", "-", *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-sagemaker" ]
48+
docker_file: !join [ docker/, *SHORT_VERSION, /, *CUDA_VERSION, /Dockerfile ]
49+
target: sagemaker
50+
build: true
51+
enable_common_stage_build: false
52+
test_configs:
53+
test_platforms:
54+
- sanity
55+
- security
56+
- sagemaker
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
ARG FINAL_BASE_IMAGE=763104351884.dkr.ecr.us-west-2.amazonaws.com/vllm:0.14.0-gpu-py312-cu129-ubuntu22.04-sagemaker-v1.0
2+
FROM ${FINAL_BASE_IMAGE} AS vllm-base
3+
4+
LABEL maintainer="Amazon AI"
5+
LABEL dlc_major_version="1"
6+
7+
ARG HUGGINGFACE_HUB_VERSION=0.36.0
8+
ARG HF_XET_VERSION=1.2.0
9+
10+
RUN apt-get update -y \
11+
&& apt-get install -y --no-install-recommends curl unzip \
12+
&& rm -rf /var/lib/apt/lists/*
13+
14+
15+
RUN pip install --upgrade pip && \
16+
pip install --no-cache-dir \
17+
huggingface-hub==${HUGGINGFACE_HUB_VERSION} \
18+
hf-xet==${HF_XET_VERSION} \
19+
grpcio
20+
21+
22+
FROM vllm-base AS sagemaker
23+
ENV HF_HUB_ENABLE_HF_TRANSFER="1" \
24+
HF_HUB_USER_AGENT_ORIGIN="aws:sagemaker:gpu-cuda:inference:hf-vllm"
25+
26+
# Copy CUDA compat and entrypoint scripts
27+
COPY start_cuda_compat.sh /usr/local/bin/start_cuda_compat.sh
28+
COPY sagemaker_entrypoint.sh /usr/local/bin/sagemaker_entrypoint.sh
29+
30+
RUN chmod +x /usr/local/bin/start_cuda_compat.sh \
31+
&& chmod +x /usr/local/bin/sagemaker_entrypoint.sh
32+
33+
RUN HOME_DIR=/root \
34+
&& uv pip install --system --upgrade pip requests PTable \
35+
&& curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip \
36+
&& unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ \
37+
&& cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance \
38+
&& chmod +x /usr/local/bin/testOSSCompliance \
39+
&& chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh \
40+
&& ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} python3 \
41+
&& rm -rf ${HOME_DIR}/oss_compliance*
42+
43+
44+
ENTRYPOINT ["/usr/local/bin/sagemaker_entrypoint.sh"]

src/constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
"base",
2828
"vllm",
2929
"sglang",
30+
"huggingface_vllm",
3031
}
3132
DEVICE_TYPES = {"cpu", "gpu", "hpu", "eia", "inf", "neuron", "neuronx"}
3233
IMAGE_TYPES = {"training", "inference"}

test/sagemaker_tests/huggingface/inference/resources/local_mode_lock

Whitespace-only changes.
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License"). You
4+
# may not use this file except in compliance with the License. A copy of
5+
# the License is located at
6+
#
7+
# http://aws.amazon.com/apache2.0/
8+
#
9+
# or in the "license" file accompanying this file. This file is
10+
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
11+
# ANY KIND, either express or implied. See the License for the specific
12+
# language governing permissions and limitations under the License.
13+
from __future__ import absolute_import

0 commit comments

Comments
 (0)