Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
af074e3
Added hf-vllm v0.12.0
Dec 10, 2025
d431ead
Added tests for hf-vllm
Dec 10, 2025
9e1d266
Changed dlc_developer_config.toml
Dec 10, 2025
9e4d893
update conflict
Dec 15, 2025
8930020
Merge branch 'master' into hf-vllm-0.12.0
DevakiBolleneni Dec 17, 2025
b84eab4
modify toml file to add huggingface-vllm
Dec 17, 2025
6099561
Merge branch 'master' into hf-vllm-0.12.0
DevakiBolleneni Dec 17, 2025
81a94d9
updated buildspec following new pipeline creation
Dec 18, 2025
289fb12
Fix test role
Dec 18, 2025
8948619
added transformers version
Dec 18, 2025
198d432
Merge branch 'master' into hf-vllm-0.12.0
fgbelidji Dec 18, 2025
7d0e3a7
fix region and suffix of base image
Dec 18, 2025
36e5adc
fix suffix of base image
Dec 18, 2025
73dae49
fix repo name
Dec 19, 2025
23cdda6
Merge branch 'master' into hf-vllm-0.12.0
fgbelidji Dec 19, 2025
5aa6216
reverted dlc_developer_config.toml
Dec 19, 2025
b36a91e
Merge branch 'master' into hf-vllm-0.12.0
DevakiBolleneni Jan 5, 2026
4e95e1f
Merge branch 'master' into hf-vllm-0.12.0
DevakiBolleneni Jan 6, 2026
ea680df
huggingface_vllm in dlc_developer_config.toml
Jan 6, 2026
cf7f384
Merge branch 'master' into hf-vllm-0.12.0
fgbelidji Jan 6, 2026
17d50f3
Renamed hf-vllm to vllm
Jan 6, 2026
fb2c4a3
renamed hf-vllm tests to vllm
Jan 6, 2026
f46e252
removed renamed folders
Jan 6, 2026
e120746
added conftest, utils, requirements, and updated text_vllm
Jan 7, 2026
9ae89fb
changed testrunner so it won't skip hf-vllm tests
Jan 7, 2026
b2c4295
support for huggingface_vllm
Jan 7, 2026
7a5e5ba
changed image_type buildspec
Jan 8, 2026
8f0092e
enforce g6 instance
Jan 9, 2026
ee50601
fix instance
Jan 12, 2026
bae57d3
added local test
Jan 12, 2026
0e40bdd
Merge branch 'master' into hf-vllm-0.12.0
fgbelidji Jan 12, 2026
243eb2f
Merge branch 'master' into hf-vllm-0.12.0
Jan 16, 2026
181d61b
changed cuda compat logic
Jan 22, 2026
b5a604b
updated test to sagemaker v3
Jan 22, 2026
b7769c3
Merge branch 'master' into hf-vllm-0.12.0
fgbelidji Jan 22, 2026
8eb99bd
Enable local tests for huggingface_vllm
Jan 23, 2026
2d993e1
Merge branch 'hf-vllm-0.12.0' of github.com:fgbelidji/deep-learning-c…
Jan 23, 2026
ea1f74c
Add huggingface/vllm local mode tests with tiny-random-qwen3 model
Jan 27, 2026
83d5d3f
Fix indentation error in __init__.py
Jan 27, 2026
524f3aa
Download Qwen2.5-0.5B model at runtime for huggingface/vllm local tests
Jan 27, 2026
73f1d41
hf hub in requirements.txt
Jan 27, 2026
ee13c4f
Trigger CI
Jan 27, 2026
66602d1
Merge branch 'master' into hf-vllm-0.12.0
fgbelidji Jan 27, 2026
8b9347a
Fix: use docker_image instead of ecr_image for local tests
Jan 27, 2026
9f42c8c
Merge branch 'master' into hf-vllm-0.12.0
fgbelidji Jan 27, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions dlc_developer_config.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[dev]
# Set to "huggingface", for example, if you are a huggingface developer. Default is ""
partner_developer = ""
partner_developer = "huggingface"
# Please only set it to true if you are preparing an EI related PR
# Do remember to revert it back to false before merging any PR (including EI dedicated PR)
ei_mode = false
Expand Down Expand Up @@ -36,8 +36,8 @@ deep_canary_mode = false

[build]
# Add in frameworks you would like to build. By default, builds are disabled unless you specify building an image.
# available frameworks - ["base", "vllm", "sglang", "autogluon", "huggingface_tensorflow", "huggingface_pytorch", "huggingface_tensorflow_trcomp", "huggingface_pytorch_trcomp", "pytorch_trcomp", "tensorflow", "pytorch", "stabilityai_pytorch"]
build_frameworks = []
# available frameworks - ["base", "vllm", "sglang", "autogluon", "huggingface_vllm", "huggingface_tensorflow", "huggingface_pytorch", "huggingface_tensorflow_trcomp", "huggingface_pytorch_trcomp", "pytorch_trcomp", "tensorflow", "pytorch", "stabilityai_pytorch"]
build_frameworks = ["huggingface_vllm"]


# By default we build both training and inference containers. Set true/false values to determine which to build.
Expand Down Expand Up @@ -186,5 +186,8 @@ dlc-pr-tensorflow-2-eia-inference = ""
# vllm
dlc-pr-vllm = ""

# HuggingFace vLLM
dlc-pr-huggingface-vllm = ""

# sglang
dlc-pr-sglang = ""
25 changes: 25 additions & 0 deletions huggingface/vllm/build_artifacts/sagemaker_entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#!/bin/bash
# Check if telemetry file exists before executing
# Execute telemetry script if it exists, suppress errors
bash /usr/local/bin/bash_telemetry.sh >/dev/null 2>&1 || true

# Source CUDA compat for older drivers (e.g., g5 instances)
if command -v nvidia-smi >/dev/null 2>&1 && command -v nvcc >/dev/null 2>&1; then
source /usr/local/bin/start_cuda_compat.sh
fi

PREFIX="SM_VLLM_"
ARG_PREFIX="--"

ARGS=(--port 8080)

while IFS='=' read -r key value; do
arg_name=$(echo "${key#"${PREFIX}"}" | tr '[:upper:]' '[:lower:]' | tr '_' '-')

ARGS+=("${ARG_PREFIX}${arg_name}")
if [ -n "$value" ]; then
ARGS+=("$value")
fi
done < <(env | grep "^${PREFIX}")

exec python3 -m vllm.entrypoints.openai.api_server "${ARGS[@]}"
25 changes: 25 additions & 0 deletions huggingface/vllm/build_artifacts/start_cuda_compat.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#!/bin/bash

verlte() {
[ "$1" = "$2" ] && return 1 || [ "$1" = "$(echo -e "$1\n$2" | sort -V | head -n1)" ]
}

COMPAT_FILE=/usr/local/cuda/compat/libcuda.so.1
if [ -f $COMPAT_FILE ]; then
CUDA_COMPAT_MAX_DRIVER_VERSION=$(readlink $COMPAT_FILE | cut -d'.' -f 3-)
echo "CUDA compat package should be installed for NVIDIA driver smaller than ${CUDA_COMPAT_MAX_DRIVER_VERSION}"
NVIDIA_DRIVER_VERSION=$(sed -n 's/^NVRM.*Kernel Module *\([0-9.]*\).*$/\1/p' /proc/driver/nvidia/version 2>/dev/null || true)
if [ -z "$NVIDIA_DRIVER_VERSION" ]; then
NVIDIA_DRIVER_VERSION=$(nvidia-smi --query-gpu=driver_version --format=csv,noheader --id=0 2>/dev/null || true)
fi
echo "Current installed NVIDIA driver version is ${NVIDIA_DRIVER_VERSION}"
if verlte $NVIDIA_DRIVER_VERSION $CUDA_COMPAT_MAX_DRIVER_VERSION; then
echo "Adding CUDA compat to LD_LIBRARY_PATH"
export LD_LIBRARY_PATH=/usr/local/cuda/compat:$LD_LIBRARY_PATH
echo $LD_LIBRARY_PATH
else
echo "Skipping CUDA compat setup as newer NVIDIA driver is installed"
fi
else
echo "Skipping CUDA compat setup as package not found"
fi
58 changes: 56 additions & 2 deletions huggingface/vllm/buildspec.yml
Original file line number Diff line number Diff line change
@@ -1,2 +1,56 @@


account_id: &ACCOUNT_ID <set-$ACCOUNT_ID-in-environment>
prod_account_id: &PROD_ACCOUNT_ID 763104351884
region: &REGION <set-$REGION-in-environment>
base_framework: &BASE_FRAMEWORK vllm
framework: &FRAMEWORK !join [ "huggingface_", *BASE_FRAMEWORK]
version: &VERSION "0.12.0"
short_version: &SHORT_VERSION "0.12"
arch_type: &ARCH_TYPE x86_64
autopatch_build: "False"

repository_info:
build_repository: &BUILD_REPOSITORY
image_type: &IMAGE_TYPE inference
root: huggingface/vllm
repository_name: &REPOSITORY_NAME !join [ "pr", "-", "huggingface", "-", *BASE_FRAMEWORK ]
repository: &REPOSITORY !join [ *ACCOUNT_ID, .dkr.ecr., *REGION, .amazonaws.com/, *REPOSITORY_NAME ]
release_repository_name: &RELEASE_REPOSITORY_NAME !join [ "huggingface", "-", *BASE_FRAMEWORK ]
release_repository: &RELEASE_REPOSITORY !join [ *PROD_ACCOUNT_ID, .dkr.ecr., *REGION, .amazonaws.com/, *RELEASE_REPOSITORY_NAME ]

context:
build_context: &BUILD_CONTEXT
deep_learning_container:
source: ../../src/deep_learning_container.py
target: deep_learning_container.py
start_cuda_compat:
source: build_artifacts/start_cuda_compat.sh
target: start_cuda_compat.sh
sagemaker_entrypoint:
source: build_artifacts/sagemaker_entrypoint.sh
target: sagemaker_entrypoint.sh


images:
BuildHuggingFaceVllmGpuPy312Cu129DockerImage:
<<: *BUILD_REPOSITORY
context:
<<: *BUILD_CONTEXT
image_size_baseline: 26000
device_type: &DEVICE_TYPE gpu
cuda_version: &CUDA_VERSION cu129
python_version: &DOCKER_PYTHON_VERSION py3
tag_python_version: &TAG_PYTHON_VERSION py312
os_version: &OS_VERSION ubuntu22.04
transformers_version: &TRANSFORMERS_VERSION 4.57.3
vllm_version: &VLLM_VERSION 0.12.0
tag: !join [ "vllm", "-", *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-sagemaker" ]
latest_release_tag: !join [ "vllm", "-", *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-sagemaker" ]
docker_file: !join [ docker/, *SHORT_VERSION, /, *CUDA_VERSION, /Dockerfile ]
target: sagemaker
build: true
enable_common_stage_build: false
test_configs:
test_platforms:
- sanity
- security
- sagemaker
44 changes: 44 additions & 0 deletions huggingface/vllm/docker/0.12/cu129/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
ARG FINAL_BASE_IMAGE=763104351884.dkr.ecr.us-west-2.amazonaws.com/vllm:0.12.0-gpu-py312-cu129-ubuntu22.04-sagemaker-v1.0
FROM ${FINAL_BASE_IMAGE} AS vllm-base

LABEL maintainer="Amazon AI"
LABEL dlc_major_version="1"

ARG HUGGINGFACE_HUB_VERSION=0.36.0
ARG HF_XET_VERSION=1.2.0

RUN apt-get update -y \
&& apt-get install -y --no-install-recommends curl unzip \
&& rm -rf /var/lib/apt/lists/*


RUN pip install --upgrade pip && \
pip install --no-cache-dir \
huggingface-hub==${HUGGINGFACE_HUB_VERSION} \
hf-xet==${HF_XET_VERSION} \
grpcio


FROM vllm-base AS sagemaker
ENV HF_HUB_ENABLE_HF_TRANSFER="1" \
HF_HUB_USER_AGENT_ORIGIN="aws:sagemaker:gpu-cuda:inference:hf-vllm"

# Copy CUDA compat and entrypoint scripts
COPY start_cuda_compat.sh /usr/local/bin/start_cuda_compat.sh
COPY sagemaker_entrypoint.sh /usr/local/bin/sagemaker_entrypoint.sh

RUN chmod +x /usr/local/bin/start_cuda_compat.sh \
&& chmod +x /usr/local/bin/sagemaker_entrypoint.sh

RUN HOME_DIR=/root \
&& uv pip install --system --upgrade pip requests PTable \
&& curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip \
&& unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ \
&& cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance \
&& chmod +x /usr/local/bin/testOSSCompliance \
&& chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh \
&& ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} python3 \
&& rm -rf ${HOME_DIR}/oss_compliance*


ENTRYPOINT ["/usr/local/bin/sagemaker_entrypoint.sh"]
1 change: 1 addition & 0 deletions src/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
"base",
"vllm",
"sglang",
"huggingface_vllm",
}
DEVICE_TYPES = {"cpu", "gpu", "hpu", "eia", "inf", "neuron", "neuronx"}
IMAGE_TYPES = {"training", "inference"}
Expand Down
Empty file.
13 changes: 13 additions & 0 deletions test/sagemaker_tests/huggingface/vllm/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"). You
# may not use this file except in compliance with the License. A copy of
# the License is located at
#
# http://aws.amazon.com/apache2.0/
#
# or in the "license" file accompanying this file. This file is
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific
# language governing permissions and limitations under the License.
from __future__ import absolute_import
Loading