Skip to content

Commit b562fe4

Browse files
Merge branch 'master' into pt-tr
2 parents f3923b5 + a66e749 commit b562fe4

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+2118
-35
lines changed

available_images.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,7 @@ Starting LMI V10 (0.28.0), we are changing the name from LMI DeepSpeed DLC to LM
225225

226226
| Framework | Job Type | Accelerator | Python Version Options | Example URL |
227227
|-----------------------------------------------------------------------------------------------------------------------------|-----------|-------------|------------------------|-------------------------------------------------------------------------------------------|
228+
| DJLServing 0.36.0 with vLLM 0.14.0, Transformers 4.57.1 | inference | GPU | 3.12 (py312) | 763104351884.dkr.ecr.us-west-2.amazonaws.com/djl-inference:0.36.0-lmi19.0.0-cu128 |
228229
| DJLServing 0.36.0 with vLLM 0.12.0, Transformers 4.57.1 | inference | GPU | 3.12 (py312) | 763104351884.dkr.ecr.us-west-2.amazonaws.com/djl-inference:0.36.0-lmi18.0.0-cu128 |
229230
| DJLServing 0.35.0 with vLLM 0.11.1, Transformers 4.57.1, and Accelerate 1.0.1 | inference | GPU | 3.12 (py312) | 763104351884.dkr.ecr.us-west-2.amazonaws.com/djl-inference:0.35.0-lmi17.0.0-cu128 |
230231
| DJLServing 0.34.0 with vLLM 0.10.2, Transformers 4.55.2, and Accelerate 1.0.1 | inference | GPU | 3.12 (py312) | 763104351884.dkr.ecr.us-west-2.amazonaws.com/djl-inference:0.34.0-lmi16.0.0-cu128 |

base/buildspec-cu130-ubuntu22.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ images:
4444
os_version: &OS_VERSION ubuntu22.04
4545
tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-ec2" ]
4646
latest_release_tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-ec2" ]
47-
docker_file: !join [ *FRAMEWORK, /, *ARCH_TYPE, /, *DEVICE_TYPE, /, *CUDA_VERSION, /, *OS_VERSION, /Dockerfile ]
47+
docker_file: !join [ *FRAMEWORK, /, *ARCH_TYPE, /, *DEVICE_TYPE, /, *CUDA_VERSION, /, *OS_VERSION, /, *TAG_PYTHON_VERSION, /Dockerfile ]
4848
target: final
4949
build: true
5050
enable_common_stage_build: false

base/buildspec-cu1302-ubuntu22.yml

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
account_id: &ACCOUNT_ID <set-$ACCOUNT_ID-in-environment>
2+
prod_account_id: &PROD_ACCOUNT_ID 763104351884
3+
region: &REGION <set-$REGION-in-environment>
4+
framework: &FRAMEWORK base
5+
version: &VERSION 13.0.2
6+
short_version: &SHORT_VERSION "13.0"
7+
arch_type: &ARCH_TYPE x86_64
8+
autopatch_build: "False"
9+
10+
repository_info:
11+
base_repository: &BASE_REPOSITORY
12+
image_type: &IMAGE_TYPE gpu
13+
root: .
14+
repository_name: &REPOSITORY_NAME !join [ pr, "-", *FRAMEWORK ]
15+
repository: &REPOSITORY !join [ *ACCOUNT_ID, .dkr.ecr., *REGION, .amazonaws.com/, *REPOSITORY_NAME ]
16+
release_repository_name: &RELEASE_REPOSITORY_NAME !join [ *FRAMEWORK ]
17+
release_repository: &RELEASE_REPOSITORY !join [ *PROD_ACCOUNT_ID, .dkr.ecr., *REGION, .amazonaws.com/, *RELEASE_REPOSITORY_NAME ]
18+
19+
context:
20+
base_context: &BASE_CONTEXT
21+
deep_learning_container:
22+
source: src/deep_learning_container.py
23+
target: deep_learning_container.py
24+
install_python:
25+
source: scripts/install_python.sh
26+
target: install_python.sh
27+
install_cuda:
28+
source: scripts/install_cuda.sh
29+
target: install_cuda.sh
30+
install_efa:
31+
source: scripts/install_efa.sh
32+
target: install_efa.sh
33+
34+
images:
35+
base_x86_64_gpu_cuda1302_ubuntu22:
36+
<<: *BASE_REPOSITORY
37+
context:
38+
<<: *BASE_CONTEXT
39+
image_size_baseline: 11000
40+
device_type: &DEVICE_TYPE gpu
41+
cuda_version: &CUDA_VERSION cu130
42+
python_version: &DOCKER_PYTHON_VERSION py3
43+
tag_python_version: &TAG_PYTHON_VERSION py313
44+
os_version: &OS_VERSION ubuntu22.04
45+
tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-ec2" ]
46+
latest_release_tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-ec2" ]
47+
docker_file: !join [ *FRAMEWORK, /, *ARCH_TYPE, /, *DEVICE_TYPE, /, *CUDA_VERSION, /, *OS_VERSION, /, *TAG_PYTHON_VERSION, /Dockerfile ]
48+
target: final
49+
build: true
50+
enable_common_stage_build: false
51+
test_configs:
52+
test_platforms:
53+
- sanity
54+
- security

base/buildspec.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
buildspec_pointer: buildspec-cu130-ubuntu22.yml
1+
buildspec_pointer: buildspec-cu1302-ubuntu22.yml
File renamed without changes.
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
ARG PYTHON="python3"
2+
ARG PYTHON_VERSION="3.13.11"
3+
ARG PYTHON_SHORT_VERSION="3.13"
4+
ARG CUDA_MAJOR="13"
5+
ARG CUDA_MINOR="0"
6+
ARG CUDA_PATCH="2"
7+
ARG EFA_VERSION="1.47.0"
8+
ARG OS_VERSION="ubuntu22.04"
9+
FROM nvidia/cuda:${CUDA_MAJOR}.${CUDA_MINOR}.${CUDA_PATCH}-base-${OS_VERSION} AS base-builder
10+
11+
12+
RUN mv /usr/local/cuda/compat /usr/local \
13+
&& apt-get update \
14+
&& apt-get -y upgrade --only-upgrade systemd \
15+
&& apt-get install -y --allow-change-held-packages --no-install-recommends \
16+
automake \
17+
build-essential \
18+
ca-certificates \
19+
cmake \
20+
curl \
21+
emacs \
22+
git \
23+
jq \
24+
libcurl4-openssl-dev \
25+
libglib2.0-0 \
26+
libegl1 \
27+
libgl1 \
28+
libsm6 \
29+
libssl-dev \
30+
libxext6 \
31+
libxrender-dev \
32+
zlib1g-dev \
33+
unzip \
34+
vim \
35+
wget \
36+
libhwloc-dev \
37+
libgomp1 \
38+
libibverbs-dev \
39+
libnuma1 \
40+
libnuma-dev \
41+
libtool \
42+
openssl \
43+
python3-dev \
44+
autoconf \
45+
pkg-config \
46+
check \
47+
libsubunit0 \
48+
libsubunit-dev \
49+
libffi-dev \
50+
libbz2-dev \
51+
liblzma-dev \
52+
libsqlite3-dev \
53+
&& apt-get autoremove -y \
54+
&& apt-get clean \
55+
&& rm -rf /var/lib/apt/lists/*
56+
57+
##############################################################################
58+
FROM base-builder AS python-builder
59+
ARG PYTHON_VERSION
60+
COPY install_python.sh install_python.sh
61+
RUN bash install_python.sh ${PYTHON_VERSION} && rm install_python.sh
62+
63+
##############################################################################
64+
FROM base-builder AS cuda-builder
65+
ARG CUDA_MAJOR
66+
ARG CUDA_MINOR
67+
ARG CUDA_PATCH
68+
ARG OS_VERSION
69+
COPY install_cuda.sh install_cuda.sh
70+
RUN bash install_cuda.sh "${CUDA_MAJOR}.${CUDA_MINOR}.${CUDA_PATCH}" "${OS_VERSION}" && rm install_cuda.sh
71+
72+
##############################################################################
73+
FROM nvidia/cuda:${CUDA_MAJOR}.${CUDA_MINOR}.${CUDA_PATCH}-base-${OS_VERSION} AS final
74+
ARG PYTHON
75+
ARG PYTHON_SHORT_VERSION
76+
ARG CUDA_MAJOR
77+
ARG CUDA_MINOR
78+
ARG EFA_VERSION
79+
LABEL maintainer="Amazon AI"
80+
LABEL dlc_major_version="1"
81+
ENV DEBIAN_FRONTEND=noninteractive \
82+
LANG=C.UTF-8 \
83+
LC_ALL=C.UTF-8 \
84+
DLC_CONTAINER_TYPE=base \
85+
# Python won't try to write .pyc or .pyo files on the import of source modules
86+
# Force stdin, stdout and stderr to be totally unbuffered. Good for logging
87+
PYTHONDONTWRITEBYTECODE=1 \
88+
PYTHONUNBUFFERED=1 \
89+
PYTHONIOENCODING=UTF-8 \
90+
CUDA_HOME="/usr/local/cuda" \
91+
PATH="/opt/amazon/openmpi/bin:/opt/amazon/efa/bin:/usr/local/cuda/bin:${PATH}" \
92+
LD_LIBRARY_PATH="/usr/local/lib:/usr/local/cuda/lib64:/opt/amazon/ofi-nccl/lib:/opt/amazon/efa/lib:/opt/amazon/openmpi/lib:${LD_LIBRARY_PATH}"
93+
94+
WORKDIR /
95+
96+
# + python and pip packages (awscli, boto3, requests)
97+
COPY --from=python-builder /usr/local/lib/python${PYTHON_SHORT_VERSION} /usr/local/lib/python${PYTHON_SHORT_VERSION}
98+
COPY --from=python-builder /usr/local/include/python${PYTHON_SHORT_VERSION} /usr/local/include/python${PYTHON_SHORT_VERSION}
99+
COPY --from=python-builder /usr/local/bin /usr/local/bin
100+
# + cuda-toolkit, cudnn, nccl
101+
COPY --from=cuda-builder /usr/local/cuda-${CUDA_MAJOR}.${CUDA_MINOR} /usr/local/cuda-${CUDA_MAJOR}.${CUDA_MINOR}
102+
COPY install_efa.sh install_efa.sh
103+
COPY deep_learning_container.py /usr/local/bin/deep_learning_container.py
104+
COPY bash_telemetry.sh /usr/local/bin/bash_telemetry.sh
105+
RUN chmod +x /usr/local/bin/deep_learning_container.py && \
106+
chmod +x /usr/local/bin/bash_telemetry.sh && \
107+
echo 'source /usr/local/bin/bash_telemetry.sh' >> /etc/bash.bashrc && \
108+
# Install EFA
109+
bash install_efa.sh ${EFA_VERSION} && \
110+
rm install_efa.sh && \
111+
# OSS compliance
112+
apt-get update && \
113+
apt-get upgrade -y && \
114+
apt-get install -y --allow-change-held-packages --no-install-recommends \
115+
unzip \
116+
wget && \
117+
apt-get clean && \
118+
HOME_DIR=/root && \
119+
curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip && \
120+
unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ && \
121+
cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance && \
122+
chmod +x /usr/local/bin/testOSSCompliance && \
123+
chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh && \
124+
${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} ${PYTHON} && \
125+
rm -rf ${HOME_DIR}/oss_compliance* && \
126+
rm -rf /tmp/tmp* && \
127+
rm -rf /var/lib/apt/lists/* && \
128+
rm -rf /root/.cache | true
129+
130+
CMD ["/bin/bash"]

dlc_developer_config.toml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ deep_canary_mode = false
3636

3737
[build]
3838
# Add in frameworks you would like to build. By default, builds are disabled unless you specify building an image.
39-
# available frameworks - ["base", "vllm", "sglang", "autogluon", "huggingface_tensorflow", "huggingface_pytorch", "huggingface_tensorflow_trcomp", "huggingface_pytorch_trcomp", "pytorch_trcomp", "tensorflow", "pytorch", "stabilityai_pytorch"]
39+
# available frameworks - ["base", "vllm", "sglang", "autogluon", "huggingface_vllm", "huggingface_tensorflow", "huggingface_pytorch", "huggingface_tensorflow_trcomp", "huggingface_pytorch_trcomp", "pytorch_trcomp", "tensorflow", "pytorch", "stabilityai_pytorch"]
4040
build_frameworks = []
4141

4242

@@ -186,5 +186,8 @@ dlc-pr-tensorflow-2-eia-inference = ""
186186
# vllm
187187
dlc-pr-vllm = ""
188188

189+
# HuggingFace vLLM
190+
dlc-pr-huggingface-vllm = ""
191+
189192
# sglang
190193
dlc-pr-sglang = ""
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
#!/bin/bash
2+
# Check if telemetry file exists before executing
3+
# Execute telemetry script if it exists, suppress errors
4+
bash /usr/local/bin/bash_telemetry.sh >/dev/null 2>&1 || true
5+
6+
# Source CUDA compat for older drivers (e.g., g5 instances)
7+
if command -v nvidia-smi >/dev/null 2>&1 && command -v nvcc >/dev/null 2>&1; then
8+
source /usr/local/bin/start_cuda_compat.sh
9+
fi
10+
11+
PREFIX="SM_VLLM_"
12+
ARG_PREFIX="--"
13+
14+
ARGS=(--port 8080)
15+
16+
while IFS='=' read -r key value; do
17+
arg_name=$(echo "${key#"${PREFIX}"}" | tr '[:upper:]' '[:lower:]' | tr '_' '-')
18+
19+
ARGS+=("${ARG_PREFIX}${arg_name}")
20+
if [ -n "$value" ]; then
21+
ARGS+=("$value")
22+
fi
23+
done < <(env | grep "^${PREFIX}")
24+
25+
exec python3 -m vllm.entrypoints.openai.api_server "${ARGS[@]}"
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
#!/bin/bash
2+
3+
verlte() {
4+
[ "$1" = "$2" ] && return 1 || [ "$1" = "$(echo -e "$1\n$2" | sort -V | head -n1)" ]
5+
}
6+
7+
COMPAT_FILE=/usr/local/cuda/compat/libcuda.so.1
8+
if [ -f $COMPAT_FILE ]; then
9+
CUDA_COMPAT_MAX_DRIVER_VERSION=$(readlink $COMPAT_FILE | cut -d'.' -f 3-)
10+
echo "CUDA compat package should be installed for NVIDIA driver smaller than ${CUDA_COMPAT_MAX_DRIVER_VERSION}"
11+
NVIDIA_DRIVER_VERSION=$(sed -n 's/^NVRM.*Kernel Module *\([0-9.]*\).*$/\1/p' /proc/driver/nvidia/version 2>/dev/null || true)
12+
if [ -z "$NVIDIA_DRIVER_VERSION" ]; then
13+
NVIDIA_DRIVER_VERSION=$(nvidia-smi --query-gpu=driver_version --format=csv,noheader --id=0 2>/dev/null || true)
14+
fi
15+
echo "Current installed NVIDIA driver version is ${NVIDIA_DRIVER_VERSION}"
16+
if verlte $NVIDIA_DRIVER_VERSION $CUDA_COMPAT_MAX_DRIVER_VERSION; then
17+
echo "Adding CUDA compat to LD_LIBRARY_PATH"
18+
export LD_LIBRARY_PATH=/usr/local/cuda/compat:$LD_LIBRARY_PATH
19+
echo $LD_LIBRARY_PATH
20+
else
21+
echo "Skipping CUDA compat setup as newer NVIDIA driver is installed"
22+
fi
23+
else
24+
echo "Skipping CUDA compat setup as package not found"
25+
fi

huggingface/vllm/buildspec.yml

Lines changed: 56 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,56 @@
1-
2-
1+
account_id: &ACCOUNT_ID <set-$ACCOUNT_ID-in-environment>
2+
prod_account_id: &PROD_ACCOUNT_ID 763104351884
3+
region: &REGION <set-$REGION-in-environment>
4+
base_framework: &BASE_FRAMEWORK vllm
5+
framework: &FRAMEWORK !join [ "huggingface_", *BASE_FRAMEWORK]
6+
version: &VERSION "0.14.0"
7+
short_version: &SHORT_VERSION "0.14"
8+
arch_type: &ARCH_TYPE x86_64
9+
autopatch_build: "False"
10+
11+
repository_info:
12+
build_repository: &BUILD_REPOSITORY
13+
image_type: &IMAGE_TYPE inference
14+
root: huggingface/vllm
15+
repository_name: &REPOSITORY_NAME !join [ "pr", "-", "huggingface", "-", *BASE_FRAMEWORK ]
16+
repository: &REPOSITORY !join [ *ACCOUNT_ID, .dkr.ecr., *REGION, .amazonaws.com/, *REPOSITORY_NAME ]
17+
release_repository_name: &RELEASE_REPOSITORY_NAME !join [ "huggingface", "-", *BASE_FRAMEWORK ]
18+
release_repository: &RELEASE_REPOSITORY !join [ *PROD_ACCOUNT_ID, .dkr.ecr., *REGION, .amazonaws.com/, *RELEASE_REPOSITORY_NAME ]
19+
20+
context:
21+
build_context: &BUILD_CONTEXT
22+
deep_learning_container:
23+
source: ../../src/deep_learning_container.py
24+
target: deep_learning_container.py
25+
start_cuda_compat:
26+
source: build_artifacts/start_cuda_compat.sh
27+
target: start_cuda_compat.sh
28+
sagemaker_entrypoint:
29+
source: build_artifacts/sagemaker_entrypoint.sh
30+
target: sagemaker_entrypoint.sh
31+
32+
33+
images:
34+
BuildHuggingFaceVllmGpuPy312Cu129DockerImage:
35+
<<: *BUILD_REPOSITORY
36+
context:
37+
<<: *BUILD_CONTEXT
38+
image_size_baseline: 26000
39+
device_type: &DEVICE_TYPE gpu
40+
cuda_version: &CUDA_VERSION cu129
41+
python_version: &DOCKER_PYTHON_VERSION py3
42+
tag_python_version: &TAG_PYTHON_VERSION py312
43+
os_version: &OS_VERSION ubuntu22.04
44+
transformers_version: &TRANSFORMERS_VERSION 4.57.3
45+
vllm_version: &VLLM_VERSION 0.14.0
46+
tag: !join [ "vllm", "-", *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-sagemaker" ]
47+
latest_release_tag: !join [ "vllm", "-", *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-sagemaker" ]
48+
docker_file: !join [ docker/, *SHORT_VERSION, /, *CUDA_VERSION, /Dockerfile ]
49+
target: sagemaker
50+
build: true
51+
enable_common_stage_build: false
52+
test_configs:
53+
test_platforms:
54+
- sanity
55+
- security
56+
- sagemaker

0 commit comments

Comments
 (0)