aws
diff --git a/‎available_images.md‎
Lines changed: 1 addition & 0 deletions b/‎available_images.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎base/buildspec-cu130-ubuntu22.yml‎
Lines changed: 1 addition & 1 deletion b/‎base/buildspec-cu130-ubuntu22.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎base/buildspec-cu1302-ubuntu22.yml‎
Lines changed: 54 additions & 0 deletions b/‎base/buildspec-cu1302-ubuntu22.yml‎
Lines changed: 54 additions & 0 deletions
diff --git a/‎base/buildspec.yml‎
Lines changed: 1 addition & 1 deletion b/‎base/buildspec.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎…/x86_64/gpu/cu130/ubuntu22.04/Dockerfile‎ ‎…4/gpu/cu130/ubuntu22.04/py312/Dockerfile‎base/x86_64/gpu/cu130/ubuntu22.04/Dockerfile renamed to base/x86_64/gpu/cu130/ubuntu22.04/py312/Dockerfile b/‎…/x86_64/gpu/cu130/ubuntu22.04/Dockerfile‎ ‎…4/gpu/cu130/ubuntu22.04/py312/Dockerfile‎base/x86_64/gpu/cu130/ubuntu22.04/Dockerfile renamed to base/x86_64/gpu/cu130/ubuntu22.04/py312/Dockerfile
diff --git a/‎base/x86_64/gpu/cu130/ubuntu22.04/py313/Dockerfile‎
Lines changed: 130 additions & 0 deletions b/‎base/x86_64/gpu/cu130/ubuntu22.04/py313/Dockerfile‎
Lines changed: 130 additions & 0 deletions
diff --git a/‎dlc_developer_config.toml‎
Lines changed: 4 additions & 1 deletion b/‎dlc_developer_config.toml‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎huggingface/vllm/build_artifacts/sagemaker_entrypoint.sh‎
Lines changed: 25 additions & 0 deletions b/‎huggingface/vllm/build_artifacts/sagemaker_entrypoint.sh‎
Lines changed: 25 additions & 0 deletions
diff --git a/‎huggingface/vllm/build_artifacts/start_cuda_compat.sh‎
Lines changed: 25 additions & 0 deletions b/‎huggingface/vllm/build_artifacts/start_cuda_compat.sh‎
Lines changed: 25 additions & 0 deletions
diff --git a/‎huggingface/vllm/buildspec.yml‎
Lines changed: 56 additions & 2 deletions b/‎huggingface/vllm/buildspec.yml‎
Lines changed: 56 additions & 2 deletions
@@ -225,6 +225,7 @@ Starting LMI V10 (0.28.0), we are changing the name from LMI DeepSpeed DLC to LM
 
 | Framework                                                                                                                   | Job Type  | Accelerator | Python Version Options | Example URL                                                                               |
 |-----------------------------------------------------------------------------------------------------------------------------|-----------|-------------|------------------------|-------------------------------------------------------------------------------------------|
+| DJLServing 0.36.0 with vLLM 0.14.0, Transformers 4.57.1 | inference | GPU | 3.12 (py312) | 763104351884.dkr.ecr.us-west-2.amazonaws.com/djl-inference:0.36.0-lmi19.0.0-cu128 |
 | DJLServing 0.36.0 with vLLM 0.12.0, Transformers 4.57.1 | inference | GPU | 3.12 (py312) | 763104351884.dkr.ecr.us-west-2.amazonaws.com/djl-inference:0.36.0-lmi18.0.0-cu128 |
 | DJLServing 0.35.0 with vLLM 0.11.1, Transformers 4.57.1, and Accelerate 1.0.1 | inference | GPU | 3.12 (py312) | 763104351884.dkr.ecr.us-west-2.amazonaws.com/djl-inference:0.35.0-lmi17.0.0-cu128 |
 | DJLServing 0.34.0 with vLLM 0.10.2, Transformers 4.55.2, and Accelerate 1.0.1 | inference | GPU | 3.12 (py312) | 763104351884.dkr.ecr.us-west-2.amazonaws.com/djl-inference:0.34.0-lmi16.0.0-cu128 |
 
@@ -44,7 +44,7 @@ images:
     os_version: &OS_VERSION ubuntu22.04
     tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-ec2" ]
     latest_release_tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-ec2" ]
-    docker_file: !join [ *FRAMEWORK, /, *ARCH_TYPE, /, *DEVICE_TYPE, /, *CUDA_VERSION, /, *OS_VERSION, /Dockerfile ]
+    docker_file: !join [ *FRAMEWORK, /, *ARCH_TYPE, /, *DEVICE_TYPE, /, *CUDA_VERSION, /, *OS_VERSION, /, *TAG_PYTHON_VERSION, /Dockerfile ]
     target: final
     build: true
     enable_common_stage_build: false
 
@@ -0,0 +1,54 @@
+account_id: &ACCOUNT_ID <set-$ACCOUNT_ID-in-environment>
+prod_account_id: &PROD_ACCOUNT_ID 763104351884
+region: &REGION <set-$REGION-in-environment>
+framework: &FRAMEWORK base
+version: &VERSION 13.0.2
+short_version: &SHORT_VERSION "13.0"
+arch_type: &ARCH_TYPE x86_64
+autopatch_build: "False"
+
+repository_info:
+  base_repository: &BASE_REPOSITORY
+    image_type: &IMAGE_TYPE gpu
+    root: .
+    repository_name: &REPOSITORY_NAME !join [ pr, "-", *FRAMEWORK ]
+    repository: &REPOSITORY !join [ *ACCOUNT_ID, .dkr.ecr., *REGION, .amazonaws.com/, *REPOSITORY_NAME ]
+    release_repository_name: &RELEASE_REPOSITORY_NAME !join [ *FRAMEWORK ]
+    release_repository: &RELEASE_REPOSITORY !join [ *PROD_ACCOUNT_ID, .dkr.ecr., *REGION, .amazonaws.com/, *RELEASE_REPOSITORY_NAME ]
+
+context:
+  base_context: &BASE_CONTEXT
+    deep_learning_container:
+      source: src/deep_learning_container.py
+      target: deep_learning_container.py
+    install_python:
+      source: scripts/install_python.sh
+      target: install_python.sh
+    install_cuda:
+      source: scripts/install_cuda.sh
+      target: install_cuda.sh
+    install_efa:
+      source: scripts/install_efa.sh
+      target: install_efa.sh
+
+images:
+  base_x86_64_gpu_cuda1302_ubuntu22:
+    <<: *BASE_REPOSITORY
+    context:
+      <<: *BASE_CONTEXT
+    image_size_baseline: 11000
+    device_type: &DEVICE_TYPE gpu
+    cuda_version: &CUDA_VERSION cu130
+    python_version: &DOCKER_PYTHON_VERSION py3
+    tag_python_version: &TAG_PYTHON_VERSION py313
+    os_version: &OS_VERSION ubuntu22.04
+    tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-ec2" ]
+    latest_release_tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-ec2" ]
+    docker_file: !join [ *FRAMEWORK, /, *ARCH_TYPE, /, *DEVICE_TYPE, /, *CUDA_VERSION, /, *OS_VERSION, /, *TAG_PYTHON_VERSION, /Dockerfile ]
+    target: final
+    build: true
+    enable_common_stage_build: false
+    test_configs:
+      test_platforms:
+        - sanity
+        - security
@@ -1 +1 @@
-buildspec_pointer: buildspec-cu130-ubuntu22.yml
+buildspec_pointer: buildspec-cu1302-ubuntu22.yml
@@ -0,0 +1,130 @@
+ARG PYTHON="python3"
+ARG PYTHON_VERSION="3.13.11"
+ARG PYTHON_SHORT_VERSION="3.13"
+ARG CUDA_MAJOR="13"
+ARG CUDA_MINOR="0"
+ARG CUDA_PATCH="2"
+ARG EFA_VERSION="1.47.0"
+ARG OS_VERSION="ubuntu22.04"
+FROM nvidia/cuda:${CUDA_MAJOR}.${CUDA_MINOR}.${CUDA_PATCH}-base-${OS_VERSION} AS base-builder
+
+
+RUN mv /usr/local/cuda/compat /usr/local \
+    && apt-get update \
+    && apt-get -y upgrade --only-upgrade systemd \
+    && apt-get install -y --allow-change-held-packages --no-install-recommends \
+        automake \
+        build-essential \
+        ca-certificates \
+        cmake \
+        curl \
+        emacs \
+        git \
+        jq \
+        libcurl4-openssl-dev \
+        libglib2.0-0 \
+        libegl1 \
+        libgl1 \
+        libsm6 \
+        libssl-dev \
+        libxext6 \
+        libxrender-dev \
+        zlib1g-dev \
+        unzip \
+        vim \
+        wget \
+        libhwloc-dev \
+        libgomp1 \
+        libibverbs-dev \
+        libnuma1 \
+        libnuma-dev \
+        libtool \
+        openssl \
+        python3-dev \
+        autoconf \
+        pkg-config \
+        check \
+        libsubunit0 \
+        libsubunit-dev \
+        libffi-dev \
+        libbz2-dev \
+        liblzma-dev \
+        libsqlite3-dev \
+    && apt-get autoremove -y \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+##############################################################################
+FROM base-builder AS python-builder
+ARG PYTHON_VERSION
+COPY install_python.sh install_python.sh
+RUN bash install_python.sh ${PYTHON_VERSION} && rm install_python.sh
+
+##############################################################################
+FROM base-builder AS cuda-builder
+ARG CUDA_MAJOR
+ARG CUDA_MINOR
+ARG CUDA_PATCH
+ARG OS_VERSION
+COPY install_cuda.sh install_cuda.sh
+RUN bash install_cuda.sh "${CUDA_MAJOR}.${CUDA_MINOR}.${CUDA_PATCH}" "${OS_VERSION}" && rm install_cuda.sh
+
+##############################################################################
+FROM nvidia/cuda:${CUDA_MAJOR}.${CUDA_MINOR}.${CUDA_PATCH}-base-${OS_VERSION} AS final 
+ARG PYTHON
+ARG PYTHON_SHORT_VERSION
+ARG CUDA_MAJOR
+ARG CUDA_MINOR
+ARG EFA_VERSION
+LABEL maintainer="Amazon AI"
+LABEL dlc_major_version="1"
+ENV DEBIAN_FRONTEND=noninteractive \
+    LANG=C.UTF-8 \
+    LC_ALL=C.UTF-8 \
+    DLC_CONTAINER_TYPE=base \
+    # Python won't try to write .pyc or .pyo files on the import of source modules
+    # Force stdin, stdout and stderr to be totally unbuffered. Good for logging
+    PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1 \
+    PYTHONIOENCODING=UTF-8 \
+    CUDA_HOME="/usr/local/cuda" \
+    PATH="/opt/amazon/openmpi/bin:/opt/amazon/efa/bin:/usr/local/cuda/bin:${PATH}" \
+    LD_LIBRARY_PATH="/usr/local/lib:/usr/local/cuda/lib64:/opt/amazon/ofi-nccl/lib:/opt/amazon/efa/lib:/opt/amazon/openmpi/lib:${LD_LIBRARY_PATH}"
+
+WORKDIR /
+
+# + python and pip packages (awscli, boto3, requests)
+COPY --from=python-builder /usr/local/lib/python${PYTHON_SHORT_VERSION} /usr/local/lib/python${PYTHON_SHORT_VERSION}
+COPY --from=python-builder /usr/local/include/python${PYTHON_SHORT_VERSION} /usr/local/include/python${PYTHON_SHORT_VERSION}
+COPY --from=python-builder /usr/local/bin /usr/local/bin
+# + cuda-toolkit, cudnn, nccl
+COPY --from=cuda-builder /usr/local/cuda-${CUDA_MAJOR}.${CUDA_MINOR} /usr/local/cuda-${CUDA_MAJOR}.${CUDA_MINOR}
+COPY install_efa.sh install_efa.sh 
+COPY deep_learning_container.py /usr/local/bin/deep_learning_container.py
+COPY bash_telemetry.sh /usr/local/bin/bash_telemetry.sh
+RUN chmod +x /usr/local/bin/deep_learning_container.py && \
+    chmod +x /usr/local/bin/bash_telemetry.sh && \
+    echo 'source /usr/local/bin/bash_telemetry.sh' >> /etc/bash.bashrc && \
+    # Install EFA
+    bash install_efa.sh ${EFA_VERSION} && \
+    rm install_efa.sh && \
+    # OSS compliance
+    apt-get update && \
+    apt-get upgrade -y && \
+    apt-get install -y --allow-change-held-packages --no-install-recommends \
+    unzip \
+    wget && \
+    apt-get clean && \
+    HOME_DIR=/root && \
+    curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip && \
+    unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ && \
+    cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance && \
+    chmod +x /usr/local/bin/testOSSCompliance && \
+    chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh && \
+    ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} ${PYTHON} && \
+    rm -rf ${HOME_DIR}/oss_compliance* && \
+    rm -rf /tmp/tmp* && \
+    rm -rf /var/lib/apt/lists/* && \
+    rm -rf /root/.cache | true
+    
+CMD ["/bin/bash"]
@@ -36,7 +36,7 @@ deep_canary_mode = false
 
 [build]
 # Add in frameworks you would like to build. By default, builds are disabled unless you specify building an image.
-# available frameworks - ["base", "vllm", "sglang", "autogluon", "huggingface_tensorflow", "huggingface_pytorch", "huggingface_tensorflow_trcomp", "huggingface_pytorch_trcomp", "pytorch_trcomp", "tensorflow", "pytorch", "stabilityai_pytorch"]
+# available frameworks - ["base", "vllm", "sglang", "autogluon", "huggingface_vllm", "huggingface_tensorflow", "huggingface_pytorch", "huggingface_tensorflow_trcomp", "huggingface_pytorch_trcomp", "pytorch_trcomp", "tensorflow", "pytorch", "stabilityai_pytorch"]
 build_frameworks = []
 
 
@@ -186,5 +186,8 @@ dlc-pr-tensorflow-2-eia-inference = ""
 # vllm
 dlc-pr-vllm = ""
 
+# HuggingFace vLLM
+dlc-pr-huggingface-vllm = ""
+
 # sglang
 dlc-pr-sglang = ""
@@ -0,0 +1,25 @@
+#!/bin/bash
+# Check if telemetry file exists before executing
+# Execute telemetry script if it exists, suppress errors
+bash /usr/local/bin/bash_telemetry.sh >/dev/null 2>&1 || true
+
+# Source CUDA compat for older drivers (e.g., g5 instances)
+if command -v nvidia-smi >/dev/null 2>&1 && command -v nvcc >/dev/null 2>&1; then
+    source /usr/local/bin/start_cuda_compat.sh
+fi
+
+PREFIX="SM_VLLM_"
+ARG_PREFIX="--"
+
+ARGS=(--port 8080)
+
+while IFS='=' read -r key value; do
+    arg_name=$(echo "${key#"${PREFIX}"}" | tr '[:upper:]' '[:lower:]' | tr '_' '-')
+
+    ARGS+=("${ARG_PREFIX}${arg_name}")
+    if [ -n "$value" ]; then
+        ARGS+=("$value")
+    fi
+done < <(env | grep "^${PREFIX}")
+
+exec python3 -m vllm.entrypoints.openai.api_server "${ARGS[@]}"
@@ -0,0 +1,25 @@
+#!/bin/bash
+
+verlte() {
+  [ "$1" = "$2" ] && return 1 || [ "$1" = "$(echo -e "$1\n$2" | sort -V | head -n1)" ]
+}
+
+COMPAT_FILE=/usr/local/cuda/compat/libcuda.so.1
+if [ -f $COMPAT_FILE ]; then
+  CUDA_COMPAT_MAX_DRIVER_VERSION=$(readlink $COMPAT_FILE | cut -d'.' -f 3-)
+  echo "CUDA compat package should be installed for NVIDIA driver smaller than ${CUDA_COMPAT_MAX_DRIVER_VERSION}"
+  NVIDIA_DRIVER_VERSION=$(sed -n 's/^NVRM.*Kernel Module *\([0-9.]*\).*$/\1/p' /proc/driver/nvidia/version 2>/dev/null || true)
+  if [ -z "$NVIDIA_DRIVER_VERSION" ]; then
+    NVIDIA_DRIVER_VERSION=$(nvidia-smi --query-gpu=driver_version --format=csv,noheader --id=0 2>/dev/null || true)
+  fi
+  echo "Current installed NVIDIA driver version is ${NVIDIA_DRIVER_VERSION}"
+  if verlte $NVIDIA_DRIVER_VERSION $CUDA_COMPAT_MAX_DRIVER_VERSION; then
+    echo "Adding CUDA compat to LD_LIBRARY_PATH"
+    export LD_LIBRARY_PATH=/usr/local/cuda/compat:$LD_LIBRARY_PATH
+    echo $LD_LIBRARY_PATH
+  else
+    echo "Skipping CUDA compat setup as newer NVIDIA driver is installed"
+  fi
+else
+  echo "Skipping CUDA compat setup as package not found"
+fi
@@ -1,2 +1,56 @@
- 
- 
+account_id: &ACCOUNT_ID <set-$ACCOUNT_ID-in-environment>
+prod_account_id: &PROD_ACCOUNT_ID 763104351884
+region: &REGION <set-$REGION-in-environment>
+base_framework: &BASE_FRAMEWORK vllm
+framework: &FRAMEWORK !join [ "huggingface_", *BASE_FRAMEWORK]
+version: &VERSION "0.14.0"
+short_version: &SHORT_VERSION "0.14"
+arch_type: &ARCH_TYPE x86_64
+autopatch_build: "False"
+
+repository_info:
+  build_repository: &BUILD_REPOSITORY
+    image_type: &IMAGE_TYPE inference
+    root: huggingface/vllm
+    repository_name: &REPOSITORY_NAME !join [ "pr", "-", "huggingface", "-", *BASE_FRAMEWORK ]
+    repository: &REPOSITORY !join [ *ACCOUNT_ID, .dkr.ecr., *REGION, .amazonaws.com/, *REPOSITORY_NAME ]
+    release_repository_name: &RELEASE_REPOSITORY_NAME !join [ "huggingface", "-", *BASE_FRAMEWORK ]
+    release_repository: &RELEASE_REPOSITORY !join [ *PROD_ACCOUNT_ID, .dkr.ecr., *REGION, .amazonaws.com/, *RELEASE_REPOSITORY_NAME ]
+
+context:
+  build_context: &BUILD_CONTEXT
+    deep_learning_container:
+      source: ../../src/deep_learning_container.py
+      target: deep_learning_container.py
+    start_cuda_compat:
+      source: build_artifacts/start_cuda_compat.sh
+      target: start_cuda_compat.sh
+    sagemaker_entrypoint:
+      source: build_artifacts/sagemaker_entrypoint.sh
+      target: sagemaker_entrypoint.sh
+
+
+images:
+  BuildHuggingFaceVllmGpuPy312Cu129DockerImage:
+    <<: *BUILD_REPOSITORY
+    context:
+      <<: *BUILD_CONTEXT
+    image_size_baseline: 26000
+    device_type: &DEVICE_TYPE gpu
+    cuda_version: &CUDA_VERSION cu129
+    python_version: &DOCKER_PYTHON_VERSION py3
+    tag_python_version: &TAG_PYTHON_VERSION py312
+    os_version: &OS_VERSION ubuntu22.04
+    transformers_version: &TRANSFORMERS_VERSION 4.57.3
+    vllm_version: &VLLM_VERSION 0.14.0
+    tag: !join [ "vllm", "-", *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-sagemaker" ]
+    latest_release_tag: !join [ "vllm", "-", *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-sagemaker" ]
+    docker_file: !join [ docker/, *SHORT_VERSION, /, *CUDA_VERSION, /Dockerfile ]
+    target: sagemaker
+    build: true
+    enable_common_stage_build: false
+    test_configs:
+      test_platforms:
+        - sanity
+        - security
+        - sagemaker
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-buildspec_pointer: buildspec-cu130-ubuntu22.yml`
	`1`	`+buildspec_pointer: buildspec-cu1302-ubuntu22.yml`