Merge branch 'master' into vllm-ec2-pilot

jinyan-li1 · web-flow · commit e4edd94163a1 · 2025-10-08T14:24:36.000-07:00
diff --git a/data/ignore_ids_safety_scan.json b/data/ignore_ids_safety_scan.json
@@ -1446,7 +1446,8 @@
                 "77744": "urllib3 is a user-friendly HTTP client library for Python. Prior to 2.5.0, it is possible to disable redirects for all requests by instantiating a PoolManager and specifying retries in a way that disable redirects. By default, requests and botocore users are not affected. An application attempting to mitigate SSRF or open redirect vulnerabilities by disabling redirects at the PoolManager level will remain vulnerable. This issue has been patched in version 2.5.0.",
                 "79077": "Affected versions of the h2 package are vulnerable to HTTP Request Smuggling due to improper validation of illegal characters in HTTP headers. The package allows CRLF characters to be injected into header names and values without proper sanitisation, which can cause request boundary manipulation when HTTP/2 requests are downgraded to HTTP/1.1 by downstream servers.",
                 "79595": "Affected versions of the transformers package are vulnerable to Regular Expression Denial of Service (ReDoS) due to inefficient regular expressions in the EnglishNormalizer.normalize_numbers() method",
-                "79596": "Affected versions of the transformers package are vulnerable to Regular Expression Denial of Service (ReDoS) due to inefficient regular expressions in the MarianTokenizer.remove_language_code() method"
+                "79596": "Affected versions of the transformers package are vulnerable to Regular Expression Denial of Service (ReDoS) due to inefficient regular expressions in the MarianTokenizer.remove_language_code() method",
+                "79855": "Affected versions of the transformers package are vulnerable to Regular Expression Denial of Service (ReDoS) due to unbounded evaluation of user-supplied regular expressions in the AdamWeightDecay._do_use_weight_decay method. The TensorFlow optimizer’s _do_use_weight_decay iterates over include_in_weight_decay and exclude_from_weight_decay lists and calls re.search on each pattern against parameter names, enabling catastrophic backtracking on crafted inputs. An attacker who can control these lists can provide pathological patterns that saturate the CPU and cause processes using transformers to hang, resulting in a Denial of Service."
             }
         },
         "inference-neuron": {
diff --git a/dlc_developer_config.toml b/dlc_developer_config.toml
@@ -186,4 +186,4 @@ dlc-pr-pytorch-eia-inference = ""
 dlc-pr-tensorflow-2-eia-inference = ""
 
 # vllm
-dlc-pr-vllm = "vllm/buildspec.yml"
+dlc-pr-vllm = ""
diff --git a/huggingface/pytorch/inference/buildspec-neuronx.yml b/huggingface/pytorch/inference/buildspec-neuronx.yml
@@ -2,8 +2,8 @@ account_id: &ACCOUNT_ID <set-$ACCOUNT_ID-in-environment>
 region: &REGION <set-$REGION-in-environment>
 base_framework: &BASE_FRAMEWORK pytorch
 framework: &FRAMEWORK !join [ "huggingface_", *BASE_FRAMEWORK]
-version: &VERSION 2.1.2
-short_version: &SHORT_VERSION "2.1"
+version: &VERSION 2.7.1
+short_version: &SHORT_VERSION "2.7"
 contributor: &CONTRIBUTOR huggingface
 arch_type: x86
 
@@ -34,9 +34,9 @@ images:
     device_type: &DEVICE_TYPE neuronx
     python_version: &DOCKER_PYTHON_VERSION py3
     tag_python_version: &TAG_PYTHON_VERSION py310
-    neuron_sdk_version: &NEURON_SDK_VERSION sdk2.20.0
-    os_version: &OS_VERSION ubuntu20.04
-    transformers_version: &TRANSFORMERS_VERSION 4.43.2
+    neuron_sdk_version: &NEURON_SDK_VERSION sdk2.24.1
+    os_version: &OS_VERSION ubuntu22.04
+    transformers_version: &TRANSFORMERS_VERSION 4.51.3
     tag: !join [ *VERSION, '-', 'transformers', *TRANSFORMERS_VERSION, '-', *DEVICE_TYPE, '-', *TAG_PYTHON_VERSION,"-", *NEURON_SDK_VERSION, '-', *OS_VERSION ]
     docker_file: !join [ docker/, *SHORT_VERSION, /, *DOCKER_PYTHON_VERSION, /, *NEURON_SDK_VERSION, /Dockerfile., *DEVICE_TYPE ]
     context:
diff --git a/huggingface/pytorch/inference/docker/2.7/py3/sdk2.24.1/Dockerfile.neuronx b/huggingface/pytorch/inference/docker/2.7/py3/sdk2.24.1/Dockerfile.neuronx
@@ -0,0 +1,196 @@
+FROM ubuntu:22.04
+
+LABEL dlc_major_version="1"
+LABEL maintainer="Amazon AI"
+LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true
+
+ARG PYTHON=python3.10
+ARG PYTHON_VERSION=3.10.12
+ARG MMS_VERSION=1.1.11
+ARG MAMBA_VERSION=23.1.0-4
+
+# Neuron SDK components version numbers
+ARG NEURONX_FRAMEWORK_VERSION=2.7.0.2.8.6734
+ARG NEURONX_DISTRIBUTED_VERSION=0.13.14393
+ARG NEURONX_CC_VERSION=2.19.8089.0
+ARG NEURONX_COLLECTIVES_LIB_VERSION=2.26.43.0-47cc904ea
+ARG NEURONX_RUNTIME_LIB_VERSION=2.26.42.0-2ff3b5c7d
+ARG NEURONX_TOOLS_VERSION=2.24.54.0
+
+# HF ARGS
+ARG TRANSFORMERS_VERSION
+ARG DIFFUSERS_VERSION=0.35.1
+ARG HUGGINGFACE_HUB_VERSION=0.35.0
+ARG OPTIMUM_NEURON_VERSION=0.3.0
+ARG SENTENCE_TRANSFORMERS=5.1.0
+ARG PEFT_VERSION=0.17.0
+ARG DATASETS_VERSION=4.1.0
+
+# See http://bugs.python.org/issue19846
+ENV LANG C.UTF-8
+ENV LD_LIBRARY_PATH /opt/aws/neuron/lib:/lib/x86_64-linux-gnu:/opt/conda/lib/:$LD_LIBRARY_PATH
+ENV PATH /opt/conda/bin:/opt/aws/neuron/bin:$PATH
+ENV SAGEMAKER_SERVING_MODULE sagemaker_pytorch_serving_container.serving:main
+ENV TEMP=/home/model-server/tmp
+
+RUN apt-get update \
+ && apt-get upgrade -y \
+ && apt-get install -y --no-install-recommends \
+    apt-transport-https \
+    build-essential \
+    ca-certificates \
+    cmake \
+    curl \
+    emacs \
+    git \
+    gnupg2 \
+    gpg-agent \
+    jq \
+    libgl1-mesa-glx \
+    libglib2.0-0 \
+    libsm6 \
+    libxext6 \
+    libxrender-dev \
+    libcap-dev \
+    libhwloc-dev \
+    openjdk-11-jdk \
+    unzip \
+    vim \
+    wget \
+    zlib1g-dev \
+ && rm -rf /var/lib/apt/lists/* \
+ && rm -rf /tmp/tmp* \
+ && apt-get clean
+
+RUN echo "deb https://apt.repos.neuron.amazonaws.com focal main" > /etc/apt/sources.list.d/neuron.list
+RUN wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | apt-key add -
+
+# Install Neuronx tools
+RUN apt-get update \
+ && apt-get install -y \
+    aws-neuronx-tools=$NEURONX_TOOLS_VERSION \
+    aws-neuronx-collectives=$NEURONX_COLLECTIVES_LIB_VERSION \
+    aws-neuronx-runtime-lib=$NEURONX_RUNTIME_LIB_VERSION \
+ && rm -rf /var/lib/apt/lists/* \
+ && rm -rf /tmp/tmp* \
+ && apt-get clean
+
+# https://github.com/docker-library/openjdk/issues/261 https://github.com/docker-library/openjdk/pull/263/files
+RUN keytool -importkeystore -srckeystore /etc/ssl/certs/java/cacerts -destkeystore /etc/ssl/certs/java/cacerts.jks -deststoretype JKS -srcstorepass changeit -deststorepass changeit -noprompt; \
+    mv /etc/ssl/certs/java/cacerts.jks /etc/ssl/certs/java/cacerts; \
+    /var/lib/dpkg/info/ca-certificates-java.postinst configure;
+
+RUN curl -L -o ~/mambaforge.sh https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-x86_64.sh \
+ && chmod +x ~/mambaforge.sh \
+ && ~/mambaforge.sh -b -p /opt/conda \
+ && rm ~/mambaforge.sh \
+ && /opt/conda/bin/conda update -y conda \
+ && /opt/conda/bin/conda install -c conda-forge -y \
+    python=$PYTHON_VERSION \
+    pyopenssl \
+    cython \
+    mkl-include \
+    mkl \
+    botocore \
+    parso \
+    scipy \
+    typing \
+    # Below 2 are included in miniconda base, but not mamba so need to install
+    conda-content-trust \
+    charset-normalizer \
+ && /opt/conda/bin/conda update -y conda \
+ && /opt/conda/bin/conda clean -ya
+
+RUN conda install -c conda-forge \
+    scikit-learn \
+    h5py \
+    requests \
+ && conda clean -ya \
+ && pip install --upgrade pip --trusted-host pypi.org --trusted-host files.pythonhosted.org \
+ && ln -s /opt/conda/bin/pip /usr/local/bin/pip3 \
+ && pip install packaging \
+    enum-compat \
+    ipython \
+ && rm -rf ~/.cache/pip/*
+
+RUN pip install --no-cache-dir -U \
+    opencv-python>=4.8.1.78 \
+    "numpy>=1.22.2, <=1.25.2" \
+    "scipy>=1.8.0" \
+    six \
+    "pillow>=10.0.1" \
+    "awscli<2" \
+    pandas==1.* \
+    boto3 \
+    "cryptography<46,>=41.0.5" \
+    "protobuf>=3.20.3, <4" \
+    "networkx~=2.6" \
+ && pip install --no-deps --no-cache-dir -U torchvision==0.22.* \
+ && rm -rf ~/.cache/pip/*
+
+# Install Neuronx-cc and PyTorch
+RUN pip install --index-url https://pip.repos.neuron.amazonaws.com \
+    --extra-index-url https://pypi.org/simple \
+    --trusted-host pip.repos.neuron.amazonaws.com \
+    neuronx-cc==$NEURONX_CC_VERSION \
+    torch-neuronx==$NEURONX_FRAMEWORK_VERSION \
+    neuronx_distributed==$NEURONX_DISTRIBUTED_VERSION
+
+WORKDIR /
+
+RUN pip install --no-cache-dir \
+    multi-model-server==$MMS_VERSION \
+    sagemaker-inference
+
+RUN useradd -m model-server \
+ && mkdir -p /home/model-server/tmp \
+ && chown -R model-server /home/model-server
+
+COPY neuron-entrypoint.py /usr/local/bin/dockerd-entrypoint.py
+COPY neuron-monitor.sh /usr/local/bin/neuron-monitor.sh
+COPY config.properties /etc/sagemaker-mms.properties
+
+RUN chmod +x /usr/local/bin/dockerd-entrypoint.py \
+ && chmod +x /usr/local/bin/neuron-monitor.sh
+
+ADD https://raw.githubusercontent.com/aws/deep-learning-containers/master/src/deep_learning_container.py /usr/local/bin/deep_learning_container.py
+
+RUN chmod +x /usr/local/bin/deep_learning_container.py
+
+#################################
+# Hugging Face specific section #
+#################################
+
+RUN curl -o /license.txt  https://aws-dlc-licenses.s3.amazonaws.com/pytorch-2.7/license.txt
+
+# install Hugging Face libraries and its dependencies
+RUN pip install --no-cache-dir -U \
+	networkx~=2.6 \
+	transformers[sentencepiece,audio,vision]==${TRANSFORMERS_VERSION} \
+    diffusers==${DIFFUSERS_VERSION} \
+    compel \
+    controlnet-aux \
+    huggingface_hub==${HUGGINGFACE_HUB_VERSION} \
+    hf_transfer \
+    datasets==${DATASETS_VERSION} \
+    optimum-neuron==${OPTIMUM_NEURON_VERSION} \
+	"sagemaker-huggingface-inference-toolkit>=2.4.1,<3" \
+ 	sentence_transformers==${SENTENCE_TRANSFORMERS} \
+	peft==${PEFT_VERSION} \
+ && rm -rf ~/.cache/pip/*
+
+RUN HOME_DIR=/root \
+ && curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip \
+ && unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ \
+ && cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance \
+ && chmod +x /usr/local/bin/testOSSCompliance \
+ && chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh \
+ && ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} ${PYTHON} \
+ && rm -rf ${HOME_DIR}/oss_compliance* \
+ # conda leaves an empty /root/.cache/conda/notices.cache file which is not removed by conda clean -ya
+ && rm -rf ${HOME_DIR}/.cache/conda
+
+ENV HF_HUB_USER_AGENT_ORIGIN="aws:sagemaker:neuron:inference:regular"
+EXPOSE 8080 8081
+ENTRYPOINT ["python", "/usr/local/bin/dockerd-entrypoint.py"]
+CMD ["serve"]
diff --git a/huggingface/pytorch/inference/docker/2.7/py3/sdk2.24.1/Dockerfile.neuronx.py_scan_allowlist.json b/huggingface/pytorch/inference/docker/2.7/py3/sdk2.24.1/Dockerfile.neuronx.py_scan_allowlist.json
@@ -0,0 +1,9 @@
+{
+  "77740": "protobuf, required by Neuron SDK. Affected versions of this package are vulnerable to a potential Denial of Service (DoS) attack due to unbounded recursion when parsing untrusted Protocol Buffers data.",
+  "77986": "In transformers, The vulnerability arises from insecure URL validation using the `startswith()` method, which can be bypassed through URL username injection. This allows attackers to craft URLs that appear to be from YouTube but resolve to malicious domains, potentially leading to phishing attacks, malware distribution, or data exfiltration. The issue is fixed in version 4.52.1. We cannot upgrade now, because it co dependent on Neuron SDK version and required by HF",
+  "78153": "A Regular Expression Denial of Service (ReDoS) vulnerability was discovered in the Hugging Face Transformers library. This vulnerability affects versions 4.51.3 and earlier, and is fixed in version 4.52.1.",
+  "78688": "also In transformers",
+  "79595": "also In transformers",
+  "79596": "also In transformers",
+  "79855": "also In transformers"
+}
diff --git a/release_images_general.yml b/release_images_general.yml
@@ -44,14 +44,14 @@ release_images:
       public_registry: True
   4:
     framework: "vllm"
-    version: "0.10.2"
+    version: "0.11.0"
     arch_type: "x86"
     customer_type: "ec2"
     general:
       device_types: [ "gpu" ]
       python_versions: [ "py312" ]
       os_version: "ubuntu22.04"
-      cuda_version: "cu129"
+      cuda_version: "cu128"
       example: False
       disable_sm_tag: False
       force_release: False
diff --git a/release_images_training.yml b/release_images_training.yml
@@ -103,4 +103,17 @@ release_images:
       cuda_version: "cu128"
       example: False
       disable_sm_tag: False
-      force_release: False
+      force_release: False
+  9:
+    framework: "huggingface_pytorch"
+    version: "2.8.0"
+    hf_transformers: "4.56.2"
+    arch_type: "x86"
+    training:
+      device_types: ["gpu"]
+      python_versions: [ "py312" ]
+      os_version: "ubuntu22.04"
+      cuda_version: "cu129"
+      example: False
+      disable_sm_tag: False
+      force_release: False
diff --git a/test/dlc_tests/sanity/test_pre_release.py b/test/dlc_tests/sanity/test_pre_release.py
@@ -482,8 +482,6 @@ def test_framework_and_neuron_sdk_version(neuron):
         if "training" in image or "neuronx" in image:
             package_names = {"torch-neuronx": "torch_neuronx"}
             # transformers is only available for the inference image
-            if "training" not in image:
-                package_names["transformers-neuronx"] = "transformers_neuronx"
         else:
             package_names = {"torch-neuron": "torch_neuron"}
     elif tested_framework == "tensorflow":
@@ -514,17 +512,8 @@ def test_framework_and_neuron_sdk_version(neuron):
             executable="python",
         )
 
-        installed_framework_version = output.stdout.strip()
+        installed_framework_version = output.stdout.strip().split("+")[0]
         version_list = release_manifest[package_name]
-        # temporary hack because transformers_neuronx reports its version as 0.6.x
-        if package_name == "transformers-neuronx":
-            if installed_framework_version == "0.12.x":
-                # skip the check due to transformers_neuronx version bug
-                # eg. transformers_neuronx.__version__=='0.10.x' for v0.11.351...
-                continue
-            version_list = [
-                ".".join(entry.split(".")[:2]) + ".x" for entry in release_manifest[package_name]
-            ]
         assert installed_framework_version in version_list, (
             f"framework {framework} version {installed_framework_version} "
             f"not found in released versions for that package: {version_list}"
diff --git a/test/sagemaker_tests/huggingface/inference/integration/__init__.py b/test/sagemaker_tests/huggingface/inference/integration/__init__.py
@@ -23,7 +23,7 @@
 
 model_dir = os.path.join(resources_path, "tiny-distilbert-sst-2")
 model_dir_sdxl = os.path.join(resources_path, "tiny-sdxl")
-model_dir_decoder = os.path.join(resources_path, "tiny-gpt2")
+model_dir_decoder = os.path.join(resources_path, "tiny-llama3")
 pt_model = "pt_model.tar.gz"
 tf_model = "tf_model.tar.gz"
 pt_neuron_model = "pt_neuron_model.tar.gz"
diff --git a/test/sagemaker_tests/huggingface/inference/integration/sagemaker/test_neuronx_decoder_hosting.py b/test/sagemaker_tests/huggingface/inference/integration/sagemaker/test_neuronx_decoder_hosting.py
@@ -38,7 +38,7 @@
 # instances in the regions corresponding to their availability.
 # In future, we would like to configure the logic to run multiple `pytest` commands that can allow
 # us to test multiple instances in multiple regions for each image.
-@pytest.mark.model("tiny-gpt2")
+@pytest.mark.model("tiny-llama3")
 @pytest.mark.processor("neuronx")
 @pytest.mark.parametrize(
     "test_region,test_instance_type",
diff --git a/test/sagemaker_tests/huggingface/inference/requirements.txt b/test/sagemaker_tests/huggingface/inference/requirements.txt
@@ -1,8 +1,7 @@
 boto3
 coverage
 # Docker v7.0.0 breaks compatibility with Docker Compose v1 (SageMaker Local)
-docker<=6.1.3
-docker-compose
+docker>=5,<=6.1.3
 flake8==3.7.7
 Flask==1.1.1
 mock
@@ -12,13 +11,13 @@ pytest-rerunfailures
 pytest-xdist
 PyYAML
 protobuf>=3.20,<=3.20.2
-sagemaker>=2,<3
+sagemaker>=2.237.0,<3
 six
 requests<2.32.0
 requests_mock
 Pillow
 retrying==1.3.3
-urllib3==1.26.0
+urllib3>=1.26.8
 pluggy>=1.5,<2
 requests_mock
 sagemaker-inference
diff --git a/test/sagemaker_tests/huggingface/inference/resources/tiny-distilbert-sst-2/pt_neuronx_model.tar.gz b/test/sagemaker_tests/huggingface/inference/resources/tiny-distilbert-sst-2/pt_neuronx_model.tar.gz
diff --git a/test/sagemaker_tests/huggingface/inference/resources/tiny-gpt2/pt_neuronx_model.tar.gz b/test/sagemaker_tests/huggingface/inference/resources/tiny-gpt2/pt_neuronx_model.tar.gz
diff --git a/test/sagemaker_tests/huggingface/inference/resources/tiny-llama3/pt_neuronx_model.tar.gz b/test/sagemaker_tests/huggingface/inference/resources/tiny-llama3/pt_neuronx_model.tar.gz
diff --git a/test/sagemaker_tests/huggingface/inference/resources/tiny-sdxl/pt_neuronx_model.tar.gz b/test/sagemaker_tests/huggingface/inference/resources/tiny-sdxl/pt_neuronx_model.tar.gz
diff --git a/vllm/buildspec.yml b/vllm/buildspec.yml
@@ -2,8 +2,8 @@ account_id: &ACCOUNT_ID <set-$ACCOUNT_ID-in-environment>
 prod_account_id: &PROD_ACCOUNT_ID 763104351884
 region: &REGION <set-$REGION-in-environment>
 framework: &FRAMEWORK vllm
-version: &VERSION "0.10.2"
-short_version: &SHORT_VERSION "0.10"
+version: &VERSION "0.11.0"
+short_version: &SHORT_VERSION "0.11"
 arch_type: &ARCH_TYPE x86_64
 autopatch_build: "False"
 
@@ -33,9 +33,9 @@ images:
     <<: *BUILD_REPOSITORY
     context:
       <<: *BUILD_CONTEXT
-    image_size_baseline: 20000
+    image_size_baseline: 23000
     device_type: &DEVICE_TYPE gpu
-    cuda_version: &CUDA_VERSION cu129
+    cuda_version: &CUDA_VERSION cu128
     python_version: &DOCKER_PYTHON_VERSION py3
     tag_python_version: &TAG_PYTHON_VERSION py312
     os_version: &OS_VERSION ubuntu22.04
diff --git a/vllm/x86_64/gpu/Dockerfile b/vllm/x86_64/gpu/Dockerfile
@@ -1,6 +1,6 @@
-FROM docker.io/vllm/vllm-openai:v0.10.2 as final
+FROM docker.io/vllm/vllm-openai:v0.11.0 as final
 ARG PYTHON="python3"
-ARG EFA_VERSION="1.43.2"
+ARG EFA_VERSION="1.43.3"
 LABEL maintainer="Amazon AI"
 LABEL dlc_major_version="1"
 ENV DEBIAN_FRONTEND=noninteractive \