Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
68347f0
revertme: dlc developer config
JingyaHuang Sep 17, 2025
47738d5
update neuronx dockerfile
JingyaHuang Sep 17, 2025
b6f5740
downgrade trfrs
JingyaHuang Sep 17, 2025
35cd1fd
update tiny artifacts
JingyaHuang Sep 17, 2025
80b2e0b
fix: remove transformer-neuronx
JingyaHuang Sep 17, 2025
9d6ec3a
address comments
JingyaHuang Sep 18, 2025
5cec25c
add pypi to extra index for networkx compatibility
JingyaHuang Sep 18, 2025
7badb09
fix: unbuntu version tag
JingyaHuang Sep 18, 2025
ed97698
Merge branch 'master' into update-hf-pt2.3-inf
ahsan-z-khan Sep 18, 2025
9878fe4
Merge branch 'master' into update-hf-pt2.3-inf
JingyaHuang Sep 19, 2025
5df31f0
fix: tackle vulneralbility
JingyaHuang Sep 19, 2025
e43b179
Merge branch 'master' into update-hf-pt2.3-inf
ahsan-z-khan Sep 19, 2025
a6a5ce8
Merge branch 'master' into update-hf-pt2.3-inf
malav-shastri Sep 24, 2025
288b282
add empty allowlists
JingyaHuang Sep 24, 2025
fced133
Remove empty allowlist
sirutBuasai Sep 25, 2025
60ea52e
Merge branch 'master' into update-hf-pt2.3-inf
ahsan-z-khan Sep 29, 2025
3c6f536
Merge branch 'master' into update-hf-pt2.3-inf
ahsan-z-khan Sep 29, 2025
ebf920e
update sdk to 2.24.1 and add allowlist images
ahsan-z-khan Sep 30, 2025
934b9bc
Merge branch 'master' into update-hf-pt2.3-inf
ahsan-z-khan Sep 30, 2025
c49ea97
add suffix on apt libraries
ahsan-z-khan Sep 30, 2025
d239a0d
remove allowlist
ahsan-z-khan Sep 30, 2025
452251c
remove suffix from installed_framework_version
ahsan-z-khan Sep 30, 2025
753f743
remove transformers_neuronx as its not used anymore
ahsan-z-khan Sep 30, 2025
192e5a4
add python allowlist
ahsan-z-khan Oct 1, 2025
28f001a
Merge branch 'master' into update-hf-pt2.3-inf
ahsan-z-khan Oct 1, 2025
9b4e932
fix: sentence trfrs no deps
JingyaHuang Oct 1, 2025
cd25836
format
ahsan-z-khan Oct 1, 2025
3449bdb
Update Dockerfile.neuronx.py_scan_allowlist.json
ahsan-z-khan Oct 2, 2025
f3ea766
Merge branch 'master' into update-hf-pt2.3-inf
ahsan-z-khan Oct 2, 2025
1efc5ac
Update Dockerfile.neuronx.py_scan_allowlist.json
ahsan-z-khan Oct 2, 2025
f645cdb
add no deps to peft
ahsan-z-khan Oct 2, 2025
f810f64
update allowlist for transformers
ahsan-z-khan Oct 2, 2025
7f5ee91
change upgrade strategy
ahsan-z-khan Oct 2, 2025
6fd0fac
Merge branch 'master' into update-hf-pt2.3-inf
ahsan-z-khan Oct 2, 2025
ebfcf1b
Merge branch 'master' into update-hf-pt2.3-inf
ahsan-z-khan Oct 3, 2025
fabfc8f
update req
ahsan-z-khan Oct 6, 2025
36297c9
add no upgrade networkx
ahsan-z-khan Oct 6, 2025
2bf751e
fix: sdxl compiled with bs=1
JingyaHuang Oct 7, 2025
c477273
Update Dockerfile.neuronx
arjraman Oct 7, 2025
31b4727
Update Dockerfile.neuronx.py_scan_allowlist.json
arjraman Oct 7, 2025
a4af4b0
Update requirements.txt
arjraman Oct 7, 2025
e796825
Update Dockerfile.neuronx.py_scan_allowlist.json
arjraman Oct 8, 2025
5ad59a1
Update Dockerfile.neuronx.py_scan_allowlist.json
arjraman Oct 8, 2025
9cdcd76
Update dlc_developer_config.toml
ahsan-z-khan Oct 8, 2025
071408d
Update dlc_developer_config.toml
ahsan-z-khan Oct 8, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion dlc_developer_config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -182,4 +182,4 @@ dlc-pr-pytorch-eia-inference = ""
dlc-pr-tensorflow-2-eia-inference = ""

# vllm
dlc-pr-vllm = ""
dlc-pr-vllm = ""
10 changes: 5 additions & 5 deletions huggingface/pytorch/inference/buildspec-neuronx.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ account_id: &ACCOUNT_ID <set-$ACCOUNT_ID-in-environment>
region: &REGION <set-$REGION-in-environment>
base_framework: &BASE_FRAMEWORK pytorch
framework: &FRAMEWORK !join [ "huggingface_", *BASE_FRAMEWORK]
version: &VERSION 2.1.2
short_version: &SHORT_VERSION "2.1"
version: &VERSION 2.7.1
short_version: &SHORT_VERSION "2.7"
contributor: &CONTRIBUTOR huggingface
arch_type: x86

Expand Down Expand Up @@ -34,9 +34,9 @@ images:
device_type: &DEVICE_TYPE neuronx
python_version: &DOCKER_PYTHON_VERSION py3
tag_python_version: &TAG_PYTHON_VERSION py310
neuron_sdk_version: &NEURON_SDK_VERSION sdk2.20.0
os_version: &OS_VERSION ubuntu20.04
transformers_version: &TRANSFORMERS_VERSION 4.43.2
neuron_sdk_version: &NEURON_SDK_VERSION sdk2.24.1
os_version: &OS_VERSION ubuntu22.04
transformers_version: &TRANSFORMERS_VERSION 4.51.3
tag: !join [ *VERSION, '-', 'transformers', *TRANSFORMERS_VERSION, '-', *DEVICE_TYPE, '-', *TAG_PYTHON_VERSION,"-", *NEURON_SDK_VERSION, '-', *OS_VERSION ]
docker_file: !join [ docker/, *SHORT_VERSION, /, *DOCKER_PYTHON_VERSION, /, *NEURON_SDK_VERSION, /Dockerfile., *DEVICE_TYPE ]
context:
Expand Down

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Overall, some of the pip commands can be combined to reduce layers

Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
FROM ubuntu:22.04

LABEL dlc_major_version="1"
LABEL maintainer="Amazon AI"
LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true

ARG PYTHON=python3.10
ARG PYTHON_VERSION=3.10.12
ARG MMS_VERSION=1.1.11
ARG MAMBA_VERSION=23.1.0-4

# Neuron SDK components version numbers
ARG NEURONX_FRAMEWORK_VERSION=2.7.0.2.8.6734
ARG NEURONX_DISTRIBUTED_VERSION=0.13.14393
ARG NEURONX_CC_VERSION=2.19.8089.0
ARG NEURONX_COLLECTIVES_LIB_VERSION=2.26.43.0-47cc904ea
ARG NEURONX_RUNTIME_LIB_VERSION=2.26.42.0-2ff3b5c7d
ARG NEURONX_TOOLS_VERSION=2.24.54.0

# HF ARGS
ARG TRANSFORMERS_VERSION
ARG DIFFUSERS_VERSION=0.35.1
ARG HUGGINGFACE_HUB_VERSION=0.35.0
ARG OPTIMUM_NEURON_VERSION=0.3.0
ARG SENTENCE_TRANSFORMERS=5.1.0
ARG PEFT_VERSION=0.17.0
ARG DATASETS_VERSION=4.1.0

# See http://bugs.python.org/issue19846
ENV LANG C.UTF-8
ENV LD_LIBRARY_PATH /opt/aws/neuron/lib:/lib/x86_64-linux-gnu:/opt/conda/lib/:$LD_LIBRARY_PATH
ENV PATH /opt/conda/bin:/opt/aws/neuron/bin:$PATH
ENV SAGEMAKER_SERVING_MODULE sagemaker_pytorch_serving_container.serving:main
ENV TEMP=/home/model-server/tmp

RUN apt-get update \
&& apt-get upgrade -y \
&& apt-get install -y --no-install-recommends \
apt-transport-https \
build-essential \
ca-certificates \
cmake \
curl \
emacs \
git \
gnupg2 \
gpg-agent \
jq \
libgl1-mesa-glx \
libglib2.0-0 \
libsm6 \
libxext6 \
libxrender-dev \
libcap-dev \
libhwloc-dev \
openjdk-11-jdk \
unzip \
vim \
wget \
zlib1g-dev \
&& rm -rf /var/lib/apt/lists/* \
&& rm -rf /tmp/tmp* \
&& apt-get clean

RUN echo "deb https://apt.repos.neuron.amazonaws.com focal main" > /etc/apt/sources.list.d/neuron.list
RUN wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | apt-key add -

# Install Neuronx tools
RUN apt-get update \
&& apt-get install -y \
aws-neuronx-tools=$NEURONX_TOOLS_VERSION \
aws-neuronx-collectives=$NEURONX_COLLECTIVES_LIB_VERSION \
aws-neuronx-runtime-lib=$NEURONX_RUNTIME_LIB_VERSION \
&& rm -rf /var/lib/apt/lists/* \
&& rm -rf /tmp/tmp* \
&& apt-get clean

# https://github.com/docker-library/openjdk/issues/261 https://github.com/docker-library/openjdk/pull/263/files
RUN keytool -importkeystore -srckeystore /etc/ssl/certs/java/cacerts -destkeystore /etc/ssl/certs/java/cacerts.jks -deststoretype JKS -srcstorepass changeit -deststorepass changeit -noprompt; \
mv /etc/ssl/certs/java/cacerts.jks /etc/ssl/certs/java/cacerts; \
/var/lib/dpkg/info/ca-certificates-java.postinst configure;

RUN curl -L -o ~/mambaforge.sh https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-x86_64.sh \
&& chmod +x ~/mambaforge.sh \
&& ~/mambaforge.sh -b -p /opt/conda \
&& rm ~/mambaforge.sh \
&& /opt/conda/bin/conda update -y conda \
&& /opt/conda/bin/conda install -c conda-forge -y \
python=$PYTHON_VERSION \
pyopenssl \
cython \
mkl-include \
mkl \
botocore \
parso \
scipy \
typing \
# Below 2 are included in miniconda base, but not mamba so need to install
conda-content-trust \
charset-normalizer \
&& /opt/conda/bin/conda update -y conda \
&& /opt/conda/bin/conda clean -ya

RUN conda install -c conda-forge \
scikit-learn \
h5py \
requests \
&& conda clean -ya \
&& pip install --upgrade pip --trusted-host pypi.org --trusted-host files.pythonhosted.org \
&& ln -s /opt/conda/bin/pip /usr/local/bin/pip3 \
&& pip install packaging \
enum-compat \
ipython \
&& rm -rf ~/.cache/pip/*

RUN pip install --no-cache-dir -U \
opencv-python>=4.8.1.78 \
"numpy>=1.22.2, <=1.25.2" \
"scipy>=1.8.0" \
six \
"pillow>=10.0.1" \
"awscli<2" \
pandas==1.* \
boto3 \
"cryptography<46,>=41.0.5" \
"protobuf>=3.20.3, <4" \
"networkx~=2.6" \
&& pip install --no-deps --no-cache-dir -U torchvision==0.22.* \
&& rm -rf ~/.cache/pip/*

# Install Neuronx-cc and PyTorch
RUN pip install --index-url https://pip.repos.neuron.amazonaws.com \
--extra-index-url https://pypi.org/simple \
--trusted-host pip.repos.neuron.amazonaws.com \
neuronx-cc==$NEURONX_CC_VERSION \
torch-neuronx==$NEURONX_FRAMEWORK_VERSION \
neuronx_distributed==$NEURONX_DISTRIBUTED_VERSION

WORKDIR /

RUN pip install --no-cache-dir \
multi-model-server==$MMS_VERSION \
sagemaker-inference

RUN useradd -m model-server \
&& mkdir -p /home/model-server/tmp \
&& chown -R model-server /home/model-server

COPY neuron-entrypoint.py /usr/local/bin/dockerd-entrypoint.py
COPY neuron-monitor.sh /usr/local/bin/neuron-monitor.sh
COPY config.properties /etc/sagemaker-mms.properties

RUN chmod +x /usr/local/bin/dockerd-entrypoint.py \
&& chmod +x /usr/local/bin/neuron-monitor.sh

ADD https://raw.githubusercontent.com/aws/deep-learning-containers/master/src/deep_learning_container.py /usr/local/bin/deep_learning_container.py

RUN chmod +x /usr/local/bin/deep_learning_container.py

#################################
# Hugging Face specific section #
#################################

RUN curl -o /license.txt https://aws-dlc-licenses.s3.amazonaws.com/pytorch-2.7/license.txt

# install Hugging Face libraries and its dependencies
RUN pip install --no-cache-dir -U \
networkx~=2.6 \
transformers[sentencepiece,audio,vision]==${TRANSFORMERS_VERSION} \
diffusers==${DIFFUSERS_VERSION} \
compel \
controlnet-aux \
huggingface_hub==${HUGGINGFACE_HUB_VERSION} \
hf_transfer \
datasets==${DATASETS_VERSION} \
optimum-neuron==${OPTIMUM_NEURON_VERSION} \
"sagemaker-huggingface-inference-toolkit>=2.4.1,<3" \
sentence_transformers==${SENTENCE_TRANSFORMERS} \
peft==${PEFT_VERSION} \
&& rm -rf ~/.cache/pip/*

RUN HOME_DIR=/root \
&& curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip \
&& unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ \
&& cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance \
&& chmod +x /usr/local/bin/testOSSCompliance \
&& chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh \
&& ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} ${PYTHON} \
&& rm -rf ${HOME_DIR}/oss_compliance* \
# conda leaves an empty /root/.cache/conda/notices.cache file which is not removed by conda clean -ya
&& rm -rf ${HOME_DIR}/.cache/conda

ENV HF_HUB_USER_AGENT_ORIGIN="aws:sagemaker:neuron:inference:regular"
EXPOSE 8080 8081
ENTRYPOINT ["python", "/usr/local/bin/dockerd-entrypoint.py"]
CMD ["serve"]
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"77740": "protobuf, required by Neuron SDK. Affected versions of this package are vulnerable to a potential Denial of Service (DoS) attack due to unbounded recursion when parsing untrusted Protocol Buffers data.",
"77986": "In transformers, The vulnerability arises from insecure URL validation using the `startswith()` method, which can be bypassed through URL username injection. This allows attackers to craft URLs that appear to be from YouTube but resolve to malicious domains, potentially leading to phishing attacks, malware distribution, or data exfiltration. The issue is fixed in version 4.52.1. We cannot upgrade now, because it co dependent on Neuron SDK version and required by HF",
"78153": "A Regular Expression Denial of Service (ReDoS) vulnerability was discovered in the Hugging Face Transformers library. This vulnerability affects versions 4.51.3 and earlier, and is fixed in version 4.52.1.",
"78688": "also In transformers",
"79595": "also In transformers",
"79596": "also In transformers",
"79855": "also In transformers"
}
13 changes: 1 addition & 12 deletions test/dlc_tests/sanity/test_pre_release.py
Original file line number Diff line number Diff line change
Expand Up @@ -482,8 +482,6 @@ def test_framework_and_neuron_sdk_version(neuron):
if "training" in image or "neuronx" in image:
package_names = {"torch-neuronx": "torch_neuronx"}
# transformers is only available for the inference image
if "training" not in image:
package_names["transformers-neuronx"] = "transformers_neuronx"
else:
package_names = {"torch-neuron": "torch_neuron"}
elif tested_framework == "tensorflow":
Expand Down Expand Up @@ -514,17 +512,8 @@ def test_framework_and_neuron_sdk_version(neuron):
executable="python",
)

installed_framework_version = output.stdout.strip()
installed_framework_version = output.stdout.strip().split("+")[0]
version_list = release_manifest[package_name]
# temporary hack because transformers_neuronx reports its version as 0.6.x
if package_name == "transformers-neuronx":
if installed_framework_version == "0.12.x":
# skip the check due to transformers_neuronx version bug
# eg. transformers_neuronx.__version__=='0.10.x' for v0.11.351...
continue
version_list = [
".".join(entry.split(".")[:2]) + ".x" for entry in release_manifest[package_name]
]
assert installed_framework_version in version_list, (
f"framework {framework} version {installed_framework_version} "
f"not found in released versions for that package: {version_list}"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

model_dir = os.path.join(resources_path, "tiny-distilbert-sst-2")
model_dir_sdxl = os.path.join(resources_path, "tiny-sdxl")
model_dir_decoder = os.path.join(resources_path, "tiny-gpt2")
model_dir_decoder = os.path.join(resources_path, "tiny-llama3")
pt_model = "pt_model.tar.gz"
tf_model = "tf_model.tar.gz"
pt_neuron_model = "pt_neuron_model.tar.gz"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
# instances in the regions corresponding to their availability.
# In future, we would like to configure the logic to run multiple `pytest` commands that can allow
# us to test multiple instances in multiple regions for each image.
@pytest.mark.model("tiny-gpt2")
@pytest.mark.model("tiny-llama3")
@pytest.mark.processor("neuronx")
@pytest.mark.parametrize(
"test_region,test_instance_type",
Expand Down
7 changes: 3 additions & 4 deletions test/sagemaker_tests/huggingface/inference/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
boto3
coverage
# Docker v7.0.0 breaks compatibility with Docker Compose v1 (SageMaker Local)
docker<=6.1.3
docker-compose
docker>=5,<=6.1.3
flake8==3.7.7
Flask==1.1.1
mock
Expand All @@ -12,13 +11,13 @@ pytest-rerunfailures
pytest-xdist
PyYAML
protobuf>=3.20,<=3.20.2
sagemaker>=2,<3
sagemaker>=2.237.0,<3
six
requests<2.32.0
requests_mock
Pillow
retrying==1.3.3
urllib3==1.26.0
urllib3>=1.26.8
pluggy>=1.5,<2
requests_mock
sagemaker-inference
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.