Skip to content

Commit e4edd94

Browse files
authored
Merge branch 'master' into vllm-ec2-pilot
2 parents c34155a + 8a044a7 commit e4edd94

File tree

17 files changed

+241
-34
lines changed

17 files changed

+241
-34
lines changed

data/ignore_ids_safety_scan.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1446,7 +1446,8 @@
14461446
"77744": "urllib3 is a user-friendly HTTP client library for Python. Prior to 2.5.0, it is possible to disable redirects for all requests by instantiating a PoolManager and specifying retries in a way that disable redirects. By default, requests and botocore users are not affected. An application attempting to mitigate SSRF or open redirect vulnerabilities by disabling redirects at the PoolManager level will remain vulnerable. This issue has been patched in version 2.5.0.",
14471447
"79077": "Affected versions of the h2 package are vulnerable to HTTP Request Smuggling due to improper validation of illegal characters in HTTP headers. The package allows CRLF characters to be injected into header names and values without proper sanitisation, which can cause request boundary manipulation when HTTP/2 requests are downgraded to HTTP/1.1 by downstream servers.",
14481448
"79595": "Affected versions of the transformers package are vulnerable to Regular Expression Denial of Service (ReDoS) due to inefficient regular expressions in the EnglishNormalizer.normalize_numbers() method",
1449-
"79596": "Affected versions of the transformers package are vulnerable to Regular Expression Denial of Service (ReDoS) due to inefficient regular expressions in the MarianTokenizer.remove_language_code() method"
1449+
"79596": "Affected versions of the transformers package are vulnerable to Regular Expression Denial of Service (ReDoS) due to inefficient regular expressions in the MarianTokenizer.remove_language_code() method",
1450+
"79855": "Affected versions of the transformers package are vulnerable to Regular Expression Denial of Service (ReDoS) due to unbounded evaluation of user-supplied regular expressions in the AdamWeightDecay._do_use_weight_decay method. The TensorFlow optimizer’s _do_use_weight_decay iterates over include_in_weight_decay and exclude_from_weight_decay lists and calls re.search on each pattern against parameter names, enabling catastrophic backtracking on crafted inputs. An attacker who can control these lists can provide pathological patterns that saturate the CPU and cause processes using transformers to hang, resulting in a Denial of Service."
14501451
}
14511452
},
14521453
"inference-neuron": {

dlc_developer_config.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -186,4 +186,4 @@ dlc-pr-pytorch-eia-inference = ""
186186
dlc-pr-tensorflow-2-eia-inference = ""
187187

188188
# vllm
189-
dlc-pr-vllm = "vllm/buildspec.yml"
189+
dlc-pr-vllm = ""

huggingface/pytorch/inference/buildspec-neuronx.yml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@ account_id: &ACCOUNT_ID <set-$ACCOUNT_ID-in-environment>
22
region: &REGION <set-$REGION-in-environment>
33
base_framework: &BASE_FRAMEWORK pytorch
44
framework: &FRAMEWORK !join [ "huggingface_", *BASE_FRAMEWORK]
5-
version: &VERSION 2.1.2
6-
short_version: &SHORT_VERSION "2.1"
5+
version: &VERSION 2.7.1
6+
short_version: &SHORT_VERSION "2.7"
77
contributor: &CONTRIBUTOR huggingface
88
arch_type: x86
99

@@ -34,9 +34,9 @@ images:
3434
device_type: &DEVICE_TYPE neuronx
3535
python_version: &DOCKER_PYTHON_VERSION py3
3636
tag_python_version: &TAG_PYTHON_VERSION py310
37-
neuron_sdk_version: &NEURON_SDK_VERSION sdk2.20.0
38-
os_version: &OS_VERSION ubuntu20.04
39-
transformers_version: &TRANSFORMERS_VERSION 4.43.2
37+
neuron_sdk_version: &NEURON_SDK_VERSION sdk2.24.1
38+
os_version: &OS_VERSION ubuntu22.04
39+
transformers_version: &TRANSFORMERS_VERSION 4.51.3
4040
tag: !join [ *VERSION, '-', 'transformers', *TRANSFORMERS_VERSION, '-', *DEVICE_TYPE, '-', *TAG_PYTHON_VERSION,"-", *NEURON_SDK_VERSION, '-', *OS_VERSION ]
4141
docker_file: !join [ docker/, *SHORT_VERSION, /, *DOCKER_PYTHON_VERSION, /, *NEURON_SDK_VERSION, /Dockerfile., *DEVICE_TYPE ]
4242
context:
Lines changed: 196 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,196 @@
1+
FROM ubuntu:22.04
2+
3+
LABEL dlc_major_version="1"
4+
LABEL maintainer="Amazon AI"
5+
LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true
6+
7+
ARG PYTHON=python3.10
8+
ARG PYTHON_VERSION=3.10.12
9+
ARG MMS_VERSION=1.1.11
10+
ARG MAMBA_VERSION=23.1.0-4
11+
12+
# Neuron SDK components version numbers
13+
ARG NEURONX_FRAMEWORK_VERSION=2.7.0.2.8.6734
14+
ARG NEURONX_DISTRIBUTED_VERSION=0.13.14393
15+
ARG NEURONX_CC_VERSION=2.19.8089.0
16+
ARG NEURONX_COLLECTIVES_LIB_VERSION=2.26.43.0-47cc904ea
17+
ARG NEURONX_RUNTIME_LIB_VERSION=2.26.42.0-2ff3b5c7d
18+
ARG NEURONX_TOOLS_VERSION=2.24.54.0
19+
20+
# HF ARGS
21+
ARG TRANSFORMERS_VERSION
22+
ARG DIFFUSERS_VERSION=0.35.1
23+
ARG HUGGINGFACE_HUB_VERSION=0.35.0
24+
ARG OPTIMUM_NEURON_VERSION=0.3.0
25+
ARG SENTENCE_TRANSFORMERS=5.1.0
26+
ARG PEFT_VERSION=0.17.0
27+
ARG DATASETS_VERSION=4.1.0
28+
29+
# See http://bugs.python.org/issue19846
30+
ENV LANG C.UTF-8
31+
ENV LD_LIBRARY_PATH /opt/aws/neuron/lib:/lib/x86_64-linux-gnu:/opt/conda/lib/:$LD_LIBRARY_PATH
32+
ENV PATH /opt/conda/bin:/opt/aws/neuron/bin:$PATH
33+
ENV SAGEMAKER_SERVING_MODULE sagemaker_pytorch_serving_container.serving:main
34+
ENV TEMP=/home/model-server/tmp
35+
36+
RUN apt-get update \
37+
&& apt-get upgrade -y \
38+
&& apt-get install -y --no-install-recommends \
39+
apt-transport-https \
40+
build-essential \
41+
ca-certificates \
42+
cmake \
43+
curl \
44+
emacs \
45+
git \
46+
gnupg2 \
47+
gpg-agent \
48+
jq \
49+
libgl1-mesa-glx \
50+
libglib2.0-0 \
51+
libsm6 \
52+
libxext6 \
53+
libxrender-dev \
54+
libcap-dev \
55+
libhwloc-dev \
56+
openjdk-11-jdk \
57+
unzip \
58+
vim \
59+
wget \
60+
zlib1g-dev \
61+
&& rm -rf /var/lib/apt/lists/* \
62+
&& rm -rf /tmp/tmp* \
63+
&& apt-get clean
64+
65+
RUN echo "deb https://apt.repos.neuron.amazonaws.com focal main" > /etc/apt/sources.list.d/neuron.list
66+
RUN wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | apt-key add -
67+
68+
# Install Neuronx tools
69+
RUN apt-get update \
70+
&& apt-get install -y \
71+
aws-neuronx-tools=$NEURONX_TOOLS_VERSION \
72+
aws-neuronx-collectives=$NEURONX_COLLECTIVES_LIB_VERSION \
73+
aws-neuronx-runtime-lib=$NEURONX_RUNTIME_LIB_VERSION \
74+
&& rm -rf /var/lib/apt/lists/* \
75+
&& rm -rf /tmp/tmp* \
76+
&& apt-get clean
77+
78+
# https://github.com/docker-library/openjdk/issues/261 https://github.com/docker-library/openjdk/pull/263/files
79+
RUN keytool -importkeystore -srckeystore /etc/ssl/certs/java/cacerts -destkeystore /etc/ssl/certs/java/cacerts.jks -deststoretype JKS -srcstorepass changeit -deststorepass changeit -noprompt; \
80+
mv /etc/ssl/certs/java/cacerts.jks /etc/ssl/certs/java/cacerts; \
81+
/var/lib/dpkg/info/ca-certificates-java.postinst configure;
82+
83+
RUN curl -L -o ~/mambaforge.sh https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-x86_64.sh \
84+
&& chmod +x ~/mambaforge.sh \
85+
&& ~/mambaforge.sh -b -p /opt/conda \
86+
&& rm ~/mambaforge.sh \
87+
&& /opt/conda/bin/conda update -y conda \
88+
&& /opt/conda/bin/conda install -c conda-forge -y \
89+
python=$PYTHON_VERSION \
90+
pyopenssl \
91+
cython \
92+
mkl-include \
93+
mkl \
94+
botocore \
95+
parso \
96+
scipy \
97+
typing \
98+
# Below 2 are included in miniconda base, but not mamba so need to install
99+
conda-content-trust \
100+
charset-normalizer \
101+
&& /opt/conda/bin/conda update -y conda \
102+
&& /opt/conda/bin/conda clean -ya
103+
104+
RUN conda install -c conda-forge \
105+
scikit-learn \
106+
h5py \
107+
requests \
108+
&& conda clean -ya \
109+
&& pip install --upgrade pip --trusted-host pypi.org --trusted-host files.pythonhosted.org \
110+
&& ln -s /opt/conda/bin/pip /usr/local/bin/pip3 \
111+
&& pip install packaging \
112+
enum-compat \
113+
ipython \
114+
&& rm -rf ~/.cache/pip/*
115+
116+
RUN pip install --no-cache-dir -U \
117+
opencv-python>=4.8.1.78 \
118+
"numpy>=1.22.2, <=1.25.2" \
119+
"scipy>=1.8.0" \
120+
six \
121+
"pillow>=10.0.1" \
122+
"awscli<2" \
123+
pandas==1.* \
124+
boto3 \
125+
"cryptography<46,>=41.0.5" \
126+
"protobuf>=3.20.3, <4" \
127+
"networkx~=2.6" \
128+
&& pip install --no-deps --no-cache-dir -U torchvision==0.22.* \
129+
&& rm -rf ~/.cache/pip/*
130+
131+
# Install Neuronx-cc and PyTorch
132+
RUN pip install --index-url https://pip.repos.neuron.amazonaws.com \
133+
--extra-index-url https://pypi.org/simple \
134+
--trusted-host pip.repos.neuron.amazonaws.com \
135+
neuronx-cc==$NEURONX_CC_VERSION \
136+
torch-neuronx==$NEURONX_FRAMEWORK_VERSION \
137+
neuronx_distributed==$NEURONX_DISTRIBUTED_VERSION
138+
139+
WORKDIR /
140+
141+
RUN pip install --no-cache-dir \
142+
multi-model-server==$MMS_VERSION \
143+
sagemaker-inference
144+
145+
RUN useradd -m model-server \
146+
&& mkdir -p /home/model-server/tmp \
147+
&& chown -R model-server /home/model-server
148+
149+
COPY neuron-entrypoint.py /usr/local/bin/dockerd-entrypoint.py
150+
COPY neuron-monitor.sh /usr/local/bin/neuron-monitor.sh
151+
COPY config.properties /etc/sagemaker-mms.properties
152+
153+
RUN chmod +x /usr/local/bin/dockerd-entrypoint.py \
154+
&& chmod +x /usr/local/bin/neuron-monitor.sh
155+
156+
ADD https://raw.githubusercontent.com/aws/deep-learning-containers/master/src/deep_learning_container.py /usr/local/bin/deep_learning_container.py
157+
158+
RUN chmod +x /usr/local/bin/deep_learning_container.py
159+
160+
#################################
161+
# Hugging Face specific section #
162+
#################################
163+
164+
RUN curl -o /license.txt https://aws-dlc-licenses.s3.amazonaws.com/pytorch-2.7/license.txt
165+
166+
# install Hugging Face libraries and its dependencies
167+
RUN pip install --no-cache-dir -U \
168+
networkx~=2.6 \
169+
transformers[sentencepiece,audio,vision]==${TRANSFORMERS_VERSION} \
170+
diffusers==${DIFFUSERS_VERSION} \
171+
compel \
172+
controlnet-aux \
173+
huggingface_hub==${HUGGINGFACE_HUB_VERSION} \
174+
hf_transfer \
175+
datasets==${DATASETS_VERSION} \
176+
optimum-neuron==${OPTIMUM_NEURON_VERSION} \
177+
"sagemaker-huggingface-inference-toolkit>=2.4.1,<3" \
178+
sentence_transformers==${SENTENCE_TRANSFORMERS} \
179+
peft==${PEFT_VERSION} \
180+
&& rm -rf ~/.cache/pip/*
181+
182+
RUN HOME_DIR=/root \
183+
&& curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip \
184+
&& unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ \
185+
&& cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance \
186+
&& chmod +x /usr/local/bin/testOSSCompliance \
187+
&& chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh \
188+
&& ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} ${PYTHON} \
189+
&& rm -rf ${HOME_DIR}/oss_compliance* \
190+
# conda leaves an empty /root/.cache/conda/notices.cache file which is not removed by conda clean -ya
191+
&& rm -rf ${HOME_DIR}/.cache/conda
192+
193+
ENV HF_HUB_USER_AGENT_ORIGIN="aws:sagemaker:neuron:inference:regular"
194+
EXPOSE 8080 8081
195+
ENTRYPOINT ["python", "/usr/local/bin/dockerd-entrypoint.py"]
196+
CMD ["serve"]
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
{
2+
"77740": "protobuf, required by Neuron SDK. Affected versions of this package are vulnerable to a potential Denial of Service (DoS) attack due to unbounded recursion when parsing untrusted Protocol Buffers data.",
3+
"77986": "In transformers, The vulnerability arises from insecure URL validation using the `startswith()` method, which can be bypassed through URL username injection. This allows attackers to craft URLs that appear to be from YouTube but resolve to malicious domains, potentially leading to phishing attacks, malware distribution, or data exfiltration. The issue is fixed in version 4.52.1. We cannot upgrade now, because it co dependent on Neuron SDK version and required by HF",
4+
"78153": "A Regular Expression Denial of Service (ReDoS) vulnerability was discovered in the Hugging Face Transformers library. This vulnerability affects versions 4.51.3 and earlier, and is fixed in version 4.52.1.",
5+
"78688": "also In transformers",
6+
"79595": "also In transformers",
7+
"79596": "also In transformers",
8+
"79855": "also In transformers"
9+
}

release_images_general.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,14 +44,14 @@ release_images:
4444
public_registry: True
4545
4:
4646
framework: "vllm"
47-
version: "0.10.2"
47+
version: "0.11.0"
4848
arch_type: "x86"
4949
customer_type: "ec2"
5050
general:
5151
device_types: [ "gpu" ]
5252
python_versions: [ "py312" ]
5353
os_version: "ubuntu22.04"
54-
cuda_version: "cu129"
54+
cuda_version: "cu128"
5555
example: False
5656
disable_sm_tag: False
5757
force_release: False

release_images_training.yml

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,4 +103,17 @@ release_images:
103103
cuda_version: "cu128"
104104
example: False
105105
disable_sm_tag: False
106-
force_release: False
106+
force_release: False
107+
9:
108+
framework: "huggingface_pytorch"
109+
version: "2.8.0"
110+
hf_transformers: "4.56.2"
111+
arch_type: "x86"
112+
training:
113+
device_types: ["gpu"]
114+
python_versions: [ "py312" ]
115+
os_version: "ubuntu22.04"
116+
cuda_version: "cu129"
117+
example: False
118+
disable_sm_tag: False
119+
force_release: False

test/dlc_tests/sanity/test_pre_release.py

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -482,8 +482,6 @@ def test_framework_and_neuron_sdk_version(neuron):
482482
if "training" in image or "neuronx" in image:
483483
package_names = {"torch-neuronx": "torch_neuronx"}
484484
# transformers is only available for the inference image
485-
if "training" not in image:
486-
package_names["transformers-neuronx"] = "transformers_neuronx"
487485
else:
488486
package_names = {"torch-neuron": "torch_neuron"}
489487
elif tested_framework == "tensorflow":
@@ -514,17 +512,8 @@ def test_framework_and_neuron_sdk_version(neuron):
514512
executable="python",
515513
)
516514

517-
installed_framework_version = output.stdout.strip()
515+
installed_framework_version = output.stdout.strip().split("+")[0]
518516
version_list = release_manifest[package_name]
519-
# temporary hack because transformers_neuronx reports its version as 0.6.x
520-
if package_name == "transformers-neuronx":
521-
if installed_framework_version == "0.12.x":
522-
# skip the check due to transformers_neuronx version bug
523-
# eg. transformers_neuronx.__version__=='0.10.x' for v0.11.351...
524-
continue
525-
version_list = [
526-
".".join(entry.split(".")[:2]) + ".x" for entry in release_manifest[package_name]
527-
]
528517
assert installed_framework_version in version_list, (
529518
f"framework {framework} version {installed_framework_version} "
530519
f"not found in released versions for that package: {version_list}"

test/sagemaker_tests/huggingface/inference/integration/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323

2424
model_dir = os.path.join(resources_path, "tiny-distilbert-sst-2")
2525
model_dir_sdxl = os.path.join(resources_path, "tiny-sdxl")
26-
model_dir_decoder = os.path.join(resources_path, "tiny-gpt2")
26+
model_dir_decoder = os.path.join(resources_path, "tiny-llama3")
2727
pt_model = "pt_model.tar.gz"
2828
tf_model = "tf_model.tar.gz"
2929
pt_neuron_model = "pt_neuron_model.tar.gz"

test/sagemaker_tests/huggingface/inference/integration/sagemaker/test_neuronx_decoder_hosting.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
# instances in the regions corresponding to their availability.
3939
# In future, we would like to configure the logic to run multiple `pytest` commands that can allow
4040
# us to test multiple instances in multiple regions for each image.
41-
@pytest.mark.model("tiny-gpt2")
41+
@pytest.mark.model("tiny-llama3")
4242
@pytest.mark.processor("neuronx")
4343
@pytest.mark.parametrize(
4444
"test_region,test_instance_type",

0 commit comments

Comments
 (0)