Skip to content

Commit a6221b5

Browse files
Merge branch 'master' into vllm-sm
2 parents 71642ce + d75d15c commit a6221b5

File tree

12 files changed

+217
-24
lines changed

12 files changed

+217
-24
lines changed

dlc_developer_config.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -182,4 +182,4 @@ dlc-pr-pytorch-eia-inference = ""
182182
dlc-pr-tensorflow-2-eia-inference = ""
183183

184184
# vllm
185-
dlc-pr-vllm = ""
185+
dlc-pr-vllm = ""

huggingface/pytorch/inference/buildspec-neuronx.yml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@ account_id: &ACCOUNT_ID <set-$ACCOUNT_ID-in-environment>
22
region: &REGION <set-$REGION-in-environment>
33
base_framework: &BASE_FRAMEWORK pytorch
44
framework: &FRAMEWORK !join [ "huggingface_", *BASE_FRAMEWORK]
5-
version: &VERSION 2.1.2
6-
short_version: &SHORT_VERSION "2.1"
5+
version: &VERSION 2.7.1
6+
short_version: &SHORT_VERSION "2.7"
77
contributor: &CONTRIBUTOR huggingface
88
arch_type: x86
99

@@ -34,9 +34,9 @@ images:
3434
device_type: &DEVICE_TYPE neuronx
3535
python_version: &DOCKER_PYTHON_VERSION py3
3636
tag_python_version: &TAG_PYTHON_VERSION py310
37-
neuron_sdk_version: &NEURON_SDK_VERSION sdk2.20.0
38-
os_version: &OS_VERSION ubuntu20.04
39-
transformers_version: &TRANSFORMERS_VERSION 4.43.2
37+
neuron_sdk_version: &NEURON_SDK_VERSION sdk2.24.1
38+
os_version: &OS_VERSION ubuntu22.04
39+
transformers_version: &TRANSFORMERS_VERSION 4.51.3
4040
tag: !join [ *VERSION, '-', 'transformers', *TRANSFORMERS_VERSION, '-', *DEVICE_TYPE, '-', *TAG_PYTHON_VERSION,"-", *NEURON_SDK_VERSION, '-', *OS_VERSION ]
4141
docker_file: !join [ docker/, *SHORT_VERSION, /, *DOCKER_PYTHON_VERSION, /, *NEURON_SDK_VERSION, /Dockerfile., *DEVICE_TYPE ]
4242
context:
Lines changed: 196 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,196 @@
1+
FROM ubuntu:22.04
2+
3+
LABEL dlc_major_version="1"
4+
LABEL maintainer="Amazon AI"
5+
LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true
6+
7+
ARG PYTHON=python3.10
8+
ARG PYTHON_VERSION=3.10.12
9+
ARG MMS_VERSION=1.1.11
10+
ARG MAMBA_VERSION=23.1.0-4
11+
12+
# Neuron SDK components version numbers
13+
ARG NEURONX_FRAMEWORK_VERSION=2.7.0.2.8.6734
14+
ARG NEURONX_DISTRIBUTED_VERSION=0.13.14393
15+
ARG NEURONX_CC_VERSION=2.19.8089.0
16+
ARG NEURONX_COLLECTIVES_LIB_VERSION=2.26.43.0-47cc904ea
17+
ARG NEURONX_RUNTIME_LIB_VERSION=2.26.42.0-2ff3b5c7d
18+
ARG NEURONX_TOOLS_VERSION=2.24.54.0
19+
20+
# HF ARGS
21+
ARG TRANSFORMERS_VERSION
22+
ARG DIFFUSERS_VERSION=0.35.1
23+
ARG HUGGINGFACE_HUB_VERSION=0.35.0
24+
ARG OPTIMUM_NEURON_VERSION=0.3.0
25+
ARG SENTENCE_TRANSFORMERS=5.1.0
26+
ARG PEFT_VERSION=0.17.0
27+
ARG DATASETS_VERSION=4.1.0
28+
29+
# See http://bugs.python.org/issue19846
30+
ENV LANG C.UTF-8
31+
ENV LD_LIBRARY_PATH /opt/aws/neuron/lib:/lib/x86_64-linux-gnu:/opt/conda/lib/:$LD_LIBRARY_PATH
32+
ENV PATH /opt/conda/bin:/opt/aws/neuron/bin:$PATH
33+
ENV SAGEMAKER_SERVING_MODULE sagemaker_pytorch_serving_container.serving:main
34+
ENV TEMP=/home/model-server/tmp
35+
36+
RUN apt-get update \
37+
&& apt-get upgrade -y \
38+
&& apt-get install -y --no-install-recommends \
39+
apt-transport-https \
40+
build-essential \
41+
ca-certificates \
42+
cmake \
43+
curl \
44+
emacs \
45+
git \
46+
gnupg2 \
47+
gpg-agent \
48+
jq \
49+
libgl1-mesa-glx \
50+
libglib2.0-0 \
51+
libsm6 \
52+
libxext6 \
53+
libxrender-dev \
54+
libcap-dev \
55+
libhwloc-dev \
56+
openjdk-11-jdk \
57+
unzip \
58+
vim \
59+
wget \
60+
zlib1g-dev \
61+
&& rm -rf /var/lib/apt/lists/* \
62+
&& rm -rf /tmp/tmp* \
63+
&& apt-get clean
64+
65+
RUN echo "deb https://apt.repos.neuron.amazonaws.com focal main" > /etc/apt/sources.list.d/neuron.list
66+
RUN wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | apt-key add -
67+
68+
# Install Neuronx tools
69+
RUN apt-get update \
70+
&& apt-get install -y \
71+
aws-neuronx-tools=$NEURONX_TOOLS_VERSION \
72+
aws-neuronx-collectives=$NEURONX_COLLECTIVES_LIB_VERSION \
73+
aws-neuronx-runtime-lib=$NEURONX_RUNTIME_LIB_VERSION \
74+
&& rm -rf /var/lib/apt/lists/* \
75+
&& rm -rf /tmp/tmp* \
76+
&& apt-get clean
77+
78+
# https://github.com/docker-library/openjdk/issues/261 https://github.com/docker-library/openjdk/pull/263/files
79+
RUN keytool -importkeystore -srckeystore /etc/ssl/certs/java/cacerts -destkeystore /etc/ssl/certs/java/cacerts.jks -deststoretype JKS -srcstorepass changeit -deststorepass changeit -noprompt; \
80+
mv /etc/ssl/certs/java/cacerts.jks /etc/ssl/certs/java/cacerts; \
81+
/var/lib/dpkg/info/ca-certificates-java.postinst configure;
82+
83+
RUN curl -L -o ~/mambaforge.sh https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-x86_64.sh \
84+
&& chmod +x ~/mambaforge.sh \
85+
&& ~/mambaforge.sh -b -p /opt/conda \
86+
&& rm ~/mambaforge.sh \
87+
&& /opt/conda/bin/conda update -y conda \
88+
&& /opt/conda/bin/conda install -c conda-forge -y \
89+
python=$PYTHON_VERSION \
90+
pyopenssl \
91+
cython \
92+
mkl-include \
93+
mkl \
94+
botocore \
95+
parso \
96+
scipy \
97+
typing \
98+
# Below 2 are included in miniconda base, but not mamba so need to install
99+
conda-content-trust \
100+
charset-normalizer \
101+
&& /opt/conda/bin/conda update -y conda \
102+
&& /opt/conda/bin/conda clean -ya
103+
104+
RUN conda install -c conda-forge \
105+
scikit-learn \
106+
h5py \
107+
requests \
108+
&& conda clean -ya \
109+
&& pip install --upgrade pip --trusted-host pypi.org --trusted-host files.pythonhosted.org \
110+
&& ln -s /opt/conda/bin/pip /usr/local/bin/pip3 \
111+
&& pip install packaging \
112+
enum-compat \
113+
ipython \
114+
&& rm -rf ~/.cache/pip/*
115+
116+
RUN pip install --no-cache-dir -U \
117+
opencv-python>=4.8.1.78 \
118+
"numpy>=1.22.2, <=1.25.2" \
119+
"scipy>=1.8.0" \
120+
six \
121+
"pillow>=10.0.1" \
122+
"awscli<2" \
123+
pandas==1.* \
124+
boto3 \
125+
"cryptography<46,>=41.0.5" \
126+
"protobuf>=3.20.3, <4" \
127+
"networkx~=2.6" \
128+
&& pip install --no-deps --no-cache-dir -U torchvision==0.22.* \
129+
&& rm -rf ~/.cache/pip/*
130+
131+
# Install Neuronx-cc and PyTorch
132+
RUN pip install --index-url https://pip.repos.neuron.amazonaws.com \
133+
--extra-index-url https://pypi.org/simple \
134+
--trusted-host pip.repos.neuron.amazonaws.com \
135+
neuronx-cc==$NEURONX_CC_VERSION \
136+
torch-neuronx==$NEURONX_FRAMEWORK_VERSION \
137+
neuronx_distributed==$NEURONX_DISTRIBUTED_VERSION
138+
139+
WORKDIR /
140+
141+
RUN pip install --no-cache-dir \
142+
multi-model-server==$MMS_VERSION \
143+
sagemaker-inference
144+
145+
RUN useradd -m model-server \
146+
&& mkdir -p /home/model-server/tmp \
147+
&& chown -R model-server /home/model-server
148+
149+
COPY neuron-entrypoint.py /usr/local/bin/dockerd-entrypoint.py
150+
COPY neuron-monitor.sh /usr/local/bin/neuron-monitor.sh
151+
COPY config.properties /etc/sagemaker-mms.properties
152+
153+
RUN chmod +x /usr/local/bin/dockerd-entrypoint.py \
154+
&& chmod +x /usr/local/bin/neuron-monitor.sh
155+
156+
ADD https://raw.githubusercontent.com/aws/deep-learning-containers/master/src/deep_learning_container.py /usr/local/bin/deep_learning_container.py
157+
158+
RUN chmod +x /usr/local/bin/deep_learning_container.py
159+
160+
#################################
161+
# Hugging Face specific section #
162+
#################################
163+
164+
RUN curl -o /license.txt https://aws-dlc-licenses.s3.amazonaws.com/pytorch-2.7/license.txt
165+
166+
# install Hugging Face libraries and its dependencies
167+
RUN pip install --no-cache-dir -U \
168+
networkx~=2.6 \
169+
transformers[sentencepiece,audio,vision]==${TRANSFORMERS_VERSION} \
170+
diffusers==${DIFFUSERS_VERSION} \
171+
compel \
172+
controlnet-aux \
173+
huggingface_hub==${HUGGINGFACE_HUB_VERSION} \
174+
hf_transfer \
175+
datasets==${DATASETS_VERSION} \
176+
optimum-neuron==${OPTIMUM_NEURON_VERSION} \
177+
"sagemaker-huggingface-inference-toolkit>=2.4.1,<3" \
178+
sentence_transformers==${SENTENCE_TRANSFORMERS} \
179+
peft==${PEFT_VERSION} \
180+
&& rm -rf ~/.cache/pip/*
181+
182+
RUN HOME_DIR=/root \
183+
&& curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip \
184+
&& unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ \
185+
&& cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance \
186+
&& chmod +x /usr/local/bin/testOSSCompliance \
187+
&& chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh \
188+
&& ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} ${PYTHON} \
189+
&& rm -rf ${HOME_DIR}/oss_compliance* \
190+
# conda leaves an empty /root/.cache/conda/notices.cache file which is not removed by conda clean -ya
191+
&& rm -rf ${HOME_DIR}/.cache/conda
192+
193+
ENV HF_HUB_USER_AGENT_ORIGIN="aws:sagemaker:neuron:inference:regular"
194+
EXPOSE 8080 8081
195+
ENTRYPOINT ["python", "/usr/local/bin/dockerd-entrypoint.py"]
196+
CMD ["serve"]
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
{
2+
"77740": "protobuf, required by Neuron SDK. Affected versions of this package are vulnerable to a potential Denial of Service (DoS) attack due to unbounded recursion when parsing untrusted Protocol Buffers data.",
3+
"77986": "In transformers, The vulnerability arises from insecure URL validation using the `startswith()` method, which can be bypassed through URL username injection. This allows attackers to craft URLs that appear to be from YouTube but resolve to malicious domains, potentially leading to phishing attacks, malware distribution, or data exfiltration. The issue is fixed in version 4.52.1. We cannot upgrade now, because it co dependent on Neuron SDK version and required by HF",
4+
"78153": "A Regular Expression Denial of Service (ReDoS) vulnerability was discovered in the Hugging Face Transformers library. This vulnerability affects versions 4.51.3 and earlier, and is fixed in version 4.52.1.",
5+
"78688": "also In transformers",
6+
"79595": "also In transformers",
7+
"79596": "also In transformers",
8+
"79855": "also In transformers"
9+
}

test/dlc_tests/sanity/test_pre_release.py

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -482,8 +482,6 @@ def test_framework_and_neuron_sdk_version(neuron):
482482
if "training" in image or "neuronx" in image:
483483
package_names = {"torch-neuronx": "torch_neuronx"}
484484
# transformers is only available for the inference image
485-
if "training" not in image:
486-
package_names["transformers-neuronx"] = "transformers_neuronx"
487485
else:
488486
package_names = {"torch-neuron": "torch_neuron"}
489487
elif tested_framework == "tensorflow":
@@ -514,17 +512,8 @@ def test_framework_and_neuron_sdk_version(neuron):
514512
executable="python",
515513
)
516514

517-
installed_framework_version = output.stdout.strip()
515+
installed_framework_version = output.stdout.strip().split("+")[0]
518516
version_list = release_manifest[package_name]
519-
# temporary hack because transformers_neuronx reports its version as 0.6.x
520-
if package_name == "transformers-neuronx":
521-
if installed_framework_version == "0.12.x":
522-
# skip the check due to transformers_neuronx version bug
523-
# eg. transformers_neuronx.__version__=='0.10.x' for v0.11.351...
524-
continue
525-
version_list = [
526-
".".join(entry.split(".")[:2]) + ".x" for entry in release_manifest[package_name]
527-
]
528517
assert installed_framework_version in version_list, (
529518
f"framework {framework} version {installed_framework_version} "
530519
f"not found in released versions for that package: {version_list}"

test/sagemaker_tests/huggingface/inference/integration/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323

2424
model_dir = os.path.join(resources_path, "tiny-distilbert-sst-2")
2525
model_dir_sdxl = os.path.join(resources_path, "tiny-sdxl")
26-
model_dir_decoder = os.path.join(resources_path, "tiny-gpt2")
26+
model_dir_decoder = os.path.join(resources_path, "tiny-llama3")
2727
pt_model = "pt_model.tar.gz"
2828
tf_model = "tf_model.tar.gz"
2929
pt_neuron_model = "pt_neuron_model.tar.gz"

test/sagemaker_tests/huggingface/inference/integration/sagemaker/test_neuronx_decoder_hosting.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
# instances in the regions corresponding to their availability.
3939
# In future, we would like to configure the logic to run multiple `pytest` commands that can allow
4040
# us to test multiple instances in multiple regions for each image.
41-
@pytest.mark.model("tiny-gpt2")
41+
@pytest.mark.model("tiny-llama3")
4242
@pytest.mark.processor("neuronx")
4343
@pytest.mark.parametrize(
4444
"test_region,test_instance_type",

test/sagemaker_tests/huggingface/inference/requirements.txt

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
boto3
22
coverage
33
# Docker v7.0.0 breaks compatibility with Docker Compose v1 (SageMaker Local)
4-
docker<=6.1.3
5-
docker-compose
4+
docker>=5,<=6.1.3
65
flake8==3.7.7
76
Flask==1.1.1
87
mock
@@ -12,13 +11,13 @@ pytest-rerunfailures
1211
pytest-xdist
1312
PyYAML
1413
protobuf>=3.20,<=3.20.2
15-
sagemaker>=2,<3
14+
sagemaker>=2.237.0,<3
1615
six
1716
requests<2.32.0
1817
requests_mock
1918
Pillow
2019
retrying==1.3.3
21-
urllib3==1.26.0
20+
urllib3>=1.26.8
2221
pluggy>=1.5,<2
2322
requests_mock
2423
sagemaker-inference
Binary file not shown.

0 commit comments

Comments
 (0)