Skip to content

Commit 211c9c5

Browse files
authored
vLLM DLC (#4839)
* initial dockerfile change * add vllm build type * add oss compliance * add python arg * update dockerfile * fix dockerfile * take security update * fix syntax * fix dockerfile, tests * fix tests * revert dev changes and format
1 parent 6baeba1 commit 211c9c5

File tree

10 files changed

+156
-12
lines changed

10 files changed

+156
-12
lines changed

data/common-ecr-scan-allowlist.json

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,35 @@
11
{
2+
"vllm": [
3+
{
4+
"description": "vLLM is an inference and serving engine for large language models. In a multi-node vLLM deployment using the V0 engine, vLLM uses ZeroMQ for some multi-node communication purposes. The secondary vLLM hosts open a `SUB` ZeroMQ socket and connect to an `XPUB` socket on the primary vLLM host. When data is received on this `SUB` socket, it is deserialized with `pickle`. This is unsafe, as it can be abused to execute code on a remote machine. Since the vulnerability exists in a client that connects to the primary vLLM host, this vulnerability serves as an escalation point. If the primary vLLM host is compromised, this vulnerability could be used to compromise the rest of the hosts in the vLLM deployment. Attackers could also use other means to exploit the vulnerability without requiring access to the primary vLLM host. One example would be the use of ARP cache poisoning to redirect traffic to a malicious endpoint used to deliver a payload with arbitrary code to execute on the target machine. Note that this issue o",
5+
"vulnerability_id": "CVE-2025-30165",
6+
"name": "CVE-2025-30165",
7+
"package_name": "vllm",
8+
"package_details": {
9+
"file_path": "/usr/local/lib/python3.12/dist-packages/vllm-0.8.5.dist-info/METADATA",
10+
"name": "vllm",
11+
"package_manager": "PYTHON",
12+
"version": "0.8.5",
13+
"release": null
14+
},
15+
"remediation": {
16+
"recommendation": {
17+
"text": "None Provided"
18+
}
19+
},
20+
"cvss_v3_score": 8.0,
21+
"cvss_v30_score": 0.0,
22+
"cvss_v31_score": 8.0,
23+
"cvss_v2_score": 0.0,
24+
"cvss_v3_severity": "HIGH",
25+
"source_url": "https://nvd.nist.gov/vuln/detail/CVE-2025-30165",
26+
"source": "NVD",
27+
"severity": "HIGH",
28+
"status": "ACTIVE",
29+
"title": "CVE-2025-30165 - vllm",
30+
"reason_to_ignore": "this vulnerability specifically affects the vllm package when used with the v0 engine, which is not the default engine in vLLM. The default engine is v1, which does not use ZeroMQ for communication. Therefore, this vulnerability does not affect the default configuration of vLLM."
31+
}
32+
],
233
"pip": [
334
{
435
"description": "An issue was discovered in pip (all versions) because it installs the version with the highest version number, even if the user had intended to obtain a private package from a private index. This only affects use of the --extra-index-url option, and exploitation requires that the package does not already exist in the public index (and thus the attacker can put the package there with an arbitrary version number). NOTE: it has been reported that this is intended functionality and the user is responsible for using --extra-index-url securely",

dlc_developer_config.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ deep_canary_mode = false
3636

3737
[build]
3838
# Add in frameworks you would like to build. By default, builds are disabled unless you specify building an image.
39-
# available frameworks - ["base", "autogluon", "huggingface_tensorflow", "huggingface_pytorch", "huggingface_tensorflow_trcomp", "huggingface_pytorch_trcomp", "pytorch_trcomp", "tensorflow", "pytorch", "stabilityai_pytorch"]
39+
# available frameworks - ["base", "vllm", "autogluon", "huggingface_tensorflow", "huggingface_pytorch", "huggingface_tensorflow_trcomp", "huggingface_pytorch_trcomp", "pytorch_trcomp", "tensorflow", "pytorch", "stabilityai_pytorch"]
4040
build_frameworks = []
4141

4242

scripts/install_efa.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ function install_efa {
3030
echo "hwloc_base_binding_policy = none" >> ${OPEN_MPI_PATH}/etc/openmpi-mca-params.conf
3131
echo "rmaps_base_mapping_policy = slot" >> ${OPEN_MPI_PATH}/etc/openmpi-mca-params.conf
3232
echo NCCL_DEBUG=INFO >> /etc/nccl.conf
33-
echo NCCL_SOCKET_IFNAME=^docker0 >> /etc/nccl.conf
33+
echo NCCL_SOCKET_IFNAME=^docker0,lo >> /etc/nccl.conf
3434

3535
# Install OpenSSH for MPI to communicate between containers, allow OpenSSH to talk to containers without asking for confirmation
3636
apt-get install -y --no-install-recommends \

src/image_builder.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,8 @@ def image_builder(buildspec, image_types=[], device_types=[]):
247247
label_job_type = "inference"
248248
elif "base" in image_repo_uri:
249249
label_job_type = "base"
250+
elif "vllm" in image_repo_uri:
251+
label_job_type = "vllm"
250252
else:
251253
raise RuntimeError(
252254
f"Cannot find inference, training or base job type in {image_repo_uri}. "

test/dlc_tests/sanity/test_pre_release.py

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -573,6 +573,10 @@ def test_pip_check(image):
573573
574574
:param image: ECR image URI
575575
"""
576+
if "vllm" in image:
577+
pytest.skip(
578+
"vLLM images do not require pip check as they are managed by vLLM devs. Skipping test."
579+
)
576580

577581
allowed_exceptions = []
578582

@@ -715,9 +719,9 @@ def test_cuda_paths(gpu):
715719
:param gpu: gpu image uris
716720
"""
717721
image = gpu
718-
if "base" in image:
722+
if "base" in image or "vllm" in image:
719723
pytest.skip(
720-
"Base DLC doesn't have the same directory structure and buildspec as other images"
724+
"Base/vLLM DLC doesn't have the same directory structure and buildspec as other images"
721725
)
722726
if "example" in image:
723727
pytest.skip("Skipping Example Dockerfiles which are not explicitly tied to a cuda version")
@@ -857,8 +861,10 @@ def _test_framework_and_cuda_version(gpu, ec2_connection):
857861
:param ec2_connection: fixture to establish connection with an ec2 instance
858862
"""
859863
image = gpu
860-
if "base" in image:
861-
pytest.skip("Base DLC has no framework version in the tag. Skipping test.")
864+
if "base" in image or "vllm" in image:
865+
pytest.skip(
866+
"Base/vLLM DLC has doesn't follow the assumptions made by inference/training. Skipping test."
867+
)
862868
tested_framework, tag_framework_version = get_framework_and_version_from_tag(image)
863869

864870
image_repo_name, _ = get_repository_and_tag_from_image_uri(image)
@@ -1110,8 +1116,8 @@ def test_core_package_version(image):
11101116
In this test, we ensure that if a core_packages.json file exists for an image, the packages installed in the image
11111117
satisfy the version constraints specified in the core_packages.json file.
11121118
"""
1113-
if "base" in image:
1114-
pytest.skip("Base images do not have core packages. Skipping test.")
1119+
if "base" in image or "vllm" in image:
1120+
pytest.skip("Base/vLLM images do not have core packages. Skipping test.")
11151121
core_packages_path = src_utils.get_core_packages_path(image)
11161122
if not os.path.exists(core_packages_path):
11171123
pytest.skip(f"Core packages file {core_packages_path} does not exist for {image}")
@@ -1160,9 +1166,9 @@ def test_package_version_regression_in_image(image):
11601166
keys in the buildspec - as these keys are used to extract the released image uri. Additionally, if the image is not already
11611167
released, this test would be skipped.
11621168
"""
1163-
if "base" in image:
1169+
if "base" in image or "vllm" in image:
11641170
pytest.skip(
1165-
"Base images don't have python packages that needs to be checked. Skipping test."
1171+
"Base/vLLM images don't have python packages that needs to be checked. Skipping test."
11661172
)
11671173
dlc_path = os.getcwd().split("/test/")[0]
11681174
corresponding_image_spec = get_image_spec_from_buildspec(

test/dlc_tests/sanity/test_safety_report_file.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,10 @@ def test_safety_file_exists_and_is_valid(image):
7373
pytest.skip(
7474
"Base images do not require safety file as there isn't much python libs in it. Skipping test."
7575
)
76+
if "vllm" in image:
77+
pytest.skip(
78+
"vllm images do not require safety file as they are managed by vLLM devs. Skipping test."
79+
)
7680
repo_name, image_tag = image.split("/")[-1].split(":")
7781
# Make sure this container name doesn't conflict with the safety check test container name
7882
container_name = f"{repo_name}-{image_tag}-safety-file"

test/test_utils/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1892,6 +1892,7 @@ def get_framework_from_image_uri(image_uri):
18921892
"tensorflow": "tensorflow",
18931893
"autogluon": "autogluon",
18941894
"base": "base",
1895+
"vllm": "vllm",
18951896
}
18961897

18971898
for image_pattern, framework in framework_map.items():
@@ -2019,7 +2020,7 @@ def get_job_type_from_image(image_uri):
20192020
:return: Job Type
20202021
"""
20212022
tested_job_type = None
2022-
allowed_job_types = ("training", "inference", "base")
2023+
allowed_job_types = ("training", "inference", "base", "vllm")
20232024
for job_type in allowed_job_types:
20242025
if job_type in image_uri:
20252026
tested_job_type = job_type

test/test_utils/security.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -614,6 +614,14 @@ def construct_allowlist_from_ecr_scan_result(self, ecr_format_vulnerability_list
614614
for vulnerable_package in ecr_format_vulnerability["packageVulnerabilityDetails"][
615615
"vulnerablePackages"
616616
]:
617+
if "fixedInVersion" in vulnerable_package:
618+
fixed_version = vulnerable_package["fixedInVersion"].lower()
619+
if "esm" in fixed_version and "ubuntu" in fixed_version:
620+
LOGGER.info(
621+
f"Skipping ESM version {fixed_version} for package {vulnerable_package['name']}"
622+
)
623+
continue
624+
617625
allowlist_format_vulnerability_object = AllowListFormatVulnerabilityForEnhancedScan(
618626
**ecr_format_vulnerability
619627
)

vllm/buildspec.yml

Lines changed: 48 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,48 @@
1-
# placeholder
1+
account_id: &ACCOUNT_ID <set-$ACCOUNT_ID-in-environment>
2+
prod_account_id: &PROD_ACCOUNT_ID 763104351884
3+
region: &REGION <set-$REGION-in-environment>
4+
framework: &FRAMEWORK vllm
5+
version: &VERSION "0.8.5"
6+
short_version: &SHORT_VERSION "0.8"
7+
arch_type: &ARCH_TYPE x86_64
8+
autopatch_build: "False"
9+
10+
repository_info:
11+
build_repository: &BUILD_REPOSITORY
12+
image_type: &IMAGE_TYPE gpu
13+
root: .
14+
repository_name: &REPOSITORY_NAME !join [ pr, "-", *FRAMEWORK ]
15+
repository: &REPOSITORY !join [ *ACCOUNT_ID, .dkr.ecr., *REGION, .amazonaws.com/, *REPOSITORY_NAME ]
16+
release_repository_name: &RELEASE_REPOSITORY_NAME !join [ *FRAMEWORK ]
17+
release_repository: &RELEASE_REPOSITORY !join [ *PROD_ACCOUNT_ID, .dkr.ecr., *REGION, .amazonaws.com/, *RELEASE_REPOSITORY_NAME ]
18+
19+
context:
20+
build_context: &BUILD_CONTEXT
21+
deep_learning_container:
22+
source: src/deep_learning_container.py
23+
target: deep_learning_container.py
24+
install_efa:
25+
source: scripts/install_efa.sh
26+
target: install_efa.sh
27+
28+
images:
29+
vllm_x86_64:
30+
<<: *BUILD_REPOSITORY
31+
context:
32+
<<: *BUILD_CONTEXT
33+
image_size_baseline: 20000
34+
device_type: &DEVICE_TYPE gpu
35+
cuda_version: &CUDA_VERSION cu124
36+
python_version: &DOCKER_PYTHON_VERSION py3
37+
tag_python_version: &TAG_PYTHON_VERSION py312
38+
os_version: &OS_VERSION ubuntu22.04
39+
tag: !join [ *DEVICE_TYPE, "-", *CUDA_VERSION, "-", *TAG_PYTHON_VERSION, "-", *OS_VERSION, "-", *ARCH_TYPE, "-ec2" ]
40+
latest_release_tag: !join [ *DEVICE_TYPE, "-", *CUDA_VERSION, "-", *TAG_PYTHON_VERSION, "-", *OS_VERSION, "-", *ARCH_TYPE, "-ec2" ]
41+
docker_file: !join [ *FRAMEWORK, /, *ARCH_TYPE, /, *DEVICE_TYPE, /, *CUDA_VERSION, /Dockerfile ]
42+
target: final
43+
build: true
44+
enable_common_stage_build: false
45+
test_configs:
46+
test_platforms:
47+
- sanity
48+
- security

vllm/x86_64/gpu/cu124/Dockerfile

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
FROM docker.io/vllm/vllm-openai:v0.8.5 as final
2+
ARG PYTHON="python3"
3+
ARG EFA_VERSION="1.40.0"
4+
LABEL maintainer="Amazon AI"
5+
LABEL dlc_major_version="1"
6+
ENV DEBIAN_FRONTEND=noninteractive \
7+
LANG=C.UTF-8 \
8+
LC_ALL=C.UTF-8 \
9+
DLC_CONTAINER_TYPE=base \
10+
# Python won’t try to write .pyc or .pyo files on the import of source modules
11+
# Force stdin, stdout and stderr to be totally unbuffered. Good for logging
12+
PYTHONDONTWRITEBYTECODE=1 \
13+
PYTHONUNBUFFERED=1 \
14+
PYTHONIOENCODING=UTF-8 \
15+
LD_LIBRARY_PATH="/usr/local/lib:/opt/amazon/ofi-nccl/lib:/opt/amazon/openmpi/lib:/opt/amazon/efa/lib:/usr/local/cuda/lib64:${LD_LIBRARY_PATH}" \
16+
PATH="/opt/amazon/openmpi/bin:/opt/amazon/efa/bin:/usr/local/cuda/bin:${PATH}"
17+
18+
WORKDIR /
19+
20+
COPY install_efa.sh install_efa.sh
21+
COPY deep_learning_container.py /usr/local/bin/deep_learning_container.py
22+
RUN chmod +x /usr/local/bin/deep_learning_container.py && \
23+
# Install EFA
24+
bash install_efa.sh ${EFA_VERSION} && \
25+
rm install_efa.sh && \
26+
# OSS compliance and software update
27+
apt-get update && \
28+
apt-get upgrade -y && \
29+
apt-get install -y --allow-change-held-packages --no-install-recommends unzip && \
30+
apt-get clean && \
31+
HOME_DIR=/root && \
32+
curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip && \
33+
unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ && \
34+
cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance && \
35+
chmod +x /usr/local/bin/testOSSCompliance && \
36+
chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh && \
37+
${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} ${PYTHON} && \
38+
# create symlink for python
39+
ln -s /usr/bin/python3 /usr/bin/python && \
40+
# clean up
41+
rm -rf ${HOME_DIR}/oss_compliance* && \
42+
rm -rf /tmp/tmp* && \
43+
rm -rf /tmp/uv* && \
44+
rm -rf /var/lib/apt/lists/* && \
45+
rm -rf /root/.cache | true

0 commit comments

Comments
 (0)