Skip to content

Commit a0ff41d

Browse files
[VLLM][ARM64] Currency Release (#5154)
* build arm 64 vllm image * modify change log to add arm64 * make arm64 true * build 0.10.1 * build 0.10.1 add platform * build 0.10.1 add upstream commands * build 0.10.1 add upstream commands * build 0.10.1 build target fix * build 0.10.1 build target fix * build 0.10.1 * add pip setuptools * add pip setuptools * build without oss compliance * build without oss compliance * remove --mount * build base, wheel and final * build base, wheel and final * build arm64 * build arm64 * build arm64 * build arm64 * build arm64 * build arm64 * build arm64 * build arm64 * build arm64 * build arm64 * build arm64 * build arm64 * build arm64 * build arm64 * build arm64 * build arm64 * build arm64 * build arm64 * build arm64 * add max jobs * remove pytorch installation with pip * reduce layers * reduce layers * reduce layers * increase max jobs * try precompiled wheels * try precompiled wheels * add additional dep * add python installation in vllm-base * fix fun name * fix instance type * fix instance type * fix ec2 launch fuction as arm64 is non efa * fix ec2 launch fuction as arm64 is non efa * fix ec2 launch fuction as arm64 is non efa * fix ec2 launch fuction as arm64 is non efa * add sleep for manual testing * add sleep for manual testing * use precompiled * rebuild arm64 * rebuild arm64 * rebuild arm64 * test * test * try offline inference * try offline inference * try offline inference * try offline inference * try offline inference with new built image * remove commands * add cd command * add cd command * add cuda targt * modify docker image * modify docker image * modify file from github * add final target * add final target * add final target * remove xformers * build arm64 * build arm64 * build arm64 * add max jobs * add requirements * max job 20 * max job 20 * add agent testing * test * rebuild * rebuild * rebuild * rebuild * rebuild * add pytorch wheels * rebuild * test * test * test and build * test * test * test * test * test * test * test * test * test * rebuild * remove strands * rebuild and test agents * rebuild and test agents * rebuild and test agents * test x86 vllm * test x86 * test x86 * rebuild and test agents * rebuild and test agents * rebuild and test agents * rebuild and test agents * rebuild and test agents * rebuild and test agents * rebuild and test agents * rebuild and test agents * rebuild and test agents * rebuild and test agents * rebuild and test agents * rebuild and test agents * rebuild and test agents * rebuild and test agents * rebuild and test agents * change triton to 3.4.0 * test with main * test with main * test with main * use vllm v0 * test with vllm serve * increase attempts * add more logging * flashinfer wheels: * flashinfer wheels: * flashinfer wheels: * downgrade flashinfer * downgrade flashinfer and triton * downgrade flashinfer and triton * fic flashinfer * fix flashinfer * fix flashinfer * install flashinfer seperately * use float32 * try new vllm serve command with gpu memory utilization * test agents with new docker setup * test agents with new docker setup * test vllm with autogen * test vllm with autogen * test autogen vllm * format logs * format logs * format logs * format logs * test open ai example * final testing * Final build * revert toml * final testing * perform openai script test * perform openai script test * perform openai script test with reasoning * perform openai script test with reasoning * perform openai script test with reasoning * Try Qwen model * Try Qwen model * revert changes * add vllm in toml * remove test_agents.py * change version * remove changes in changelog * remove changes in changelog
1 parent 502da71 commit a0ff41d

File tree

13 files changed

+564
-156
lines changed

13 files changed

+564
-156
lines changed

dlc_developer_config.toml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -179,4 +179,7 @@ dlc-pr-stabilityai-pytorch-inference = ""
179179

180180
# EIA Inference
181181
dlc-pr-pytorch-eia-inference = ""
182-
dlc-pr-tensorflow-2-eia-inference = ""
182+
dlc-pr-tensorflow-2-eia-inference = ""
183+
184+
# vllm
185+
dlc-pr-vllm = ""

scripts/install_efa.sh

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,22 @@
22

33
set -ex
44

5+
ARCH=$(uname -m)
6+
case $ARCH in
7+
x86_64)
8+
ARCH_DIR="x86_64-linux-gnu"
9+
;;
10+
aarch64)
11+
ARCH_DIR="aarch64-linux-gnu"
12+
;;
13+
*)
14+
echo "Unsupported architecture: $ARCH"
15+
exit 1
16+
;;
17+
esac
18+
519
function check_libnccl_net_so {
6-
OFI_LIB_DIR="/opt/amazon/ofi-nccl/lib/x86_64-linux-gnu"
20+
OFI_LIB_DIR="/opt/amazon/ofi-nccl/lib/${ARCH_DIR}"
721
NCCL_NET_SO="$OFI_LIB_DIR/libnccl-net.so"
822

923
# Check if file exists

test/dlc_tests/sanity/test_boottime_container_security.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,11 @@ def test_security(image):
2020
)
2121
try:
2222
docker_exec_cmd = f"docker exec -i {container_name}"
23-
run(f"{docker_exec_cmd} python /test/bin/security_checks.py --image_uri {image}", hide=True)
23+
if "vllm" in image:
24+
run_command = f"python3 /test/bin/security_checks.py"
25+
else:
26+
run_command = f"python /test/bin/security_checks.py"
27+
28+
run(f"{docker_exec_cmd} {run_command} --image_uri {image}", hide=True)
2429
finally:
2530
run(f"docker rm -f {container_name}", hide=True)

test/dlc_tests/sanity/test_dlc_labels.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,10 @@ def test_dlc_major_version_label(image, region):
3131
@pytest.mark.integration("dlc_labels")
3232
@pytest.mark.model("N/A")
3333
def test_dlc_standard_labels(image, region):
34+
if "vllm" in image:
35+
pytest.skip(
36+
"vLLM images do not require test_dlc_standard_labels check as they are managed by vLLM devs. Skipping test."
37+
)
3438
customer_type_label_prefix = "ec2" if test_utils.is_ec2_image(image) else "sagemaker"
3539

3640
framework, fw_version = test_utils.get_framework_and_version_from_tag(image)

test/dlc_tests/sanity/test_ecr_scan.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -435,6 +435,10 @@ def test_ecr_enhanced_scan(image, ecr_client, sts_client, region):
435435
:param sts_client: boto3 Client for STS
436436
:param region: str Name of region where test is executed
437437
"""
438+
if "vllm" in image:
439+
pytest.skip(
440+
"vLLM images do not require test_ecr_enhanced_scan check as they are managed by vLLM devs. Skipping test."
441+
)
438442
LOGGER.info(f"Running test_ecr_enhanced_scan for image {image}")
439443
image = conduct_preprocessing_of_images_before_running_ecr_scans(
440444
image, ecr_client, sts_client, region

test/dlc_tests/sanity/test_pre_release.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -160,18 +160,20 @@ def test_python_version(image):
160160
:param image: ECR image URI
161161
"""
162162
ctx = Context()
163-
163+
command = ""
164164
py_version = ""
165165
for tag_split in image.split("-"):
166166
if tag_split.startswith("py"):
167167
if len(tag_split) > 3:
168168
py_version = f"Python {tag_split[2]}.{tag_split[3]}"
169+
command = f"python3 --version"
169170
else:
170171
py_version = f"Python {tag_split[2]}"
172+
command = f"python --version"
171173

172174
container_name = get_container_name("py-version", image)
173175
start_container(container_name, image, ctx)
174-
output = run_cmd_on_container(container_name, ctx, "python --version")
176+
output = run_cmd_on_container(container_name, ctx, command)
175177

176178
# Due to py2 deprecation, Python2 version gets streamed to stderr. Python installed via Conda also appears to
177179
# stream to stderr (in some cases).

0 commit comments

Comments
 (0)