Skip to content

Commit 52757b3

Browse files
Enable Intel ARC gpu test for vllm openvino. (opea-project#856)
Signed-off-by: senhui2intel <[email protected]> Co-authored-by: chen, suyue <[email protected]>
1 parent 09980b5 commit 52757b3

File tree

1 file changed

+128
-0
lines changed

1 file changed

+128
-0
lines changed
Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
#!/bin/bash
2+
# Copyright (C) 2024 Intel Corporation
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
set -x
6+
7+
WORKPATH="$( cd "$( dirname "$0" )" && pwd )"
8+
DOCKER_FILE="$WORKPATH"/../../comps/llms/text-generation/vllm/langchain/dependency/Dockerfile.intel_gpu
9+
10+
# Define variables
11+
port=5033
12+
RENDER_GROUP_ID=110
13+
DOCKER_IMAGE="vllm-openvino:comps"
14+
CONTAINER_NAME="test-comps-vllm-openvino-container"
15+
HF_CACHE_DIR=$HOME/.cache/huggingface
16+
17+
function build_container() {
18+
docker build --no-cache -t $DOCKER_IMAGE \
19+
-f $DOCKER_FILE \
20+
. \
21+
--build-arg https_proxy=$https_proxy \
22+
--build-arg http_proxy=$http_proxy
23+
24+
if [ $? -ne 0 ]; then
25+
echo "vllm-openvino built fail"
26+
exit 1
27+
else
28+
echo "vllm-openvino built successful"
29+
fi
30+
}
31+
32+
# Function to start Docker container
33+
start_container() {
34+
35+
docker run -d --rm --name=$CONTAINER_NAME \
36+
-p $port:$port \
37+
--ipc=host \
38+
-e HTTPS_PROXY=$https_proxy \
39+
-e HTTP_PROXY=$https_proxy \
40+
-v $HF_CACHE_DIR:/root/.cache/huggingface \
41+
--device=/dev/dri:/dev/dri \
42+
--group-add $RENDER_GROUP_ID \
43+
vllm-openvino:comps /bin/bash -c "\
44+
export VLLM_OPENVINO_DEVICE=GPU && \
45+
export VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS=ON && \
46+
python3 -m vllm.entrypoints.openai.api_server \
47+
--model Intel/neural-chat-7b-v3-3 \
48+
--host 0.0.0.0 \
49+
--port $port \
50+
--max_model_len 8192"
51+
52+
# check whether service is fully ready
53+
n=0
54+
until [[ "$n" -ge 300 ]]; do
55+
docker logs $CONTAINER_NAME > /tmp/$CONTAINER_NAME.log 2>&1
56+
n=$((n+1))
57+
if grep -q "Uvicorn running on" /tmp/$CONTAINER_NAME.log; then
58+
break
59+
fi
60+
sleep 3s
61+
done
62+
63+
}
64+
65+
# Cleanup Function
66+
cleanup() {
67+
# Stop and remove Docker container and images
68+
cid=$(docker ps -aq --filter "name=$CONTAINER_NAME")
69+
if [[ ! -z "$cid" ]]; then docker stop $cid || docker rm $cid && sleep 1s; fi
70+
docker rmi -f $DOCKER_IMAGE
71+
rm /tmp/$CONTAINER_NAME.log
72+
}
73+
74+
# Function to test API endpoint
75+
function test_api_endpoint {
76+
local endpoint="$1"
77+
local expected_status="$2"
78+
79+
# Make the HTTP request
80+
if test "$1" = "v1/completions"
81+
then
82+
local response=$(curl "http://localhost:$port/$endpoint" \
83+
-H "Content-Type: application/json" \
84+
-d '{
85+
"model": "Intel/neural-chat-7b-v3-3",
86+
"prompt": "What is the key advantage of Openvino framework",
87+
"max_tokens": 300,
88+
"temperature": 0.7
89+
}' \
90+
--write-out '%{http_code}' \
91+
--silent \
92+
--output /dev/null)
93+
else
94+
local response=$(curl "http://localhost:$port/$endpoint" \
95+
--write-out '%{http_code}' \
96+
--silent \
97+
--output /dev/null)
98+
fi
99+
100+
# Assert the response status code
101+
if [[ "$response" -eq "$expected_status" ]]; then
102+
echo "PASS: $endpoint returned expected status code: $expected_status"
103+
else
104+
echo "FAIL: $endpoint returned unexpected status code: $response (expected: $expected_status)"
105+
docker logs $CONTAINER_NAME
106+
exit 1
107+
fi
108+
}
109+
110+
# Main function
111+
main() {
112+
113+
build_container
114+
start_container
115+
116+
# Sleep to allow the container to start up fully
117+
sleep 10
118+
# Test the /v1/models API
119+
test_api_endpoint "v1/models" 200
120+
121+
# Test the /v1/completions API
122+
test_api_endpoint "v1/completions" 200
123+
124+
cleanup
125+
}
126+
127+
# Call main function
128+
main

0 commit comments

Comments
 (0)