|
| 1 | +#!/bin/bash |
| 2 | +# Copyright (C) 2024 Intel Corporation |
| 3 | +# SPDX-License-Identifier: Apache-2.0 |
| 4 | + |
| 5 | +set -x |
| 6 | + |
| 7 | +WORKPATH="$( cd "$( dirname "$0" )" && pwd )" |
| 8 | +DOCKER_FILE="$WORKPATH"/../../comps/llms/text-generation/vllm/langchain/dependency/Dockerfile.intel_gpu |
| 9 | + |
| 10 | +# Define variables |
| 11 | +port=5033 |
| 12 | +RENDER_GROUP_ID=110 |
| 13 | +DOCKER_IMAGE="vllm-openvino:comps" |
| 14 | +CONTAINER_NAME="test-comps-vllm-openvino-container" |
| 15 | +HF_CACHE_DIR=$HOME/.cache/huggingface |
| 16 | + |
| 17 | +function build_container() { |
| 18 | + docker build --no-cache -t $DOCKER_IMAGE \ |
| 19 | + -f $DOCKER_FILE \ |
| 20 | + . \ |
| 21 | + --build-arg https_proxy=$https_proxy \ |
| 22 | + --build-arg http_proxy=$http_proxy |
| 23 | + |
| 24 | + if [ $? -ne 0 ]; then |
| 25 | + echo "vllm-openvino built fail" |
| 26 | + exit 1 |
| 27 | + else |
| 28 | + echo "vllm-openvino built successful" |
| 29 | + fi |
| 30 | +} |
| 31 | + |
| 32 | +# Function to start Docker container |
| 33 | +start_container() { |
| 34 | + |
| 35 | + docker run -d --rm --name=$CONTAINER_NAME \ |
| 36 | + -p $port:$port \ |
| 37 | + --ipc=host \ |
| 38 | + -e HTTPS_PROXY=$https_proxy \ |
| 39 | + -e HTTP_PROXY=$https_proxy \ |
| 40 | + -v $HF_CACHE_DIR:/root/.cache/huggingface \ |
| 41 | + --device=/dev/dri:/dev/dri \ |
| 42 | + --group-add $RENDER_GROUP_ID \ |
| 43 | + vllm-openvino:comps /bin/bash -c "\ |
| 44 | + export VLLM_OPENVINO_DEVICE=GPU && \ |
| 45 | + export VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS=ON && \ |
| 46 | + python3 -m vllm.entrypoints.openai.api_server \ |
| 47 | + --model Intel/neural-chat-7b-v3-3 \ |
| 48 | + --host 0.0.0.0 \ |
| 49 | + --port $port \ |
| 50 | + --max_model_len 8192" |
| 51 | + |
| 52 | + # check whether service is fully ready |
| 53 | + n=0 |
| 54 | + until [[ "$n" -ge 300 ]]; do |
| 55 | + docker logs $CONTAINER_NAME > /tmp/$CONTAINER_NAME.log 2>&1 |
| 56 | + n=$((n+1)) |
| 57 | + if grep -q "Uvicorn running on" /tmp/$CONTAINER_NAME.log; then |
| 58 | + break |
| 59 | + fi |
| 60 | + sleep 3s |
| 61 | + done |
| 62 | + |
| 63 | +} |
| 64 | + |
| 65 | +# Cleanup Function |
| 66 | +cleanup() { |
| 67 | + # Stop and remove Docker container and images |
| 68 | + cid=$(docker ps -aq --filter "name=$CONTAINER_NAME") |
| 69 | + if [[ ! -z "$cid" ]]; then docker stop $cid || docker rm $cid && sleep 1s; fi |
| 70 | + docker rmi -f $DOCKER_IMAGE |
| 71 | + rm /tmp/$CONTAINER_NAME.log |
| 72 | +} |
| 73 | + |
| 74 | +# Function to test API endpoint |
| 75 | +function test_api_endpoint { |
| 76 | + local endpoint="$1" |
| 77 | + local expected_status="$2" |
| 78 | + |
| 79 | + # Make the HTTP request |
| 80 | + if test "$1" = "v1/completions" |
| 81 | + then |
| 82 | + local response=$(curl "http://localhost:$port/$endpoint" \ |
| 83 | + -H "Content-Type: application/json" \ |
| 84 | + -d '{ |
| 85 | + "model": "Intel/neural-chat-7b-v3-3", |
| 86 | + "prompt": "What is the key advantage of Openvino framework", |
| 87 | + "max_tokens": 300, |
| 88 | + "temperature": 0.7 |
| 89 | + }' \ |
| 90 | + --write-out '%{http_code}' \ |
| 91 | + --silent \ |
| 92 | + --output /dev/null) |
| 93 | + else |
| 94 | + local response=$(curl "http://localhost:$port/$endpoint" \ |
| 95 | + --write-out '%{http_code}' \ |
| 96 | + --silent \ |
| 97 | + --output /dev/null) |
| 98 | + fi |
| 99 | + |
| 100 | + # Assert the response status code |
| 101 | + if [[ "$response" -eq "$expected_status" ]]; then |
| 102 | + echo "PASS: $endpoint returned expected status code: $expected_status" |
| 103 | + else |
| 104 | + echo "FAIL: $endpoint returned unexpected status code: $response (expected: $expected_status)" |
| 105 | + docker logs $CONTAINER_NAME |
| 106 | + exit 1 |
| 107 | + fi |
| 108 | +} |
| 109 | + |
| 110 | +# Main function |
| 111 | +main() { |
| 112 | + |
| 113 | + build_container |
| 114 | + start_container |
| 115 | + |
| 116 | + # Sleep to allow the container to start up fully |
| 117 | + sleep 10 |
| 118 | + # Test the /v1/models API |
| 119 | + test_api_endpoint "v1/models" 200 |
| 120 | + |
| 121 | + # Test the /v1/completions API |
| 122 | + test_api_endpoint "v1/completions" 200 |
| 123 | + |
| 124 | + cleanup |
| 125 | +} |
| 126 | + |
| 127 | +# Call main function |
| 128 | +main |
0 commit comments