Skip to content

Commit be76800

Browse files
ci: add vllm action and integration to image test
Co-authored-by: Derek Higgins <derekh@redhat.com> Signed-off-by: Nathan Weinberg <nweinber@redhat.com>
1 parent dac9825 commit be76800

File tree

2 files changed

+35
-2
lines changed

2 files changed

+35
-2
lines changed
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
name: Setup VLLM
2+
description: Start VLLM
3+
runs:
4+
using: "composite"
5+
steps:
6+
- name: Start VLLM
7+
shell: bash
8+
run: |
9+
# Start vllm container
10+
docker run -d \
11+
--name vllm \
12+
-p 8000:8000 \
13+
--privileged=true \
14+
quay.io/higginsd/vllm-cpu:65393ee064 \
15+
--host 0.0.0.0 \
16+
--port 8000 \
17+
--enable-auto-tool-choice \
18+
--tool-call-parser llama3_json \
19+
--model /root/.cache/Llama-3.2-1B-Instruct \
20+
--served-model-name meta-llama/Llama-3.2-1B-Instruct
21+
22+
# Wait for vllm to be ready
23+
echo "Waiting for vllm to be ready..."
24+
timeout 900 bash -c 'until curl -f http://localhost:8000/health; do
25+
echo "Waiting for vllm..."
26+
sleep 5
27+
done'

.github/workflows/redhat-distro-container.yml

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,14 +50,18 @@ jobs:
5050
cache-from: type=gha
5151
cache-to: type=gha,mode=max
5252

53+
- name: Setup vllm for image test
54+
id: vllm
55+
uses: ./.github/actions/setup-vllm
56+
5357
- name: Test image
5458
id: test
5559
run: |
5660
set -euo pipefail
5761
# Start llama stack
5862
CID="$(docker run -d --pull=never \
5963
-p 8321:8321 \
60-
--env INFERENCE_MODEL=dummy \
64+
--env INFERENCE_MODEL=meta-llama/Llama-3.2-1B-Instruct \
6165
--name llama-stack \
6266
${{ env.IMAGE_NAME }}:${{ github.sha }})"
6367
trap 'docker rm -f "$CID" >/dev/null 2>&1 || true' EXIT
@@ -67,7 +71,9 @@ jobs:
6771
for i in {1..60}; do
6872
echo "Attempt $i to connect to Llama Stack..."
6973
if curl -fsS --max-time 2 http://127.0.0.1:8321/v1/health | grep -q '"status":"OK"'; then
70-
echo "Llama Stack server is up :)"
74+
MODEL_RETURNED=$(curl http://127.0.0.1:8321/v1/openai/chat/completions -H "Content-Type: application/json" -d "{\"model\": \"meta-llama/Llama-3.2-1B-Instruct\",\"messages\": [{\"role\": \"user\", \"content\": \"What color is grass?\"}], \"max_tokens\": 1, \"temperature\": 0.0}"
75+
[ "$MODEL_RETURNED" != "meta-llama/Llama-3.2-1B-Instruct" ] && echo ERR: $MODEL_RETURNED && exit 1
76+
echo "Llama Stack server is up and serving :)"
7177
exit 0
7278
fi
7379
sleep 1

0 commit comments

Comments
 (0)