Skip to content

Commit e7203d6

Browse files
ci: add vllm action and integration to image test
Co-authored-by: Derek Higgins <derekh@redhat.com> Signed-off-by: Nathan Weinberg <nweinber@redhat.com>
1 parent 65a75bb commit e7203d6

File tree

2 files changed

+50
-3
lines changed

2 files changed

+50
-3
lines changed
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
name: Setup VLLM
2+
description: Start VLLM
3+
runs:
4+
using: "composite"
5+
steps:
6+
- name: Start VLLM
7+
shell: bash
8+
run: |
9+
# Start vllm container
10+
docker run -d \
11+
--name vllm \
12+
-p 8000:8000 \
13+
--privileged=true \
14+
quay.io/higginsd/vllm-cpu:65393ee064 \
15+
--host 0.0.0.0 \
16+
--port 8000 \
17+
--enable-auto-tool-choice \
18+
--tool-call-parser llama3_json \
19+
--model /root/.cache/Llama-3.2-1B-Instruct \
20+
--served-model-name meta-llama/Llama-3.2-1B-Instruct
21+
22+
# Wait for vllm to be ready
23+
echo "Waiting for vllm to be ready..."
24+
timeout 900 bash -c 'until curl -f http://localhost:8000/health; do
25+
echo "Waiting for vllm..."
26+
sleep 5
27+
done'

.github/workflows/redhat-distro-container.yml

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,14 +47,18 @@ jobs:
4747
cache-from: type=gha
4848
cache-to: type=gha,mode=max
4949

50+
- name: Setup vllm for image test
51+
id: vllm
52+
uses: ./.github/actions/setup-vllm
53+
5054
- name: Test image
5155
id: test
5256
run: |
5357
set -euo pipefail
5458
# Start llama stack
5559
CID="$(docker run -d --pull=never \
5660
-p 8321:8321 \
57-
--env INFERENCE_MODEL=dummy \
61+
--env INFERENCE_MODEL=meta-llama/Llama-3.2-1B-Instruct \
5862
--name llama-stack \
5963
${{ env.IMAGE_NAME }}:${{ github.sha }})"
6064
trap 'docker rm -f "$CID" >/dev/null 2>&1 || true' EXIT
@@ -64,8 +68,24 @@ jobs:
6468
for i in {1..60}; do
6569
echo "Attempt $i to connect to Llama Stack..."
6670
if curl -fsS --max-time 2 http://127.0.0.1:8321/v1/health | grep -q '"status":"OK"'; then
67-
echo "Llama Stack server is up :)"
68-
exit 0
71+
echo "Llama Stack server is up and serving :)"
72+
if curl -fsS --max-time 4 http://127.0.0.1:8321/v1/models | grep -q 'meta-llama/Llama-3.2-1B-Instruct'; then
73+
echo "meta-llama/Llama-3.2-1B-Instruct model was found :)"
74+
if curl -fsS --max-time 6 http://127.0.0.1:8321/v1/openai/v1/chat/completions -H "Content-Type: application/json" -d "{\"model\": \"meta-llama/Llama-3.2-1B-Instruct\",\"messages\": [{\"role\": \"user\", \"content\": \"What color is grass?\"}], \"max_tokens\": 10, \"temperature\": 0.0}" | grep -q 'green'; then
75+
echo "Inference is working :)"
76+
exit 0
77+
else
78+
echo "Inference is not working :("
79+
echo "Container logs:"
80+
docker logs "$CID" || true
81+
exit 1
82+
fi
83+
else
84+
echo "meta-llama/Llama-3.2-1B-Instruct model was not found :("
85+
echo "Container logs:"
86+
docker logs "$CID" || true
87+
exit 1
88+
fi
6989
fi
7090
sleep 1
7191
done

0 commit comments

Comments
 (0)