Skip to content

Commit 61b9c7f

Browse files
Update pr-sglang-g6-inference.yaml
1 parent 4a9ccf2 commit 61b9c7f

File tree

1 file changed

+26
-97
lines changed

1 file changed

+26
-97
lines changed

.github/workflows/pr-sglang-g6-inference.yaml

Lines changed: 26 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,11 @@ on:
88

99
env:
1010
SGLANG_IMAGE: "lmsysorg/sglang:v0.5.7-runtime"
11+
SGLANG_VERSION: "0.5.7"
12+
DATASET_URL: "https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json"
1113

1214
jobs:
13-
sglang-heavy-inference:
15+
sglang-upstream-test:
1416
runs-on: g6-2gpu-runner
1517
steps:
1618
- name: Checkout
@@ -19,109 +21,34 @@ jobs:
1921
- name: Pull image
2022
run: docker pull ${{ env.SGLANG_IMAGE }}
2123

22-
- name: Start container (2 GPUs)
23-
run: |
24-
CONTAINER_ID=$(docker run -d --gpus=all \
25-
-p 30000:30000 \
26-
${{ env.SGLANG_IMAGE }} \
27-
python3 -m sglang.launch_server \
28-
--model-path Qwen/Qwen2.5-0.5B-Instruct \
29-
--host 0.0.0.0 --port 30000 \
30-
--tp 2)
31-
echo "CONTAINER_ID=${CONTAINER_ID}" >> ${GITHUB_ENV}
32-
33-
# Wait for container to be ready with timeout (up to 10 minutes)
34-
echo "Waiting for SGLang server to start..."
35-
for i in {1..120}; do
36-
# Try health endpoint first, fall back to v1/models
37-
if docker exec ${CONTAINER_ID} curl -s -f http://localhost:30000/health > /dev/null 2>&1 || \
38-
docker exec ${CONTAINER_ID} curl -s -f http://localhost:30000/v1/models > /dev/null 2>&1; then
39-
echo "Server is ready after $((i * 5)) seconds!"
40-
break
41-
fi
42-
if ! docker ps -q --no-trunc | grep -q ${CONTAINER_ID}; then
43-
echo "Container exited unexpectedly!"
44-
docker logs ${CONTAINER_ID} || true
45-
exit 1
46-
fi
47-
if [ $i -eq 120 ]; then
48-
echo "Timeout waiting for server to start after 10 minutes"
49-
docker logs ${CONTAINER_ID} || true
50-
exit 1
51-
fi
52-
echo "Waiting... ($((i * 5))s / 600s)"
53-
sleep 5
54-
done
55-
56-
- name: Verify GPUs
57-
run: docker exec ${CONTAINER_ID} nvidia-smi
58-
59-
- name: Test inference
60-
run: |
61-
docker exec ${CONTAINER_ID} curl -X POST http://localhost:30000/generate \
62-
-H "Content-Type: application/json" \
63-
-d '{"text": "Hello, how are you?", "sampling_params": {"temperature": 0.7, "max_new_tokens": 50}}'
64-
65-
- name: Show container logs
66-
if: always()
67-
run: docker logs ${CONTAINER_ID} || true
68-
69-
- name: Cleanup
70-
if: always()
71-
run: |
72-
docker stop ${CONTAINER_ID} || true
73-
docker rm ${CONTAINER_ID} || true
74-
75-
sglang-light-inference:
76-
runs-on: g6-1gpu-runner
77-
steps:
78-
- name: Checkout
24+
- name: Checkout SGLang tests
7925
uses: actions/checkout@v5
26+
with:
27+
repository: sgl-project/sglang
28+
ref: v${{ env.SGLANG_VERSION }}
29+
path: sglang_source
8030

81-
- name: Pull image
82-
run: docker pull ${{ env.SGLANG_IMAGE }}
83-
84-
- name: Start container (1 GPU)
31+
- name: Start container
8532
run: |
86-
CONTAINER_ID=$(docker run -d --gpus=all \
87-
-p 30000:30000 \
88-
${{ env.SGLANG_IMAGE }} \
89-
python3 -m sglang.launch_server \
90-
--model-path Qwen/Qwen2.5-0.5B-Instruct \
91-
--host 0.0.0.0 --port 30000)
33+
CONTAINER_ID=$(docker run -d --gpus=all --entrypoint /bin/bash \
34+
-v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
35+
-v ./sglang_source:/workdir --workdir /workdir \
36+
-e HF_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \
37+
${{ env.SGLANG_IMAGE }})
9238
echo "CONTAINER_ID=${CONTAINER_ID}" >> ${GITHUB_ENV}
93-
94-
# Wait for container to be ready with timeout (up to 10 minutes)
95-
echo "Waiting for SGLang server to start..."
96-
for i in {1..120}; do
97-
# Try health endpoint first, fall back to v1/models
98-
if docker exec ${CONTAINER_ID} curl -s -f http://localhost:30000/health > /dev/null 2>&1 || \
99-
docker exec ${CONTAINER_ID} curl -s -f http://localhost:30000/v1/models > /dev/null 2>&1; then
100-
echo "Server is ready after $((i * 5)) seconds!"
101-
break
102-
fi
103-
if ! docker ps -q --no-trunc | grep -q ${CONTAINER_ID}; then
104-
echo "Container exited unexpectedly!"
105-
docker logs ${CONTAINER_ID} || true
106-
exit 1
107-
fi
108-
if [ $i -eq 120 ]; then
109-
echo "Timeout waiting for server to start after 10 minutes"
110-
docker logs ${CONTAINER_ID} || true
111-
exit 1
112-
fi
113-
echo "Waiting... ($((i * 5))s / 600s)"
114-
sleep 5
115-
done
11639
117-
- name: Verify GPUs
118-
run: docker exec ${CONTAINER_ID} nvidia-smi
40+
- name: Setup for SGLang tests
41+
run: |
42+
docker exec ${CONTAINER_ID} sh -c 'set -eux
43+
bash scripts/ci/ci_install_dependency.sh'
11944
120-
- name: Test inference
45+
- name: Run SGLang upstream tests
12146
run: |
122-
docker exec ${CONTAINER_ID} curl -X POST http://localhost:30000/generate \
123-
-H "Content-Type: application/json" \
124-
-d '{"text": "Hello, how are you?", "sampling_params": {"temperature": 0.7, "max_new_tokens": 50}}'
47+
docker exec ${CONTAINER_ID} sh -c 'set -eux
48+
nvidia-smi
49+
# SRT backend Test
50+
cd /workdir/test
51+
python3 run_suite.py --hw cuda --suite stage-a-test-1'
12552
12653
- name: Show container logs
12754
if: always()
@@ -132,3 +59,5 @@ jobs:
13259
run: |
13360
docker stop ${CONTAINER_ID} || true
13461
docker rm ${CONTAINER_ID} || true
62+
63+

0 commit comments

Comments
 (0)