88
99env :
1010 SGLANG_IMAGE : " lmsysorg/sglang:v0.5.7-runtime"
11+ SGLANG_VERSION : " 0.5.7"
12+ DATASET_URL : " https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json"
1113
1214jobs :
13- sglang-heavy-inference :
15+ sglang-upstream-test :
1416 runs-on : g6-2gpu-runner
1517 steps :
1618 - name : Checkout
@@ -19,109 +21,34 @@ jobs:
1921 - name : Pull image
2022 run : docker pull ${{ env.SGLANG_IMAGE }}
2123
22- - name : Start container (2 GPUs)
23- run : |
24- CONTAINER_ID=$(docker run -d --gpus=all \
25- -p 30000:30000 \
26- ${{ env.SGLANG_IMAGE }} \
27- python3 -m sglang.launch_server \
28- --model-path Qwen/Qwen2.5-0.5B-Instruct \
29- --host 0.0.0.0 --port 30000 \
30- --tp 2)
31- echo "CONTAINER_ID=${CONTAINER_ID}" >> ${GITHUB_ENV}
32-
33- # Wait for container to be ready with timeout (up to 10 minutes)
34- echo "Waiting for SGLang server to start..."
35- for i in {1..120}; do
36- # Try health endpoint first, fall back to v1/models
37- if docker exec ${CONTAINER_ID} curl -s -f http://localhost:30000/health > /dev/null 2>&1 || \
38- docker exec ${CONTAINER_ID} curl -s -f http://localhost:30000/v1/models > /dev/null 2>&1; then
39- echo "Server is ready after $((i * 5)) seconds!"
40- break
41- fi
42- if ! docker ps -q --no-trunc | grep -q ${CONTAINER_ID}; then
43- echo "Container exited unexpectedly!"
44- docker logs ${CONTAINER_ID} || true
45- exit 1
46- fi
47- if [ $i -eq 120 ]; then
48- echo "Timeout waiting for server to start after 10 minutes"
49- docker logs ${CONTAINER_ID} || true
50- exit 1
51- fi
52- echo "Waiting... ($((i * 5))s / 600s)"
53- sleep 5
54- done
55-
56- - name : Verify GPUs
57- run : docker exec ${CONTAINER_ID} nvidia-smi
58-
59- - name : Test inference
60- run : |
61- docker exec ${CONTAINER_ID} curl -X POST http://localhost:30000/generate \
62- -H "Content-Type: application/json" \
63- -d '{"text": "Hello, how are you?", "sampling_params": {"temperature": 0.7, "max_new_tokens": 50}}'
64-
65- - name : Show container logs
66- if : always()
67- run : docker logs ${CONTAINER_ID} || true
68-
69- - name : Cleanup
70- if : always()
71- run : |
72- docker stop ${CONTAINER_ID} || true
73- docker rm ${CONTAINER_ID} || true
74-
75- sglang-light-inference :
76- runs-on : g6-1gpu-runner
77- steps :
78- - name : Checkout
24+ - name : Checkout SGLang tests
7925 uses : actions/checkout@v5
26+ with :
27+ repository : sgl-project/sglang
28+ ref : v${{ env.SGLANG_VERSION }}
29+ path : sglang_source
8030
81- - name : Pull image
82- run : docker pull ${{ env.SGLANG_IMAGE }}
83-
84- - name : Start container (1 GPU)
31+ - name : Start container
8532 run : |
86- CONTAINER_ID=$(docker run -d --gpus=all \
87- -p 30000:30000 \
88- ${{ env.SGLANG_IMAGE }} \
89- python3 -m sglang.launch_server \
90- --model-path Qwen/Qwen2.5-0.5B-Instruct \
91- --host 0.0.0.0 --port 30000)
33+ CONTAINER_ID=$(docker run -d --gpus=all --entrypoint /bin/bash \
34+ -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
35+ -v ./sglang_source:/workdir --workdir /workdir \
36+ -e HF_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \
37+ ${{ env.SGLANG_IMAGE }})
9238 echo "CONTAINER_ID=${CONTAINER_ID}" >> ${GITHUB_ENV}
93-
94- # Wait for container to be ready with timeout (up to 10 minutes)
95- echo "Waiting for SGLang server to start..."
96- for i in {1..120}; do
97- # Try health endpoint first, fall back to v1/models
98- if docker exec ${CONTAINER_ID} curl -s -f http://localhost:30000/health > /dev/null 2>&1 || \
99- docker exec ${CONTAINER_ID} curl -s -f http://localhost:30000/v1/models > /dev/null 2>&1; then
100- echo "Server is ready after $((i * 5)) seconds!"
101- break
102- fi
103- if ! docker ps -q --no-trunc | grep -q ${CONTAINER_ID}; then
104- echo "Container exited unexpectedly!"
105- docker logs ${CONTAINER_ID} || true
106- exit 1
107- fi
108- if [ $i -eq 120 ]; then
109- echo "Timeout waiting for server to start after 10 minutes"
110- docker logs ${CONTAINER_ID} || true
111- exit 1
112- fi
113- echo "Waiting... ($((i * 5))s / 600s)"
114- sleep 5
115- done
11639
117- - name : Verify GPUs
118- run : docker exec ${CONTAINER_ID} nvidia-smi
40+ - name : Setup for SGLang tests
41+ run : |
42+ docker exec ${CONTAINER_ID} sh -c 'set -eux
43+ bash scripts/ci/ci_install_dependency.sh'
11944
120- - name : Test inference
45+ - name : Run SGLang upstream tests
12146 run : |
122- docker exec ${CONTAINER_ID} curl -X POST http://localhost:30000/generate \
123- -H "Content-Type: application/json" \
124- -d '{"text": "Hello, how are you?", "sampling_params": {"temperature": 0.7, "max_new_tokens": 50}}'
47+ docker exec ${CONTAINER_ID} sh -c 'set -eux
48+ nvidia-smi
49+ # SRT backend Test
50+ cd /workdir/test
51+ python3 run_suite.py --hw cuda --suite stage-a-test-1'
12552
12653 - name : Show container logs
12754 if : always()
13259 run : |
13360 docker stop ${CONTAINER_ID} || true
13461 docker rm ${CONTAINER_ID} || true
62+
63+
0 commit comments