Skip to content

Commit 814ff7e

Browse files
Update pr-sglang-g6-inference.yaml
1 parent 03b25fc commit 814ff7e

File tree

1 file changed

+5
-7
lines changed

1 file changed

+5
-7
lines changed

.github/workflows/pr-sglang-g6-inference.yaml

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -26,20 +26,19 @@ jobs:
2626
run: |
2727
CONTAINER_ID=$(docker run -d --rm --gpus=all \
2828
-p 30000:30000 \
29-
-e HF_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \
3029
${{ env.SGLANG_IMAGE }} \
3130
python3 -m sglang.launch_server \
32-
--model-path meta-llama/Llama-3.2-3B-Instruct \
31+
--model-path Qwen/Qwen2.5-0.5B-Instruct \
3332
--host 0.0.0.0 --port 30000 \
3433
--tp 2)
3534
echo "CONTAINER_ID=${CONTAINER_ID}" >> ${GITHUB_ENV}
3635
sleep 60
3736
3837
- name: Test inference
3938
run: |
40-
docker exec ${CONTAINER_ID} curl -X POST http://localhost:30000/v1/completions \
39+
docker exec ${CONTAINER_ID} curl -X POST http://localhost:30000/generate \
4140
-H "Content-Type: application/json" \
42-
-d '{"model": "meta-llama/Llama-3.2-3B-Instruct", "prompt": "Hello, how are you?", "max_tokens": 50}'
41+
-d '{"text": "Hello, how are you?", "sampling_params": {"temperature": 0.7, "max_new_tokens": 50}}'
4342
4443
- name: Show GPU usage
4544
if: always()
@@ -65,7 +64,6 @@ jobs:
6564
run: |
6665
CONTAINER_ID=$(docker run -d --rm --gpus=all \
6766
-p 30000:30000 \
68-
-e HF_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \
6967
${{ env.SGLANG_IMAGE }} \
7068
python3 -m sglang.launch_server \
7169
--model-path Qwen/Qwen2.5-0.5B-Instruct \
@@ -75,9 +73,9 @@ jobs:
7573
7674
- name: Test inference
7775
run: |
78-
docker exec ${CONTAINER_ID} curl -X POST http://localhost:30000/v1/completions \
76+
docker exec ${CONTAINER_ID} curl -X POST http://localhost:30000/generate \
7977
-H "Content-Type: application/json" \
80-
-d '{"model": "Qwen/Qwen2.5-0.5B-Instruct", "prompt": "Hello, how are you?", "max_tokens": 50}'
78+
-d '{"text": "Hello, how are you?", "sampling_params": {"temperature": 0.7, "max_new_tokens": 50}}'
8179
8280
- name: Show GPU usage
8381
if: always()

0 commit comments

Comments
 (0)