@@ -26,20 +26,19 @@ jobs:
2626 run : |
2727 CONTAINER_ID=$(docker run -d --rm --gpus=all \
2828 -p 30000:30000 \
29- -e HF_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \
3029 ${{ env.SGLANG_IMAGE }} \
3130 python3 -m sglang.launch_server \
32- --model-path meta-llama/Llama-3.2-3B -Instruct \
31+ --model-path Qwen/Qwen2.5-0.5B -Instruct \
3332 --host 0.0.0.0 --port 30000 \
3433 --tp 2)
3534 echo "CONTAINER_ID=${CONTAINER_ID}" >> ${GITHUB_ENV}
3635 sleep 60
3736
3837 - name : Test inference
3938 run : |
40- docker exec ${CONTAINER_ID} curl -X POST http://localhost:30000/v1/completions \
39+ docker exec ${CONTAINER_ID} curl -X POST http://localhost:30000/generate \
4140 -H "Content-Type: application/json" \
42- -d '{"model ": "meta-llama/Llama-3.2-3B-Instruct", "prompt": " Hello, how are you?", "max_tokens ": 50 }'
41+ -d '{"text ": "Hello, how are you?", "sampling_params ": {"temperature": 0.7, "max_new_tokens": 50} }'
4342
4443 - name : Show GPU usage
4544 if : always()
6564 run : |
6665 CONTAINER_ID=$(docker run -d --rm --gpus=all \
6766 -p 30000:30000 \
68- -e HF_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \
6967 ${{ env.SGLANG_IMAGE }} \
7068 python3 -m sglang.launch_server \
7169 --model-path Qwen/Qwen2.5-0.5B-Instruct \
7573
7674 - name : Test inference
7775 run : |
78- docker exec ${CONTAINER_ID} curl -X POST http://localhost:30000/v1/completions \
76+ docker exec ${CONTAINER_ID} curl -X POST http://localhost:30000/generate \
7977 -H "Content-Type: application/json" \
80- -d '{"model ": "Qwen/Qwen2.5-0.5B-Instruct", "prompt": " Hello, how are you?", "max_tokens ": 50 }'
78+ -d '{"text ": "Hello, how are you?", "sampling_params ": {"temperature": 0.7, "max_new_tokens": 50} }'
8179
8280 - name : Show GPU usage
8381 if : always()
0 commit comments