@@ -54,13 +54,13 @@ jobs:
5454 docker buildx build --progress plain \
5555 --build-arg CACHE_REFRESH="$(date +"%Y-%m-%d")" \
5656 --cache-to=type=inline \
57- --cache-from=type=registry,ref=" $IMAGE_TAG" \
58- --tag " $IMAGE_TAG" \
57+ --cache-from=type=registry,ref=$IMAGE_TAG \
58+ --tag $IMAGE_TAG \
5959 --target vllm-rayserve-ec2 \
6060 -f docker/vllm/Dockerfile.rayserve .
61- docker push " $IMAGE_TAG"
62- docker rmi " $IMAGE_TAG"
63- echo " $IMAGE_TAG" > image_uri.txt
61+ docker push $IMAGE_TAG
62+ docker rmi $IMAGE_TAG
63+ echo $IMAGE_TAG > image_uri.txt
6464 - name : Upload image URI
6565 uses : actions/upload-artifact@v4
6666 with :
8080 with :
8181 name : vllm-rayserve-ec2-image-uri
8282
83- - name : Pull image URI
84- id : read
85- run : |
86- IMAGE_URI=$(cat image_uri.txt)
87- echo "IMAGE_URI=$IMAGE_URI" >> $GITHUB_ENV
88- echo "Resolved image URI: $IMAGE_URI"
89- aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com
90- docker pull "$IMAGE_URI"
91-
9283 - name : Checkout vLLM
9384 uses : actions/checkout@v5
9485 with :
@@ -99,29 +90,39 @@ jobs:
9990 tests
10091 path : vllm_tests
10192
102- - name : Run vLLM Tests
93+ - name : Pull image URI
94+ id : read
95+ run : |
96+ IMAGE_URI=$(cat image_uri.txt)
97+ echo "IMAGE_URI=$IMAGE_URI" >> $GITHUB_ENV
98+ echo "Resolved image URI: $IMAGE_URI"
99+ aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com
100+ docker pull "$IMAGE_URI"
101+
102+ - name : Start container
103+ id : start
103104 run : |
104105 CONTAINER_NAME=vllm-rayserve-test
105- docker stop ${CONTAINER_NAME} || true
106- docker rm -f ${CONTAINER_NAME} || true
107- echo "${IMAGE_URI}"
108-
106+ echo "CONTAINER_NAME=$CONTAINER_NAME" >> $GITHUB_ENV
109107 docker run --name ${CONTAINER_NAME} \
110108 -d -it --rm --gpus=all --entrypoint /bin/bash \
111109 -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
112110 -v ${HOME}/.cache/vllm:/root/.cache/vllm \
113111 -v vllm_tests:/workdir --workdir /workdir \
114112 ${IMAGE_URI}
115-
116- docker exec ${CONTAINER_NAME} nvidia-smi
117- docker exec ${CONTAINER_NAME} cd vllm_tests && uv pip install --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto
118- docker exec ${CONTAINER_NAME} cd vllm_tests && uv pip install --system pytest pytest-asyncio
119- docker exec ${CONTAINER_NAME} cd vllm_tests && pytest -s -v tests/test_logger.py
120-
121- # cleanup container
122- docker stop ${CONTAINER_NAME}
123- docker rm -f ${CONTAINER_NAME}
124-
113+
114+ - name : Run vLLM Tests
115+ run : |
116+ docker exec ${CONTAINER_NAME} sh -c '
117+ set -eux
118+ nvidia-smi
119+ ls -la
120+ cd vllm_tests
121+ uv pip install --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto
122+ uv pip install --system pytest pytest-asyncio
123+ pytest -s -v tests/test_logger.py
124+ '
125+
125126 - name : Run qwen3 benchmark
126127 run : |
127128 # Download ShareGPT dataset if it doesn't exist
@@ -131,19 +132,9 @@ jobs:
131132 wget -q -P ${HOME}/dataset https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json else
132133 echo "ShareGPT dataset already exists. Skipping download."
133134 fi
134- CONTAINER_NAME=vllm-rayserve
135- docker stop ${CONTAINER_NAME} || true
136- docker rm -f ${CONTAINER_NAME} || true
137- docker run --name ${CONTAINER_NAME} \
138- -d --gpus=all --entrypoint /bin/bash \
139- -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
140- -v ${HOME}/.cache/vllm:/root/.cache/vllm \
141- -v ${HOME}/dataset:/dataset \
142- -e HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \
143- ${IMAGE_URI} \
144- -c "vllm serve Qwen/Qwen3-0.6B --reasoning-parser qwen3"
135+
145136 sleep 60
146- docker logs ${CONTAINER_NAME}
137+ # docker logs ${CONTAINER_NAME}
147138
148139 # run serving benchmark
149140 echo "start running serving benchmark workflow..."
@@ -153,7 +144,9 @@ jobs:
153144 --dataset-name sharegpt \
154145 --dataset-path /dataset/ShareGPT_V3_unfiltered_cleaned_split.json \
155146 --num-prompts 1000
156-
157- # cleanup container
158- docker stop ${CONTAINER_NAME}
159- docker rm -f ${CONTAINER_NAME}
147+
148+ - name : Cleanup container
149+ if : always()
150+ run : |
151+ docker stop ${CONTAINER_NAME} || true
152+ docker rm -f ${CONTAINER_NAME} || true
0 commit comments