Migrate vLLM Ray Serve Container #5
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: PR - vLLM RayServe | |
| on: | |
| pull_request: | |
| branches: | |
| - main | |
| paths: | |
| - "docker/**" | |
| permissions: | |
| contents: read | |
| concurrency: | |
| group: pr-${{ github.event.pull_request.number }} | |
| cancel-in-progress: true | |
| jobs: | |
| check-changes: | |
| runs-on: ubuntu-latest | |
| outputs: | |
| vllm-rayserve-ec2: ${{ steps.changes.outputs.vllm-rayserve-ec2 }} | |
| steps: | |
| - uses: actions/checkout@v5 | |
| - uses: actions/setup-python@v6 | |
| with: | |
| python-version: "3.12" | |
| - uses: pre-commit/action@v3.0.1 | |
| with: | |
| extra_args: --all-files | |
| - name: Detect file changes | |
| id: changes | |
| uses: dorny/paths-filter@v3 | |
| with: | |
| filters: | | |
| vllm-rayserve-ec2: | |
| - "docker/vllm/Dockerfile.rayserve" | |
| build-vllm-rayserve-ec2: | |
| needs: [check-changes] | |
| if: needs.check-changes.outputs.vllm-rayserve-ec2 == 'true' | |
| runs-on: | |
| - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }} | |
| fleet:x86-build-runner | |
| steps: | |
| - uses: actions/checkout@v5 | |
| - run: .github/scripts/runner_setup.sh | |
| - run: .github/scripts/buildkitd.sh | |
| - name: Build vllm-rayserve-ec2 image | |
| id: build | |
| shell: bash | |
| run: | | |
| aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com | |
| IMAGE_TAG=${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com/ci:vllm-0.10.2-gpu-py312-cu128-ubuntu22.04-rayserve-ec2-pr-${{ github.event.pull_request.number }} | |
| docker buildx build --progress plain \ | |
| --build-arg CACHE_REFRESH="$(date +"%Y-%m-%d")" \ | |
| --cache-to=type=inline \ | |
| --cache-from=type=registry,ref="$IMAGE_TAG" \ | |
| --tag "$IMAGE_TAG" \ | |
| --target vllm-rayserve-ec2 \ | |
| -f docker/vllm/Dockerfile.rayserve . | |
| docker push "$IMAGE_TAG" | |
| docker rmi "$IMAGE_TAG" | |
| echo "$IMAGE_TAG" > image_uri.txt | |
| - name: Upload image URI | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: vllm-rayserve-ec2-image-uri | |
| path: image_uri.txt | |
| test-vllm-rayserve-ec2: | |
| needs: [build-vllm-rayserve-ec2] | |
| if: needs.build-vllm-rayserve-ec2.result == 'success' | |
| runs-on: | |
| - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }} | |
| fleet:x86-g6xl-runner | |
| steps: | |
| - uses: actions/checkout@v5 | |
| - name: Download image URI | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: vllm-rayserve-ec2-image-uri | |
| - name: Pull image URI | |
| id: read | |
| run: | | |
| IMAGE_URI=$(cat image_uri.txt) | |
| echo "IMAGE_URI=$IMAGE_URI" >> $GITHUB_ENV | |
| echo "Resolved image URI: $IMAGE_URI" | |
| aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com | |
| docker pull "$IMAGE_URI" | |
| - name: Checkout vLLM v0.10.2 | |
| uses: actions/checkout@v5 | |
| with: | |
| repository: vllm-project/vllm | |
| ref: v0.10.2 | |
| path: vllm | |
| - name: Run vLLM Tests | |
| run: | | |
| CONTAINER_NAME=vllm-rayserve-test | |
| docker stop ${CONTAINER_NAME} || true | |
| docker rm -f ${CONTAINER_NAME} || true | |
| echo "${IMAGE_URI}" | |
| docker run --name ${CONTAINER_NAME} \ | |
| -d -it --rm --gpus=all --entrypoint /bin/bash \ | |
| -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \ | |
| -v ${HOME}/.cache/vllm:/root/.cache/vllm \ | |
| -v ./vllm:/workdir --workdir /workdir \ | |
| ${IMAGE_URI} | |
| docker exec ${CONTAINER_NAME} nvidia-smi | |
| docker exec ${CONTAINER_NAME} uv pip list | |
| docker exec ${CONTAINER_NAME} uv pip install --system -r requirements/dev.txt | |
| docker exec ${CONTAINER_NAME} pytest -v -s v1/e2e | |
| docker exec ${CONTAINER_NAME} pytest -v -s v1/engine | |
| # cleanup container | |
| docker stop ${CONTAINER_NAME} | |
| docker rm -f ${CONTAINER_NAME} | |
| - name: Run qwen3 benchmark | |
| run: | | |
| # Download ShareGPT dataset if it doesn't exist | |
| mkdir -p ${HOME}/dataset | |
| if [ ! -f ${HOME}/dataset/ShareGPT_V3_unfiltered_cleaned_split.json ]; then | |
| echo "Downloading ShareGPT dataset..." | |
| wget -q -P ${HOME}/dataset https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json else | |
| echo "ShareGPT dataset already exists. Skipping download." | |
| fi | |
| CONTAINER_NAME=vllm-rayserve | |
| docker stop ${CONTAINER_NAME} || true | |
| docker rm -f ${CONTAINER_NAME} || true | |
| docker run --name ${CONTAINER_NAME} \ | |
| -d --gpus=all --entrypoint /bin/bash \ | |
| -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \ | |
| -v ${HOME}/.cache/vllm:/root/.cache/vllm \ | |
| -v ${HOME}/dataset:/dataset \ | |
| -e HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \ | |
| ${IMAGE_URI} \ | |
| -c "vllm serve Qwen/Qwen3-0.6B --reasoning-parser qwen3" | |
| sleep 60 | |
| docker logs ${CONTAINER_NAME} | |
| # run serving benchmark | |
| echo "start running serving benchmark workflow..." | |
| docker exec ${CONTAINER_NAME} vllm bench serve \ | |
| --backend vllm \ | |
| --model Qwen/Qwen3-0.6B \ | |
| --dataset-name sharegpt \ | |
| --dataset-path /dataset/ShareGPT_V3_unfiltered_cleaned_split.json \ | |
| --num-prompts 1000 | |
| # cleanup container | |
| docker stop ${CONTAINER_NAME} | |
| docker rm -f ${CONTAINER_NAME} |