Skip to content

Migrate vLLM Ray Serve Container #14

Migrate vLLM Ray Serve Container

Migrate vLLM Ray Serve Container #14

name: PR - vLLM RayServe
on:
pull_request:
branches:
- main
paths:
- "docker/**"
permissions:
contents: read
concurrency:
group: pr-${{ github.event.pull_request.number }}
cancel-in-progress: true
jobs:
check-changes:
runs-on: ubuntu-latest
outputs:
vllm-rayserve-ec2: ${{ steps.changes.outputs.vllm-rayserve-ec2 }}
steps:
- uses: actions/checkout@v5
- uses: actions/setup-python@v6
with:
python-version: "3.12"
- uses: pre-commit/action@v3.0.1
with:
extra_args: --all-files
- name: Detect file changes
id: changes
uses: dorny/paths-filter@v3
with:
filters: |
vllm-rayserve-ec2:
- "docker/vllm/Dockerfile.rayserve"
build-vllm-rayserve-ec2:
needs: [check-changes]
if: needs.check-changes.outputs.vllm-rayserve-ec2 == 'true'
runs-on:
- codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
fleet:x86-build-runner
steps:
- uses: actions/checkout@v5
- run: .github/scripts/runner_setup.sh
- run: .github/scripts/buildkitd.sh
- name: Build vllm-rayserve-ec2 image
id: build
shell: bash
run: |
aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com
IMAGE_TAG=${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com/ci:vllm-0.10.2-gpu-py312-cu128-ubuntu22.04-rayserve-ec2-pr-${{ github.event.pull_request.number }}
docker buildx build --progress plain \
--build-arg CACHE_REFRESH="$(date +"%Y-%m-%d")" \
--cache-to=type=inline \
--cache-from=type=registry,ref=$IMAGE_TAG \
--tag $IMAGE_TAG \
--target vllm-rayserve-ec2 \
-f docker/vllm/Dockerfile.rayserve .
docker push $IMAGE_TAG
docker rmi $IMAGE_TAG
echo $IMAGE_TAG > image_uri.txt
- name: Upload image URI
uses: actions/upload-artifact@v4
with:
name: vllm-rayserve-ec2-image-uri
path: image_uri.txt
test-vllm-rayserve-ec2:
needs: [build-vllm-rayserve-ec2]
if: needs.build-vllm-rayserve-ec2.result == 'success'
runs-on:
- codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
fleet:x86-g6xl-runner
steps:
- name: Checkout DLC Source
uses: actions/checkout@v5
- name: Checkout vLLM Tests
uses: actions/checkout@v5
with:
repository: vllm-project/vllm
ref: v0.10.2
path: vllm_tests
sparse-checkout: |
requirements
tests
sparse-checkout-cone-mode: false
- name: Download image URI
uses: actions/download-artifact@v4
with:
name: vllm-rayserve-ec2-image-uri
- name: Resolve image URI
run: |
IMAGE_URI=$(cat image_uri.txt)
echo "Resolved image URI: $IMAGE_URI"
echo "IMAGE_URI=$IMAGE_URI" >> $GITHUB_ENV
- name: Pull image
run: |
aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com
docker pull $IMAGE_URI
- name: Start container
run: |
pwd
ls -la
CONTAINER_NAME=vllm-rayserve-test
echo "CONTAINER_NAME=$CONTAINER_NAME" >> $GITHUB_ENV
docker run --name ${CONTAINER_NAME} \
-d -it --rm --gpus=all --entrypoint /bin/bash \
-v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
-v ${HOME}/.cache/vllm:/root/.cache/vllm \
-v ./vllm_tests:/workdir --workdir /workdir \
${IMAGE_URI}
- name: Run vLLM Tests
run: |
docker exec ${CONTAINER_NAME} sh -c '
set -eux
nvidia-smi
pwd
ls -la
uv pip install --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto
uv pip install --system pytest pytest-asyncio
pytest -s -v tests/test_logger.py
'
- name: Run qwen3 benchmark
run: |
# Download ShareGPT dataset if it doesn't exist
mkdir -p ${HOME}/dataset
if [ ! -f ${HOME}/dataset/ShareGPT_V3_unfiltered_cleaned_split.json ]; then
echo "Downloading ShareGPT dataset..."
wget -q -P ${HOME}/dataset https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json else
echo "ShareGPT dataset already exists. Skipping download."
fi
sleep 60
# docker logs ${CONTAINER_NAME}
# run serving benchmark
echo "start running serving benchmark workflow..."
docker exec ${CONTAINER_NAME} vllm bench serve \
--backend vllm \
--model Qwen/Qwen3-0.6B \
--dataset-name sharegpt \
--dataset-path /dataset/ShareGPT_V3_unfiltered_cleaned_split.json \
--num-prompts 1000
- name: Cleanup container and image
if: always()
run: |
docker stop ${CONTAINER_NAME} || true
docker rm -f ${CONTAINER_NAME} || true
docker rmi ${IMAGE_URI} || true
docker image ls || true