Skip to content

Migrate vLLM Ray Serve Container #17

Migrate vLLM Ray Serve Container

Migrate vLLM Ray Serve Container #17

name: PR - vLLM RayServe
on:
pull_request:
branches:
- main
paths:
- "docker/**"
permissions:
contents: read
concurrency:
group: pr-${{ github.event.pull_request.number }}
cancel-in-progress: true
jobs:
check-changes:
runs-on: ubuntu-latest
outputs:
vllm-rayserve-ec2: ${{ steps.changes.outputs.vllm-rayserve-ec2 }}
steps:
- uses: actions/checkout@v5
- uses: actions/setup-python@v6
with:
python-version: "3.12"
- uses: pre-commit/action@v3.0.1
with:
extra_args: --all-files
- name: Detect file changes
id: changes
uses: dorny/paths-filter@v3
with:
filters: |
vllm-rayserve-ec2:
- "docker/vllm/Dockerfile.rayserve"
build-vllm-rayserve-ec2:
needs: [check-changes]
if: needs.check-changes.outputs.vllm-rayserve-ec2 == 'true'
runs-on:
- codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
fleet:x86-build-runner
steps:
- uses: actions/checkout@v5
- run: .github/scripts/runner_setup.sh
- run: .github/scripts/buildkitd.sh
- name: Build vllm-rayserve-ec2 image
id: build
shell: bash
run: |
aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com
IMAGE_TAG=${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com/ci:vllm-0.10.2-gpu-py312-cu128-ubuntu22.04-rayserve-ec2-pr-${{ github.event.pull_request.number }}
docker buildx build --progress plain \
--build-arg CACHE_REFRESH="$(date +"%Y-%m-%d")" \
--cache-to=type=inline \
--cache-from=type=registry,ref=$IMAGE_TAG \
--tag $IMAGE_TAG \
--target vllm-rayserve-ec2 \
-f docker/vllm/Dockerfile.rayserve .
docker push $IMAGE_TAG
docker rmi $IMAGE_TAG
echo $IMAGE_TAG > image_uri.txt
- name: Upload image URI
uses: actions/upload-artifact@v4
with:
name: vllm-rayserve-ec2-image-uri
path: image_uri.txt
test-vllm-rayserve-ec2:
needs: [build-vllm-rayserve-ec2]
if: needs.build-vllm-rayserve-ec2.result == 'success'
runs-on:
- codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
fleet:x86-g6xl-runner
steps:
- uses: actions/checkout@v5
- uses: actions/checkout@v5
with:
repository: vllm-project/vllm
ref: v0.10.2
path: vllm_tests
sparse-checkout: |
requirements
tests
sparse-checkout-cone-mode: false
- name: Download image URI
uses: actions/download-artifact@v4
with:
name: vllm-rayserve-ec2-image-uri
- name: Resolve image URI
run: |
IMAGE_URI=$(cat image_uri.txt)
echo "Resolved image URI: $IMAGE_URI"
echo "IMAGE_URI=$IMAGE_URI" >> $GITHUB_ENV
- name: Pull image
run: |
aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com
docker pull $IMAGE_URI
- name: Start container
run: |
CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \
-v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
-v ${HOME}/.cache/vllm:/root/.cache/vllm \
-v ./vllm_tests:/workdir --workdir /workdir \
${IMAGE_URI})
echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV
- name: Install Test dependencies
run: |
docker exec ${CONTAINER_ID} sh -c '
set -eux
uv pip install --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto
uv pip install --system pytest pytest-asyncio
'
- name: Run vLLM Tests
run: |
docker exec ${CONTAINER_ID} sh -c '
set -eux
nvidia-smi
pytest -s -v tests/test_logger.py
# Entrypoints Integration Test (LLM) # 30min
# export VLLM_WORKER_MULTIPROC_METHOD=spawn
# pytest -v -s entrypoints/llm --ignore=entrypoints/llm/test_generate.py --ignore=entrypoints/llm/test_collective_rpc.py
# pytest -v -s entrypoints/llm/test_generate.py # it needs a clean process
# pytest -v -s entrypoints/offline_mode # Needs to avoid interference with other tests
'
- name: Cleanup container and image
if: always()
run: |
docker stop ${CONTAINER_ID} || true
docker rm -f ${CONTAINER_ID} || true
- run: .github/scripts/cleanup_old_image.sh