Override CB fleet with buildspec to setup and cleanup steps #104
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: PR - vLLM | |
| on: | |
| pull_request: | |
| branches: | |
| - main | |
| paths: | |
| - "**vllm**" | |
| permissions: | |
| contents: read | |
| env: | |
| # CI Image configuration | |
| VLLM_VERSION: 0.11.2 | |
| VLLM_RAYSERVE_VERSION: 0.10.2 | |
| PYTHON_VERSION: "py312" | |
| CUDA_VERSION: "cu129" | |
| OS_VERSION: "ubuntu22.04" | |
| # Prod Image configuration | |
| PROD_EC2_IMAGE: vllm:0.11-gpu-py312-ec2 | |
| PROD_RAYSERVE_IMAGE: vllm:0.10-gpu-py312-rayserve | |
| PROD_SAGEMAKER_IMAGE: vllm:0.11-gpu-py312 | |
| # CI environment configuration | |
| FORCE_COLOR: "1" | |
| jobs: | |
| check-changes: | |
| runs-on: ubuntu-latest | |
| concurrency: | |
| group: ${{ github.workflow }}-check-changes-${{ github.event.pull_request.number }} | |
| cancel-in-progress: true | |
| outputs: | |
| build-change: ${{ steps.changes.outputs.build-change }} | |
| test-change: ${{ steps.changes.outputs.test-change }} | |
| steps: | |
| - name: Checkout DLC source | |
| uses: actions/checkout@v5 | |
| - name: Setup python | |
| uses: actions/setup-python@v6 | |
| with: | |
| python-version: "3.12" | |
| - name: Run pre-commit | |
| uses: pre-commit/action@v3.0.1 | |
| with: | |
| extra_args: --all-files | |
| - name: Detect file changes | |
| id: changes | |
| uses: dorny/paths-filter@v3 | |
| with: | |
| filters: | | |
| build-change: | |
| - "docker/vllm/**" | |
| - "scripts/vllm/**" | |
| - "scripts/common/**" | |
| - "scripts/telemetry/**" | |
| - ".github/workflows/pr-vllm*" | |
| test-change: | |
| - "test/vllm/**" | |
| # ============================================== | |
| # =============== vLLM EC2 jobs ================ | |
| # ============================================== | |
| build-vllm-ec2-image: | |
| needs: [check-changes] | |
| if: needs.check-changes.outputs.build-change == 'true' | |
| runs-on: | |
| - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }} | |
| fleet:x86-build-runner | |
| buildspec-override:true | |
| concurrency: | |
| group: ${{ github.workflow }}-build-vllm-ec2-image-${{ github.event.pull_request.number }} | |
| cancel-in-progress: true | |
| outputs: | |
| ci-image: ${{ steps.image-uri-build.outputs.CI_IMAGE_URI }} | |
| steps: | |
| - uses: actions/checkout@v5 | |
| - run: .github/scripts/buildkitd.sh | |
| - name: ECR login | |
| uses: ./.github/actions/ecr-authenticate | |
| with: | |
| aws-account-id: ${{ vars.CI_AWS_ACCOUNT_ID }} | |
| aws-region: ${{ vars.AWS_REGION }} | |
| - name: Resolve image URI for build | |
| id: image-uri-build | |
| run: | | |
| CI_IMAGE_URI=${{ vars.CI_AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_REGION }}.amazonaws.com/ci:vllm-${{ env.VLLM_VERSION }}-gpu-${{ env.PYTHON_VERSION }}-${{ env.CUDA_VERSION }}-${{ env.OS_VERSION }}-ec2-pr-${{ github.event.pull_request.number }} | |
| echo "Image URI to build: ${CI_IMAGE_URI}" | |
| echo "CI_IMAGE_URI=${CI_IMAGE_URI}" >> ${GITHUB_ENV} | |
| echo "CI_IMAGE_URI=${CI_IMAGE_URI}" >> ${GITHUB_OUTPUT} | |
| - name: Build image | |
| run: | | |
| # base image: https://hub.docker.com/r/vllm/vllm-openai/tags | |
| docker buildx build --progress plain \ | |
| --build-arg CACHE_REFRESH="$(date +"%Y-%m-%d")" \ | |
| --build-arg BASE_IMAGE="vllm/vllm-openai:v${{ env.VLLM_VERSION }}" \ | |
| --cache-to=type=inline \ | |
| --cache-from=type=registry,ref=${CI_IMAGE_URI} \ | |
| --tag ${CI_IMAGE_URI} \ | |
| --target vllm-ec2 \ | |
| -f docker/vllm/Dockerfile . | |
| - name: Container push | |
| run: | | |
| docker push ${CI_IMAGE_URI} | |
| docker rmi ${CI_IMAGE_URI} | |
| set-ec2-test-environment: | |
| needs: [check-changes, build-vllm-ec2-image] | |
| if: | | |
| always() && !failure() && !cancelled() && | |
| (needs.check-changes.outputs.build-change == 'true' || needs.check-changes.outputs.test-change == 'true') | |
| runs-on: ubuntu-latest | |
| concurrency: | |
| group: ${{ github.workflow }}-set-ec2-test-environment-${{ github.event.pull_request.number }} | |
| cancel-in-progress: true | |
| outputs: | |
| aws-account-id: ${{ steps.set-env.outputs.AWS_ACCOUNT_ID }} | |
| image-uri: ${{ steps.set-env.outputs.IMAGE_URI }} | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Set test environment | |
| id: set-env | |
| run: | | |
| if [[ "${{ needs.build-vllm-ec2-image.result }}" == "success" ]]; then | |
| AWS_ACCOUNT_ID=${{ vars.CI_AWS_ACCOUNT_ID }} | |
| IMAGE_URI=${{ needs.build-vllm-ec2-image.outputs.ci-image }} | |
| else | |
| AWS_ACCOUNT_ID=${{ vars.PROD_AWS_ACCOUNT_ID }} | |
| IMAGE_URI=${{ vars.PROD_AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_REGION }}.amazonaws.com/${{ env.PROD_EC2_IMAGE }} | |
| fi | |
| echo "Image URI to test: ${IMAGE_URI}" | |
| echo "AWS_ACCOUNT_ID=${AWS_ACCOUNT_ID}" >> ${GITHUB_OUTPUT} | |
| echo "IMAGE_URI=${IMAGE_URI}" >> ${GITHUB_OUTPUT} | |
| vllm-ec2-regression-test: | |
| needs: [build-vllm-ec2-image, set-ec2-test-environment] | |
| if: success() | |
| runs-on: | |
| - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }} | |
| fleet:x86-g6xl-runner | |
| buildspec-override:true | |
| concurrency: | |
| group: ${{ github.workflow }}-vllm-ec2-regression-test-${{ github.event.pull_request.number }} | |
| cancel-in-progress: true | |
| steps: | |
| - name: Checkout DLC source | |
| uses: actions/checkout@v5 | |
| - name: Container pull | |
| uses: ./.github/actions/ecr-authenticate | |
| with: | |
| aws-account-id: ${{ needs.set-ec2-test-environment.outputs.aws-account-id }} | |
| aws-region: ${{ vars.AWS_REGION }} | |
| image-uri: ${{ needs.set-ec2-test-environment.outputs.image-uri }} | |
| - name: Checkout vLLM tests | |
| uses: actions/checkout@v5 | |
| with: | |
| repository: vllm-project/vllm | |
| ref: v${{ env.VLLM_VERSION }} | |
| path: vllm_source | |
| - name: Start container | |
| run: | | |
| CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \ | |
| -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \ | |
| -v ${HOME}/.cache/vllm:/root/.cache/vllm \ | |
| -v ./vllm_source:/workdir --workdir /workdir \ | |
| -e HF_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \ | |
| ${{ needs.set-ec2-test-environment.outputs.image-uri }}) | |
| echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV | |
| - name: Setup for vLLM tests | |
| run: | | |
| docker exec ${CONTAINER_ID} sh -c ' | |
| set -eux | |
| uv pip install --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto | |
| uv pip install --system pytest pytest-asyncio | |
| uv pip install --system -e tests/vllm_test_utils | |
| uv pip install --system hf_transfer | |
| mkdir src | |
| mv vllm src/vllm | |
| ' | |
| - name: Run vLLM tests | |
| run: | | |
| docker exec ${CONTAINER_ID} sh -c ' | |
| set -eux | |
| nvidia-smi | |
| # Regression Test # 7min | |
| cd /workdir/tests | |
| uv pip install --system modelscope | |
| pytest -v -s test_regression.py | |
| ' | |
| vllm-ec2-cuda-test: | |
| needs: [build-vllm-ec2-image, set-ec2-test-environment] | |
| if: success() | |
| runs-on: | |
| - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }} | |
| fleet:x86-g6xl-runner | |
| buildspec-override:true | |
| concurrency: | |
| group: ${{ github.workflow }}-vllm-ec2-cuda-test-${{ github.event.pull_request.number }} | |
| cancel-in-progress: true | |
| steps: | |
| - name: Checkout DLC source | |
| uses: actions/checkout@v5 | |
| - name: Container pull | |
| uses: ./.github/actions/ecr-authenticate | |
| with: | |
| aws-account-id: ${{ needs.set-ec2-test-environment.outputs.aws-account-id }} | |
| aws-region: ${{ vars.AWS_REGION }} | |
| image-uri: ${{ needs.set-ec2-test-environment.outputs.image-uri }} | |
| - name: Checkout vLLM tests | |
| uses: actions/checkout@v5 | |
| with: | |
| repository: vllm-project/vllm | |
| ref: v${{ env.VLLM_VERSION }} | |
| path: vllm_source | |
| - name: Start container | |
| run: | | |
| CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \ | |
| -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \ | |
| -v ${HOME}/.cache/vllm:/root/.cache/vllm \ | |
| -v ./vllm_source:/workdir --workdir /workdir \ | |
| -e HF_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \ | |
| ${{ needs.set-ec2-test-environment.outputs.image-uri }}) | |
| echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV | |
| - name: Setup for vLLM tests | |
| run: | | |
| docker exec ${CONTAINER_ID} sh -c ' | |
| set -eux | |
| uv pip install --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto | |
| uv pip install --system pytest pytest-asyncio | |
| uv pip install --system -e tests/vllm_test_utils | |
| uv pip install --system hf_transfer | |
| mkdir src | |
| mv vllm src/vllm | |
| ' | |
| - name: Run vLLM tests | |
| run: | | |
| docker exec ${CONTAINER_ID} sh -c ' | |
| set -eux | |
| nvidia-smi | |
| # Platform Tests (CUDA) # 4min | |
| cd /workdir/tests | |
| pytest -v -s cuda/test_cuda_context.py | |
| ' | |
| vllm-ec2-example-test: | |
| needs: [build-vllm-ec2-image, set-ec2-test-environment] | |
| if: success() | |
| runs-on: | |
| - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }} | |
| fleet:x86-g6xl-runner | |
| buildspec-override:true | |
| concurrency: | |
| group: ${{ github.workflow }}-vllm-ec2-example-test-${{ github.event.pull_request.number }} | |
| cancel-in-progress: true | |
| steps: | |
| - name: Checkout DLC source | |
| uses: actions/checkout@v5 | |
| - name: Container pull | |
| uses: ./.github/actions/ecr-authenticate | |
| with: | |
| aws-account-id: ${{ needs.set-ec2-test-environment.outputs.aws-account-id }} | |
| aws-region: ${{ vars.AWS_REGION }} | |
| image-uri: ${{ needs.set-ec2-test-environment.outputs.image-uri }} | |
| - name: Checkout vLLM tests | |
| uses: actions/checkout@v5 | |
| with: | |
| repository: vllm-project/vllm | |
| ref: v${{ env.VLLM_VERSION }} | |
| path: vllm_source | |
| - name: Start container | |
| run: | | |
| CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \ | |
| -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \ | |
| -v ${HOME}/.cache/vllm:/root/.cache/vllm \ | |
| -v ./vllm_source:/workdir --workdir /workdir \ | |
| -e HF_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \ | |
| ${{ needs.set-ec2-test-environment.outputs.image-uri }}) | |
| echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV | |
| - name: Setup for vLLM tests | |
| run: | | |
| docker exec ${CONTAINER_ID} sh -c ' | |
| set -eux | |
| uv pip install --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto | |
| uv pip install --system pytest pytest-asyncio | |
| uv pip install --system -e tests/vllm_test_utils | |
| uv pip install --system hf_transfer | |
| uv pip install --system transformers==4.57.3 | |
| mkdir src | |
| mv vllm src/vllm | |
| ' | |
| - name: Run vLLM tests | |
| run: | | |
| docker exec ${CONTAINER_ID} sh -c ' | |
| set -eux | |
| nvidia-smi | |
| uv pip list | grep transformers | |
| # Examples Test # 30min | |
| cd /workdir/examples | |
| pip install tensorizer # for tensorizer test | |
| python3 offline_inference/basic/generate.py --model facebook/opt-125m | |
| # python3 offline_inference/basic/generate.py --model meta-llama/Llama-2-13b-chat-hf --cpu-offload-gb 10 | |
| python3 offline_inference/basic/chat.py | |
| python3 offline_inference/prefix_caching.py | |
| python3 offline_inference/llm_engine_example.py | |
| python3 offline_inference/audio_language.py --seed 0 | |
| python3 offline_inference/vision_language.py --seed 0 | |
| python3 offline_inference/vision_language_pooling.py --seed 0 | |
| python3 offline_inference/vision_language_multi_image.py --seed 0 | |
| VLLM_USE_V1=0 python3 others/tensorize_vllm_model.py --model facebook/opt-125m serialize --serialized-directory /tmp/ --suffix v1 && python3 others/tensorize_vllm_model.py --model facebook/opt-125m deserialize --path-to-tensors /tmp/vllm/facebook/opt-125m/v1/model.tensors | |
| python3 offline_inference/encoder_decoder_multimodal.py --model-type whisper --seed 0 | |
| python3 offline_inference/basic/classify.py | |
| python3 offline_inference/basic/embed.py | |
| python3 offline_inference/basic/score.py | |
| python3 offline_inference/simple_profiling.py | |
| ' | |
| # =================================================== | |
| # =============== vLLM RayServe jobs ================ | |
| # =================================================== | |
| build-vllm-rayserve-image: | |
| needs: [check-changes] | |
| if: needs.check-changes.outputs.build-change == 'true' | |
| runs-on: | |
| - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }} | |
| fleet:x86-build-runner | |
| buildspec-override:true | |
| concurrency: | |
| group: ${{ github.workflow }}-build-vllm-rayserve-image-${{ github.event.pull_request.number }} | |
| cancel-in-progress: true | |
| outputs: | |
| ci-image: ${{ steps.image-uri-build.outputs.CI_IMAGE_URI }} | |
| steps: | |
| - uses: actions/checkout@v5 | |
| - run: .github/scripts/buildkitd.sh | |
| - name: ECR login | |
| uses: ./.github/actions/ecr-authenticate | |
| with: | |
| aws-account-id: ${{ vars.CI_AWS_ACCOUNT_ID }} | |
| aws-region: ${{ vars.AWS_REGION }} | |
| - name: Resolve image URI for build | |
| id: image-uri-build | |
| run: | | |
| CI_IMAGE_URI=${{ vars.CI_AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_REGION }}.amazonaws.com/ci:vllm-${{ env.VLLM_VERSION }}-gpu-${{ env.PYTHON_VERSION }}-${{ env.CUDA_VERSION }}-${{ env.OS_VERSION }}-rayserve-ec2-pr-${{ github.event.pull_request.number }} | |
| echo "Image URI to build: ${CI_IMAGE_URI}" | |
| echo "CI_IMAGE_URI=${CI_IMAGE_URI}" >> ${GITHUB_ENV} | |
| echo "CI_IMAGE_URI=${CI_IMAGE_URI}" >> ${GITHUB_OUTPUT} | |
| - name: Build image | |
| run: | | |
| # base image: https://hub.docker.com/r/vllm/vllm-openai/tags | |
| docker buildx build --progress plain \ | |
| --build-arg CACHE_REFRESH="$(date +"%Y-%m-%d")" \ | |
| --build-arg BASE_IMAGE="vllm/vllm-openai:v${{ env.VLLM_RAYSERVE_VERSION }}" \ | |
| --cache-to=type=inline \ | |
| --cache-from=type=registry,ref=${CI_IMAGE_URI} \ | |
| --tag ${CI_IMAGE_URI} \ | |
| --target vllm-rayserve-ec2 \ | |
| -f docker/vllm/Dockerfile . | |
| - name: Container push | |
| run: | | |
| docker push ${CI_IMAGE_URI} | |
| docker rmi ${CI_IMAGE_URI} | |
| set-rayserve-test-environment: | |
| needs: [check-changes, build-vllm-rayserve-image] | |
| if: | | |
| always() && !failure() && !cancelled() && | |
| (needs.check-changes.outputs.build-change == 'true' || needs.check-changes.outputs.test-change == 'true') | |
| runs-on: ubuntu-latest | |
| concurrency: | |
| group: ${{ github.workflow }}-set-rayserve-test-environment-${{ github.event.pull_request.number }} | |
| cancel-in-progress: true | |
| outputs: | |
| aws-account-id: ${{ steps.set-env.outputs.AWS_ACCOUNT_ID }} | |
| image-uri: ${{ steps.set-env.outputs.IMAGE_URI }} | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Set test environment | |
| id: set-env | |
| run: | | |
| if [[ "${{ needs.build-vllm-rayserve-image.result }}" == "success" ]]; then | |
| AWS_ACCOUNT_ID=${{ vars.CI_AWS_ACCOUNT_ID }} | |
| IMAGE_URI=${{ needs.build-vllm-rayserve-image.outputs.ci-image }} | |
| else | |
| AWS_ACCOUNT_ID=${{ vars.PROD_AWS_ACCOUNT_ID }} | |
| IMAGE_URI=${{ vars.PROD_AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_REGION }}.amazonaws.com/${{ env.PROD_RAYSERVE_IMAGE }} | |
| fi | |
| echo "Image URI to test: ${IMAGE_URI}" | |
| echo "AWS_ACCOUNT_ID=${AWS_ACCOUNT_ID}" >> ${GITHUB_OUTPUT} | |
| echo "IMAGE_URI=${IMAGE_URI}" >> ${GITHUB_OUTPUT} | |
| vllm-rayserve-regression-test: | |
| needs: [build-vllm-rayserve-image, set-rayserve-test-environment] | |
| if: success() | |
| runs-on: | |
| - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }} | |
| fleet:x86-g6xl-runner | |
| buildspec-override:true | |
| concurrency: | |
| group: ${{ github.workflow }}-vllm-rayserve-regression-test-${{ github.event.pull_request.number }} | |
| cancel-in-progress: true | |
| steps: | |
| - name: Checkout DLC source | |
| uses: actions/checkout@v5 | |
| - name: Container pull | |
| uses: ./.github/actions/ecr-authenticate | |
| with: | |
| aws-account-id: ${{ needs.set-rayserve-test-environment.outputs.aws-account-id }} | |
| aws-region: ${{ vars.AWS_REGION }} | |
| image-uri: ${{ needs.set-rayserve-test-environment.outputs.image-uri }} | |
| - name: Checkout vLLM tests | |
| uses: actions/checkout@v5 | |
| with: | |
| repository: vllm-project/vllm | |
| ref: v${{ env.VLLM_RAYSERVE_VERSION }} | |
| path: vllm_source | |
| - name: Start container | |
| run: | | |
| CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \ | |
| -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \ | |
| -v ${HOME}/.cache/vllm:/root/.cache/vllm \ | |
| -v ./vllm_source:/workdir --workdir /workdir \ | |
| -e HF_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \ | |
| ${{ needs.set-rayserve-test-environment.outputs.image-uri }}) | |
| echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV | |
| - name: Setup for vLLM tests | |
| run: | | |
| docker exec ${CONTAINER_ID} sh -c ' | |
| set -eux | |
| uv pip install --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto | |
| uv pip install --system pytest pytest-asyncio | |
| uv pip install --system -e tests/vllm_test_utils | |
| uv pip install --system hf_transfer | |
| mkdir src | |
| mv vllm src/vllm | |
| ' | |
| - name: Run vLLM tests | |
| run: | | |
| docker exec ${CONTAINER_ID} sh -c ' | |
| set -eux | |
| nvidia-smi | |
| # Regression Test # 7min | |
| cd /workdir/tests | |
| uv pip install --system modelscope | |
| pytest -v -s test_regression.py | |
| ' | |
| vllm-rayserve-cuda-test: | |
| needs: [build-vllm-rayserve-image, set-rayserve-test-environment] | |
| if: success() | |
| runs-on: | |
| - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }} | |
| fleet:x86-g6xl-runner | |
| buildspec-override:true | |
| concurrency: | |
| group: ${{ github.workflow }}-vllm-rayserve-cuda-test-${{ github.event.pull_request.number }} | |
| cancel-in-progress: true | |
| steps: | |
| - name: Checkout DLC source | |
| uses: actions/checkout@v5 | |
| - name: Container pull | |
| uses: ./.github/actions/ecr-authenticate | |
| with: | |
| aws-account-id: ${{ needs.set-rayserve-test-environment.outputs.aws-account-id }} | |
| aws-region: ${{ vars.AWS_REGION }} | |
| image-uri: ${{ needs.set-rayserve-test-environment.outputs.image-uri }} | |
| - name: Checkout vLLM tests | |
| uses: actions/checkout@v5 | |
| with: | |
| repository: vllm-project/vllm | |
| ref: v${{ env.VLLM_RAYSERVE_VERSION }} | |
| path: vllm_source | |
| - name: Start container | |
| run: | | |
| CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \ | |
| -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \ | |
| -v ${HOME}/.cache/vllm:/root/.cache/vllm \ | |
| -v ./vllm_source:/workdir --workdir /workdir \ | |
| -e HF_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \ | |
| ${{ needs.set-rayserve-test-environment.outputs.image-uri }}) | |
| echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV | |
| - name: Setup for vLLM tests | |
| run: | | |
| docker exec ${CONTAINER_ID} sh -c ' | |
| set -eux | |
| uv pip install --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto | |
| uv pip install --system pytest pytest-asyncio | |
| uv pip install --system -e tests/vllm_test_utils | |
| uv pip install --system hf_transfer | |
| mkdir src | |
| mv vllm src/vllm | |
| ' | |
| - name: Run vLLM tests | |
| run: | | |
| docker exec ${CONTAINER_ID} sh -c ' | |
| set -eux | |
| nvidia-smi | |
| # Platform Tests (CUDA) # 4min | |
| cd /workdir/tests | |
| pytest -v -s cuda/test_cuda_context.py | |
| ' | |
| vllm-rayserve-example-test: | |
| needs: [build-vllm-rayserve-image, set-rayserve-test-environment] | |
| if: success() | |
| runs-on: | |
| - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }} | |
| fleet:x86-g6xl-runner | |
| buildspec-override:true | |
| concurrency: | |
| group: ${{ github.workflow }}-vllm-rayserve-example-test-${{ github.event.pull_request.number }} | |
| cancel-in-progress: true | |
| steps: | |
| - name: Checkout DLC source | |
| uses: actions/checkout@v5 | |
| - name: Container pull | |
| uses: ./.github/actions/ecr-authenticate | |
| with: | |
| aws-account-id: ${{ needs.set-rayserve-test-environment.outputs.aws-account-id }} | |
| aws-region: ${{ vars.AWS_REGION }} | |
| image-uri: ${{ needs.set-rayserve-test-environment.outputs.image-uri }} | |
| - name: Checkout vLLM tests | |
| uses: actions/checkout@v5 | |
| with: | |
| repository: vllm-project/vllm | |
| ref: v${{ env.VLLM_RAYSERVE_VERSION }} | |
| path: vllm_source | |
| - name: Start container | |
| run: | | |
| CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \ | |
| -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \ | |
| -v ${HOME}/.cache/vllm:/root/.cache/vllm \ | |
| -v ./vllm_source:/workdir --workdir /workdir \ | |
| -e HF_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \ | |
| ${{ needs.set-rayserve-test-environment.outputs.image-uri }}) | |
| echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV | |
| - name: Setup for vLLM tests | |
| run: | | |
| docker exec ${CONTAINER_ID} sh -c ' | |
| set -eux | |
| uv pip install --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto | |
| uv pip install --system pytest pytest-asyncio | |
| uv pip install --system -e tests/vllm_test_utils | |
| uv pip install --system hf_transfer | |
| uv pip install --system transformers==4.57.3 | |
| mkdir src | |
| mv vllm src/vllm | |
| ' | |
| - name: Run vLLM tests | |
| run: | | |
| docker exec ${CONTAINER_ID} sh -c ' | |
| set -eux | |
| nvidia-smi | |
| uv pip list | grep transformers | |
| # Examples Test # 30min | |
| cd /workdir/examples | |
| pip install tensorizer # for tensorizer test | |
| python3 offline_inference/basic/generate.py --model facebook/opt-125m | |
| # python3 offline_inference/basic/generate.py --model meta-llama/Llama-2-13b-chat-hf --cpu-offload-gb 10 | |
| python3 offline_inference/basic/chat.py | |
| python3 offline_inference/prefix_caching.py | |
| python3 offline_inference/llm_engine_example.py | |
| python3 offline_inference/audio_language.py --seed 0 | |
| python3 offline_inference/vision_language.py --seed 0 | |
| python3 offline_inference/vision_language_pooling.py --seed 0 | |
| python3 offline_inference/vision_language_multi_image.py --seed 0 | |
| VLLM_USE_V1=0 python3 others/tensorize_vllm_model.py --model facebook/opt-125m serialize --serialized-directory /tmp/ --suffix v1 && python3 others/tensorize_vllm_model.py --model facebook/opt-125m deserialize --path-to-tensors /tmp/vllm/facebook/opt-125m/v1/model.tensors | |
| python3 offline_inference/encoder_decoder_multimodal.py --model-type whisper --seed 0 | |
| python3 offline_inference/basic/classify.py | |
| python3 offline_inference/basic/embed.py | |
| python3 offline_inference/basic/score.py | |
| VLLM_USE_V1=0 python3 offline_inference/profiling.py --model facebook/opt-125m run_num_steps --num-steps 2 | |
| ' | |
| # ==================================================== | |
| # =============== vLLM SageMaker jobs ================ | |
| # ==================================================== | |
| build-vllm-sagemaker-image: | |
| needs: [check-changes] | |
| if: needs.check-changes.outputs.build-change == 'true' | |
| runs-on: | |
| - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }} | |
| fleet:x86-build-runner | |
| buildspec-override:true | |
| concurrency: | |
| group: ${{ github.workflow }}-build-vllm-sagemaker-image-${{ github.event.pull_request.number }} | |
| cancel-in-progress: true | |
| outputs: | |
| ci-image: ${{ steps.image-uri-build.outputs.CI_IMAGE_URI }} | |
| steps: | |
| - uses: actions/checkout@v5 | |
| - run: .github/scripts/buildkitd.sh | |
| - name: ECR login | |
| uses: ./.github/actions/ecr-authenticate | |
| with: | |
| aws-account-id: ${{ vars.CI_AWS_ACCOUNT_ID }} | |
| aws-region: ${{ vars.AWS_REGION }} | |
| - name: Resolve image URI for build | |
| id: image-uri-build | |
| run: | | |
| CI_IMAGE_URI=${{ vars.CI_AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_REGION }}.amazonaws.com/ci:vllm-${{ env.VLLM_VERSION }}-gpu-${{ env.PYTHON_VERSION }}-${{ env.CUDA_VERSION }}-${{ env.OS_VERSION }}-sagemaker-pr-${{ github.event.pull_request.number }} | |
| echo "Image URI to build: ${CI_IMAGE_URI}" | |
| echo "CI_IMAGE_URI=${CI_IMAGE_URI}" >> ${GITHUB_ENV} | |
| echo "CI_IMAGE_URI=${CI_IMAGE_URI}" >> ${GITHUB_OUTPUT} | |
| - name: Build image | |
| run: | | |
| # base image: https://hub.docker.com/r/vllm/vllm-openai/tags | |
| docker buildx build --progress plain \ | |
| --build-arg CACHE_REFRESH="$(date +"%Y-%m-%d")" \ | |
| --build-arg BASE_IMAGE="vllm/vllm-openai:v${{ env.VLLM_VERSION }}" \ | |
| --cache-to=type=inline \ | |
| --cache-from=type=registry,ref=${CI_IMAGE_URI} \ | |
| --tag ${CI_IMAGE_URI} \ | |
| --target vllm-sagemaker \ | |
| -f docker/vllm/Dockerfile . | |
| - name: Container push | |
| run: | | |
| docker push ${CI_IMAGE_URI} | |
| docker rmi ${CI_IMAGE_URI} | |
| set-sagemaker-test-environment: | |
| needs: [check-changes, build-vllm-sagemaker-image] | |
| if: | | |
| always() && !failure() && !cancelled() && | |
| (needs.check-changes.outputs.build-change == 'true' || needs.check-changes.outputs.test-change == 'true') | |
| runs-on: ubuntu-latest | |
| concurrency: | |
| group: ${{ github.workflow }}-set-sagemaker-test-environment-${{ github.event.pull_request.number }} | |
| cancel-in-progress: true | |
| outputs: | |
| aws-account-id: ${{ steps.set-env.outputs.AWS_ACCOUNT_ID }} | |
| image-uri: ${{ steps.set-env.outputs.IMAGE_URI }} | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Set test environment | |
| id: set-env | |
| run: | | |
| if [[ "${{ needs.build-vllm-sagemaker-image.result }}" == "success" ]]; then | |
| AWS_ACCOUNT_ID=${{ vars.CI_AWS_ACCOUNT_ID }} | |
| IMAGE_URI=${{ needs.build-vllm-sagemaker-image.outputs.ci-image }} | |
| else | |
| AWS_ACCOUNT_ID=${{ vars.PROD_AWS_ACCOUNT_ID }} | |
| IMAGE_URI=${{ vars.PROD_AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_REGION }}.amazonaws.com/${{ env.PROD_SAGEMAKER_IMAGE }} | |
| fi | |
| echo "Image URI to test: ${IMAGE_URI}" | |
| echo "AWS_ACCOUNT_ID=${AWS_ACCOUNT_ID}" >> ${GITHUB_OUTPUT} | |
| echo "IMAGE_URI=${IMAGE_URI}" >> ${GITHUB_OUTPUT} | |
| vllm-sagemaker-regression-test: | |
| needs: [build-vllm-sagemaker-image, set-sagemaker-test-environment] | |
| if: success() | |
| runs-on: | |
| - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }} | |
| fleet:x86-g6xl-runner | |
| buildspec-override:true | |
| concurrency: | |
| group: ${{ github.workflow }}-vllm-sagemaker-regression-test-${{ github.event.pull_request.number }} | |
| cancel-in-progress: true | |
| steps: | |
| - name: Checkout DLC source | |
| uses: actions/checkout@v5 | |
| - name: Container pull | |
| uses: ./.github/actions/ecr-authenticate | |
| with: | |
| aws-account-id: ${{ needs.set-sagemaker-test-environment.outputs.aws-account-id }} | |
| aws-region: ${{ vars.AWS_REGION }} | |
| image-uri: ${{ needs.set-sagemaker-test-environment.outputs.image-uri }} | |
| - name: Checkout vLLM tests | |
| uses: actions/checkout@v5 | |
| with: | |
| repository: vllm-project/vllm | |
| ref: v${{ env.VLLM_VERSION }} | |
| path: vllm_source | |
| - name: Start container | |
| run: | | |
| CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \ | |
| -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \ | |
| -v ${HOME}/.cache/vllm:/root/.cache/vllm \ | |
| -v ./vllm_source:/workdir --workdir /workdir \ | |
| -e HF_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \ | |
| ${{ needs.set-sagemaker-test-environment.outputs.image-uri }}) | |
| echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV | |
| - name: Setup for vLLM tests | |
| run: | | |
| docker exec ${CONTAINER_ID} sh -c ' | |
| set -eux | |
| uv pip install --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto | |
| uv pip install --system pytest pytest-asyncio | |
| uv pip install --system -e tests/vllm_test_utils | |
| uv pip install --system hf_transfer | |
| mkdir src | |
| mv vllm src/vllm | |
| ' | |
| - name: Run vLLM tests | |
| run: | | |
| docker exec ${CONTAINER_ID} sh -c ' | |
| set -eux | |
| nvidia-smi | |
| # Regression Test # 7min | |
| cd /workdir/tests | |
| uv pip install --system modelscope | |
| pytest -v -s test_regression.py | |
| ' | |
| vllm-sagemaker-cuda-test: | |
| needs: [build-vllm-sagemaker-image, set-sagemaker-test-environment] | |
| if: success() | |
| runs-on: | |
| - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }} | |
| fleet:x86-g6xl-runner | |
| buildspec-override:true | |
| concurrency: | |
| group: ${{ github.workflow }}-vllm-sagemaker-cuda-test-${{ github.event.pull_request.number }} | |
| cancel-in-progress: true | |
| steps: | |
| - name: Checkout DLC source | |
| uses: actions/checkout@v5 | |
| - name: Container pull | |
| uses: ./.github/actions/ecr-authenticate | |
| with: | |
| aws-account-id: ${{ needs.set-sagemaker-test-environment.outputs.aws-account-id }} | |
| aws-region: ${{ vars.AWS_REGION }} | |
| image-uri: ${{ needs.set-sagemaker-test-environment.outputs.image-uri }} | |
| - name: Checkout vLLM tests | |
| uses: actions/checkout@v5 | |
| with: | |
| repository: vllm-project/vllm | |
| ref: v${{ env.VLLM_VERSION }} | |
| path: vllm_source | |
| - name: Start container | |
| run: | | |
| CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \ | |
| -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \ | |
| -v ${HOME}/.cache/vllm:/root/.cache/vllm \ | |
| -v ./vllm_source:/workdir --workdir /workdir \ | |
| -e HF_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \ | |
| ${{ needs.set-sagemaker-test-environment.outputs.image-uri }}) | |
| echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV | |
| - name: Setup for vLLM tests | |
| run: | | |
| docker exec ${CONTAINER_ID} sh -c ' | |
| set -eux | |
| uv pip install --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto | |
| uv pip install --system pytest pytest-asyncio | |
| uv pip install --system -e tests/vllm_test_utils | |
| uv pip install --system hf_transfer | |
| mkdir src | |
| mv vllm src/vllm | |
| ' | |
| - name: Run vLLM tests | |
| run: | | |
| docker exec ${CONTAINER_ID} sh -c ' | |
| set -eux | |
| nvidia-smi | |
| # Platform Tests (CUDA) # 4min | |
| cd /workdir/tests | |
| pytest -v -s cuda/test_cuda_context.py | |
| ' | |
| vllm-sagemaker-example-test: | |
| needs: [build-vllm-sagemaker-image, set-sagemaker-test-environment] | |
| if: success() | |
| runs-on: | |
| - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }} | |
| fleet:x86-g6xl-runner | |
| buildspec-override:true | |
| concurrency: | |
| group: ${{ github.workflow }}-vllm-sagemaker-example-test-${{ github.event.pull_request.number }} | |
| cancel-in-progress: true | |
| steps: | |
| - name: Checkout DLC source | |
| uses: actions/checkout@v5 | |
| - name: Container pull | |
| uses: ./.github/actions/ecr-authenticate | |
| with: | |
| aws-account-id: ${{ needs.set-sagemaker-test-environment.outputs.aws-account-id }} | |
| aws-region: ${{ vars.AWS_REGION }} | |
| image-uri: ${{ needs.set-sagemaker-test-environment.outputs.image-uri }} | |
| - name: Checkout vLLM tests | |
| uses: actions/checkout@v5 | |
| with: | |
| repository: vllm-project/vllm | |
| ref: v${{ env.VLLM_VERSION }} | |
| path: vllm_source | |
| - name: Start container | |
| run: | | |
| CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \ | |
| -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \ | |
| -v ${HOME}/.cache/vllm:/root/.cache/vllm \ | |
| -v ./vllm_source:/workdir --workdir /workdir \ | |
| -e HF_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \ | |
| ${{ needs.set-sagemaker-test-environment.outputs.image-uri }}) | |
| echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV | |
| - name: Setup for vLLM tests | |
| run: | | |
| docker exec ${CONTAINER_ID} sh -c ' | |
| set -eux | |
| uv pip install --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto | |
| uv pip install --system pytest pytest-asyncio | |
| uv pip install --system -e tests/vllm_test_utils | |
| uv pip install --system hf_transfer | |
| uv pip install --system transformers==4.57.3 | |
| mkdir src | |
| mv vllm src/vllm | |
| ' | |
| - name: Run vLLM tests | |
| run: | | |
| docker exec ${CONTAINER_ID} sh -c ' | |
| set -eux | |
| nvidia-smi | |
| uv pip list | grep transformers | |
| # Examples Test # 30min | |
| cd /workdir/examples | |
| pip install tensorizer # for tensorizer test | |
| python3 offline_inference/basic/generate.py --model facebook/opt-125m | |
| # python3 offline_inference/basic/generate.py --model meta-llama/Llama-2-13b-chat-hf --cpu-offload-gb 10 | |
| python3 offline_inference/basic/chat.py | |
| python3 offline_inference/prefix_caching.py | |
| python3 offline_inference/llm_engine_example.py | |
| python3 offline_inference/audio_language.py --seed 0 | |
| python3 offline_inference/vision_language.py --seed 0 | |
| python3 offline_inference/vision_language_pooling.py --seed 0 | |
| python3 offline_inference/vision_language_multi_image.py --seed 0 | |
| VLLM_USE_V1=0 python3 others/tensorize_vllm_model.py --model facebook/opt-125m serialize --serialized-directory /tmp/ --suffix v1 && python3 others/tensorize_vllm_model.py --model facebook/opt-125m deserialize --path-to-tensors /tmp/vllm/facebook/opt-125m/v1/model.tensors | |
| python3 offline_inference/encoder_decoder_multimodal.py --model-type whisper --seed 0 | |
| python3 offline_inference/basic/classify.py | |
| python3 offline_inference/basic/embed.py | |
| python3 offline_inference/basic/score.py | |
| python3 offline_inference/simple_profiling.py | |
| ' | |
| vllm-sagemaker-endpoint-test: | |
| needs: [set-sagemaker-test-environment] | |
| if: | | |
| always() && !failure() && !cancelled() && | |
| needs.set-sagemaker-test-environment.result == 'success' | |
| runs-on: | |
| - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }} | |
| fleet:default-runner | |
| buildspec-override:true | |
| concurrency: | |
| group: ${{ github.workflow }}-vllm-sagemaker-endpoint-test-${{ github.event.pull_request.number }} | |
| cancel-in-progress: false | |
| steps: | |
| - name: Checkout DLC source | |
| uses: actions/checkout@v5 | |
| - name: Install test dependencies | |
| run: | | |
| uv venv | |
| source .venv/bin/activate | |
| uv pip install -r test/requirements.txt | |
| uv pip install -r test/vllm/sagemaker/requirements.txt | |
| - name: Run sagemaker endpoint test | |
| run: | | |
| source .venv/bin/activate | |
| python test/vllm/sagemaker/test_sm_endpoint.py --image-uri ${{ needs.set-sagemaker-test-environment.outputs.image-uri }} --endpoint-name test-sm-vllm-endpoint-${{ github.sha }} |