diff --git a/.github/actions/generate-release-spec/action.yml b/.github/actions/generate-release-spec/action.yml new file mode 100644 index 000000000000..917869161965 --- /dev/null +++ b/.github/actions/generate-release-spec/action.yml @@ -0,0 +1,91 @@ +name: 'Generate Release Spec' +description: 'Generate release specification YAML from config file' + +inputs: + config-json: + description: 'JSON string containing the configuration' + required: true + +outputs: + release-spec: + description: 'Generated release specification YAML' + value: ${{ steps.generate.outputs.spec }} + +runs: + using: 'composite' + steps: + - name: Generate release spec + id: generate + shell: bash + run: | + echo '${{ inputs.config-json }}' > config.json + + # Extract required values from config + FRAMEWORK=$(jq -r '.common.framework' config.json) + VERSION=$(jq -r '.common.framework_version' config.json) + + # Validate required fields + if [ -z "$FRAMEWORK" ] || [ "$FRAMEWORK" == "null" ]; then + echo "Error: framework is required" + exit 1 + fi + + if [ -z "$VERSION" ] || [ "$VERSION" == "null" ]; then + echo "Error: version is required" + exit 1 + fi + + # Extract optional values from config + ARCH_TYPE=$(jq -r '.common.arch_type' config.json) + JOB_TYPE=$(jq -r '.common.job_type' config.json) + DEVICE_TYPE=$(jq -r '.common.device_type' config.json) + PYTHON_VERSION=$(jq -r '.common.python_version' config.json) + OS_VERSION=$(jq -r '.common.os_version' config.json) + CUSTOMER_TYPE=$(jq -r '.common.customer_type' config.json) + CUDA_VERSION=$(jq -r '.common.cuda_version' config.json) + + # Extract boolean values + FORCE_RELEASE=$(jq -r '.release.force_release' config.json) + PUBLIC_REGISTRY=$(jq -r '.release.public_registry' config.json) + PRIVATE_REGISTRY=$(jq -r '.release.private_registry' config.json) + ENABLE_SOCI=$(jq -r '.release.enable_soci' config.json) + + echo "Generating release spec with:" + echo " Framework: ${FRAMEWORK} [required]" + echo " Version: ${VERSION} [required]" + [ "$ARCH_TYPE" != "null" ] && echo " Arch Type: ${ARCH_TYPE}" + [ "$JOB_TYPE" != "null" ] && echo " Job Type: ${JOB_TYPE}" + [ "$DEVICE_TYPE" != "null" ] && echo " Device Type: ${DEVICE_TYPE}" + [ "$CUSTOMER_TYPE" != "null" ] && echo " Customer Type: ${CUSTOMER_TYPE}" + [ "$FORCE_RELEASE" != "null" ] && echo " Force Release: ${FORCE_RELEASE}" + [ "$PUBLIC_REGISTRY" != "null" ] && echo " Public Registry: ${PUBLIC_REGISTRY}" + [ "$PRIVATE_REGISTRY" != "null" ] && echo " Private Registry: ${PRIVATE_REGISTRY}" + [ "$ENABLE_SOCI" != "null" ] && echo " Enable SOCI: ${ENABLE_SOCI}" + + # Generate release spec YAML with conditional fields + { + echo "spec<> ${GITHUB_OUTPUT} + + echo "Release spec generated successfully" diff --git a/.github/actions/setup-release-package/action.yml b/.github/actions/setup-release-package/action.yml new file mode 100644 index 000000000000..8641599b4696 --- /dev/null +++ b/.github/actions/setup-release-package/action.yml @@ -0,0 +1,68 @@ +name: 'Setup Release Package' +description: 'Download and install release package from S3' + +inputs: + aws-region: + description: 'AWS region for S3' + required: false + default: 'us-west-2' + release-package-s3-bucket: + description: 'S3 bucket containing the release package' + required: false + default: 'dlc-release-logic-v2' + release-package-s3-prefix: + description: 'S3 prefix/folder path to the package' + required: false + default: 'DLContainersReleaseLogicV2/DLContainersReleaseLogicV2' + +runs: + using: 'composite' + steps: + - name: Download release package from S3 + shell: bash + run: | + echo "Downloading fresh release package from S3..." + PACKAGE_DIR="release_package" + mkdir -p ${PACKAGE_DIR} + + aws s3 sync \ + s3://${{ inputs.release-package-s3-bucket }}/${{ inputs.release-package-s3-prefix }} \ + ${PACKAGE_DIR} \ + --region ${{ inputs.aws-region }} + + if [ ! -d "${PACKAGE_DIR}" ] || [ -z "$(ls -A ${PACKAGE_DIR})" ]; then + echo "Error: Failed to download package from S3" + exit 1 + fi + + echo "Package downloaded successfully" + + - name: Setup virtual environment and install package + shell: bash + run: | + PACKAGE_DIR="release_package" + + echo "Creating virtual environment with uv..." + uv venv --python 3.12 .venv + + echo "Installing release package..." + uv pip install -e ./${PACKAGE_DIR} + + echo "Making scripts executable..." + chmod +x ${PACKAGE_DIR}/scripts/* + + echo "Package installed successfully" + + - name: Activate environment + shell: bash + run: | + PACKAGE_DIR="release_package" + + echo "PACKAGE_DIR=${PACKAGE_DIR}" >> ${GITHUB_ENV} + echo "VIRTUAL_ENV=${PWD}/.venv" >> ${GITHUB_ENV} + + # Add both venv/bin and scripts directory to PATH + echo "${PWD}/.venv/bin" >> ${GITHUB_PATH} + echo "${PWD}/${PACKAGE_DIR}/scripts" >> ${GITHUB_PATH} + + echo "Environment activated" diff --git a/.github/config/vllm-0.10.2-rayserve.yml b/.github/config/vllm-0.10.2-rayserve.yml new file mode 100644 index 000000000000..80645f214e8f --- /dev/null +++ b/.github/config/vllm-0.10.2-rayserve.yml @@ -0,0 +1,30 @@ +# vLLM RayServe Image Configuration +# This file contains all configuration for building, testing, and releasing the vLLM RayServe image + +# Image identification +image: + name: "vllm-rayserve" + description: "vLLM with RayServe for EC2 instances" + +# Build configuration +common: + framework: "vllm" + framework_version: "0.10.2" + job_type: "general" + python_version: "py312" + cuda_version: "cu129" + os_version: "ubuntu22.04" + customer_type: "rayserve_ec2" + arch_type: "x86" + prod_image: "vllm:0.10-gpu-py312-rayserve" + device_type: "gpu" + +# Release configuration +release: + release: true + force_release: false + public_registry: true + private_registry: false + enable_soci: true + source_stage: beta # private for gamma test + target_stage: release # gamma for gamma test diff --git a/.github/workflows/reusable-release-image.yml b/.github/workflows/reusable-release-image.yml new file mode 100644 index 000000000000..ac453d842e6c --- /dev/null +++ b/.github/workflows/reusable-release-image.yml @@ -0,0 +1,251 @@ +name: Reusable Release Image Workflow + +permissions: + contents: read + +on: + workflow_call: + inputs: + source-image-uri: + description: 'Source image URI to release' + required: true + type: string + release-spec: + description: 'Release specification YAML content' + required: true + type: string + source-stage: + description: 'Source stage (e.g., private, beta)' + required: true + type: string + target-stage: + description: 'Target stage (e.g., gamma, release)' + required: true + type: string + aws-region: + description: 'AWS region' + required: false + type: string + default: 'us-west-2' + runner-fleet: + description: 'CodeBuild runner fleet' + required: false + type: string + default: 'default-runner' + outputs: + run-identifier: + description: 'Unique identifier for this release run' + value: ${{ jobs.step1-publish-images.outputs.run-identifier }} + +jobs: + step1-publish-images: + runs-on: + - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }} + fleet:${{ inputs.runner-fleet }} + buildspec-override:true + outputs: + run-identifier: ${{ steps.setup.outputs.run-identifier }} + metadata-file-name: ${{ steps.setup.outputs.metadata-file-name }} + images-published: ${{ steps.publish.outputs.images-published }} + steps: + - name: Checkout code + uses: actions/checkout@v5 + + - name: Setup release package + uses: ./.github/actions/setup-release-package + with: + aws-region: ${{ inputs.aws-region }} + + - name: Install SOCI and nerdctl + run: | + echo "Installing SOCI and nerdctl..." + ARCH_SUFFIX="amd64" + + # SOCI SNAPSHOTTER + if command -v soci &> /dev/null; then + echo "SOCI already installed:" + soci --version + else + echo "Installing SOCI for ${ARCH_SUFFIX}..." + SOCI_VERSION="0.11.1" + wget https://github.com/awslabs/soci-snapshotter/releases/download/v${SOCI_VERSION}/soci-snapshotter-${SOCI_VERSION}-linux-${ARCH_SUFFIX}.tar.gz + sudo tar -C /usr/local/bin -xvf soci-snapshotter-${SOCI_VERSION}-linux-${ARCH_SUFFIX}.tar.gz soci soci-snapshotter-grpc + rm soci-snapshotter-${SOCI_VERSION}-linux-${ARCH_SUFFIX}.tar.gz + soci --version + fi + + # NERDCTL + if command -v nerdctl &> /dev/null; then + echo "nerdctl already installed:" + nerdctl --version + else + echo "Installing nerdctl for ${ARCH_SUFFIX}..." + export CONTAINERD_ADDRESS=/var/run/docker/containerd/containerd.sock + NERDCTL_VERSION="2.1.5" + wget https://github.com/containerd/nerdctl/releases/download/v${NERDCTL_VERSION}/nerdctl-${NERDCTL_VERSION}-linux-${ARCH_SUFFIX}.tar.gz + sudo tar -C /usr/local/bin -xzf nerdctl-${NERDCTL_VERSION}-linux-${ARCH_SUFFIX}.tar.gz + sudo chmod +x /usr/local/bin/nerdctl + rm nerdctl-${NERDCTL_VERSION}-linux-${ARCH_SUFFIX}.tar.gz + nerdctl --version + fi + + echo "CONTAINERD_ADDRESS=/var/run/docker/containerd/containerd.sock" >> ${GITHUB_ENV} + + - name: Setup release environment + id: setup + run: | + # Extract a unique identifier from the source image URI + # Example: 123456789.dkr.ecr.us-west-2.amazonaws.com/vllm:0.14.0-gpu-py3.11-cu12.4-ubuntu22.04-ec2-pr-123 + # Extract the tag portion after the last colon + IMAGE_TAG=$(echo "${{ inputs.source-image-uri }}" | rev | cut -d':' -f1 | rev) + # Sanitize the tag to be filesystem-safe (replace special chars with dashes) + SANITIZED_TAG=$(echo "${IMAGE_TAG}" | tr '/:.' '-') + + RUN_IDENTIFIER="${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}-${SANITIZED_TAG}" + METADATA_FILE="/tmp/release_metadata_${RUN_IDENTIFIER}.dict" + + echo "RUN_IDENTIFIER=${RUN_IDENTIFIER}" >> ${GITHUB_ENV} + echo "METADATA_FILE=${METADATA_FILE}" >> ${GITHUB_ENV} + echo "REGION=${{ inputs.aws-region }}" >> ${GITHUB_ENV} + echo "SOURCE_STAGE=${{ inputs.source-stage }}" >> ${GITHUB_ENV} + echo "TARGET_STAGE=${{ inputs.target-stage }}" >> ${GITHUB_ENV} + + echo "run-identifier=${RUN_IDENTIFIER}" >> ${GITHUB_OUTPUT} + echo "metadata-file-name=release_metadata_${RUN_IDENTIFIER}.dict" >> ${GITHUB_OUTPUT} + + echo "Generated run identifier: ${RUN_IDENTIFIER}" + + - name: Publish DLC Images + id: publish + run: | + echo "==========================================" + echo "Step 1: Publishing DLC Images" + echo "==========================================" + + cat > release_spec.yml <<'EOF' + ${{ inputs.release-spec }} + EOF + + echo "Release specification:" + cat release_spec.yml + + publish_dlc_images \ + --release-spec release_spec.yml \ + --source-image-uri "${{ inputs.source-image-uri }}" \ + --metadata-file "${METADATA_FILE}" + + # Read IMAGES_PUBLISHED from the metadata file + # The Python script sets this in the metadata, but env vars don't propagate from subprocess + if [ -f "${METADATA_FILE}" ]; then + IMAGES_PUBLISHED=$(python3 -c "import json; print(json.load(open('${METADATA_FILE}'))['release_successful'])") + echo "images-published=${IMAGES_PUBLISHED}" >> ${GITHUB_OUTPUT} + + if [ "${IMAGES_PUBLISHED}" == "1" ]; then + echo "✅ Step 1 completed: Images published successfully" + else + echo "⚠️ Step 1 completed but release was skipped (no images published)" + fi + echo "Release metadata:" + cat ${METADATA_FILE} + else + echo "❌ Error: Metadata file not found" + exit 1 + fi + + - name: Upload metadata artifact + uses: actions/upload-artifact@v4 + with: + name: release-metadata-${{ steps.setup.outputs.run-identifier }} + path: ${{ env.METADATA_FILE }} + retention-days: 7 + + step2-generate-info: + needs: [step1-publish-images] + if: needs.step1-publish-images.outputs.images-published == '1' + runs-on: + - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }} + fleet:${{ inputs.runner-fleet }} + buildspec-override:true + steps: + - name: Checkout code + uses: actions/checkout@v5 + + - name: Setup release package + uses: ./.github/actions/setup-release-package + with: + aws-region: ${{ inputs.aws-region }} + + - name: Download metadata artifact + uses: actions/download-artifact@v4 + with: + name: release-metadata-${{ needs.step1-publish-images.outputs.run-identifier }} + path: /tmp/ + + - name: Setup environment + run: | + RUN_IDENTIFIER="${{ needs.step1-publish-images.outputs.run-identifier }}" + METADATA_FILE="/tmp/${{ needs.step1-publish-images.outputs.metadata-file-name }}" + + echo "RUN_IDENTIFIER=${RUN_IDENTIFIER}" >> ${GITHUB_ENV} + echo "METADATA_FILE=${METADATA_FILE}" >> ${GITHUB_ENV} + echo "REGION=${{ inputs.aws-region }}" >> ${GITHUB_ENV} + echo "SOURCE_STAGE=${{ inputs.source-stage }}" >> ${GITHUB_ENV} + echo "TARGET_STAGE=${{ inputs.target-stage }}" >> ${GITHUB_ENV} + + - name: Generate Release Information + run: | + echo "==========================================" + echo "Step 2: Generating Release Information" + echo "==========================================" + + if [ ! -f "${METADATA_FILE}" ]; then + echo "❌ Error: Metadata file not found at ${METADATA_FILE}" + exit 1 + fi + + echo "Release metadata from Step 1:" + cat ${METADATA_FILE} + + generate_dlc_image_release_information \ + --metadata-file "${METADATA_FILE}" \ + --run-id "${RUN_IDENTIFIER}" + + echo "✅ Step 2 completed: Release information generated and uploaded to S3" + + step3-publish-notifications: + needs: [step1-publish-images, step2-generate-info] + if: needs.step1-publish-images.outputs.images-published == '1' + runs-on: + - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }} + fleet:${{ inputs.runner-fleet }} + buildspec-override:true + steps: + - name: Checkout code + uses: actions/checkout@v5 + + - name: Setup release package + uses: ./.github/actions/setup-release-package + with: + aws-region: ${{ inputs.aws-region }} + + - name: Setup environment + run: | + RUN_IDENTIFIER="${{ needs.step1-publish-images.outputs.run-identifier }}" + echo "RUN_IDENTIFIER=${RUN_IDENTIFIER}" >> ${GITHUB_ENV} + echo "REGION=${{ inputs.aws-region }}" >> ${GITHUB_ENV} + echo "SOURCE_STAGE=${{ inputs.source-stage }}" >> ${GITHUB_ENV} + echo "TARGET_STAGE=${{ inputs.target-stage }}" >> ${GITHUB_ENV} + + - name: Publish Release Information + run: | + echo "==========================================" + echo "Step 3: Publishing Release Information" + echo "==========================================" + + publish_release_information --run-id "${RUN_IDENTIFIER}" + + echo "✅ Step 3 completed: Release information published successfully" + echo "" + echo "==========================================" + echo "🎉 All release steps completed successfully!" + echo "==========================================" diff --git a/.github/workflows/vllm-rayserver-auto-release.yml b/.github/workflows/vllm-rayserver-auto-release.yml new file mode 100644 index 000000000000..abf9494c8b86 --- /dev/null +++ b/.github/workflows/vllm-rayserver-auto-release.yml @@ -0,0 +1,326 @@ +name: vLLM_RAYSERVE_AUTO_RELEASE + +on: + + schedule: + # Runs at 10:00 AM PST/PDT on Monday and Wednesday + # Note: GitHub Actions uses UTC time. PST is UTC-8, PDT is UTC-7 + # Using 17:00 UTC = 10:00 AM PDT (most of the year) / 9:00 AM PST (winter) + - cron: '00 17 * * 1,3' # Monday and Wednesday at 10:00 AM PDT / 9:00 AM PST + +permissions: + contents: read + pull-requests: read + +env: + # CI environment configuration + FORCE_COLOR: "1" + + # Config file paths + RAYSERVE_CONFIG: ".github/config/vllm-0.10.2-rayserve.yml" + + +jobs: + gatekeeper: + runs-on: ubuntu-latest + concurrency: + group: ${{ github.workflow }}-gate-${{ github.event.pull_request.number }} + cancel-in-progress: true + steps: + - name: Checkout base branch (safe) + uses: actions/checkout@v5 + with: + ref: ${{ github.event.pull_request.base.sha }} + fetch-depth: 1 + + - name: Run permission gate (from base) + uses: ./.github/actions/pr-permission-gate + + load-config: + needs: [gatekeeper] + if: success() + runs-on: ubuntu-latest + outputs: + rayserve-config: ${{ steps.load-configs.outputs.rayserve-config }} + steps: + - name: Checkout code + uses: actions/checkout@v5 + + - name: Load configuration files + id: load-configs + run: | + # Install yq for YAML parsing + sudo wget -qO /usr/local/bin/yq https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 + sudo chmod +x /usr/local/bin/yq + + # Load and output configs as JSON for easy parsing in other jobs + # Using multiline output format to handle special characters + { + echo "rayserve-config<> $GITHUB_OUTPUT + + # =================================================== + # =============== vLLM RayServe jobs ================ + # =================================================== + build-vllm-rayserve-image: + needs: [load-config] + runs-on: + - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }} + fleet:x86-build-runner + buildspec-override:true + concurrency: + group: ${{ github.workflow }}-build-vllm-rayserve-image-${{ github.event.pull_request.number }} + cancel-in-progress: true + outputs: + ci-image: ${{ steps.build.outputs.image-uri }} + steps: + - uses: actions/checkout@v5 + + - name: Parse RayServe config + id: config + run: | + echo '${{ needs.load-config.outputs.rayserve-config }}' > config.json + echo "framework=$(jq -r '.common.framework' config.json)" >> $GITHUB_OUTPUT + echo "framework-version=$(jq -r '.common.framework_version' config.json)" >> $GITHUB_OUTPUT + echo "container-type=$(jq -r '.common.job_type' config.json)" >> $GITHUB_OUTPUT + echo "python-version=$(jq -r '.common.python_version' config.json)" >> $GITHUB_OUTPUT + echo "cuda-version=$(jq -r '.common.cuda_version' config.json)" >> $GITHUB_OUTPUT + echo "os-version=$(jq -r '.common.os_version' config.json)" >> $GITHUB_OUTPUT + echo "device-type=$(jq -r '.common.device_type // "gpu"' config.json)" >> $GITHUB_OUTPUT + + - name: Build image + id: build + uses: ./.github/actions/build-image + with: + framework: ${{ steps.config.outputs.framework }} + target: vllm-rayserve-ec2 + base-image: vllm/vllm-openai:v${{ steps.config.outputs.framework-version }} + framework-version: ${{ steps.config.outputs.framework-version }} + container-type: ${{ steps.config.outputs.container-type }} + aws-account-id: ${{ vars.CI_AWS_ACCOUNT_ID }} + aws-region: ${{ vars.AWS_REGION }} + tag-pr: ${{ steps.config.outputs.framework }}-${{ steps.config.outputs.framework-version }}-${{ steps.config.outputs.device-type }}-${{ steps.config.outputs.python-version }}-${{ steps.config.outputs.cuda-version }}-${{ steps.config.outputs.os-version }}-rayserve-ec2-pr-${{ github.event.pull_request.number }} + dockerfile-path: docker/${{ steps.config.outputs.framework }}/Dockerfile + + set-rayserve-test-environment: + needs: [build-vllm-rayserve-image, load-config] + if: | + always() && !failure() && !cancelled() + runs-on: ubuntu-latest + concurrency: + group: ${{ github.workflow }}-set-rayserve-test-environment-${{ github.event.pull_request.number }} + cancel-in-progress: true + outputs: + aws-account-id: ${{ steps.set-env.outputs.AWS_ACCOUNT_ID }} + image-uri: ${{ steps.set-env.outputs.IMAGE_URI }} + framework-version: ${{ steps.config.outputs.framework-version }} + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Parse RayServe config + id: config + run: | + echo '${{ needs.load-config.outputs.rayserve-config }}' > config.json + echo "framework-version=$(jq -r '.common.framework_version' config.json)" >> $GITHUB_OUTPUT + echo "prod-image=$(jq -r '.common.prod_image' config.json)" >> $GITHUB_OUTPUT + + - name: Set test environment + id: set-env + run: | + if [[ "${{ needs.build-vllm-rayserve-image.result }}" == "success" ]]; then + AWS_ACCOUNT_ID=${{ vars.CI_AWS_ACCOUNT_ID }} + IMAGE_URI=${{ needs.build-vllm-rayserve-image.outputs.ci-image }} + else + AWS_ACCOUNT_ID=${{ vars.PROD_AWS_ACCOUNT_ID }} + IMAGE_URI=${{ vars.PROD_AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_REGION }}.amazonaws.com/${{ steps.config.outputs.prod-image }} + fi + + echo "Image URI to test: ${IMAGE_URI}" + echo "AWS_ACCOUNT_ID=${AWS_ACCOUNT_ID}" >> ${GITHUB_OUTPUT} + echo "IMAGE_URI=${IMAGE_URI}" >> ${GITHUB_OUTPUT} + + vllm-rayserve-regression-test: + needs: [build-vllm-rayserve-image, set-rayserve-test-environment] + if: success() + runs-on: + - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }} + fleet:x86-g6xl-runner + buildspec-override:true + concurrency: + group: ${{ github.workflow }}-vllm-rayserve-regression-test-${{ github.event.pull_request.number }} + cancel-in-progress: true + steps: + - name: Checkout DLC source + uses: actions/checkout@v5 + + - name: Container pull + uses: ./.github/actions/ecr-authenticate + with: + aws-account-id: ${{ needs.set-rayserve-test-environment.outputs.aws-account-id }} + aws-region: ${{ vars.AWS_REGION }} + image-uri: ${{ needs.set-rayserve-test-environment.outputs.image-uri }} + + - name: Checkout vLLM tests + uses: actions/checkout@v5 + with: + repository: vllm-project/vllm + ref: v${{ needs.set-rayserve-test-environment.outputs.framework-version }} + path: vllm_source + + - name: Start container + run: | + CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \ + -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \ + -v ${HOME}/.cache/vllm:/root/.cache/vllm \ + -v .:/workdir --workdir /workdir \ + -e HF_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \ + ${{ needs.set-rayserve-test-environment.outputs.image-uri }}) + echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV + + - name: Setup for vLLM tests + run: | + docker exec ${CONTAINER_ID} scripts/vllm/vllm_0_10_2_test_setup.sh + + - name: Run vLLM tests + run: | + docker exec ${CONTAINER_ID} scripts/vllm/vllm_regression_test.sh + + vllm-rayserve-cuda-test: + needs: [build-vllm-rayserve-image, set-rayserve-test-environment] + if: success() + runs-on: + - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }} + fleet:x86-g6xl-runner + buildspec-override:true + concurrency: + group: ${{ github.workflow }}-vllm-rayserve-cuda-test-${{ github.event.pull_request.number }} + cancel-in-progress: true + steps: + - name: Checkout DLC source + uses: actions/checkout@v5 + + - name: Container pull + uses: ./.github/actions/ecr-authenticate + with: + aws-account-id: ${{ needs.set-rayserve-test-environment.outputs.aws-account-id }} + aws-region: ${{ vars.AWS_REGION }} + image-uri: ${{ needs.set-rayserve-test-environment.outputs.image-uri }} + + - name: Checkout vLLM tests + uses: actions/checkout@v5 + with: + repository: vllm-project/vllm + ref: v${{ needs.set-rayserve-test-environment.outputs.framework-version }} + path: vllm_source + + - name: Start container + run: | + CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \ + -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \ + -v ${HOME}/.cache/vllm:/root/.cache/vllm \ + -v .:/workdir --workdir /workdir \ + -e HF_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \ + ${{ needs.set-rayserve-test-environment.outputs.image-uri }}) + echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV + + - name: Setup for vLLM tests + run: | + docker exec ${CONTAINER_ID} scripts/vllm/vllm_0_10_2_test_setup.sh + + - name: Run vLLM tests + run: | + docker exec ${CONTAINER_ID} scripts/vllm/vllm_cuda_test.sh + + vllm-rayserve-example-test: + needs: [build-vllm-rayserve-image, set-rayserve-test-environment] + if: success() + runs-on: + - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }} + fleet:x86-g6xl-runner + buildspec-override:true + concurrency: + group: ${{ github.workflow }}-vllm-rayserve-example-test-${{ github.event.pull_request.number }} + cancel-in-progress: true + steps: + - name: Checkout DLC source + uses: actions/checkout@v5 + + - name: Container pull + uses: ./.github/actions/ecr-authenticate + with: + aws-account-id: ${{ needs.set-rayserve-test-environment.outputs.aws-account-id }} + aws-region: ${{ vars.AWS_REGION }} + image-uri: ${{ needs.set-rayserve-test-environment.outputs.image-uri }} + + - name: Checkout vLLM tests + uses: actions/checkout@v5 + with: + repository: vllm-project/vllm + ref: v${{ needs.set-rayserve-test-environment.outputs.framework-version }} + path: vllm_source + + - name: Start container + run: | + CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \ + -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \ + -v ${HOME}/.cache/vllm:/root/.cache/vllm \ + -v .:/workdir --workdir /workdir \ + -e HF_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \ + ${{ needs.set-rayserve-test-environment.outputs.image-uri }}) + echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV + + - name: Setup for vLLM tests + run: | + docker exec ${CONTAINER_ID} scripts/vllm/vllm_0_10_2_test_setup.sh + + - name: Run vLLM tests + run: | + docker exec ${CONTAINER_ID} scripts/vllm/vllm_rayserve_examples_test.sh + + generate-rayserve-release-spec: + needs: [load-config, build-vllm-rayserve-image, vllm-rayserve-regression-test, vllm-rayserve-cuda-test, vllm-rayserve-example-test] + if: | + always() && !failure() && !cancelled() && + needs.build-vllm-rayserve-image.result == 'success' && + needs.vllm-rayserve-regression-test.result == 'success' && + needs.vllm-rayserve-cuda-test.result == 'success' && + needs.vllm-rayserve-example-test.result == 'success' + runs-on: ubuntu-latest + outputs: + release-spec: ${{ steps.generate.outputs.release-spec }} + should-release: ${{ steps.check-release.outputs.should-release }} + steps: + - name: Checkout code + uses: actions/checkout@v5 + + - name: Check if release is enabled + id: check-release + run: | + echo '${{ needs.load-config.outputs.rayserve-config }}' > config.json + RELEASE_ENABLED=$(jq -r '.release.release // false' config.json) + echo "Release enabled: ${RELEASE_ENABLED}" + echo "should-release=${RELEASE_ENABLED}" >> $GITHUB_OUTPUT + + - name: Generate release spec + id: generate + if: steps.check-release.outputs.should-release == 'true' + uses: ./.github/actions/generate-release-spec + with: + config-json: ${{ needs.load-config.outputs.rayserve-config }} + + release-rayserve-image: + needs: [load-config, build-vllm-rayserve-image, generate-rayserve-release-spec] + if: needs.generate-rayserve-release-spec.outputs.should-release == 'true' + uses: ./.github/workflows/reusable-release-image.yml + with: + source-image-uri: ${{ needs.build-vllm-rayserve-image.outputs.ci-image }} + release-spec: ${{ needs.generate-rayserve-release-spec.outputs.release-spec }} + source-stage: ${{ fromJson(needs.load-config.outputs.rayserve-config).release.source_stage }} + target-stage: ${{ fromJson(needs.load-config.outputs.rayserve-config).release.target_stage }} + aws-region: ${{ vars.AWS_REGION }} + runner-fleet: default-runner + +