PR Test #142
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: PR Test | |
| on: | |
| schedule: | |
| - cron: '0 */6 * * *' # Run every 6 hours | |
| pull_request: | |
| branches: [main] | |
| workflow_dispatch: | |
| inputs: | |
| version: | |
| description: "FlashInfer version" | |
| required: true | |
| type: choice | |
| default: "release" | |
| options: | |
| - "release" | |
| - "nightly" | |
| target_stage: | |
| description: "Specific stage to run (optional, for quick testing)" | |
| required: false | |
| type: string | |
| default: "" | |
| force_continue_on_error: | |
| description: "Force continue-on-error (test scheduled CI behavior)" | |
| required: false | |
| type: boolean | |
| default: false | |
| pr_head_sha: | |
| description: "PR head SHA to checkout (for /rerun-stage on fork PRs)" | |
| required: false | |
| type: string | |
| default: "" | |
| workflow_call: | |
| inputs: | |
| ref: | |
| description: 'Git ref (branch, tag, or SHA) to test. If not provided, uses the default branch.' | |
| required: false | |
| type: string | |
| default: '' | |
| run_all_tests: | |
| description: "Run all tests (for releasing or testing purpose)" | |
| required: false | |
| type: boolean | |
| default: false | |
| concurrency: | |
| # Include pr_head_sha in group for /rerun-stage dispatches to avoid collisions with main branch runs | |
| group: pr-test-${{ inputs.pr_head_sha || inputs.ref || github.ref }} | |
| cancel-in-progress: ${{ github.event_name != 'workflow_call' }} | |
| env: | |
| SGLANG_IS_IN_CI: true | |
| jobs: | |
| # =============================================== check changes ==================================================== | |
| check-changes: | |
| runs-on: ubuntu-latest | |
| outputs: | |
| main_package: ${{ steps.filter.outputs.main_package || steps.run-mode.outputs.run_all_tests }} | |
| sgl_kernel: ${{ steps.filter.outputs.sgl_kernel }} # sgl-kernel tests only run when kernels are rebuilt | |
| jit_kernel: ${{ steps.filter.outputs.jit_kernel || steps.run-mode.outputs.run_all_tests }} | |
| multimodal_gen: ${{ steps.filter.outputs.multimodal_gen || steps.run-mode.outputs.run_all_tests }} | |
| max_parallel: ${{ steps.set-parallel.outputs.max_parallel }} | |
| b200_runner: ${{ steps.set-runner.outputs.b200_runner }} | |
| enable_retry: ${{ steps.set-retry.outputs.enable_retry }} | |
| continue_on_error: ${{ steps.set-continue-on-error.outputs.continue_on_error }} | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Determine run mode | |
| id: run-mode | |
| run: | | |
| # Run all tests for scheduled runs and workflow_call (when ref input is provided) | |
| # Note: github.event_name is inherited from caller, so we detect workflow_call by checking inputs.ref | |
| if [[ "${{ github.event_name }}" == "schedule" || "${{ inputs.run_all_tests }}" == "true" ]]; then | |
| echo "run_all_tests=true" >> $GITHUB_OUTPUT | |
| echo "Run mode: ALL TESTS (schedule=${{ github.event_name == 'schedule' }}, run_all_tests=${{ inputs.run_all_tests }})" | |
| else | |
| echo "run_all_tests=false" >> $GITHUB_OUTPUT | |
| echo "Run mode: FILTERED (triggered by ${{ github.event_name }})" | |
| fi | |
| - name: Detect file changes | |
| id: filter | |
| uses: dorny/paths-filter@v3 | |
| if: steps.run-mode.outputs.run_all_tests != 'true' | |
| with: | |
| filters: | | |
| main_package: | |
| - "python/sglang/!(multimodal_gen)/**" | |
| - "python/*.toml" | |
| - "scripts/ci/**" | |
| - "test/**" | |
| - ".github/workflows/pr-test.yml" | |
| sgl_kernel: | |
| - "sgl-kernel/**" | |
| jit_kernel: | |
| - "python/sglang/jit_kernel/**" | |
| - "python/*.toml" | |
| - ".github/workflows/pr-test.yml" | |
| multimodal_gen: | |
| - "python/sglang/multimodal_gen/**" | |
| - "python/sglang/cli/**" | |
| - "python/*.toml" | |
| - ".github/workflows/pr-test.yml" | |
| - name: Set max-parallel based on high-priority label | |
| id: set-parallel | |
| run: | | |
| if [[ "${{ github.event_name }}" == "pull_request" && "${{ contains(github.event.pull_request.labels.*.name, 'high priority') }}" == "true" ]]; then | |
| echo "max_parallel=15" >> $GITHUB_OUTPUT | |
| echo "High priority PR detected, setting max_parallel to 15" | |
| else | |
| echo "max_parallel=4" >> $GITHUB_OUTPUT | |
| echo "Using default max_parallel of 4" | |
| fi | |
| - name: Set B200 runner tag | |
| id: set-runner | |
| run: | | |
| sgl_kernel="${{ steps.filter.outputs.sgl_kernel || steps.run-mode.outputs.run_all_tests }}" | |
| if [[ "$sgl_kernel" == "true" ]]; then | |
| echo "b200_runner=4-gpu-b200-kernel" >> $GITHUB_OUTPUT | |
| else | |
| echo "b200_runner=4-gpu-b200" >> $GITHUB_OUTPUT | |
| fi | |
| - name: Enable retry for CI | |
| id: set-retry | |
| run: | | |
| echo "enable_retry=true" >> $GITHUB_OUTPUT | |
| echo "Retry logic enabled for CI" | |
| - name: Set continue-on-error for full test runs | |
| id: set-continue-on-error | |
| run: | | |
| if [[ "${{ steps.run-mode.outputs.run_all_tests }}" == "true" || "${{ inputs.force_continue_on_error }}" == "true" ]]; then | |
| echo "continue_on_error=true" >> $GITHUB_OUTPUT | |
| echo "Full test run or force flag detected, enabling continue-on-error to run all tests" | |
| else | |
| echo "continue_on_error=false" >> $GITHUB_OUTPUT | |
| echo "Filtered run, continue-on-error disabled" | |
| fi | |
| - name: Show filter results in summary (table) | |
| run: | | |
| { | |
| echo "## Change Detection" | |
| echo "" | |
| echo "| Component | Changed |" | |
| echo "|-------------------|---------|" | |
| echo "| main_package | ${{ steps.filter.outputs.main_package || steps.run-mode.outputs.run_all_tests }} |" | |
| echo "| sgl_kernel | ${{ steps.filter.outputs.sgl_kernel || steps.run-mode.outputs.run_all_tests }} |" | |
| echo "| jit_kernel | ${{ steps.filter.outputs.jit_kernel || steps.run-mode.outputs.run_all_tests }} |" | |
| echo "| multimodal_gen | ${{ steps.filter.outputs.multimodal_gen || steps.run-mode.outputs.run_all_tests }} |" | |
| echo "| max_parallel | ${{ steps.set-parallel.outputs.max_parallel }} |" | |
| echo "| b200_runner | ${{ steps.set-runner.outputs.b200_runner }} |" | |
| echo "| enable_retry | ${{ steps.set-retry.outputs.enable_retry }} |" | |
| echo "| continue_on_error | ${{ steps.set-continue-on-error.outputs.continue_on_error }} |" | |
| } >> $GITHUB_STEP_SUMMARY | |
| # =============================================== PR Gate ==================================================== | |
| call-gate: | |
| needs: check-changes | |
| if: | | |
| needs.check-changes.outputs.main_package == 'true' || | |
| needs.check-changes.outputs.sgl_kernel == 'true' || | |
| needs.check-changes.outputs.jit_kernel == 'true' || | |
| needs.check-changes.outputs.multimodal_gen == 'true' | |
| uses: ./.github/workflows/pr-gate.yml | |
| secrets: inherit | |
| # =============================================== sgl-kernel ==================================================== | |
| sgl-kernel-build-wheels: | |
| needs: [check-changes, call-gate] | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| runs-on: x64-kernel-build-node | |
| strategy: | |
| matrix: | |
| include: | |
| - python-version: "3.10" | |
| cuda-version: "12.9" | |
| # Add back when CUDA 13.0 is supported on CI | |
| # - python-version: "3.10" | |
| # cuda-version: "13.0" | |
| name: Build Wheel | |
| steps: | |
| - name: Cleanup | |
| run: | | |
| sudo rm -rf $GITHUB_WORKSPACE/* || true | |
| - uses: actions/checkout@v4 | |
| with: | |
| submodules: "recursive" | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Set up Python ${{ matrix.python-version }} | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: ${{ matrix.python-version }} | |
| - name: Build wheel for Python ${{ matrix.python-version }} and CUDA ${{ matrix.cuda-version }} | |
| run: | | |
| cd sgl-kernel | |
| ./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}" | |
| env: | |
| USE_CCACHE: 1 | |
| - name: Verify wheel artifacts | |
| run: | | |
| ls -alh sgl-kernel/dist | |
| ls -alh sgl-kernel/dist/*.whl | |
| - name: Upload artifacts | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }} | |
| path: sgl-kernel/dist/* | |
| if-no-files-found: error | |
| sgl-kernel-build-wheels-arm: | |
| needs: [check-changes, call-gate] | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| runs-on: arm-kernel-build-node | |
| strategy: | |
| matrix: | |
| include: | |
| - python-version: "3.10" | |
| cuda-version: "12.9" | |
| name: Build Wheel Arm | |
| steps: | |
| - name: Cleanup | |
| run: | | |
| if [ -d "$GITHUB_WORKSPACE" ]; then | |
| sudo rm -rf "$GITHUB_WORKSPACE"/* || true | |
| else | |
| echo "$GITHUB_WORKSPACE does not exist, nothing to clean" | |
| fi | |
| - uses: actions/checkout@v4 | |
| with: | |
| submodules: "recursive" | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Set up Python ${{ matrix.python-version }} | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: ${{ matrix.python-version }} | |
| - name: Build wheel for Python ${{ matrix.python-version }} and CUDA ${{ matrix.cuda-version }} | |
| run: | | |
| cd sgl-kernel | |
| ./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}" | |
| env: | |
| USE_CCACHE: 1 | |
| - name: Verify wheel artifacts | |
| run: | | |
| ls -alh sgl-kernel/dist | |
| ls -alh sgl-kernel/dist/*.whl | |
| - name: Upload artifacts | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }}-aarch64 | |
| path: sgl-kernel/dist/* | |
| if-no-files-found: error | |
| sgl-kernel-unit-test: | |
| needs: [check-changes, call-gate, sgl-kernel-build-wheels] | |
| if: | | |
| !inputs.target_stage && | |
| needs.check-changes.outputs.sgl_kernel == 'true' | |
| runs-on: 1-gpu-runner | |
| env: | |
| RUNNER_LABELS: 1-gpu-runner | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Cleanup | |
| run: | | |
| ls -alh sgl-kernel/dist || true | |
| rm -rf sgl-kernel/dist/* || true | |
| - name: Download artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9 | |
| - name: Install dependencies | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh diffusion | |
| - name: Run test | |
| timeout-minutes: 30 | |
| run: | | |
| cd sgl-kernel | |
| pytest tests/ | |
| sgl-kernel-mla-test: | |
| needs: [check-changes, call-gate, sgl-kernel-build-wheels] | |
| if: | | |
| !inputs.target_stage && | |
| needs.check-changes.outputs.sgl_kernel == 'true' | |
| runs-on: 1-gpu-runner | |
| env: | |
| RUNNER_LABELS: 1-gpu-runner | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Cleanup | |
| run: | | |
| ls -alh sgl-kernel/dist || true | |
| rm -rf sgl-kernel/dist/* || true | |
| - name: Download artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9 | |
| - name: Install dependencies | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh | |
| - name: Run test | |
| timeout-minutes: 30 | |
| run: | | |
| cd test/registered/mla | |
| python3 test_mla_deepseek_v3.py | |
| sgl-kernel-benchmark-test: | |
| needs: [check-changes, call-gate, sgl-kernel-build-wheels] | |
| if: | | |
| !inputs.target_stage && | |
| needs.check-changes.outputs.sgl_kernel == 'true' | |
| runs-on: 1-gpu-runner | |
| env: | |
| CI: true | |
| RUNNER_LABELS: 1-gpu-runner | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Cleanup | |
| run: | | |
| ls -alh sgl-kernel/dist || true | |
| rm -rf sgl-kernel/dist/* || true | |
| - name: Download artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9 | |
| - name: Install dependencies | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh | |
| - name: Run benchmark tests | |
| timeout-minutes: 45 | |
| run: | | |
| cd sgl-kernel/benchmark | |
| echo "Running sgl-kernel benchmark tests in CI mode..." | |
| echo "CI environment variable: $CI" | |
| echo "GITHUB_ACTIONS environment variable: $GITHUB_ACTIONS" | |
| for bench_file in bench_*.py; do | |
| echo "Testing $bench_file..." | |
| timeout 60 python3 "$bench_file" || echo "Warning: $bench_file timed out or failed, continuing..." | |
| echo "Completed $bench_file" | |
| echo "---" | |
| done | |
| echo "All benchmark tests completed!" | |
| sgl-kernel-b200-test: | |
| needs: [check-changes, sgl-kernel-build-wheels] | |
| if: | | |
| !inputs.target_stage && | |
| needs.check-changes.outputs.sgl_kernel == 'true' | |
| runs-on: ${{ needs.check-changes.outputs.b200_runner }} | |
| env: | |
| RUNNER_LABELS: ${{ needs.check-changes.outputs.b200_runner }} | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Cleanup | |
| run: | | |
| ls -alh sgl-kernel/dist || true | |
| rm -rf sgl-kernel/dist/* || true | |
| - name: Download artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9 | |
| - name: Install dependencies | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} IS_BLACKWELL=1 bash scripts/ci/ci_install_dependency.sh diffusion | |
| - name: Run sgl-kernel unit tests on B200 | |
| timeout-minutes: 30 | |
| run: | | |
| cd sgl-kernel | |
| pytest tests/ | |
| # Adding a single CUDA13 smoke test to verify that the kernel builds and runs | |
| # TODO: Add back this test when it can pass on CI | |
| # cuda13-kernel-smoke-test: | |
| # needs: [check-changes, sgl-kernel-build-wheels] | |
| # if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| # runs-on: x64-cu13-kernel-tests | |
| # steps: | |
| # - uses: actions/checkout@v4 | |
| # - name: Cleanup | |
| # run: | | |
| # ls -alh sgl-kernel/dist || true | |
| # rm -rf sgl-kernel/dist/* || true | |
| # - name: Download CUDA 13.0 artifacts | |
| # uses: actions/download-artifact@v4 | |
| # with: | |
| # path: sgl-kernel/dist/ | |
| # merge-multiple: true | |
| # pattern: wheel-python3.10-cuda13.0 | |
| # - name: Install dependencies | |
| # run: | | |
| # CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh | |
| # - name: Run kernel unit tests | |
| # timeout-minutes: 30 | |
| # run: | | |
| # cd sgl-kernel | |
| # pytest tests/ | |
| # =============================================== jit-kernel ==================================================== | |
| jit-kernel-unit-test: | |
| needs: [check-changes, call-gate] | |
| if: | | |
| !inputs.target_stage && | |
| needs.check-changes.outputs.jit_kernel == 'true' | |
| runs-on: 1-gpu-runner | |
| env: | |
| RUNNER_LABELS: 1-gpu-runner | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/ci_install_dependency.sh | |
| - name: Run test | |
| timeout-minutes: 30 | |
| run: | | |
| cd python/sglang/jit_kernel | |
| pytest tests/ | |
| # =============================================== primary ==================================================== | |
| stage-a-test-1: | |
| needs: [check-changes, call-gate, sgl-kernel-build-wheels] | |
| if: | | |
| always() && | |
| ( | |
| (inputs.target_stage == 'stage-a-test-1') || | |
| ( | |
| !inputs.target_stage && | |
| (github.event_name == 'schedule' || (!failure() && !cancelled())) && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| ) | |
| ) | |
| runs-on: 1-gpu-runner | |
| env: | |
| RUNNER_LABELS: 1-gpu-runner | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9 | |
| - name: Install dependencies | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh | |
| - name: Run test | |
| timeout-minutes: 10 | |
| run: | | |
| cd test/ | |
| CONTINUE_ON_ERROR_FLAG="" | |
| if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then | |
| CONTINUE_ON_ERROR_FLAG="--continue-on-error" | |
| fi | |
| python3 run_suite.py --hw cuda --suite stage-a-test-1 $CONTINUE_ON_ERROR_FLAG | |
| # temporarily put backend-independent cpu tests here | |
| python3 run_suite.py --hw cpu --suite default $CONTINUE_ON_ERROR_FLAG | |
| stage-a-cpu-only: | |
| needs: [check-changes, call-gate] | |
| if: | | |
| always() && | |
| ( | |
| (inputs.target_stage == 'stage-a-cpu-only') || | |
| ( | |
| !inputs.target_stage && | |
| (github.event_name == 'schedule' || (!failure() && !cancelled())) && | |
| (needs.check-changes.outputs.main_package == 'true') | |
| ) | |
| ) | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Free disk space | |
| run: | | |
| sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc | |
| df -h | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Set up Python | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: '3.10' | |
| - name: Install dependencies | |
| run: | | |
| pip install -e "python/[dev]" | |
| - name: Run test | |
| timeout-minutes: 10 | |
| run: | | |
| cd test/ | |
| CONTINUE_ON_ERROR_FLAG="" | |
| if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then | |
| CONTINUE_ON_ERROR_FLAG="--continue-on-error" | |
| fi | |
| python3 run_suite.py --hw cpu --suite stage-a-cpu-only $CONTINUE_ON_ERROR_FLAG | |
| stage-b-test-small-1-gpu: | |
| needs: [check-changes, call-gate, stage-a-test-1, sgl-kernel-build-wheels] | |
| if: | | |
| always() && | |
| ( | |
| (inputs.target_stage == 'stage-b-test-small-1-gpu') || | |
| ( | |
| !inputs.target_stage && | |
| (github.event_name == 'schedule' || (!failure() && !cancelled())) && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| ) | |
| ) | |
| runs-on: 1-gpu-runner | |
| env: | |
| RUNNER_LABELS: 1-gpu-runner | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| partition: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9 | |
| - name: Install dependencies | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh | |
| - name: Run test | |
| timeout-minutes: 30 | |
| run: | | |
| cd test/ | |
| CONTINUE_ON_ERROR_FLAG="" | |
| if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then | |
| CONTINUE_ON_ERROR_FLAG="--continue-on-error" | |
| fi | |
| python3 run_suite.py --hw cuda --suite stage-b-test-small-1-gpu --auto-partition-id ${{ matrix.partition }} --auto-partition-size 11 $CONTINUE_ON_ERROR_FLAG | |
| stage-b-test-large-1-gpu: | |
| needs: [check-changes, call-gate, stage-a-test-1, sgl-kernel-build-wheels] | |
| if: | | |
| always() && | |
| ( | |
| (inputs.target_stage == 'stage-b-test-large-1-gpu') || | |
| ( | |
| !inputs.target_stage && | |
| (github.event_name == 'schedule' || (!failure() && !cancelled())) && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| ) | |
| ) | |
| runs-on: 1-gpu-runner | |
| env: | |
| RUNNER_LABELS: 1-gpu-runner | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9 | |
| - name: Install dependencies | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh | |
| - name: Run test | |
| timeout-minutes: 30 | |
| run: | | |
| cd test/ | |
| CONTINUE_ON_ERROR_FLAG="" | |
| if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then | |
| CONTINUE_ON_ERROR_FLAG="--continue-on-error" | |
| fi | |
| python3 run_suite.py --hw cuda --suite stage-b-test-large-1-gpu $CONTINUE_ON_ERROR_FLAG | |
| stage-b-test-large-2-gpu: | |
| needs: [check-changes, call-gate, stage-a-test-1, sgl-kernel-build-wheels] | |
| if: | | |
| always() && | |
| ( | |
| (inputs.target_stage == 'stage-b-test-large-2-gpu') || | |
| ( | |
| !inputs.target_stage && | |
| (github.event_name == 'schedule' || (!failure() && !cancelled())) && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| ) | |
| ) | |
| runs-on: 2-gpu-runner | |
| env: | |
| RUNNER_LABELS: 2-gpu-runner | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9 | |
| - name: Install dependencies | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh | |
| - name: Run test | |
| timeout-minutes: 30 | |
| run: | | |
| cd test/ | |
| CONTINUE_ON_ERROR_FLAG="" | |
| if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then | |
| CONTINUE_ON_ERROR_FLAG="--continue-on-error" | |
| fi | |
| python3 run_suite.py --hw cuda --suite stage-b-test-large-2-gpu $CONTINUE_ON_ERROR_FLAG | |
| stage-c-test-large-4-gpu: | |
| needs: [check-changes, call-gate, stage-b-test-small-1-gpu, stage-b-test-large-1-gpu, stage-b-test-large-2-gpu, sgl-kernel-build-wheels] | |
| if: | | |
| always() && | |
| ( | |
| (inputs.target_stage == 'stage-c-test-large-4-gpu') || | |
| ( | |
| !inputs.target_stage && | |
| (github.event_name == 'schedule' || (!failure() && !cancelled())) && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| ) | |
| ) | |
| runs-on: 4-gpu-h100 | |
| env: | |
| RUNNER_LABELS: 4-gpu-h100 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9 | |
| - name: Install dependencies | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh | |
| - name: Run test | |
| timeout-minutes: 30 | |
| run: | | |
| cd test/ | |
| CONTINUE_ON_ERROR_FLAG="" | |
| if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then | |
| CONTINUE_ON_ERROR_FLAG="--continue-on-error" | |
| fi | |
| python3 run_suite.py --hw cuda --suite stage-c-test-large-4-gpu $CONTINUE_ON_ERROR_FLAG | |
| stage-c-test-large-4-gpu-b200: | |
| needs: [check-changes, call-gate, stage-b-test-small-1-gpu, stage-b-test-large-1-gpu, stage-b-test-large-2-gpu, stage-b-test-4-gpu-b200, sgl-kernel-build-wheels] | |
| if: | | |
| always() && | |
| ( | |
| (inputs.target_stage == 'stage-c-test-large-4-gpu-b200') || | |
| ( | |
| !inputs.target_stage && | |
| (github.event_name == 'schedule' || (!failure() && !cancelled())) && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| ) | |
| ) | |
| runs-on: ${{ needs.check-changes.outputs.b200_runner }} | |
| env: | |
| RUNNER_LABELS: ${{ needs.check-changes.outputs.b200_runner }} | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v6 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9 | |
| - name: Install dependencies | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} IS_BLACKWELL=1 bash scripts/ci/ci_install_dependency.sh | |
| - name: Run test | |
| timeout-minutes: 30 | |
| run: | | |
| cd test/ | |
| IS_BLACKWELL=1 python3 run_suite.py --hw cuda --suite stage-c-test-large-4-gpu-b200 | |
| multimodal-gen-test-1-gpu: | |
| needs: [check-changes, call-gate, sgl-kernel-build-wheels] | |
| if: | | |
| always() && | |
| ( | |
| (inputs.target_stage == 'multimodal-gen-test-1-gpu') || | |
| ( | |
| !inputs.target_stage && | |
| (github.event_name == 'schedule' || (!failure() && !cancelled())) && | |
| needs.check-changes.outputs.multimodal_gen == 'true' | |
| ) | |
| ) | |
| runs-on: 1-gpu-runner | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| part: [0, 1] | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9 | |
| - name: Install dependencies | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh diffusion | |
| - name: Run diffusion server tests | |
| timeout-minutes: 60 | |
| run: | | |
| cd python | |
| CONTINUE_ON_ERROR_FLAG="" | |
| if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then | |
| CONTINUE_ON_ERROR_FLAG="--continue-on-error" | |
| fi | |
| python3 sglang/multimodal_gen/test/run_suite.py \ | |
| --suite 1-gpu \ | |
| --partition-id ${{ matrix.part }} \ | |
| --total-partitions 2 \ | |
| $CONTINUE_ON_ERROR_FLAG | |
| multimodal-gen-test-2-gpu: | |
| needs: [check-changes, call-gate, sgl-kernel-build-wheels] | |
| if: | | |
| always() && | |
| ( | |
| (inputs.target_stage == 'multimodal-gen-test-2-gpu') || | |
| ( | |
| !inputs.target_stage && | |
| (github.event_name == 'schedule' || (!failure() && !cancelled())) && | |
| needs.check-changes.outputs.multimodal_gen == 'true' | |
| ) | |
| ) | |
| runs-on: 2-gpu-runner | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| part: [0, 1] | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9 | |
| - name: Install dependencies | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh diffusion | |
| - name: Run diffusion server tests | |
| timeout-minutes: 60 | |
| run: | | |
| cd python | |
| CONTINUE_ON_ERROR_FLAG="" | |
| if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then | |
| CONTINUE_ON_ERROR_FLAG="--continue-on-error" | |
| fi | |
| python3 sglang/multimodal_gen/test/run_suite.py \ | |
| --suite 2-gpu \ | |
| --partition-id ${{ matrix.part }} \ | |
| --total-partitions 2 \ | |
| $CONTINUE_ON_ERROR_FLAG | |
| quantization-test: | |
| needs: [check-changes, call-gate, stage-a-test-1] | |
| if: | | |
| always() && | |
| ( | |
| (inputs.target_stage == 'quantization-test') || | |
| ( | |
| !inputs.target_stage && | |
| (github.event_name == 'schedule' || (!failure() && !cancelled())) && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| ) | |
| ) | |
| runs-on: 1-gpu-runner | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9 | |
| - name: Install dependencies | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh | |
| - name: Run test | |
| timeout-minutes: 30 | |
| run: | | |
| cd test/srt | |
| RETRY_FLAG="" | |
| if [[ "${{ needs.check-changes.outputs.enable_retry }}" == "true" ]]; then | |
| RETRY_FLAG="--enable-retry" | |
| fi | |
| CONTINUE_ON_ERROR_FLAG="" | |
| if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then | |
| CONTINUE_ON_ERROR_FLAG="--continue-on-error" | |
| fi | |
| python3 run_suite.py --suite quantization_test $RETRY_FLAG $CONTINUE_ON_ERROR_FLAG | |
| unit-test-backend-1-gpu: | |
| needs: [check-changes, call-gate, stage-a-test-1] | |
| if: | | |
| always() && | |
| ( | |
| (inputs.target_stage == 'unit-test-backend-1-gpu') || | |
| ( | |
| !inputs.target_stage && | |
| (github.event_name == 'schedule' || (!failure() && !cancelled())) && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| ) | |
| ) | |
| runs-on: 1-gpu-runner | |
| env: | |
| RUNNER_LABELS: 1-gpu-runner | |
| strategy: | |
| fail-fast: false | |
| max-parallel: ${{ fromJson(needs.check-changes.outputs.max_parallel) }} | |
| matrix: | |
| part: [0, 1] | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9 | |
| - name: Install dependencies | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh | |
| - name: Run test | |
| timeout-minutes: 30 | |
| run: | | |
| cd test/srt | |
| RETRY_FLAG="" | |
| if [[ "${{ needs.check-changes.outputs.enable_retry }}" == "true" ]]; then | |
| RETRY_FLAG="--enable-retry" | |
| fi | |
| CONTINUE_ON_ERROR_FLAG="" | |
| if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then | |
| CONTINUE_ON_ERROR_FLAG="--continue-on-error" | |
| fi | |
| python3 run_suite.py --suite per-commit-1-gpu --auto-partition-id ${{ matrix.part }} --auto-partition-size 2 $RETRY_FLAG $CONTINUE_ON_ERROR_FLAG | |
| stage-b-test-4-gpu-b200: | |
| needs: [check-changes, call-gate, stage-a-test-1, sgl-kernel-build-wheels] | |
| if: | | |
| always() && | |
| ( | |
| (inputs.target_stage == 'stage-b-test-4-gpu-b200') || | |
| ( | |
| !inputs.target_stage && | |
| (github.event_name == 'schedule' || (!failure() && !cancelled())) && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| ) | |
| ) | |
| runs-on: ${{ needs.check-changes.outputs.b200_runner }} | |
| env: | |
| RUNNER_LABELS: ${{ needs.check-changes.outputs.b200_runner }} | |
| strategy: | |
| fail-fast: false | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v6 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9 | |
| - name: Install dependencies | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} IS_BLACKWELL=1 bash scripts/ci/ci_install_dependency.sh | |
| - name: Run test | |
| timeout-minutes: 30 | |
| run: | | |
| cd test | |
| CONTINUE_ON_ERROR_FLAG="" | |
| if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then | |
| CONTINUE_ON_ERROR_FLAG="--continue-on-error" | |
| fi | |
| IS_BLACKWELL=1 python3 run_suite.py --hw cuda --suite stage-b-test-4-gpu-b200 $CONTINUE_ON_ERROR_FLAG | |
| unit-test-backend-2-gpu: | |
| needs: [check-changes, call-gate, unit-test-backend-1-gpu] | |
| if: | | |
| always() && | |
| ( | |
| (inputs.target_stage == 'unit-test-backend-2-gpu') || | |
| ( | |
| !inputs.target_stage && | |
| (github.event_name == 'schedule' || (!failure() && !cancelled())) && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| ) | |
| ) | |
| runs-on: 2-gpu-runner | |
| env: | |
| RUNNER_LABELS: 2-gpu-runner | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| part: [0, 1] | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9 | |
| - name: Install dependencies | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh | |
| - name: Run test | |
| timeout-minutes: 30 | |
| run: | | |
| cd test/srt | |
| RETRY_FLAG="" | |
| if [[ "${{ needs.check-changes.outputs.enable_retry }}" == "true" ]]; then | |
| RETRY_FLAG="--enable-retry" | |
| fi | |
| CONTINUE_ON_ERROR_FLAG="" | |
| if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then | |
| CONTINUE_ON_ERROR_FLAG="--continue-on-error" | |
| fi | |
| python3 run_suite.py --suite per-commit-2-gpu --auto-partition-id ${{ matrix.part }} --auto-partition-size 2 $RETRY_FLAG $CONTINUE_ON_ERROR_FLAG | |
| unit-test-backend-4-gpu: | |
| needs: [check-changes, call-gate, unit-test-backend-1-gpu, stage-b-test-4-gpu-b200] | |
| if: | | |
| always() && | |
| ( | |
| (inputs.target_stage == 'unit-test-backend-4-gpu') || | |
| ( | |
| !inputs.target_stage && | |
| (github.event_name == 'schedule' || (!failure() && !cancelled())) && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| ) | |
| ) | |
| runs-on: 4-gpu-h100 | |
| env: | |
| RUNNER_LABELS: 4-gpu-h100 | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| part: [0, 1, 2] | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9 | |
| - name: Install dependencies | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh | |
| - name: Run test | |
| timeout-minutes: 20 | |
| run: | | |
| cd test/srt | |
| RETRY_FLAG="" | |
| if [[ "${{ needs.check-changes.outputs.enable_retry }}" == "true" ]]; then | |
| RETRY_FLAG="--enable-retry" | |
| fi | |
| CONTINUE_ON_ERROR_FLAG="" | |
| if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then | |
| CONTINUE_ON_ERROR_FLAG="--continue-on-error" | |
| fi | |
| python3 run_suite.py --suite per-commit-4-gpu --auto-partition-id ${{ matrix.part }} --auto-partition-size 3 $RETRY_FLAG $CONTINUE_ON_ERROR_FLAG | |
| unit-test-backend-8-gpu-h200: | |
| needs: [check-changes, call-gate, unit-test-backend-1-gpu, stage-b-test-4-gpu-b200] | |
| if: | | |
| always() && | |
| ( | |
| (inputs.target_stage == 'unit-test-backend-8-gpu-h200') || | |
| ( | |
| !inputs.target_stage && | |
| (github.event_name == 'schedule' || (!failure() && !cancelled())) && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| ) | |
| ) | |
| runs-on: 8-gpu-h200 | |
| env: | |
| RUNNER_LABELS: 8-gpu-h200 | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| part: [0, 1, 2, 3] | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9 | |
| - name: Install dependencies | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh | |
| # - name: Warmup Weights and JIT Compilation | |
| # timeout-minutes: 20 | |
| # run: | | |
| # # An example command for testing the warmup. TODO: make this more general and move them to python scripts. | |
| # python3 -m sglang.compile_deep_gemm --model deepseek-ai/DeepSeek-V3-0324 --tp 8 --trust-remote-code | |
| - name: Run test | |
| timeout-minutes: 20 | |
| run: | | |
| cd test/srt | |
| RETRY_FLAG="" | |
| if [[ "${{ needs.check-changes.outputs.enable_retry }}" == "true" ]]; then | |
| RETRY_FLAG="--enable-retry" | |
| fi | |
| CONTINUE_ON_ERROR_FLAG="" | |
| if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then | |
| CONTINUE_ON_ERROR_FLAG="--continue-on-error" | |
| fi | |
| python3 run_suite.py --suite per-commit-8-gpu-h200 --auto-partition-id ${{ matrix.part }} --auto-partition-size 4 $RETRY_FLAG $CONTINUE_ON_ERROR_FLAG | |
| unit-test-backend-8-gpu-h20: | |
| needs: [check-changes, call-gate, unit-test-backend-1-gpu, stage-b-test-4-gpu-b200] | |
| if: | | |
| always() && | |
| ( | |
| (inputs.target_stage == 'unit-test-backend-8-gpu-h20') || | |
| ( | |
| !inputs.target_stage && | |
| (github.event_name == 'schedule' || (!failure() && !cancelled())) && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| ) | |
| ) | |
| runs-on: 8-gpu-h20 | |
| env: | |
| SGLANG_CI_RDMA_ALL_DEVICES: "mlx5_1,mlx5_2,mlx5_3,mlx5_4" | |
| RUNNER_LABELS: 8-gpu-h20 | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| part: [0, 1] | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9 | |
| - name: Install dependencies | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_deepep.sh | |
| - name: Run test | |
| timeout-minutes: 20 | |
| run: | | |
| cd test/srt | |
| RETRY_FLAG="" | |
| if [[ "${{ needs.check-changes.outputs.enable_retry }}" == "true" ]]; then | |
| RETRY_FLAG="--enable-retry" | |
| fi | |
| CONTINUE_ON_ERROR_FLAG="" | |
| if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then | |
| CONTINUE_ON_ERROR_FLAG="--continue-on-error" | |
| fi | |
| python3 run_suite.py --suite per-commit-8-gpu-h20 --auto-partition-id ${{ matrix.part }} --auto-partition-size 2 $RETRY_FLAG $CONTINUE_ON_ERROR_FLAG | |
| performance-test-1-gpu-part-1: | |
| needs: [check-changes, call-gate, stage-a-test-1] | |
| if: | | |
| always() && | |
| ( | |
| (inputs.target_stage == 'performance-test-1-gpu-part-1') || | |
| ( | |
| !inputs.target_stage && | |
| (github.event_name == 'schedule' || (!failure() && !cancelled())) && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| ) | |
| ) | |
| runs-on: 1-gpu-runner | |
| env: | |
| RUNNER_LABELS: 1-gpu-runner | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9 | |
| - name: Install dependencies | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh | |
| - name: Benchmark single latency | |
| timeout-minutes: 10 | |
| run: | | |
| cd test/srt | |
| python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_bs1_small | |
| python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_bs1_default | |
| - name: Benchmark online latency | |
| timeout-minutes: 10 | |
| run: | | |
| cd test/srt | |
| python3 -m unittest test_bench_serving.TestBenchServing.test_online_latency_default | |
| - name: Benchmark offline throughput | |
| timeout-minutes: 10 | |
| run: | | |
| cd test/srt | |
| python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_default | |
| - name: Benchmark offline throughput (Non-streaming, small batch size) | |
| timeout-minutes: 10 | |
| run: | | |
| cd test/srt | |
| python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_non_stream_small_batch_size | |
| - name: Benchmark online latency (EAGLE) | |
| timeout-minutes: 10 | |
| run: | | |
| cd test/srt | |
| python3 -m unittest test_bench_serving.TestBenchServing.test_online_latency_eagle | |
| - name: Benchmark online latency (LoRA) | |
| timeout-minutes: 10 | |
| run: | | |
| cd test/srt | |
| python3 -m unittest test_bench_serving.TestBenchServing.test_lora_online_latency | |
| python3 -m unittest test_bench_serving.TestBenchServing.test_lora_online_latency_with_concurrent_adapter_updates | |
| performance-test-1-gpu-part-2: | |
| needs: [check-changes, call-gate, stage-a-test-1] | |
| if: | | |
| always() && | |
| ( | |
| (inputs.target_stage == 'performance-test-1-gpu-part-2') || | |
| ( | |
| !inputs.target_stage && | |
| (github.event_name == 'schedule' || (!failure() && !cancelled())) && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| ) | |
| ) | |
| runs-on: 1-gpu-runner | |
| env: | |
| RUNNER_LABELS: 1-gpu-runner | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9 | |
| - name: Install dependencies | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh | |
| - name: Benchmark offline throughput (w/o RadixAttention) | |
| timeout-minutes: 10 | |
| run: | | |
| cd test/srt | |
| python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_without_radix_cache | |
| - name: Benchmark offline throughput (w/ Triton) | |
| timeout-minutes: 10 | |
| run: | | |
| cd test/srt | |
| python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_with_triton_attention_backend | |
| - name: Benchmark offline throughput (w/ FP8) | |
| timeout-minutes: 10 | |
| run: | | |
| cd test/srt | |
| python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_default_fp8 | |
| - name: Benchmark VLM offline throughput | |
| timeout-minutes: 10 | |
| run: | | |
| cd test/srt | |
| python3 -m unittest test_bench_serving.TestBenchServing.test_vlm_offline_throughput | |
| - name: Benchmark VLM online latency | |
| timeout-minutes: 10 | |
| run: | | |
| cd test/srt | |
| python3 -m unittest test_bench_serving.TestBenchServing.test_vlm_online_latency | |
| performance-test-1-gpu-part-3: | |
| needs: [check-changes, call-gate, stage-a-test-1] | |
| if: | | |
| always() && | |
| ( | |
| (inputs.target_stage == 'performance-test-1-gpu-part-3') || | |
| ( | |
| !inputs.target_stage && | |
| (github.event_name == 'schedule' || (!failure() && !cancelled())) && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| ) | |
| ) | |
| runs-on: 1-gpu-runner | |
| env: | |
| RUNNER_LABELS: 1-gpu-runner | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9 | |
| - name: Install dependencies | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh | |
| - name: Benchmark Scores online latency and throughput | |
| timeout-minutes: 10 | |
| run: | | |
| cd test/srt | |
| python3 -m unittest test_bench_serving.TestBenchServing.test_score_api_latency_throughput | |
| - name: Benchmark Scores online latency and throughput (batch size scaling) | |
| timeout-minutes: 10 | |
| run: | | |
| cd test/srt | |
| python3 -m unittest test_bench_serving.TestBenchServing.test_score_api_batch_scaling | |
| - name: Benchmark Embeddings online latency and throughput | |
| timeout-minutes: 10 | |
| run: | | |
| cd test/srt | |
| python3 -m unittest test_bench_serving.TestBenchServing.test_embeddings_api_latency_throughput | |
| - name: Benchmark Embeddings online latency and throughput (batch size scaling) | |
| timeout-minutes: 10 | |
| run: | | |
| cd test/srt | |
| python3 -m unittest test_bench_serving.TestBenchServing.test_embeddings_api_batch_scaling | |
| performance-test-2-gpu: | |
| needs: [check-changes, call-gate, unit-test-backend-1-gpu, stage-b-test-4-gpu-b200] | |
| if: | | |
| always() && | |
| ( | |
| (inputs.target_stage == 'performance-test-2-gpu') || | |
| ( | |
| !inputs.target_stage && | |
| (github.event_name == 'schedule' || (!failure() && !cancelled())) && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| ) | |
| ) | |
| runs-on: 2-gpu-runner | |
| env: | |
| RUNNER_LABELS: 2-gpu-runner | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9 | |
| - name: Install dependencies | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh | |
| - name: Benchmark single latency (TP=2) | |
| timeout-minutes: 10 | |
| run: | | |
| cd test/srt | |
| python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_moe_tp2_bs1 | |
| - name: Benchmark single latency + torch.compile (TP=2) | |
| timeout-minutes: 10 | |
| run: | | |
| cd test/srt | |
| python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_torch_compile_tp2_bs1 | |
| - name: Benchmark offline throughput (TP=2) | |
| timeout-minutes: 10 | |
| run: | | |
| cd test/srt | |
| python3 -m unittest test_bench_serving.TestBenchServing.test_moe_offline_throughput_default | |
| - name: Benchmark offline throughput (w/o RadixAttention) (TP=2) | |
| timeout-minutes: 10 | |
| run: | | |
| cd test/srt | |
| python3 -m unittest test_bench_serving.TestBenchServing.test_moe_offline_throughput_without_radix_cache | |
| - name: Benchmark offline PP decode throughput (PP=2) | |
| timeout-minutes: 10 | |
| run: | | |
| cd test/srt | |
| python3 -m unittest test_bench_serving.TestBenchServing.test_pp_offline_throughput_default_decode | |
| - name: Benchmark offline PP prefill throughput (PP=2) | |
| timeout-minutes: 10 | |
| run: | | |
| cd test/srt | |
| python3 -m unittest test_bench_serving.TestBenchServing.test_pp_long_context_prefill | |
| accuracy-test-1-gpu: | |
| needs: [check-changes, call-gate, stage-a-test-1] | |
| if: | | |
| always() && | |
| ( | |
| (inputs.target_stage == 'accuracy-test-1-gpu') || | |
| ( | |
| !inputs.target_stage && | |
| (github.event_name == 'schedule' || (!failure() && !cancelled())) && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| ) | |
| ) | |
| runs-on: 1-gpu-runner | |
| env: | |
| RUNNER_LABELS: 1-gpu-runner | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9 | |
| - name: Install dependencies | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh | |
| git clone https://github.com/merrymercy/human-eval.git | |
| cd human-eval | |
| pip install -e . | |
| - name: Evaluate accuracy | |
| timeout-minutes: 25 | |
| run: | | |
| cd test/srt | |
| python3 -m sglang.test.ci.run_with_retry test_eval_accuracy_large.py | |
| accuracy-test-2-gpu: | |
| needs: [check-changes, call-gate, accuracy-test-1-gpu] | |
| if: | | |
| always() && | |
| ( | |
| (inputs.target_stage == 'accuracy-test-2-gpu') || | |
| ( | |
| !inputs.target_stage && | |
| (github.event_name == 'schedule' || (!failure() && !cancelled())) && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| ) | |
| ) | |
| runs-on: 2-gpu-runner | |
| env: | |
| RUNNER_LABELS: 2-gpu-runner | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9 | |
| - name: Install dependencies | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh | |
| git clone https://github.com/merrymercy/human-eval.git | |
| cd human-eval | |
| pip install -e . | |
| - name: Evaluate accuracy (TP=2) | |
| timeout-minutes: 25 | |
| run: | | |
| cd test/srt | |
| python3 -m sglang.test.ci.run_with_retry test_moe_eval_accuracy_large.py | |
| unit-test-deepep-4-gpu: | |
| needs: [check-changes, call-gate, unit-test-backend-1-gpu, stage-b-test-4-gpu-b200] | |
| if: | | |
| always() && | |
| ( | |
| (inputs.target_stage == 'unit-test-deepep-4-gpu') || | |
| ( | |
| !inputs.target_stage && | |
| (github.event_name == 'schedule' || (!failure() && !cancelled())) && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| ) | |
| ) | |
| runs-on: 4-gpu-h100 | |
| env: | |
| RUNNER_LABELS: 4-gpu-h100 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9 | |
| - name: Install dependencies | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_deepep.sh | |
| - name: Run test | |
| timeout-minutes: 20 | |
| run: | | |
| cd test/srt | |
| RETRY_FLAG="" | |
| if [[ "${{ needs.check-changes.outputs.enable_retry }}" == "true" ]]; then | |
| RETRY_FLAG="--enable-retry" | |
| fi | |
| CONTINUE_ON_ERROR_FLAG="" | |
| if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then | |
| CONTINUE_ON_ERROR_FLAG="--continue-on-error" | |
| fi | |
| python3 run_suite.py --suite per-commit-4-gpu-deepep $RETRY_FLAG $CONTINUE_ON_ERROR_FLAG | |
| unit-test-deepep-8-gpu: | |
| needs: [check-changes, call-gate, unit-test-backend-1-gpu, stage-b-test-4-gpu-b200] | |
| if: | | |
| always() && | |
| ( | |
| (inputs.target_stage == 'unit-test-deepep-8-gpu') || | |
| ( | |
| !inputs.target_stage && | |
| (github.event_name == 'schedule' || (!failure() && !cancelled())) && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| ) | |
| ) | |
| runs-on: 8-gpu-h200 | |
| env: | |
| RUNNER_LABELS: 8-gpu-h200 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9 | |
| - name: Install dependencies | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_deepep.sh | |
| - name: Run test | |
| timeout-minutes: 20 | |
| run: | | |
| cd test/srt | |
| RETRY_FLAG="" | |
| if [[ "${{ needs.check-changes.outputs.enable_retry }}" == "true" ]]; then | |
| RETRY_FLAG="--enable-retry" | |
| fi | |
| CONTINUE_ON_ERROR_FLAG="" | |
| if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then | |
| CONTINUE_ON_ERROR_FLAG="--continue-on-error" | |
| fi | |
| python3 run_suite.py --suite per-commit-8-gpu-h200-deepep $RETRY_FLAG $CONTINUE_ON_ERROR_FLAG | |
| unit-test-backend-4-gpu-b200: | |
| needs: [check-changes, call-gate, unit-test-backend-1-gpu, stage-b-test-4-gpu-b200] | |
| if: | | |
| always() && | |
| ( | |
| (inputs.target_stage == 'unit-test-backend-4-gpu-b200') || | |
| ( | |
| !inputs.target_stage && | |
| (github.event_name == 'schedule' || (!failure() && !cancelled())) && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| ) | |
| ) | |
| runs-on: ${{ needs.check-changes.outputs.b200_runner }} | |
| env: | |
| RUNNER_LABELS: ${{ needs.check-changes.outputs.b200_runner }} | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| part: [0, 1, 2] | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v6 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9 | |
| - name: Install dependencies | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} IS_BLACKWELL=1 bash scripts/ci/ci_install_dependency.sh | |
| - name: Run test | |
| timeout-minutes: 30 | |
| run: | | |
| cd test/srt | |
| RETRY_FLAG="" | |
| if [[ "${{ needs.check-changes.outputs.enable_retry }}" == "true" ]]; then | |
| RETRY_FLAG="--enable-retry" | |
| fi | |
| CONTINUE_ON_ERROR_FLAG="" | |
| if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then | |
| CONTINUE_ON_ERROR_FLAG="--continue-on-error" | |
| fi | |
| IS_BLACKWELL=1 python3 run_suite.py --suite per-commit-4-gpu-b200 --auto-partition-id ${{ matrix.part }} --auto-partition-size 3 --timeout-per-file 1800 $RETRY_FLAG $CONTINUE_ON_ERROR_FLAG | |
| unit-test-backend-4-gpu-gb200: | |
| needs: [check-changes, call-gate, unit-test-backend-1-gpu, stage-b-test-4-gpu-b200, sgl-kernel-build-wheels-arm] | |
| if: | | |
| always() && | |
| ( | |
| (inputs.target_stage == 'unit-test-backend-4-gpu-gb200') || | |
| ( | |
| !inputs.target_stage && | |
| (github.event_name == 'schedule' || (!failure() && !cancelled())) && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| ) | |
| ) | |
| runs-on: 4-gpu-gb200 | |
| env: | |
| RUNNER_LABELS: 4-gpu-gb200 | |
| strategy: | |
| fail-fast: false | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9-aarch64 | |
| - name: Install dependencies | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} IS_BLACKWELL=1 GRACE_BLACKWELL=1 bash scripts/ci/ci_install_deepep.sh | |
| - name: Run test | |
| timeout-minutes: 45 | |
| run: | | |
| cd test/srt | |
| RETRY_FLAG="" | |
| if [[ "${{ needs.check-changes.outputs.enable_retry }}" == "true" ]]; then | |
| RETRY_FLAG="--enable-retry" | |
| fi | |
| CONTINUE_ON_ERROR_FLAG="" | |
| if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then | |
| CONTINUE_ON_ERROR_FLAG="--continue-on-error" | |
| fi | |
| python3 run_suite.py --suite per-commit-4-gpu-gb200 --auto-partition-id 0 --auto-partition-size 1 --timeout-per-file 3600 $RETRY_FLAG $CONTINUE_ON_ERROR_FLAG | |
| pr-test-finish: | |
| needs: | |
| [ | |
| call-gate, | |
| check-changes, | |
| sgl-kernel-build-wheels, | |
| sgl-kernel-unit-test, | |
| sgl-kernel-mla-test, | |
| sgl-kernel-benchmark-test, | |
| sgl-kernel-b200-test, | |
| jit-kernel-unit-test, | |
| multimodal-gen-test-1-gpu, | |
| multimodal-gen-test-2-gpu, | |
| stage-a-test-1, | |
| stage-a-cpu-only, | |
| stage-b-test-small-1-gpu, | |
| stage-b-test-large-1-gpu, | |
| stage-b-test-large-2-gpu, | |
| stage-c-test-large-4-gpu, | |
| quantization-test, | |
| unit-test-backend-1-gpu, | |
| unit-test-backend-2-gpu, | |
| stage-b-test-4-gpu-b200, | |
| unit-test-backend-4-gpu, | |
| unit-test-backend-8-gpu-h20, | |
| unit-test-backend-8-gpu-h200, | |
| performance-test-1-gpu-part-1, | |
| performance-test-1-gpu-part-2, | |
| performance-test-1-gpu-part-3, | |
| performance-test-2-gpu, | |
| accuracy-test-1-gpu, | |
| accuracy-test-2-gpu, | |
| unit-test-deepep-4-gpu, | |
| unit-test-deepep-8-gpu, | |
| unit-test-backend-4-gpu-b200, | |
| unit-test-backend-4-gpu-gb200, | |
| ] | |
| if: always() | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Check all dependent job statuses | |
| run: | | |
| # Convert the 'needs' context to a JSON string | |
| json_needs='${{ toJson(needs) }}' | |
| # Get a list of all job names from the JSON keys | |
| job_names=$(echo "$json_needs" | jq -r 'keys_unsorted[]') | |
| for job in $job_names; do | |
| # For each job, extract its result | |
| result=$(echo "$json_needs" | jq -r --arg j "$job" '.[$j].result') | |
| # Print the job name and its result | |
| echo "$job: $result" | |
| # Check for failure or cancellation and exit if found | |
| if [[ "$result" == "failure" || "$result" == "cancelled" ]]; then | |
| echo "The above jobs failed." | |
| exit 1 | |
| fi | |
| done | |
| # If the loop completes, all jobs were successful | |
| echo "All jobs completed successfully" | |
| exit 0 |