Nightly Test (Nvidia) #37
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Nightly Test (Nvidia) | |
| on: | |
| schedule: | |
| - cron: '0 0 * * *' | |
| push: | |
| branches: | |
| - main | |
| paths: | |
| - "python/sglang/version.py" | |
| - "test/run_suite.py" | |
| workflow_dispatch: | |
| inputs: | |
| job_filter: | |
| description: 'Select which job to run (leave empty or "all" to run all jobs)' | |
| required: false | |
| type: choice | |
| default: 'all' | |
| options: | |
| - 'all' | |
| - 'nightly-test-general-1-gpu-runner' | |
| - 'nightly-test-general-4-gpu-h100' | |
| - 'nightly-test-general-8-gpu-h200' | |
| - 'nightly-test-general-8-gpu-h20' | |
| - 'nightly-test-general-8-gpu-b200' | |
| - 'nightly-test-text-accuracy-2-gpu-runner' | |
| - 'nightly-test-text-perf-2-gpu-runner' | |
| - 'nightly-test-vlm-accuracy-2-gpu-runner' | |
| - 'nightly-test-vlm-perf-2-gpu-runner' | |
| - 'nightly-test-multimodal-server-1-gpu' | |
| - 'nightly-test-multimodal-server-2-gpu' | |
| - 'nightly-test-perf-4-gpu-b200' | |
| - 'nightly-test-perf-8-gpu-b200' | |
| workflow_call: | |
| inputs: | |
| ref: | |
| description: 'Git ref (branch, tag, or SHA) to test. If not provided, uses the default branch.' | |
| required: false | |
| type: string | |
| default: '' | |
| job_filter: | |
| description: 'Select which job to run (leave empty or "all" to run all jobs)' | |
| required: false | |
| type: string | |
| default: 'all' | |
| concurrency: | |
| group: nightly-test-nvidia-${{ inputs.ref || github.ref }} | |
| cancel-in-progress: ${{ github.event_name != 'workflow_call' }} | |
| env: | |
| SGLANG_IS_IN_CI: true | |
| HF_HUB_DOWNLOAD_TIMEOUT: 300 | |
| HF_HUB_ETAG_TIMEOUT: 300 | |
| jobs: | |
| # General tests - 1 GPU | |
| nightly-test-general-1-gpu-runner: | |
| if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-general-1-gpu-runner') | |
| runs-on: 1-gpu-runner | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/ci_install_dependency.sh | |
| - name: Run test | |
| timeout-minutes: 60 | |
| run: | | |
| cd test | |
| python3 run_suite.py --hw cuda --suite nightly-1-gpu --nightly --continue-on-error | |
| # General tests - 4 GPU H100 | |
| nightly-test-general-4-gpu-h100: | |
| if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-general-4-gpu-h100') | |
| runs-on: 4-gpu-h100 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/ci_install_dependency.sh | |
| - name: Run test | |
| timeout-minutes: 30 | |
| run: | | |
| cd test | |
| python3 run_suite.py --hw cuda --suite nightly-4-gpu --nightly --continue-on-error | |
| # General tests - 8 GPU H200 | |
| nightly-test-general-8-gpu-h200: | |
| if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-general-8-gpu-h200') | |
| runs-on: 8-gpu-h200 | |
| env: | |
| RUNNER_LABELS: 8-gpu-h200 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/ci_install_dependency.sh | |
| - name: Run common 8-GPU model tests | |
| if: always() | |
| timeout-minutes: 300 | |
| env: | |
| TRACE_BASE_URL: https://raw.githubusercontent.com/sglang-bot/sglang-ci-data/main/traces/${{ github.run_id }} | |
| PERFETTO_RELAY_URL: ${{ vars.PERFETTO_RELAY_URL }} | |
| GPU_CONFIG: "8-gpu-h200" | |
| IS_H200: "1" | |
| run: | | |
| cd test | |
| python3 run_suite.py --hw cuda --suite nightly-8-gpu-common --nightly --timeout-per-file=18000 --continue-on-error | |
| - name: Run test | |
| timeout-minutes: 30 | |
| env: | |
| GPU_CONFIG: "8-gpu-h200" | |
| run: | | |
| cd test | |
| python3 run_suite.py --hw cuda --suite nightly-8-gpu-h200 --nightly --continue-on-error | |
| # General tests - 8 GPU H20 | |
| nightly-test-general-8-gpu-h20: | |
| if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-general-8-gpu-h20') | |
| runs-on: 8-gpu-h20 | |
| env: | |
| SGLANG_CI_RDMA_ALL_DEVICES: "mlx5_1,mlx5_2,mlx5_3,mlx5_4" | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/ci_install_dependency.sh | |
| - name: Run test | |
| timeout-minutes: 30 | |
| env: | |
| GPU_CONFIG: "8-gpu-h20" | |
| run: | | |
| cd test | |
| python3 run_suite.py --hw cuda --suite nightly-8-gpu-h20 --nightly --continue-on-error | |
| # General tests - 8 GPU B200 | |
| nightly-test-general-8-gpu-b200: | |
| if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-general-8-gpu-h20') | |
| runs-on: 8-gpu-b200 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Install dependencies | |
| run: | | |
| IS_BLACKWELL=1 bash scripts/ci/ci_install_dependency.sh | |
| - name: Run test | |
| timeout-minutes: 120 | |
| env: | |
| GPU_CONFIG: "8-gpu-b200" | |
| run: | | |
| cd test | |
| python3 run_suite.py --hw cuda --suite nightly-8-gpu-b200 --nightly --continue-on-error --timeout-per-file 2400 | |
| # Text model accuracy tests | |
| nightly-test-text-accuracy-2-gpu-runner: | |
| if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-text-accuracy-2-gpu-runner') | |
| runs-on: 2-gpu-runner | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/ci_install_dependency.sh | |
| - name: Run eval test for text models | |
| timeout-minutes: 120 | |
| run: | | |
| cd test | |
| python3 run_suite.py --hw cuda --suite nightly-eval-text-2-gpu --nightly --continue-on-error --timeout-per-file 4500 | |
| # Text model performance tests | |
| nightly-test-text-perf-2-gpu-runner: | |
| if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-text-perf-2-gpu-runner') | |
| runs-on: 2-gpu-runner | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/ci_install_dependency.sh | |
| - name: Run performance test for text models | |
| timeout-minutes: 180 | |
| env: | |
| TRACE_BASE_URL: https://raw.githubusercontent.com/sglang-bot/sglang-ci-data/main/traces/${{ github.run_id }} | |
| PERFETTO_RELAY_URL: ${{ vars.PERFETTO_RELAY_URL }} | |
| GPU_CONFIG: "2-gpu-runner" | |
| run: | | |
| cd test | |
| rm -rf performance_profiles_text_models/ | |
| python3 run_suite.py --hw cuda --suite nightly-perf-text-2-gpu --nightly --continue-on-error | |
| - name: Publish traces to storage repo | |
| env: | |
| GITHUB_TOKEN: ${{ secrets.GH_PAT_FOR_NIGHTLY_CI_DATA }} | |
| GITHUB_RUN_ID: ${{ github.run_id }} | |
| GITHUB_RUN_NUMBER: ${{ github.run_number }} | |
| run: | | |
| python3 scripts/ci/publish_traces.py --traces-dir test/performance_profiles_text_models | |
| # VLM accuracy tests | |
| nightly-test-vlm-accuracy-2-gpu-runner: | |
| if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-vlm-accuracy-2-gpu-runner') | |
| runs-on: 2-gpu-runner | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/ci_install_dependency.sh | |
| - name: Run eval test for VLM models (fixed MMMU-100) | |
| timeout-minutes: 240 | |
| run: | | |
| cd test | |
| python3 run_suite.py --hw cuda --suite nightly-eval-vlm-2-gpu --nightly --continue-on-error --timeout-per-file 9000 | |
| # VLM performance tests | |
| nightly-test-vlm-perf-2-gpu-runner: | |
| if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-vlm-perf-2-gpu-runner') | |
| runs-on: 2-gpu-runner | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/ci_install_dependency.sh | |
| - name: Run perf test for VLM models (MMMU) | |
| timeout-minutes: 240 | |
| env: | |
| TRACE_BASE_URL: https://raw.githubusercontent.com/sglang-bot/sglang-ci-data/main/traces/${{ github.run_id }} | |
| PERFETTO_RELAY_URL: ${{ vars.PERFETTO_RELAY_URL }} | |
| GPU_CONFIG: "2-gpu-runner" | |
| run: | | |
| cd test | |
| rm -rf performance_profiles_vlms/ | |
| python3 run_suite.py --hw cuda --suite nightly-perf-vlm-2-gpu --nightly --continue-on-error | |
| - name: Publish traces to storage repo | |
| env: | |
| GITHUB_TOKEN: ${{ secrets.GH_PAT_FOR_NIGHTLY_CI_DATA }} | |
| GITHUB_RUN_ID: ${{ github.run_id }} | |
| GITHUB_RUN_NUMBER: ${{ github.run_number }} | |
| run: | | |
| python3 scripts/ci/publish_traces.py --traces-dir test/performance_profiles_vlms | |
| # diffusion performance tests | |
| nightly-test-multimodal-server-1-gpu: | |
| if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-multimodal-server-1-gpu') | |
| runs-on: 1-gpu-runner | |
| strategy: | |
| fail-fast: false | |
| max-parallel: 5 | |
| matrix: | |
| part: [0, 1] | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/ci_install_dependency.sh diffusion | |
| pip install slack_sdk | |
| - name: Run diffusion server tests | |
| env: | |
| SGLANG_DIFFUSION_SLACK_TOKEN: ${{ secrets.SGLANG_DIFFUSION_SLACK_TOKEN }} | |
| GITHUB_RUN_ID: ${{ github.run_id }} | |
| timeout-minutes: 60 | |
| run: | | |
| cd python | |
| python3 sglang/multimodal_gen/test/run_suite.py \ | |
| --suite 1-gpu \ | |
| --partition-id ${{ matrix.part }} \ | |
| --total-partitions 2 | |
| nightly-test-multimodal-server-2-gpu: | |
| if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-multimodal-server-2-gpu') | |
| runs-on: 2-gpu-runner | |
| strategy: | |
| fail-fast: false | |
| max-parallel: 5 | |
| matrix: | |
| part: [0, 1] | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/ci_install_dependency.sh diffusion | |
| pip install slack_sdk | |
| - name: Run diffusion server tests | |
| env: | |
| SGLANG_DIFFUSION_SLACK_TOKEN: ${{ secrets.SGLANG_DIFFUSION_SLACK_TOKEN }} | |
| GITHUB_RUN_ID: ${{ github.run_id }} | |
| timeout-minutes: 60 | |
| run: | | |
| cd python | |
| python3 sglang/multimodal_gen/test/run_suite.py \ | |
| --suite 2-gpu \ | |
| --partition-id ${{ matrix.part }} \ | |
| --total-partitions 2 | |
| # B200 Performance tests - 4 GPU | |
| nightly-test-perf-4-gpu-b200: | |
| if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-perf-4-gpu-b200') | |
| runs-on: 4-gpu-b200 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Install dependencies | |
| run: | | |
| IS_BLACKWELL=1 bash scripts/ci/ci_install_dependency.sh | |
| - name: Run test | |
| timeout-minutes: 60 | |
| run: | | |
| cd test | |
| python3 run_suite.py --hw cuda --suite nightly-4-gpu-b200 --nightly --continue-on-error | |
| # B200 Performance tests - 8 GPU | |
| nightly-test-perf-8-gpu-b200: | |
| if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-perf-8-gpu-b200') | |
| runs-on: 8-gpu-b200 | |
| env: | |
| RUNNER_LABELS: 8-gpu-b200 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Install dependencies | |
| run: | | |
| IS_BLACKWELL=1 bash scripts/ci/ci_install_dependency.sh | |
| - name: Run common 8-GPU model tests | |
| if: always() | |
| timeout-minutes: 300 | |
| env: | |
| TRACE_BASE_URL: https://raw.githubusercontent.com/sglang-bot/sglang-ci-data/main/traces/${{ github.run_id }} | |
| PERFETTO_RELAY_URL: ${{ vars.PERFETTO_RELAY_URL }} | |
| GPU_CONFIG: "8-gpu-b200" | |
| run: | | |
| cd test | |
| IS_BLACKWELL=1 python3 run_suite.py --hw cuda --suite nightly-8-gpu-common --nightly --timeout-per-file=12000 --continue-on-error | |
| # Final check job | |
| check-all-jobs: | |
| if: github.repository == 'sgl-project/sglang' && always() | |
| needs: | |
| - nightly-test-general-1-gpu-runner | |
| - nightly-test-general-4-gpu-h100 | |
| - nightly-test-general-8-gpu-h200 | |
| - nightly-test-general-8-gpu-h20 | |
| - nightly-test-general-8-gpu-b200 | |
| - nightly-test-text-accuracy-2-gpu-runner | |
| - nightly-test-text-perf-2-gpu-runner | |
| - nightly-test-vlm-accuracy-2-gpu-runner | |
| - nightly-test-vlm-perf-2-gpu-runner | |
| - nightly-test-multimodal-server-1-gpu | |
| - nightly-test-multimodal-server-2-gpu | |
| - nightly-test-perf-4-gpu-b200 | |
| - nightly-test-perf-8-gpu-b200 | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Check if any job failed | |
| run: | | |
| if [[ "${{ contains(needs.*.result, 'failure') }}" == "true" ]]; then | |
| echo "One or more nightly test jobs failed" | |
| exit 1 | |
| fi | |
| if [[ "${{ contains(needs.*.result, 'cancelled') }}" == "true" ]]; then | |
| echo "One or more nightly test jobs were cancelled" | |
| exit 1 | |
| fi | |
| echo "All nightly test jobs passed" |