Nightly Test (Nvidia) #156
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Nightly Test (Nvidia) | |
| on: | |
| schedule: | |
| - cron: '0 0 * * *' | |
| workflow_dispatch: | |
| inputs: | |
| job_filter: | |
| description: 'Select which job to run (leave empty or "all" to run all jobs)' | |
| required: false | |
| type: choice | |
| default: 'all' | |
| options: | |
| - 'all' | |
| - 'nightly-test-general-1-gpu-h100' | |
| - 'nightly-test-general-4-gpu-h100' | |
| - 'nightly-test-general-8-gpu-h200' | |
| - 'nightly-test-general-8-gpu-h20' | |
| - 'nightly-test-general-8-gpu-b200' | |
| - 'nightly-test-text-accuracy-2-gpu-h100' | |
| - 'nightly-test-text-perf-2-gpu-h100' | |
| - 'nightly-test-vlm-accuracy-2-gpu-h100' | |
| - 'nightly-test-vlm-perf-2-gpu-h100' | |
| - 'nightly-test-multimodal-server-1-gpu' | |
| - 'nightly-test-multimodal-server-2-gpu' | |
| - 'nightly-test-perf-4-gpu-b200' | |
| - 'nightly-test-perf-8-gpu-b200' | |
| - 'nightly-test-specialized-8-gpu-b200' | |
| - 'nightly-test-kernel-1-gpu-h100' | |
| - 'nightly-test-diffusion-comparison' | |
| - 'nightly-test-kernel-8-gpu-h200' | |
| workflow_call: | |
| inputs: | |
| ref: | |
| description: 'Git ref (branch, tag, or SHA) to test. If not provided, uses the default branch.' | |
| required: false | |
| type: string | |
| default: '' | |
| job_filter: | |
| description: 'Select which job to run (leave empty or "all" to run all jobs)' | |
| required: false | |
| type: string | |
| default: 'all' | |
| concurrency: | |
| group: nightly-test-nvidia-${{ inputs.ref || github.ref }} | |
| cancel-in-progress: ${{ github.event_name != 'workflow_call' }} | |
| env: | |
| SGLANG_IS_IN_CI: true | |
| SGLANG_CUDA_COREDUMP: "1" | |
| HF_HUB_DOWNLOAD_TIMEOUT: 300 | |
| HF_HUB_ETAG_TIMEOUT: 300 | |
| jobs: | |
| # General tests - 1 GPU | |
| nightly-test-general-1-gpu-h100: | |
| if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-general-1-gpu-h100') | |
| runs-on: 1-gpu-h100 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - uses: ./.github/actions/check-maintenance | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/cuda/ci_install_dependency.sh | |
| - name: Run test | |
| timeout-minutes: 60 | |
| run: | | |
| cd test | |
| python3 run_suite.py --hw cuda --suite nightly-1-gpu --nightly --continue-on-error | |
| - uses: ./.github/actions/upload-cuda-coredumps | |
| if: always() | |
| # JIT kernel full unit tests (expanded parameter ranges via SGLANG_JIT_KERNEL_RUN_FULL_TESTS) | |
| nightly-test-kernel-1-gpu-h100: | |
| if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-kernel-1-gpu-h100') | |
| runs-on: 1-gpu-h100 | |
| timeout-minutes: 240 | |
| env: | |
| # Full jit_kernel test grids (see sglang.jit_kernel.utils.should_run_full_tests) | |
| SGLANG_JIT_KERNEL_RUN_FULL_TESTS: "1" | |
| # Match pr-test-jit-kernel workflow for consistent JIT warmup behavior | |
| SGLANG_JIT_DEEPGEMM_FAST_WARMUP: true | |
| # Allow maintenance bypass on default branch (same semantics as PR JIT workflow) | |
| SGLANG_PR_TEST_BYPASS_MAINTENANCE_ON_MAIN: ${{ github.ref == 'refs/heads/main' && 'true' || 'false' }} | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - uses: ./.github/actions/check-maintenance | |
| - name: Install dependencies | |
| timeout-minutes: 20 | |
| run: | | |
| bash scripts/ci/cuda/ci_install_dependency.sh | |
| - name: Run jit kernel nightly suite | |
| timeout-minutes: 60 | |
| run: | | |
| cd test | |
| python3 run_suite.py --hw cuda --suite nightly-kernel-1-gpu --nightly --continue-on-error | |
| - uses: ./.github/actions/upload-cuda-coredumps | |
| if: always() | |
| nightly-test-kernel-8-gpu-h200: | |
| if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-kernel-8-gpu-h200') | |
| runs-on: 8-gpu-h200 | |
| timeout-minutes: 240 | |
| env: | |
| SGLANG_JIT_KERNEL_RUN_FULL_TESTS: "1" | |
| SGLANG_JIT_DEEPGEMM_FAST_WARMUP: true | |
| SGLANG_PR_TEST_BYPASS_MAINTENANCE_ON_MAIN: ${{ github.ref == 'refs/heads/main' && 'true' || 'false' }} | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - uses: ./.github/actions/check-maintenance | |
| - name: Install dependencies | |
| timeout-minutes: 20 | |
| run: | | |
| bash scripts/ci/cuda/ci_install_dependency.sh | |
| - name: Run multi-GPU jit kernel nightly suite | |
| timeout-minutes: 90 | |
| run: | | |
| cd test | |
| python3 run_suite.py --hw cuda --suite nightly-kernel-8-gpu-h200 --nightly --continue-on-error | |
| - uses: ./.github/actions/upload-cuda-coredumps | |
| if: always() | |
| # General tests - 4 GPU H100 | |
| nightly-test-general-4-gpu-h100: | |
| if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-general-4-gpu-h100') | |
| runs-on: 4-gpu-h100 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - uses: ./.github/actions/check-maintenance | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/cuda/ci_install_dependency.sh | |
| - name: Run test | |
| timeout-minutes: 30 | |
| run: | | |
| cd test | |
| python3 run_suite.py --hw cuda --suite nightly-4-gpu --nightly --continue-on-error | |
| - uses: ./.github/actions/upload-cuda-coredumps | |
| if: always() | |
| # General tests - 8 GPU H200 | |
| nightly-test-general-8-gpu-h200: | |
| if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-general-8-gpu-h200') | |
| runs-on: 8-gpu-h200 | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| partition: [0, 1, 2, 3] | |
| env: | |
| RUNNER_LABELS: 8-gpu-h200 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - uses: ./.github/actions/check-maintenance | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/cuda/ci_install_dependency.sh | |
| - name: Run common 8-GPU model tests | |
| if: always() | |
| timeout-minutes: 300 | |
| env: | |
| TRACE_BASE_URL: https://raw.githubusercontent.com/sglang-bot/sglang-ci-data/main/traces/${{ github.run_id }} | |
| PERFETTO_RELAY_URL: ${{ vars.PERFETTO_RELAY_URL }} | |
| GPU_CONFIG: "8-gpu-h200" | |
| IS_H200: "1" | |
| run: | | |
| cd test | |
| python3 run_suite.py --hw cuda --suite nightly-8-gpu-common --nightly --timeout-per-file=18000 --continue-on-error --auto-partition-id=${{ matrix.partition }} --auto-partition-size=4 | |
| - name: Publish traces to storage repo | |
| if: always() | |
| continue-on-error: true | |
| env: | |
| GITHUB_TOKEN: ${{ secrets.GH_PAT_FOR_NIGHTLY_CI_DATA }} | |
| GITHUB_RUN_ID: ${{ github.run_id }} | |
| GITHUB_RUN_NUMBER: ${{ github.run_number }} | |
| run: | | |
| TRACE_ARGS="" | |
| for dir in test/performance_profiles_*/; do | |
| [ -d "$dir" ] && TRACE_ARGS="$TRACE_ARGS --traces-dir $dir" | |
| done | |
| if [ -n "$TRACE_ARGS" ]; then | |
| python3 scripts/ci/utils/publish_traces.py $TRACE_ARGS | |
| find test/performance_profiles_*/ -name '*.json.gz' -delete | |
| else | |
| echo "No trace directories found, skipping publish" | |
| fi | |
| - name: Run test | |
| timeout-minutes: 30 | |
| env: | |
| GPU_CONFIG: "8-gpu-h200" | |
| run: | | |
| cd test | |
| python3 run_suite.py --hw cuda --suite nightly-8-gpu-h200 --nightly --continue-on-error | |
| - name: Collect performance metrics | |
| if: always() | |
| run: | | |
| python3 scripts/ci/utils/save_metrics.py \ | |
| --gpu-config 8-gpu-h200 \ | |
| --partition ${{ matrix.partition }} \ | |
| --run-id ${{ github.run_id }} \ | |
| --output test/metrics-8gpu-h200-partition-${{ matrix.partition }}.json \ | |
| --search-dir test/performance_profiles_8_gpu \ | |
| --search-dir test | |
| - name: Upload partition metrics | |
| if: always() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: metrics-8gpu-h200-partition-${{ matrix.partition }} | |
| path: test/metrics-8gpu-h200-partition-${{ matrix.partition }}.json | |
| retention-days: 5 | |
| if-no-files-found: ignore | |
| - uses: ./.github/actions/upload-cuda-coredumps | |
| if: always() | |
| with: | |
| artifact-suffix: ${{ matrix.partition }} | |
| # General tests - 8 GPU H20 | |
| nightly-test-general-8-gpu-h20: | |
| if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-general-8-gpu-h20') | |
| runs-on: 8-gpu-h20 | |
| env: | |
| SGLANG_CI_RDMA_ALL_DEVICES: "mlx5_1,mlx5_2,mlx5_3,mlx5_4" | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - uses: ./.github/actions/check-maintenance | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/cuda/ci_install_dependency.sh | |
| - name: Run test | |
| timeout-minutes: 30 | |
| env: | |
| GPU_CONFIG: "8-gpu-h20" | |
| run: | | |
| cd test | |
| python3 run_suite.py --hw cuda --suite nightly-8-gpu-h20 --nightly --continue-on-error | |
| - uses: ./.github/actions/upload-cuda-coredumps | |
| if: always() | |
| # General tests - 8 GPU B200 | |
| nightly-test-general-8-gpu-b200: | |
| if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-general-8-gpu-b200') | |
| runs-on: 8-gpu-b200 | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| partition: [0, 1, 2, 3] | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - uses: ./.github/actions/check-maintenance | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/cuda/ci_install_dependency.sh | |
| - name: Run common 8-GPU model tests | |
| if: always() | |
| timeout-minutes: 300 | |
| env: | |
| TRACE_BASE_URL: https://raw.githubusercontent.com/sglang-bot/sglang-ci-data/main/traces/${{ github.run_id }} | |
| PERFETTO_RELAY_URL: ${{ vars.PERFETTO_RELAY_URL }} | |
| GPU_CONFIG: "8-gpu-b200" | |
| run: | | |
| cd test | |
| python3 run_suite.py --hw cuda --suite nightly-8-gpu-common --nightly --timeout-per-file=12000 --continue-on-error --auto-partition-id=${{ matrix.partition }} --auto-partition-size=4 | |
| - name: Publish traces to storage repo | |
| if: always() | |
| continue-on-error: true | |
| env: | |
| GITHUB_TOKEN: ${{ secrets.GH_PAT_FOR_NIGHTLY_CI_DATA }} | |
| GITHUB_RUN_ID: ${{ github.run_id }} | |
| GITHUB_RUN_NUMBER: ${{ github.run_number }} | |
| run: | | |
| TRACE_ARGS="" | |
| for dir in test/performance_profiles_*/; do | |
| [ -d "$dir" ] && TRACE_ARGS="$TRACE_ARGS --traces-dir $dir" | |
| done | |
| if [ -n "$TRACE_ARGS" ]; then | |
| python3 scripts/ci/utils/publish_traces.py $TRACE_ARGS | |
| find test/performance_profiles_*/ -name '*.json.gz' -delete | |
| else | |
| echo "No trace directories found, skipping publish" | |
| fi | |
| - name: Collect performance metrics | |
| if: always() | |
| run: | | |
| python3 scripts/ci/utils/save_metrics.py \ | |
| --gpu-config 8-gpu-b200 \ | |
| --partition ${{ matrix.partition }} \ | |
| --run-id ${{ github.run_id }} \ | |
| --output test/metrics-8gpu-b200-partition-${{ matrix.partition }}.json \ | |
| --search-dir test/performance_profiles_8_gpu \ | |
| --search-dir test | |
| - name: Upload partition metrics | |
| if: always() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: metrics-8gpu-b200-partition-${{ matrix.partition }} | |
| path: test/metrics-8gpu-b200-partition-${{ matrix.partition }}.json | |
| retention-days: 5 | |
| if-no-files-found: ignore | |
| - uses: ./.github/actions/upload-cuda-coredumps | |
| if: always() | |
| with: | |
| artifact-suffix: ${{ matrix.partition }} | |
| # Text model accuracy tests | |
| nightly-test-text-accuracy-2-gpu-h100: | |
| if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-text-accuracy-2-gpu-h100') | |
| runs-on: 2-gpu-h100 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - uses: ./.github/actions/check-maintenance | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/cuda/ci_install_dependency.sh | |
| - name: Run eval test for text models | |
| timeout-minutes: 120 | |
| run: | | |
| cd test | |
| python3 run_suite.py --hw cuda --suite nightly-eval-text-2-gpu --nightly --continue-on-error --timeout-per-file 4500 | |
| - uses: ./.github/actions/upload-cuda-coredumps | |
| if: always() | |
| # Text model performance tests | |
| nightly-test-text-perf-2-gpu-h100: | |
| if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-text-perf-2-gpu-h100') | |
| runs-on: 2-gpu-h100 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - uses: ./.github/actions/check-maintenance | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/cuda/ci_install_dependency.sh | |
| - name: Run performance test for text models | |
| timeout-minutes: 180 | |
| env: | |
| TRACE_BASE_URL: https://raw.githubusercontent.com/sglang-bot/sglang-ci-data/main/traces/${{ github.run_id }} | |
| PERFETTO_RELAY_URL: ${{ vars.PERFETTO_RELAY_URL }} | |
| GPU_CONFIG: "2-gpu-h100" | |
| run: | | |
| cd test | |
| rm -rf performance_profiles_text_models/ | |
| python3 run_suite.py --hw cuda --suite nightly-perf-text-2-gpu --nightly --continue-on-error --timeout-per-file 3600 | |
| - name: Publish traces to storage repo | |
| env: | |
| GITHUB_TOKEN: ${{ secrets.GH_PAT_FOR_NIGHTLY_CI_DATA }} | |
| GITHUB_RUN_ID: ${{ github.run_id }} | |
| GITHUB_RUN_NUMBER: ${{ github.run_number }} | |
| run: | | |
| python3 scripts/ci/utils/publish_traces.py --traces-dir test/performance_profiles_text_models | |
| - uses: ./.github/actions/upload-cuda-coredumps | |
| if: always() | |
| # VLM accuracy tests | |
| nightly-test-vlm-accuracy-2-gpu-h100: | |
| if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-vlm-accuracy-2-gpu-h100') | |
| runs-on: 2-gpu-h100 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - uses: ./.github/actions/check-maintenance | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/cuda/ci_install_dependency.sh | |
| - name: Run eval test for VLM models (fixed MMMU-100) | |
| timeout-minutes: 240 | |
| run: | | |
| cd test | |
| python3 run_suite.py --hw cuda --suite nightly-eval-vlm-2-gpu --nightly --continue-on-error --timeout-per-file 9000 | |
| - uses: ./.github/actions/upload-cuda-coredumps | |
| if: always() | |
| # VLM performance tests | |
| nightly-test-vlm-perf-2-gpu-h100: | |
| if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-vlm-perf-2-gpu-h100') | |
| runs-on: 2-gpu-h100 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - uses: ./.github/actions/check-maintenance | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/cuda/ci_install_dependency.sh | |
| - name: Run perf test for VLM models (MMMU) | |
| timeout-minutes: 240 | |
| env: | |
| TRACE_BASE_URL: https://raw.githubusercontent.com/sglang-bot/sglang-ci-data/main/traces/${{ github.run_id }} | |
| PERFETTO_RELAY_URL: ${{ vars.PERFETTO_RELAY_URL }} | |
| GPU_CONFIG: "2-gpu-h100" | |
| run: | | |
| cd test | |
| rm -rf performance_profiles_vlms/ | |
| python3 run_suite.py --hw cuda --suite nightly-perf-vlm-2-gpu --nightly --continue-on-error --timeout-per-file 3600 | |
| - name: Publish traces to storage repo | |
| env: | |
| GITHUB_TOKEN: ${{ secrets.GH_PAT_FOR_NIGHTLY_CI_DATA }} | |
| GITHUB_RUN_ID: ${{ github.run_id }} | |
| GITHUB_RUN_NUMBER: ${{ github.run_number }} | |
| run: | | |
| python3 scripts/ci/utils/publish_traces.py --traces-dir test/performance_profiles_vlms | |
| - uses: ./.github/actions/upload-cuda-coredumps | |
| if: always() | |
| # diffusion performance tests | |
| nightly-test-multimodal-server-1-gpu: | |
| if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-multimodal-server-1-gpu') | |
| runs-on: 1-gpu-h100 | |
| strategy: | |
| fail-fast: false | |
| max-parallel: 5 | |
| matrix: | |
| part: [0, 1] | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - uses: ./.github/actions/check-maintenance | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/cuda/ci_install_dependency.sh diffusion | |
| pip install slack_sdk | |
| - name: Run diffusion server tests | |
| env: | |
| SGLANG_DIFFUSION_SLACK_TOKEN: ${{ secrets.SGLANG_DIFFUSION_SLACK_TOKEN }} | |
| GITHUB_RUN_ID: ${{ github.run_id }} | |
| GPU_CONFIG: "1-gpu-h100" | |
| timeout-minutes: 90 | |
| run: | | |
| cd python | |
| python3 sglang/multimodal_gen/test/run_suite.py \ | |
| --suite 1-gpu \ | |
| --partition-id ${{ matrix.part }} \ | |
| --total-partitions 2 | |
| - name: Collect diffusion performance metrics | |
| if: always() | |
| run: | | |
| python3 scripts/ci/utils/diffusion/save_diffusion_metrics.py \ | |
| --gpu-config 1-gpu-h100 \ | |
| --run-id ${{ github.run_id }} \ | |
| --output python/diffusion-metrics-1gpu-partition-${{ matrix.part }}.json \ | |
| --results-json python/diffusion-results.json | |
| - name: Upload diffusion metrics | |
| if: always() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: diffusion-metrics-1gpu-partition-${{ matrix.part }} | |
| path: python/diffusion-metrics-1gpu-partition-${{ matrix.part }}.json | |
| retention-days: 90 | |
| if-no-files-found: ignore | |
| - uses: ./.github/actions/upload-cuda-coredumps | |
| if: always() | |
| with: | |
| artifact-suffix: ${{ matrix.part }} | |
| nightly-test-multimodal-server-2-gpu: | |
| if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-multimodal-server-2-gpu') | |
| runs-on: 2-gpu-h100 | |
| strategy: | |
| fail-fast: false | |
| max-parallel: 5 | |
| matrix: | |
| part: [0, 1] | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - uses: ./.github/actions/check-maintenance | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/cuda/ci_install_dependency.sh diffusion | |
| pip install slack_sdk | |
| - name: Run diffusion server tests | |
| env: | |
| SGLANG_DIFFUSION_SLACK_TOKEN: ${{ secrets.SGLANG_DIFFUSION_SLACK_TOKEN }} | |
| GITHUB_RUN_ID: ${{ github.run_id }} | |
| GPU_CONFIG: "2-gpu-h100" | |
| timeout-minutes: 90 | |
| run: | | |
| cd python | |
| python3 sglang/multimodal_gen/test/run_suite.py \ | |
| --suite 2-gpu \ | |
| --partition-id ${{ matrix.part }} \ | |
| --total-partitions 2 | |
| - name: Collect diffusion performance metrics | |
| if: always() | |
| run: | | |
| python3 scripts/ci/utils/diffusion/save_diffusion_metrics.py \ | |
| --gpu-config 2-gpu-h100 \ | |
| --run-id ${{ github.run_id }} \ | |
| --output python/diffusion-metrics-2gpu-partition-${{ matrix.part }}.json \ | |
| --results-json python/diffusion-results.json | |
| - name: Upload diffusion metrics | |
| if: always() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: diffusion-metrics-2gpu-partition-${{ matrix.part }} | |
| path: python/diffusion-metrics-2gpu-partition-${{ matrix.part }}.json | |
| retention-days: 90 | |
| if-no-files-found: ignore | |
| - uses: ./.github/actions/upload-cuda-coredumps | |
| if: always() | |
| with: | |
| artifact-suffix: ${{ matrix.part }} | |
| # B200 Performance tests - 4 GPU | |
| nightly-test-perf-4-gpu-b200: | |
| if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-perf-4-gpu-b200') | |
| runs-on: 4-gpu-b200 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - uses: ./.github/actions/check-maintenance | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/cuda/ci_install_dependency.sh | |
| - name: Run test | |
| timeout-minutes: 300 | |
| run: | | |
| cd test | |
| python3 run_suite.py --hw cuda --suite nightly-4-gpu-b200 --nightly --continue-on-error --timeout-per-file 12000 | |
| - uses: ./.github/actions/upload-cuda-coredumps | |
| if: always() | |
| # Specialized B200 tests - 8 GPU, for specific backends and configs | |
| nightly-test-specialized-8-gpu-b200: | |
| if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-perf-8-gpu-b200' || inputs.job_filter == 'nightly-test-specialized-8-gpu-b200') | |
| runs-on: 8-gpu-b200 | |
| env: | |
| RUNNER_LABELS: 8-gpu-b200 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - uses: ./.github/actions/check-maintenance | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/cuda/ci_install_dependency.sh | |
| - name: Run test | |
| timeout-minutes: 120 | |
| env: | |
| GPU_CONFIG: "8-gpu-b200" | |
| run: | | |
| cd test | |
| python3 run_suite.py --hw cuda --suite nightly-8-gpu-b200 --nightly --continue-on-error --timeout-per-file 2400 | |
| - uses: ./.github/actions/upload-cuda-coredumps | |
| if: always() | |
| # Diffusion cross-framework comparison | |
| nightly-test-diffusion-comparison: | |
| if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-diffusion-comparison') | |
| runs-on: 4-gpu-h100 | |
| timeout-minutes: 240 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/cuda/ci_install_dependency.sh diffusion | |
| - name: Run cross-framework comparison | |
| env: | |
| GITHUB_SHA: ${{ github.sha }} | |
| GITHUB_RUN_ID: ${{ github.run_id }} | |
| PYTHONUNBUFFERED: "1" | |
| timeout-minutes: 210 | |
| run: | | |
| python3 -u scripts/ci/utils/diffusion/run_comparison.py \ | |
| --output comparison-results.json | |
| - name: Generate dashboard | |
| if: always() | |
| env: | |
| GH_PAT_FOR_NIGHTLY_CI_DATA: ${{ secrets.GH_PAT_FOR_NIGHTLY_CI_DATA }} | |
| GH_TOKEN: ${{ github.token }} | |
| run: | | |
| python3 scripts/ci/utils/diffusion/generate_diffusion_dashboard.py \ | |
| --results comparison-results.json \ | |
| --output dashboard.md \ | |
| --charts-dir comparison-charts \ | |
| --fetch-history \ | |
| --step-summary | |
| - name: Publish to sglang-ci-data | |
| if: always() | |
| env: | |
| GH_PAT_FOR_NIGHTLY_CI_DATA: ${{ secrets.GH_PAT_FOR_NIGHTLY_CI_DATA }} | |
| run: | | |
| python3 scripts/ci/utils/diffusion/publish_comparison_results.py \ | |
| --results comparison-results.json \ | |
| --dashboard dashboard.md \ | |
| --charts-dir comparison-charts | |
| - name: Upload comparison artifacts | |
| if: always() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: diffusion-comparison-${{ github.run_id }} | |
| path: | | |
| comparison-results.json | |
| dashboard.md | |
| comparison-charts/ | |
| comparison-logs/ | |
| retention-days: 90 | |
| if-no-files-found: ignore | |
| - uses: ./.github/actions/upload-cuda-coredumps | |
| if: always() | |
| # Consolidate performance metrics from all jobs | |
| consolidate-metrics: | |
| if: github.repository == 'sgl-project/sglang' && always() | |
| needs: | |
| - nightly-test-general-8-gpu-h200 | |
| - nightly-test-general-8-gpu-b200 | |
| - nightly-test-multimodal-server-1-gpu | |
| - nightly-test-multimodal-server-2-gpu | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Download all partition metrics | |
| uses: actions/download-artifact@v4 | |
| with: | |
| pattern: "*metrics-*" | |
| path: metrics/ | |
| merge-multiple: true | |
| - name: List downloaded metrics | |
| run: | | |
| echo "Downloaded metrics files:" | |
| find metrics/ -name "*.json" -type f 2>/dev/null || echo "No metrics files found" | |
| - name: Merge metrics | |
| run: | | |
| python3 scripts/ci/utils/merge_metrics.py \ | |
| --input-dir metrics/ \ | |
| --output consolidated-metrics-${{ github.run_id }}.json \ | |
| --run-id ${{ github.run_id }} \ | |
| --commit-sha ${{ github.sha }} \ | |
| --branch ${{ github.ref_name }} | |
| - name: Upload consolidated metrics | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: consolidated-metrics-${{ github.run_id }} | |
| path: consolidated-metrics-${{ github.run_id }}.json | |
| retention-days: 90 | |
| if-no-files-found: warn | |
| # Final check job | |
| check-all-jobs: | |
| if: github.repository == 'sgl-project/sglang' && always() | |
| needs: | |
| - nightly-test-general-1-gpu-h100 | |
| - nightly-test-general-4-gpu-h100 | |
| - nightly-test-general-8-gpu-h200 | |
| - nightly-test-general-8-gpu-h20 | |
| - nightly-test-general-8-gpu-b200 | |
| - nightly-test-text-accuracy-2-gpu-h100 | |
| - nightly-test-text-perf-2-gpu-h100 | |
| - nightly-test-vlm-accuracy-2-gpu-h100 | |
| - nightly-test-vlm-perf-2-gpu-h100 | |
| - nightly-test-multimodal-server-1-gpu | |
| - nightly-test-multimodal-server-2-gpu | |
| - nightly-test-perf-4-gpu-b200 | |
| - nightly-test-specialized-8-gpu-b200 | |
| - nightly-test-diffusion-comparison | |
| - consolidate-metrics | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Check if any job failed | |
| run: | | |
| if [[ "${{ contains(needs.*.result, 'failure') }}" == "true" ]]; then | |
| echo "One or more nightly test jobs failed" | |
| exit 1 | |
| fi | |
| if [[ "${{ contains(needs.*.result, 'cancelled') }}" == "true" ]]; then | |
| echo "One or more nightly test jobs were cancelled" | |
| exit 1 | |
| fi | |
| echo "All nightly test jobs passed" |