Skip to content

Nightly Test (Nvidia) #37

Nightly Test (Nvidia)

Nightly Test (Nvidia) #37

name: Nightly Test (Nvidia)
on:
schedule:
- cron: '0 0 * * *'
push:
branches:
- main
paths:
- "python/sglang/version.py"
- "test/run_suite.py"
workflow_dispatch:
inputs:
job_filter:
description: 'Select which job to run (leave empty or "all" to run all jobs)'
required: false
type: choice
default: 'all'
options:
- 'all'
- 'nightly-test-general-1-gpu-runner'
- 'nightly-test-general-4-gpu-h100'
- 'nightly-test-general-8-gpu-h200'
- 'nightly-test-general-8-gpu-h20'
- 'nightly-test-general-8-gpu-b200'
- 'nightly-test-text-accuracy-2-gpu-runner'
- 'nightly-test-text-perf-2-gpu-runner'
- 'nightly-test-vlm-accuracy-2-gpu-runner'
- 'nightly-test-vlm-perf-2-gpu-runner'
- 'nightly-test-multimodal-server-1-gpu'
- 'nightly-test-multimodal-server-2-gpu'
- 'nightly-test-perf-4-gpu-b200'
- 'nightly-test-perf-8-gpu-b200'
workflow_call:
inputs:
ref:
description: 'Git ref (branch, tag, or SHA) to test. If not provided, uses the default branch.'
required: false
type: string
default: ''
job_filter:
description: 'Select which job to run (leave empty or "all" to run all jobs)'
required: false
type: string
default: 'all'
concurrency:
group: nightly-test-nvidia-${{ inputs.ref || github.ref }}
cancel-in-progress: ${{ github.event_name != 'workflow_call' }}
env:
SGLANG_IS_IN_CI: true
HF_HUB_DOWNLOAD_TIMEOUT: 300
HF_HUB_ETAG_TIMEOUT: 300
jobs:
# General tests - 1 GPU
nightly-test-general-1-gpu-runner:
if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-general-1-gpu-runner')
runs-on: 1-gpu-runner
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Install dependencies
run: |
bash scripts/ci/ci_install_dependency.sh
- name: Run test
timeout-minutes: 60
run: |
cd test
python3 run_suite.py --hw cuda --suite nightly-1-gpu --nightly --continue-on-error
# General tests - 4 GPU H100
nightly-test-general-4-gpu-h100:
if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-general-4-gpu-h100')
runs-on: 4-gpu-h100
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Install dependencies
run: |
bash scripts/ci/ci_install_dependency.sh
- name: Run test
timeout-minutes: 30
run: |
cd test
python3 run_suite.py --hw cuda --suite nightly-4-gpu --nightly --continue-on-error
# General tests - 8 GPU H200
nightly-test-general-8-gpu-h200:
if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-general-8-gpu-h200')
runs-on: 8-gpu-h200
env:
RUNNER_LABELS: 8-gpu-h200
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Install dependencies
run: |
bash scripts/ci/ci_install_dependency.sh
- name: Run common 8-GPU model tests
if: always()
timeout-minutes: 300
env:
TRACE_BASE_URL: https://raw.githubusercontent.com/sglang-bot/sglang-ci-data/main/traces/${{ github.run_id }}
PERFETTO_RELAY_URL: ${{ vars.PERFETTO_RELAY_URL }}
GPU_CONFIG: "8-gpu-h200"
IS_H200: "1"
run: |
cd test
python3 run_suite.py --hw cuda --suite nightly-8-gpu-common --nightly --timeout-per-file=18000 --continue-on-error
- name: Run test
timeout-minutes: 30
env:
GPU_CONFIG: "8-gpu-h200"
run: |
cd test
python3 run_suite.py --hw cuda --suite nightly-8-gpu-h200 --nightly --continue-on-error
# General tests - 8 GPU H20
nightly-test-general-8-gpu-h20:
if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-general-8-gpu-h20')
runs-on: 8-gpu-h20
env:
SGLANG_CI_RDMA_ALL_DEVICES: "mlx5_1,mlx5_2,mlx5_3,mlx5_4"
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Install dependencies
run: |
bash scripts/ci/ci_install_dependency.sh
- name: Run test
timeout-minutes: 30
env:
GPU_CONFIG: "8-gpu-h20"
run: |
cd test
python3 run_suite.py --hw cuda --suite nightly-8-gpu-h20 --nightly --continue-on-error
# General tests - 8 GPU B200
nightly-test-general-8-gpu-b200:
if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-general-8-gpu-h20')
runs-on: 8-gpu-b200
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Install dependencies
run: |
IS_BLACKWELL=1 bash scripts/ci/ci_install_dependency.sh
- name: Run test
timeout-minutes: 120
env:
GPU_CONFIG: "8-gpu-b200"
run: |
cd test
python3 run_suite.py --hw cuda --suite nightly-8-gpu-b200 --nightly --continue-on-error --timeout-per-file 2400
# Text model accuracy tests
nightly-test-text-accuracy-2-gpu-runner:
if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-text-accuracy-2-gpu-runner')
runs-on: 2-gpu-runner
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Install dependencies
run: |
bash scripts/ci/ci_install_dependency.sh
- name: Run eval test for text models
timeout-minutes: 120
run: |
cd test
python3 run_suite.py --hw cuda --suite nightly-eval-text-2-gpu --nightly --continue-on-error --timeout-per-file 4500
# Text model performance tests
nightly-test-text-perf-2-gpu-runner:
if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-text-perf-2-gpu-runner')
runs-on: 2-gpu-runner
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Install dependencies
run: |
bash scripts/ci/ci_install_dependency.sh
- name: Run performance test for text models
timeout-minutes: 180
env:
TRACE_BASE_URL: https://raw.githubusercontent.com/sglang-bot/sglang-ci-data/main/traces/${{ github.run_id }}
PERFETTO_RELAY_URL: ${{ vars.PERFETTO_RELAY_URL }}
GPU_CONFIG: "2-gpu-runner"
run: |
cd test
rm -rf performance_profiles_text_models/
python3 run_suite.py --hw cuda --suite nightly-perf-text-2-gpu --nightly --continue-on-error
- name: Publish traces to storage repo
env:
GITHUB_TOKEN: ${{ secrets.GH_PAT_FOR_NIGHTLY_CI_DATA }}
GITHUB_RUN_ID: ${{ github.run_id }}
GITHUB_RUN_NUMBER: ${{ github.run_number }}
run: |
python3 scripts/ci/publish_traces.py --traces-dir test/performance_profiles_text_models
# VLM accuracy tests
nightly-test-vlm-accuracy-2-gpu-runner:
if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-vlm-accuracy-2-gpu-runner')
runs-on: 2-gpu-runner
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Install dependencies
run: |
bash scripts/ci/ci_install_dependency.sh
- name: Run eval test for VLM models (fixed MMMU-100)
timeout-minutes: 240
run: |
cd test
python3 run_suite.py --hw cuda --suite nightly-eval-vlm-2-gpu --nightly --continue-on-error --timeout-per-file 9000
# VLM performance tests
nightly-test-vlm-perf-2-gpu-runner:
if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-vlm-perf-2-gpu-runner')
runs-on: 2-gpu-runner
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Install dependencies
run: |
bash scripts/ci/ci_install_dependency.sh
- name: Run perf test for VLM models (MMMU)
timeout-minutes: 240
env:
TRACE_BASE_URL: https://raw.githubusercontent.com/sglang-bot/sglang-ci-data/main/traces/${{ github.run_id }}
PERFETTO_RELAY_URL: ${{ vars.PERFETTO_RELAY_URL }}
GPU_CONFIG: "2-gpu-runner"
run: |
cd test
rm -rf performance_profiles_vlms/
python3 run_suite.py --hw cuda --suite nightly-perf-vlm-2-gpu --nightly --continue-on-error
- name: Publish traces to storage repo
env:
GITHUB_TOKEN: ${{ secrets.GH_PAT_FOR_NIGHTLY_CI_DATA }}
GITHUB_RUN_ID: ${{ github.run_id }}
GITHUB_RUN_NUMBER: ${{ github.run_number }}
run: |
python3 scripts/ci/publish_traces.py --traces-dir test/performance_profiles_vlms
# diffusion performance tests
nightly-test-multimodal-server-1-gpu:
if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-multimodal-server-1-gpu')
runs-on: 1-gpu-runner
strategy:
fail-fast: false
max-parallel: 5
matrix:
part: [0, 1]
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Install dependencies
run: |
bash scripts/ci/ci_install_dependency.sh diffusion
pip install slack_sdk
- name: Run diffusion server tests
env:
SGLANG_DIFFUSION_SLACK_TOKEN: ${{ secrets.SGLANG_DIFFUSION_SLACK_TOKEN }}
GITHUB_RUN_ID: ${{ github.run_id }}
timeout-minutes: 60
run: |
cd python
python3 sglang/multimodal_gen/test/run_suite.py \
--suite 1-gpu \
--partition-id ${{ matrix.part }} \
--total-partitions 2
nightly-test-multimodal-server-2-gpu:
if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-multimodal-server-2-gpu')
runs-on: 2-gpu-runner
strategy:
fail-fast: false
max-parallel: 5
matrix:
part: [0, 1]
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Install dependencies
run: |
bash scripts/ci/ci_install_dependency.sh diffusion
pip install slack_sdk
- name: Run diffusion server tests
env:
SGLANG_DIFFUSION_SLACK_TOKEN: ${{ secrets.SGLANG_DIFFUSION_SLACK_TOKEN }}
GITHUB_RUN_ID: ${{ github.run_id }}
timeout-minutes: 60
run: |
cd python
python3 sglang/multimodal_gen/test/run_suite.py \
--suite 2-gpu \
--partition-id ${{ matrix.part }} \
--total-partitions 2
# B200 Performance tests - 4 GPU
nightly-test-perf-4-gpu-b200:
if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-perf-4-gpu-b200')
runs-on: 4-gpu-b200
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Install dependencies
run: |
IS_BLACKWELL=1 bash scripts/ci/ci_install_dependency.sh
- name: Run test
timeout-minutes: 60
run: |
cd test
python3 run_suite.py --hw cuda --suite nightly-4-gpu-b200 --nightly --continue-on-error
# B200 Performance tests - 8 GPU
nightly-test-perf-8-gpu-b200:
if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-perf-8-gpu-b200')
runs-on: 8-gpu-b200
env:
RUNNER_LABELS: 8-gpu-b200
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Install dependencies
run: |
IS_BLACKWELL=1 bash scripts/ci/ci_install_dependency.sh
- name: Run common 8-GPU model tests
if: always()
timeout-minutes: 300
env:
TRACE_BASE_URL: https://raw.githubusercontent.com/sglang-bot/sglang-ci-data/main/traces/${{ github.run_id }}
PERFETTO_RELAY_URL: ${{ vars.PERFETTO_RELAY_URL }}
GPU_CONFIG: "8-gpu-b200"
run: |
cd test
IS_BLACKWELL=1 python3 run_suite.py --hw cuda --suite nightly-8-gpu-common --nightly --timeout-per-file=12000 --continue-on-error
# Final check job
check-all-jobs:
if: github.repository == 'sgl-project/sglang' && always()
needs:
- nightly-test-general-1-gpu-runner
- nightly-test-general-4-gpu-h100
- nightly-test-general-8-gpu-h200
- nightly-test-general-8-gpu-h20
- nightly-test-general-8-gpu-b200
- nightly-test-text-accuracy-2-gpu-runner
- nightly-test-text-perf-2-gpu-runner
- nightly-test-vlm-accuracy-2-gpu-runner
- nightly-test-vlm-perf-2-gpu-runner
- nightly-test-multimodal-server-1-gpu
- nightly-test-multimodal-server-2-gpu
- nightly-test-perf-4-gpu-b200
- nightly-test-perf-8-gpu-b200
runs-on: ubuntu-latest
steps:
- name: Check if any job failed
run: |
if [[ "${{ contains(needs.*.result, 'failure') }}" == "true" ]]; then
echo "One or more nightly test jobs failed"
exit 1
fi
if [[ "${{ contains(needs.*.result, 'cancelled') }}" == "true" ]]; then
echo "One or more nightly test jobs were cancelled"
exit 1
fi
echo "All nightly test jobs passed"