Skip to content

Nightly Test (AMD)

Nightly Test (AMD) #85

name: Nightly Test (AMD)
on:
schedule:
- cron: '0 0 * * *'
push:
branches:
- main
paths:
- "python/sglang/version.py"
workflow_dispatch:
inputs:
job_filter:
description: 'Select which job to run (leave empty or "all" to run all jobs)'
required: false
type: choice
default: 'all'
options:
- 'all'
# MI30x Unit Tests
- 'nightly-test-1-gpu-unit'
# MI30x Accuracy Tests (GSM8K / MMMU)
- 'nightly-accuracy-2-gpu'
- 'nightly-accuracy-2-gpu-vlm'
- 'nightly-accuracy-8-gpu'
- 'nightly-accuracy-8-gpu-deepseek-r1'
# MI30x Accuracy + Performance Tests (combined)
- 'nightly-8-gpu-grok1-int4'
- 'nightly-8-gpu-grok2'
- 'nightly-8-gpu-deepseek-v31'
# MI35x jobs
- 'nightly-test-1-gpu-mi35x'
- 'nightly-accuracy-8-gpu-mi35x'
- 'nightly-accuracy-8-gpu-mi35x-deepseek-r1'
- 'nightly-8-gpu-mi35x-grok1-int4'
- 'nightly-8-gpu-mi35x-grok2'
- 'nightly-8-gpu-mi35x-deepseek-r1-mxfp4'
workflow_call:
inputs:
ref:
description: 'Git ref (branch, tag, or SHA) to test. If not provided, uses the default branch.'
required: false
type: string
default: ''
job_filter:
description: 'Select which job to run (leave empty or "all" to run all jobs)'
required: false
type: string
default: 'all'
concurrency:
group: nightly-test-amd-${{ inputs.ref || github.ref }}
cancel-in-progress: ${{ github.event_name != 'workflow_call' }}
jobs:
# ============================================== MI30x Unit Tests ==============================================
# 1-GPU Unit Tests - LoRA, debug utils, scheduler, etc. (MI30x only)
nightly-test-1-gpu-unit:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-1-gpu-unit')
runs-on: linux-mi325-gpu-1
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd_ci_install_dependency.sh
- name: Nightly Unit Test (1-GPU)
timeout-minutes: 60
run: |
bash scripts/ci/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-1-gpu --nightly --timeout-per-file 600 --continue-on-error || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# ============================================== MI30x Accuracy Tests ==============================================
# 2-GPU Accuracy Tests - GSM8K eval (MI30x only)
nightly-accuracy-2-gpu:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-accuracy-2-gpu')
runs-on: linux-mi325-gpu-2
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd_ci_install_dependency.sh
- name: Nightly Test (2-GPU)
run: |
bash scripts/ci/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd --nightly --timeout-per-file 7200 || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# 2-GPU VLM Accuracy Tests - Vision-Language Models MMMU evaluation
nightly-accuracy-2-gpu-vlm:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-accuracy-2-gpu-vlm')
runs-on: linux-mi325-gpu-2
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd_ci_install_dependency.sh
- name: Nightly Accuracy Test (2-GPU VLM MMMU)
timeout-minutes: 180
run: |
bash scripts/ci/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-2-gpu-vlm --nightly --timeout-per-file 7200 || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# 8-GPU Accuracy Tests - GPT-OSS, Grok1-FP8 (accuracy only)
nightly-accuracy-8-gpu:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-accuracy-8-gpu')
runs-on: linux-mi325-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd_ci_install_dependency.sh
- name: Accuracy Test (8-GPU GPT-OSS)
timeout-minutes: 180
run: |
bash scripts/ci/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-gpt-oss --nightly --timeout-per-file 7200 || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
- name: Accuracy Test (8-GPU Grok1-FP8)
timeout-minutes: 60
run: |
bash scripts/ci/amd_ci_exec.sh -w /sglang-checkout/test \
-e RCCL_MSCCL_ENABLE=0 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-grok1-fp8 --nightly --timeout-per-file 3600 || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# 8-GPU DeepSeek-R1 Accuracy Test (separate job due to long loading time)
nightly-accuracy-8-gpu-deepseek-r1:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-accuracy-8-gpu-deepseek-r1')
runs-on: linux-mi325-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd_ci_install_dependency.sh
- name: Accuracy Test (8-GPU DeepSeek-R1)
timeout-minutes: 240
run: |
bash scripts/ci/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-deepseek-r1 --nightly --timeout-per-file 7200 || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# ============================================== MI30x Combined Accuracy + Performance Tests ==============================================
# 8-GPU Grok1-INT4 (Accuracy + Performance combined)
nightly-8-gpu-grok1-int4:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-8-gpu-grok1-int4')
runs-on: linux-mi325-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd_ci_install_dependency.sh
- name: Accuracy Test (8-GPU Grok1-INT4)
timeout-minutes: 60
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd_ci_exec.sh -w /sglang-checkout/test \
-e RCCL_MSCCL_ENABLE=0 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-grok1-int4 --nightly --timeout-per-file 3600 || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
- name: Performance Test (8-GPU Grok1-INT4)
timeout-minutes: 60
continue-on-error: true # Perf test failure doesn't fail the job if accuracy passed
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd_ci_exec.sh -w /sglang-checkout/test \
-e RCCL_MSCCL_ENABLE=0 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-grok1-int4 --nightly --timeout-per-file 3600 || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# 8-GPU Grok2 (Accuracy + Performance combined)
nightly-8-gpu-grok2:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-8-gpu-grok2')
runs-on: linux-mi325-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd_ci_install_dependency.sh
- name: Accuracy Test (8-GPU Grok2)
timeout-minutes: 60
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd_ci_exec.sh -w /sglang-checkout/test \
-e RCCL_MSCCL_ENABLE=0 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-grok2 --nightly --timeout-per-file 3600 || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
- name: Performance Test (8-GPU Grok2)
timeout-minutes: 60
continue-on-error: true # Perf test failure doesn't fail the job if accuracy passed
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd_ci_exec.sh -w /sglang-checkout/test \
-e RCCL_MSCCL_ENABLE=0 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-grok2 --nightly --timeout-per-file 3600 || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# 8-GPU DeepSeek-V3.1 (Accuracy + Performance combined)
nightly-8-gpu-deepseek-v31:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-8-gpu-deepseek-v31')
runs-on: linux-mi325-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd_ci_install_dependency.sh
- name: Accuracy Test (8-GPU DeepSeek-V3.1)
timeout-minutes: 120
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd_ci_exec.sh -w /sglang-checkout/test \
-e SGLANG_USE_AITER=1 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-deepseek-v31 --nightly --timeout-per-file 3600 || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
- name: Performance Test (8-GPU DeepSeek-V3.1)
timeout-minutes: 300
continue-on-error: true # Perf test failure doesn't fail the job if accuracy passed
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd_ci_exec.sh -w /sglang-checkout/test \
-e SGLANG_USE_ROCM700A=1 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-deepseek-v31 --nightly --timeout-per-file 18000 || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# ============================================== MI35x Tests ==============================================
# MI35x 1-GPU tests - platform-agnostic tests that may work on CDNA4 (gfx950)
nightly-test-1-gpu-mi35x:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-1-gpu-mi35x')
runs-on: linux-mi35x-gpu-1
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd_ci_install_dependency.sh
# Install tabulate for run_suite.py (missing in MI35x container)
bash scripts/ci/amd_ci_exec.sh pip install tabulate
- name: Nightly Test MI35x (1-GPU)
timeout-minutes: 60
run: |
bash scripts/ci/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-1-gpu-mi35x --nightly --timeout-per-file 600 --continue-on-error || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# MI35x 8-GPU Accuracy Tests - GPT-OSS (accuracy only)
nightly-accuracy-8-gpu-mi35x:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-accuracy-8-gpu-mi35x')
runs-on: linux-mi35x-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd_ci_install_dependency.sh
# Install tabulate for run_suite.py (missing in MI35x container)
bash scripts/ci/amd_ci_exec.sh pip install tabulate
- name: Accuracy Test MI35x (8-GPU GPT-OSS)
timeout-minutes: 180
run: |
bash scripts/ci/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x --nightly --timeout-per-file 7200 || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# MI35x 8-GPU DeepSeek-R1-0528 Accuracy Test (separate job due to long loading time)
nightly-accuracy-8-gpu-mi35x-deepseek-r1:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-accuracy-8-gpu-mi35x-deepseek-r1')
runs-on: linux-mi35x-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd_ci_install_dependency.sh
# Install tabulate for run_suite.py (missing in MI35x container)
bash scripts/ci/amd_ci_exec.sh pip install tabulate
- name: Accuracy Test MI35x (8-GPU DeepSeek-R1-0528)
timeout-minutes: 240
run: |
bash scripts/ci/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-mi35x-deepseek-r1 --nightly --timeout-per-file 7200 || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# MI35x 8-GPU Grok1-INT4 (Accuracy + Performance combined)
nightly-8-gpu-mi35x-grok1-int4:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-8-gpu-mi35x-grok1-int4')
runs-on: linux-mi35x-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd_ci_install_dependency.sh
# Install tabulate for run_suite.py (missing in MI35x container)
bash scripts/ci/amd_ci_exec.sh pip install tabulate
- name: Accuracy Test MI35x (8-GPU Grok1-INT4)
timeout-minutes: 60
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd_ci_exec.sh -w /sglang-checkout/test \
-e RCCL_MSCCL_ENABLE=0 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-mi35x-grok1-int4 --nightly --timeout-per-file 3600 || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
- name: Performance Test MI35x (8-GPU Grok1-INT4)
timeout-minutes: 60
continue-on-error: true # Perf test failure doesn't fail the job if accuracy passed
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd_ci_exec.sh -w /sglang-checkout/test \
-e RCCL_MSCCL_ENABLE=0 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-mi35x-grok1-int4 --nightly --timeout-per-file 3600 || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# MI35x 8-GPU Grok2 (Accuracy + Performance combined)
nightly-8-gpu-mi35x-grok2:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-8-gpu-mi35x-grok2')
runs-on: linux-mi35x-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd_ci_install_dependency.sh
# Install tabulate for run_suite.py (missing in MI35x container)
bash scripts/ci/amd_ci_exec.sh pip install tabulate
- name: Accuracy Test MI35x (8-GPU Grok2)
timeout-minutes: 60
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd_ci_exec.sh -w /sglang-checkout/test \
-e RCCL_MSCCL_ENABLE=0 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-mi35x-grok2 --nightly --timeout-per-file 3600 || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
- name: Performance Test MI35x (8-GPU Grok2)
timeout-minutes: 60
continue-on-error: true # Perf test failure doesn't fail the job if accuracy passed
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd_ci_exec.sh -w /sglang-checkout/test \
-e RCCL_MSCCL_ENABLE=0 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-mi35x-grok2 --nightly --timeout-per-file 3600 || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# MI35x 8-GPU DeepSeek-R1-MXFP4 (Accuracy + Performance combined)
nightly-8-gpu-mi35x-deepseek-r1-mxfp4:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-8-gpu-mi35x-deepseek-r1-mxfp4')
runs-on: linux-mi35x-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd_ci_install_dependency.sh
# Install tabulate for run_suite.py (missing in MI35x container)
bash scripts/ci/amd_ci_exec.sh pip install tabulate
- name: Accuracy Test MI35x (8-GPU DeepSeek-R1-MXFP4)
timeout-minutes: 180
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x-deepseek-r1-mxfp4 --nightly --timeout-per-file 7200 || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
- name: Performance Test MI35x (8-GPU DeepSeek-R1-MXFP4)
timeout-minutes: 300
continue-on-error: true # Perf test failure doesn't fail the job if accuracy passed
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 registered/amd/perf/mi35x/test_deepseek_r1_mxfp4_perf_mi35x.py || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
check-all-jobs:
if: always() && (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' || github.event_name == 'workflow_dispatch')
needs:
# MI30x Unit Tests
- nightly-test-1-gpu-unit
# MI30x Accuracy Tests
- nightly-accuracy-2-gpu
- nightly-accuracy-2-gpu-vlm
- nightly-accuracy-8-gpu
- nightly-accuracy-8-gpu-deepseek-r1
# MI30x Combined Accuracy + Performance Tests
- nightly-8-gpu-grok1-int4
- nightly-8-gpu-grok2
- nightly-8-gpu-deepseek-v31
# MI35x jobs
- nightly-test-1-gpu-mi35x
- nightly-accuracy-8-gpu-mi35x
- nightly-accuracy-8-gpu-mi35x-deepseek-r1
- nightly-8-gpu-mi35x-grok1-int4
- nightly-8-gpu-mi35x-grok2
- nightly-8-gpu-mi35x-deepseek-r1-mxfp4
runs-on: ubuntu-latest
steps:
- name: Check if any job failed
run: |
if [[ "${{ contains(needs.*.result, 'failure') }}" == "true" ]]; then
echo "One or more nightly test jobs failed"
exit 1
fi
if [[ "${{ contains(needs.*.result, 'cancelled') }}" == "true" ]]; then
echo "One or more nightly test jobs were cancelled"
exit 1
fi
echo "All nightly test jobs passed"