Skip to content

Nightly Test (AMD)

Nightly Test (AMD) #79

name: Nightly Test (AMD)
on:
schedule:
- cron: '30 17 * * *'
push:
branches:
- main
paths:
- "python/sglang/version.py"
workflow_dispatch:
inputs:
aiter_ref:
description: 'Override AITER commit (optional, leave empty to use Dockerfile default)'
required: false
type: string
default: ''
continue_on_error:
description: 'Continue on error (do not fail the workflow on test failures)'
required: false
type: boolean
default: true
job_select:
description: 'Select a job to run from dropdown (choose "all" to run all jobs)'
required: false
type: choice
default: 'all'
options:
- 'all'
- nightly-test-1-gpu-unit
- nightly-accuracy-2-gpu
- nightly-accuracy-2-gpu-vlm
- nightly-perf-2-gpu-text
- nightly-perf-2-gpu-vlm
- nightly-4-gpu
- nightly-accuracy-8-gpu
- nightly-8-gpu-grok1-int4
- nightly-8-gpu-grok2
- nightly-8-gpu-deepseek-v31
- nightly-8-gpu-deepseek-v32
- nightly-8-gpu-deepseek-v32-mtp
- nightly-8-gpu-deepseek-v3-kv-fp8
- nightly-8-gpu-kimi-k25
- nightly-8-gpu-qwen3-235b
- nightly-8-gpu-qwen35
- nightly-8-gpu-glm5
- nightly-8-gpu-glm51
- nightly-8-gpu-glm51-mxfp4
- nightly-8-gpu-minimax-m27
- nightly-1-gpu-zimage-turbo
- nightly-test-1-gpu-mi35x
- nightly-accuracy-8-gpu-mi35x
- nightly-8-gpu-mi35x-grok1-int4
- nightly-8-gpu-mi35x-grok2
- nightly-8-gpu-mi35x-deepseek-r1-mxfp4
- nightly-8-gpu-mi35x-deepseek-r1-mxfp4-kv-fp8
- nightly-8-gpu-mi35x-deepseek-r1-mxfp4-ar-fusion
- nightly-accuracy-8-gpu-mi35x-deepseek-v32
- nightly-accuracy-8-gpu-mi35x-deepseek-v32-mtp
- nightly-perf-8-gpu-mi35x-deepseek-v32-basic
- nightly-perf-8-gpu-mi35x-deepseek-v32-mtp
- nightly-8-gpu-mi35x-kimi-k25
- nightly-8-gpu-mi35x-qwen3-235b-mxfp4
- nightly-8-gpu-mi35x-qwen35
- nightly-8-gpu-mi35x-glm5
- nightly-8-gpu-mi35x-glm51
- nightly-8-gpu-mi35x-glm51-mxfp4
job_filter:
description: 'Or type comma-separated job names (overrides dropdown if non-empty)'
required: false
type: string
default: ''
workflow_call:
inputs:
ref:
description: 'Git ref (branch, tag, or SHA) to test. If not provided, uses the default branch.'
required: false
type: string
default: ''
aiter_ref:
description: 'Override AITER commit (optional, leave empty to use Dockerfile default)'
required: false
type: string
default: ''
job_filter:
description: 'Select which job to run (leave empty or "all" to run all jobs)'
required: false
type: string
default: 'all'
continue_on_error:
description: 'Continue on error (do not fail the workflow on test failures)'
required: false
type: boolean
default: true
env:
AITER_COMMIT_OVERRIDE: ${{ inputs.aiter_ref }}
concurrency:
# When called via workflow_call with ref set, use a unique group per caller run to avoid
# collisions with direct schedule/push triggers. We use inputs.ref (not github.event_name)
# to detect this, because github.event_name inherits from the caller in workflow_call.
group: nightly-test-amd-${{ inputs.ref && format('caller-{0}', github.run_id) || github.ref }}
cancel-in-progress: ${{ !inputs.ref && github.event_name != 'workflow_call' }}
jobs:
# ============================================== MI30x Unit Tests ==============================================
# 1-GPU Unit Tests - LoRA, debug utils, scheduler, etc. (MI30x only)
nightly-test-1-gpu-unit:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-test-1-gpu-unit,'))
runs-on: linux-mi325-1gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Nightly Unit Test (1-GPU)
timeout-minutes: 90
run: |
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-1-gpu --nightly --timeout-per-file 900 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# ============================================== MI30x Accuracy Tests ==============================================
# 2-GPU Accuracy Tests - GSM8K eval (MI30x only)
nightly-accuracy-2-gpu:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-accuracy-2-gpu,'))
runs-on: linux-mi325-2gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Nightly Test (2-GPU)
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# 2-GPU VLM Accuracy Tests - Vision-Language Models MMMU evaluation
nightly-accuracy-2-gpu-vlm:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-accuracy-2-gpu-vlm,'))
runs-on: linux-mi325-2gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Nightly Accuracy Test (2-GPU VLM MMMU)
timeout-minutes: 180
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-2-gpu-vlm --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# 2-GPU Text Models Performance Tests
nightly-perf-2-gpu-text:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-perf-2-gpu-text,'))
runs-on: linux-mi325-2gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Performance Test (2-GPU Text Models)
timeout-minutes: 120
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e SGLANG_USE_AITER=1 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-perf-text-2-gpu --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# 2-GPU VLM Performance Tests
nightly-perf-2-gpu-vlm:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-perf-2-gpu-vlm,'))
runs-on: linux-mi325-2gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Performance Test (2-GPU VLM Models)
timeout-minutes: 180
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e SGLANG_USE_AITER=1 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-perf-vlm-2-gpu --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# ============================================== MI30x 4-GPU Tests ==============================================
# 4-GPU Nightly Tests - Dumper/Comparator E2E, VLM Encoder DP
nightly-4-gpu:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-4-gpu,'))
runs-on: linux-mi325-4gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Nightly Test (4-GPU)
timeout-minutes: 120
run: |
> github_summary.md
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-4-gpu --nightly --continue-on-error --timeout-per-file 3600 || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# 8-GPU Accuracy Tests - GPT-OSS, Grok1-FP8 (accuracy only)
nightly-accuracy-8-gpu:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-accuracy-8-gpu,'))
runs-on: linux-mi325-8gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Accuracy Test (8-GPU GPT-OSS)
timeout-minutes: 180
run: |
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-gpt-oss --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
- name: Accuracy Test (8-GPU Grok1-FP8)
timeout-minutes: 60
run: |
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e RCCL_MSCCL_ENABLE=0 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-grok1-fp8 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# ============================================== MI30x Combined Accuracy + Performance Tests ==============================================
# 8-GPU Grok1-INT4 (Accuracy + Performance combined)
nightly-8-gpu-grok1-int4:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-grok1-int4,'))
runs-on: linux-mi325-8gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Accuracy Test (8-GPU Grok1-INT4)
timeout-minutes: 60
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e RCCL_MSCCL_ENABLE=0 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-grok1-int4 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
- name: Performance Test (8-GPU Grok1-INT4)
timeout-minutes: 60
continue-on-error: true # Perf test failure doesn't fail the job if accuracy passed
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e RCCL_MSCCL_ENABLE=0 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-grok1-int4 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# 8-GPU Grok2 (Accuracy + Performance combined)
nightly-8-gpu-grok2:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-grok2,'))
runs-on: linux-mi325-8gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Accuracy Test (8-GPU Grok2)
timeout-minutes: 60
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e RCCL_MSCCL_ENABLE=0 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-grok2 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
- name: Performance Test (8-GPU Grok2)
timeout-minutes: 60
continue-on-error: true # Perf test failure doesn't fail the job if accuracy passed
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e RCCL_MSCCL_ENABLE=0 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-grok2 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# 8-GPU DeepSeek-V3.1 (Accuracy + Performance combined)
nightly-8-gpu-deepseek-v31:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-deepseek-v31,'))
runs-on: linux-mi325-8gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Accuracy Test (8-GPU DeepSeek-V3.1)
timeout-minutes: 120
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e SGLANG_USE_AITER=1 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-deepseek-v31 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
- name: Performance Test (8-GPU DeepSeek-V3.1)
timeout-minutes: 300
continue-on-error: true # Perf test failure doesn't fail the job if accuracy passed
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e SGLANG_USE_ROCM700A=1 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-deepseek-v31 --nightly --timeout-per-file 18000 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# 8-GPU DeepSeek-V3.2 (Basic Accuracy + Perf)
nightly-8-gpu-deepseek-v32:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-deepseek-v32,'))
runs-on: linux-mi325-8gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Accuracy Test (8-GPU DeepSeek-V3.2 Basic)
timeout-minutes: 120
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-deepseek-v32 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
- name: Performance Test (8-GPU DeepSeek-V3.2 Basic)
timeout-minutes: 150
continue-on-error: true # Perf test failure doesn't fail the job if accuracy passed
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-deepseek-v32-basic --nightly --timeout-per-file 5400 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# 8-GPU DeepSeek-V3.2 MTP (MTP Accuracy + Perf)
nightly-8-gpu-deepseek-v32-mtp:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-deepseek-v32-mtp,'))
runs-on: linux-mi325-8gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Accuracy Test (8-GPU DeepSeek-V3.2 MTP)
timeout-minutes: 120
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-deepseek-v32-mtp --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
- name: Performance Test (8-GPU DeepSeek-V3.2 MTP)
timeout-minutes: 180
continue-on-error: true # Perf test failure doesn't fail the job if accuracy passed
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-deepseek-v32-mtp --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# 8-GPU DeepSeek-V3 KV FP8 (Basic + MTP with --kv-cache-dtype fp8_e4m3)
nightly-8-gpu-deepseek-v3-kv-fp8:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-deepseek-v3-kv-fp8,'))
runs-on: linux-mi325-8gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: DeepSeek-V3 KV FP8 Test (8-GPU Basic + MTP)
timeout-minutes: 120
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-deepseek-v3-kv-fp8 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# 8-GPU Kimi-K2.5 (Accuracy)
nightly-8-gpu-kimi-k25:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-kimi-k25,'))
runs-on: linux-mi325-8gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Accuracy Test (8-GPU Kimi-K2.5)
timeout-minutes: 120
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-kimi-k25 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
nightly-8-gpu-qwen3-235b:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-qwen3-235b,'))
runs-on: linux-mi325-8gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Accuracy Test + Performance Test (8-GPU Qwen3)
timeout-minutes: 120
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-8-gpu-qwen3-235b --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# 8-GPU Qwen 3.5 (Accuracy + Performance combined)
nightly-8-gpu-qwen35:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-qwen35,'))
runs-on: linux-mi325-8gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh
bash scripts/ci/amd/amd_ci_exec.sh pip install mistral-common "lm-eval[api]"
- name: Accuracy Test (8-GPU Qwen 3.5)
timeout-minutes: 120
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-qwen35 --nightly --timeout-per-file 3600 || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
- name: Performance Test (8-GPU Qwen 3.5 FP8)
timeout-minutes: 120
continue-on-error: true
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e SGLANG_USE_AITER=1 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-qwen35-fp8 --nightly --timeout-per-file 5400 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# 8-GPU GLM-5 (Accuracy + Performance combined)
nightly-8-gpu-glm5:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-glm5,'))
runs-on: linux-mi325-8gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh
bash scripts/ci/amd/amd_ci_exec.sh pip install git+https://github.com/huggingface/transformers.git@96f807a33b75
- name: Accuracy Test (8-GPU GLM-5 NSA)
timeout-minutes: 120
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-glm5 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
- name: Performance Test (8-GPU GLM-5)
timeout-minutes: 120
continue-on-error: true # Perf test failure doesn't fail the job if accuracy passed
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e SGLANG_USE_AITER=1 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-glm5 --nightly --timeout-per-file 5400 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# 8-GPU GLM-5.1 (Accuracy + Performance combined)
nightly-8-gpu-glm51:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-glm51,'))
runs-on: linux-mi325-8gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh
bash scripts/ci/amd/amd_ci_exec.sh pip install git+https://github.com/huggingface/transformers.git@96f807a33b75
- name: Accuracy Test (8-GPU GLM-5.1 NSA)
timeout-minutes: 120
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-glm51 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
- name: Performance Test (8-GPU GLM-5.1)
timeout-minutes: 120
continue-on-error: true
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e SGLANG_USE_AITER=1 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-glm51 --nightly --timeout-per-file 5400 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# 8-GPU MiniMax-M2.7 (Accuracy + Performance combined, replaces M2.5)
nightly-8-gpu-minimax-m27:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-minimax-m27,'))
runs-on: linux-mi325-8gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Accuracy Test (8-GPU MiniMax-M2.7)
timeout-minutes: 120
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e SGLANG_USE_AITER=1 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-minimax-m27 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
- name: Performance Test (8-GPU MiniMax-M2.7)
timeout-minutes: 120
continue-on-error: true # Perf test failure doesn't fail the job if accuracy passed
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e SGLANG_USE_AITER=1 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-minimax-m27 --nightly --timeout-per-file 5400 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# ============================================== MI30x Diffusion Tests ==============================================
# 1-GPU Z-Image-Turbo (Diffusion T2I)
nightly-1-gpu-zimage-turbo:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-1-gpu-zimage-turbo,'))
runs-on: linux-mi325-1gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Z-Image-Turbo Diffusion Test (1-GPU)
timeout-minutes: 45
run: |
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
-e SGLANG_DIFFUSION_ARTIFACT_DIR="/sglang-checkout/diffusion-artifacts" \
pytest test/registered/amd/test_zimage_turbo.py -v -s ${{ inputs.continue_on_error && '|| true' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
- name: Upload generated images
if: always()
uses: actions/upload-artifact@v4
with:
name: zimage-turbo-outputs
path: diffusion-artifacts/
if-no-files-found: ignore
retention-days: 30
# ============================================== MI35x Tests ==============================================
# MI35x 1-GPU tests - platform-agnostic tests that may work on CDNA4 (gfx950)
nightly-test-1-gpu-mi35x:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-test-1-gpu-mi35x,'))
runs-on: linux-mi35x-gpu-1
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh
# Install tabulate for run_suite.py (missing in MI35x container)
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
- name: Nightly Test MI35x (1-GPU)
timeout-minutes: 90
run: |
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-1-gpu-mi35x --nightly --timeout-per-file 900 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# MI35x 8-GPU Accuracy Tests - GPT-OSS (accuracy only)
nightly-accuracy-8-gpu-mi35x:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-accuracy-8-gpu-mi35x,'))
runs-on: linux-mi35x-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh
# Install tabulate for run_suite.py (missing in MI35x container)
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
- name: Accuracy Test MI35x (8-GPU GPT-OSS)
timeout-minutes: 180
run: |
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# MI35x 8-GPU Grok1-INT4 (Accuracy + Performance combined)
nightly-8-gpu-mi35x-grok1-int4:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-grok1-int4,'))
runs-on: linux-mi35x-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh
# Install tabulate for run_suite.py (missing in MI35x container)
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
- name: Accuracy Test MI35x (8-GPU Grok1-INT4)
timeout-minutes: 90
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e RCCL_MSCCL_ENABLE=0 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-mi35x-grok1-int4 --nightly --timeout-per-file 5400 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
- name: Performance Test MI35x (8-GPU Grok1-INT4)
timeout-minutes: 60
continue-on-error: true # Perf test failure doesn't fail the job if accuracy passed
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e RCCL_MSCCL_ENABLE=0 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-mi35x-grok1-int4 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# MI35x 8-GPU Grok2 (Accuracy + Performance combined)
nightly-8-gpu-mi35x-grok2:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-grok2,'))
runs-on: linux-mi35x-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh
# Install tabulate for run_suite.py (missing in MI35x container)
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
- name: Accuracy Test MI35x (8-GPU Grok2)
timeout-minutes: 60
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e RCCL_MSCCL_ENABLE=0 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-mi35x-grok2 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
- name: Performance Test MI35x (8-GPU Grok2)
timeout-minutes: 60
continue-on-error: true # Perf test failure doesn't fail the job if accuracy passed
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e RCCL_MSCCL_ENABLE=0 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-mi35x-grok2 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# MI35x 8-GPU DeepSeek-R1-MXFP4 (Accuracy + Performance combined)
nightly-8-gpu-mi35x-deepseek-r1-mxfp4:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-deepseek-r1-mxfp4,'))
runs-on: linux-mi35x-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh
# Install tabulate for run_suite.py (missing in MI35x container)
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
- name: Accuracy Test MI35x (8-GPU DeepSeek-R1-MXFP4)
timeout-minutes: 180
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x-deepseek-r1-mxfp4 --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
- name: Performance Test MI35x (8-GPU DeepSeek-R1-MXFP4)
timeout-minutes: 300
continue-on-error: true # Perf test failure doesn't fail the job if accuracy passed
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 registered/amd/perf/mi35x/test_deepseek_r1_mxfp4_perf_mi35x.py || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# MI35x 8-GPU DeepSeek-R1-MXFP4 KV FP8 (Accuracy + Performance combined)
nightly-8-gpu-mi35x-deepseek-r1-mxfp4-kv-fp8:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-deepseek-r1-mxfp4-kv-fp8,'))
runs-on: linux-mi35x-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh
# Install tabulate for run_suite.py (missing in MI35x container)
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
- name: Accuracy Test MI35x (8-GPU DeepSeek-R1-MXFP4 KV FP8)
timeout-minutes: 180
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x-deepseek-r1-mxfp4-kv-fp8 --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
- name: Performance Test MI35x (8-GPU DeepSeek-R1-MXFP4 KV FP8)
timeout-minutes: 300
continue-on-error: true # Perf test failure doesn't fail the job if accuracy passed
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 registered/amd/perf/mi35x/test_deepseek_r1_mxfp4_kv_fp8_perf_mi35x.py || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# MI35x 8-GPU DeepSeek-R1-MXFP4 AllReduce Fusion (Accuracy + Performance combined)
nightly-8-gpu-mi35x-deepseek-r1-mxfp4-ar-fusion:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-deepseek-r1-mxfp4-ar-fusion,'))
runs-on: linux-mi35x-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh
# Install tabulate for run_suite.py (missing in MI35x container)
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
- name: Accuracy Test MI35x (8-GPU DeepSeek-R1-MXFP4 AllReduce Fusion)
timeout-minutes: 180
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x-deepseek-r1-mxfp4-ar-fusion --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
- name: Performance Test MI35x (8-GPU DeepSeek-R1-MXFP4 AllReduce Fusion)
timeout-minutes: 300
continue-on-error: true # Perf test failure doesn't fail the job if accuracy passed
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 registered/amd/perf/mi35x/test_deepseek_r1_mxfp4_ar_fusion_perf_mi35x.py || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# MI35x 8-GPU DeepSeek-V3.2 Accuracy Test
nightly-accuracy-8-gpu-mi35x-deepseek-v32:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-accuracy-8-gpu-mi35x-deepseek-v32,'))
runs-on: linux-mi35x-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh
# Install tabulate for run_suite.py (missing in MI35x container)
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
- name: Accuracy Test MI35x (8-GPU DeepSeek-V3.2)
timeout-minutes: 120
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x-deepseek-v32 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# MI35x 8-GPU DeepSeek-V3.2 TP+MTP Accuracy Test
nightly-accuracy-8-gpu-mi35x-deepseek-v32-mtp:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-accuracy-8-gpu-mi35x-deepseek-v32-mtp,'))
runs-on: linux-mi35x-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh
# Install tabulate for run_suite.py (missing in MI35x container)
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
- name: Accuracy Test MI35x (8-GPU DeepSeek-V3.2 TP+MTP)
timeout-minutes: 120
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-mi35x-deepseek-v32-mtp --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# MI35x 8-GPU DeepSeek-V3.2 Performance Test (Basic)
nightly-perf-8-gpu-mi35x-deepseek-v32-basic:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-perf-8-gpu-mi35x-deepseek-v32-basic,'))
runs-on: linux-mi35x-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh
# Install tabulate for run_suite.py (missing in MI35x container)
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
- name: Performance Test MI35x (8-GPU DeepSeek-V3.2 Basic)
timeout-minutes: 150
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-mi35x-deepseek-v32-basic --nightly --timeout-per-file 5400 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# MI35x 8-GPU Kimi-K2.5 (Accuracy)
nightly-8-gpu-mi35x-kimi-k25:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-kimi-k25,'))
runs-on: linux-mi35x-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh
# Install tabulate for run_suite.py (missing in MI35x container)
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
- name: Accuracy Test MI35x (8-GPU Kimi-K2.5)
timeout-minutes: 180
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-mi35x-kimi-k25 --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# MI35x 8-GPU Qwen3-235B-MXFP4 (Accuracy + Performance)
nightly-8-gpu-mi35x-qwen3-235b-mxfp4:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-qwen3-235b-mxfp4,'))
runs-on: linux-mi35x-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh
# Install tabulate for run_suite.py (missing in MI35x container)
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
- name: Accuracy Test + Performance Test MI35x (8-GPU Qwen3-235B-MXFP4)
timeout-minutes: 120
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-8-gpu-mi35x-qwen3-235b-mxfp4 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# MI35x 8-GPU Qwen 3.5 (Accuracy + Performance combined)
nightly-8-gpu-mi35x-qwen35:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-qwen35,'))
runs-on: linux-mi35x-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
bash scripts/ci/amd/amd_ci_exec.sh pip install mistral-common "lm-eval[api]"
- name: Accuracy Test MI35x (8-GPU Qwen 3.5)
timeout-minutes: 120
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-mi35x-qwen35 --nightly --timeout-per-file 3600 || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
- name: Performance Test MI35x (8-GPU Qwen 3.5 FP8)
timeout-minutes: 120
continue-on-error: true
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e SGLANG_USE_AITER=1 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-mi35x-qwen35-fp8 --nightly --timeout-per-file 5400 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# MI35x 8-GPU GLM-5 (Accuracy only)
nightly-8-gpu-mi35x-glm5:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-glm5,'))
runs-on: linux-mi35x-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh
# Install tabulate for run_suite.py (missing in MI35x container)
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
bash scripts/ci/amd/amd_ci_exec.sh pip install git+https://github.com/huggingface/transformers.git@96f807a33b75
- name: Accuracy Test MI35x (8-GPU GLM-5 NSA)
timeout-minutes: 180
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x-glm5 --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# MI35x 8-GPU GLM-5.1 (Accuracy + Performance combined)
nightly-8-gpu-mi35x-glm51:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-glm51,'))
runs-on: linux-mi35x-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
bash scripts/ci/amd/amd_ci_exec.sh pip install git+https://github.com/huggingface/transformers.git@96f807a33b75
- name: Accuracy Test MI35x (8-GPU GLM-5.1 NSA)
timeout-minutes: 180
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x-glm51 --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
- name: Performance Test MI35x (8-GPU GLM-5.1)
timeout-minutes: 120
continue-on-error: true
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-mi35x-glm51 --nightly --timeout-per-file 5400 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# MI35x 8-GPU DeepSeek-V3.2 Performance Test (MTP)
nightly-perf-8-gpu-mi35x-deepseek-v32-mtp:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-perf-8-gpu-mi35x-deepseek-v32-mtp,'))
runs-on: linux-mi35x-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh
# Install tabulate for run_suite.py (missing in MI35x container)
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
- name: Performance Test MI35x (8-GPU DeepSeek-V3.2 MTP)
timeout-minutes: 180
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-mi35x-deepseek-v32-mtp --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
check-all-jobs:
if: always() && (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' || github.event_name == 'workflow_dispatch')
needs:
# MI30x Unit Tests
- nightly-test-1-gpu-unit
# MI30x Accuracy Tests
- nightly-accuracy-2-gpu
- nightly-accuracy-2-gpu-vlm
# MI30x 4-GPU Tests
- nightly-4-gpu
- nightly-accuracy-8-gpu
# MI30x Performance Tests - excluded from check (perf failures don't block CI)
# - nightly-perf-2-gpu-text
# - nightly-perf-2-gpu-vlm
# MI30x Combined Accuracy + Performance Tests
- nightly-8-gpu-grok1-int4
- nightly-8-gpu-grok2
- nightly-8-gpu-deepseek-v31
- nightly-8-gpu-deepseek-v32
- nightly-8-gpu-deepseek-v32-mtp
- nightly-8-gpu-deepseek-v3-kv-fp8
- nightly-8-gpu-kimi-k25
- nightly-8-gpu-qwen3-235b
- nightly-8-gpu-qwen35
- nightly-8-gpu-glm5
- nightly-8-gpu-glm51
- nightly-8-gpu-minimax-m27
# MI30x Diffusion Tests
- nightly-1-gpu-zimage-turbo
# MI35x jobs
- nightly-test-1-gpu-mi35x
- nightly-accuracy-8-gpu-mi35x
- nightly-8-gpu-mi35x-grok1-int4
- nightly-8-gpu-mi35x-grok2
- nightly-8-gpu-mi35x-deepseek-r1-mxfp4
- nightly-8-gpu-mi35x-deepseek-r1-mxfp4-kv-fp8
- nightly-8-gpu-mi35x-deepseek-r1-mxfp4-ar-fusion
- nightly-accuracy-8-gpu-mi35x-deepseek-v32
- nightly-accuracy-8-gpu-mi35x-deepseek-v32-mtp
- nightly-8-gpu-mi35x-kimi-k25
- nightly-8-gpu-mi35x-qwen3-235b-mxfp4
- nightly-8-gpu-mi35x-qwen35
- nightly-8-gpu-mi35x-glm5
- nightly-8-gpu-mi35x-glm51
# MI35x perf jobs excluded from check - perf failures don't block CI
# - nightly-perf-8-gpu-mi35x-deepseek-v32-basic
# - nightly-perf-8-gpu-mi35x-deepseek-v32-mtp
runs-on: ubuntu-latest
steps:
- name: Check if any job failed
run: |
if [[ "${{ contains(needs.*.result, 'failure') }}" == "true" ]]; then
echo "One or more nightly test jobs failed"
exit 1
fi
if [[ "${{ contains(needs.*.result, 'cancelled') }}" == "true" ]]; then
echo "One or more nightly test jobs were cancelled"
exit 1
fi
echo "All nightly test jobs passed"