Nightly Test (AMD) #79
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Nightly Test (AMD) | |
| on: | |
| schedule: | |
| - cron: '30 17 * * *' | |
| push: | |
| branches: | |
| - main | |
| paths: | |
| - "python/sglang/version.py" | |
| workflow_dispatch: | |
| inputs: | |
| aiter_ref: | |
| description: 'Override AITER commit (optional, leave empty to use Dockerfile default)' | |
| required: false | |
| type: string | |
| default: '' | |
| continue_on_error: | |
| description: 'Continue on error (do not fail the workflow on test failures)' | |
| required: false | |
| type: boolean | |
| default: true | |
| job_select: | |
| description: 'Select a job to run from dropdown (choose "all" to run all jobs)' | |
| required: false | |
| type: choice | |
| default: 'all' | |
| options: | |
| - 'all' | |
| - nightly-test-1-gpu-unit | |
| - nightly-accuracy-2-gpu | |
| - nightly-accuracy-2-gpu-vlm | |
| - nightly-perf-2-gpu-text | |
| - nightly-perf-2-gpu-vlm | |
| - nightly-4-gpu | |
| - nightly-accuracy-8-gpu | |
| - nightly-8-gpu-grok1-int4 | |
| - nightly-8-gpu-grok2 | |
| - nightly-8-gpu-deepseek-v31 | |
| - nightly-8-gpu-deepseek-v32 | |
| - nightly-8-gpu-deepseek-v32-mtp | |
| - nightly-8-gpu-deepseek-v3-kv-fp8 | |
| - nightly-8-gpu-kimi-k25 | |
| - nightly-8-gpu-qwen3-235b | |
| - nightly-8-gpu-qwen35 | |
| - nightly-8-gpu-glm5 | |
| - nightly-8-gpu-glm51 | |
| - nightly-8-gpu-glm51-mxfp4 | |
| - nightly-8-gpu-minimax-m27 | |
| - nightly-1-gpu-zimage-turbo | |
| - nightly-test-1-gpu-mi35x | |
| - nightly-accuracy-8-gpu-mi35x | |
| - nightly-8-gpu-mi35x-grok1-int4 | |
| - nightly-8-gpu-mi35x-grok2 | |
| - nightly-8-gpu-mi35x-deepseek-r1-mxfp4 | |
| - nightly-8-gpu-mi35x-deepseek-r1-mxfp4-kv-fp8 | |
| - nightly-8-gpu-mi35x-deepseek-r1-mxfp4-ar-fusion | |
| - nightly-accuracy-8-gpu-mi35x-deepseek-v32 | |
| - nightly-accuracy-8-gpu-mi35x-deepseek-v32-mtp | |
| - nightly-perf-8-gpu-mi35x-deepseek-v32-basic | |
| - nightly-perf-8-gpu-mi35x-deepseek-v32-mtp | |
| - nightly-8-gpu-mi35x-kimi-k25 | |
| - nightly-8-gpu-mi35x-qwen3-235b-mxfp4 | |
| - nightly-8-gpu-mi35x-qwen35 | |
| - nightly-8-gpu-mi35x-glm5 | |
| - nightly-8-gpu-mi35x-glm51 | |
| - nightly-8-gpu-mi35x-glm51-mxfp4 | |
| job_filter: | |
| description: 'Or type comma-separated job names (overrides dropdown if non-empty)' | |
| required: false | |
| type: string | |
| default: '' | |
| workflow_call: | |
| inputs: | |
| ref: | |
| description: 'Git ref (branch, tag, or SHA) to test. If not provided, uses the default branch.' | |
| required: false | |
| type: string | |
| default: '' | |
| aiter_ref: | |
| description: 'Override AITER commit (optional, leave empty to use Dockerfile default)' | |
| required: false | |
| type: string | |
| default: '' | |
| job_filter: | |
| description: 'Select which job to run (leave empty or "all" to run all jobs)' | |
| required: false | |
| type: string | |
| default: 'all' | |
| continue_on_error: | |
| description: 'Continue on error (do not fail the workflow on test failures)' | |
| required: false | |
| type: boolean | |
| default: true | |
| env: | |
| AITER_COMMIT_OVERRIDE: ${{ inputs.aiter_ref }} | |
| concurrency: | |
| # When called via workflow_call with ref set, use a unique group per caller run to avoid | |
| # collisions with direct schedule/push triggers. We use inputs.ref (not github.event_name) | |
| # to detect this, because github.event_name inherits from the caller in workflow_call. | |
| group: nightly-test-amd-${{ inputs.ref && format('caller-{0}', github.run_id) || github.ref }} | |
| cancel-in-progress: ${{ !inputs.ref && github.event_name != 'workflow_call' }} | |
| jobs: | |
| # ============================================== MI30x Unit Tests ============================================== | |
| # 1-GPU Unit Tests - LoRA, debug utils, scheduler, etc. (MI30x only) | |
| nightly-test-1-gpu-unit: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-test-1-gpu-unit,')) | |
| runs-on: linux-mi325-1gpu-sglang | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: bash scripts/ci/amd/amd_ci_install_dependency.sh | |
| - name: Nightly Unit Test (1-GPU) | |
| timeout-minutes: 90 | |
| run: | | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-1-gpu --nightly --timeout-per-file 900 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # ============================================== MI30x Accuracy Tests ============================================== | |
| # 2-GPU Accuracy Tests - GSM8K eval (MI30x only) | |
| nightly-accuracy-2-gpu: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-accuracy-2-gpu,')) | |
| runs-on: linux-mi325-2gpu-sglang | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: bash scripts/ci/amd/amd_ci_install_dependency.sh | |
| - name: Nightly Test (2-GPU) | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # 2-GPU VLM Accuracy Tests - Vision-Language Models MMMU evaluation | |
| nightly-accuracy-2-gpu-vlm: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-accuracy-2-gpu-vlm,')) | |
| runs-on: linux-mi325-2gpu-sglang | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: bash scripts/ci/amd/amd_ci_install_dependency.sh | |
| - name: Nightly Accuracy Test (2-GPU VLM MMMU) | |
| timeout-minutes: 180 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-accuracy-2-gpu-vlm --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # 2-GPU Text Models Performance Tests | |
| nightly-perf-2-gpu-text: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-perf-2-gpu-text,')) | |
| runs-on: linux-mi325-2gpu-sglang | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: bash scripts/ci/amd/amd_ci_install_dependency.sh | |
| - name: Performance Test (2-GPU Text Models) | |
| timeout-minutes: 120 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e SGLANG_USE_AITER=1 \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-perf-text-2-gpu --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # 2-GPU VLM Performance Tests | |
| nightly-perf-2-gpu-vlm: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-perf-2-gpu-vlm,')) | |
| runs-on: linux-mi325-2gpu-sglang | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: bash scripts/ci/amd/amd_ci_install_dependency.sh | |
| - name: Performance Test (2-GPU VLM Models) | |
| timeout-minutes: 180 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e SGLANG_USE_AITER=1 \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-perf-vlm-2-gpu --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # ============================================== MI30x 4-GPU Tests ============================================== | |
| # 4-GPU Nightly Tests - Dumper/Comparator E2E, VLM Encoder DP | |
| nightly-4-gpu: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-4-gpu,')) | |
| runs-on: linux-mi325-4gpu-sglang | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: bash scripts/ci/amd/amd_ci_install_dependency.sh | |
| - name: Nightly Test (4-GPU) | |
| timeout-minutes: 120 | |
| run: | | |
| > github_summary.md | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-4-gpu --nightly --continue-on-error --timeout-per-file 3600 || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # 8-GPU Accuracy Tests - GPT-OSS, Grok1-FP8 (accuracy only) | |
| nightly-accuracy-8-gpu: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-accuracy-8-gpu,')) | |
| runs-on: linux-mi325-8gpu-sglang | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: bash scripts/ci/amd/amd_ci_install_dependency.sh | |
| - name: Accuracy Test (8-GPU GPT-OSS) | |
| timeout-minutes: 180 | |
| run: | | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-gpt-oss --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| - name: Accuracy Test (8-GPU Grok1-FP8) | |
| timeout-minutes: 60 | |
| run: | | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e RCCL_MSCCL_ENABLE=0 \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-grok1-fp8 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # ============================================== MI30x Combined Accuracy + Performance Tests ============================================== | |
| # 8-GPU Grok1-INT4 (Accuracy + Performance combined) | |
| nightly-8-gpu-grok1-int4: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-grok1-int4,')) | |
| runs-on: linux-mi325-8gpu-sglang | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: bash scripts/ci/amd/amd_ci_install_dependency.sh | |
| - name: Accuracy Test (8-GPU Grok1-INT4) | |
| timeout-minutes: 60 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e RCCL_MSCCL_ENABLE=0 \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-grok1-int4 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| - name: Performance Test (8-GPU Grok1-INT4) | |
| timeout-minutes: 60 | |
| continue-on-error: true # Perf test failure doesn't fail the job if accuracy passed | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e RCCL_MSCCL_ENABLE=0 \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-grok1-int4 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # 8-GPU Grok2 (Accuracy + Performance combined) | |
| nightly-8-gpu-grok2: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-grok2,')) | |
| runs-on: linux-mi325-8gpu-sglang | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: bash scripts/ci/amd/amd_ci_install_dependency.sh | |
| - name: Accuracy Test (8-GPU Grok2) | |
| timeout-minutes: 60 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e RCCL_MSCCL_ENABLE=0 \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-grok2 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| - name: Performance Test (8-GPU Grok2) | |
| timeout-minutes: 60 | |
| continue-on-error: true # Perf test failure doesn't fail the job if accuracy passed | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e RCCL_MSCCL_ENABLE=0 \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-grok2 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # 8-GPU DeepSeek-V3.1 (Accuracy + Performance combined) | |
| nightly-8-gpu-deepseek-v31: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-deepseek-v31,')) | |
| runs-on: linux-mi325-8gpu-sglang | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: bash scripts/ci/amd/amd_ci_install_dependency.sh | |
| - name: Accuracy Test (8-GPU DeepSeek-V3.1) | |
| timeout-minutes: 120 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e SGLANG_USE_AITER=1 \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-deepseek-v31 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| - name: Performance Test (8-GPU DeepSeek-V3.1) | |
| timeout-minutes: 300 | |
| continue-on-error: true # Perf test failure doesn't fail the job if accuracy passed | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e SGLANG_USE_ROCM700A=1 \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-deepseek-v31 --nightly --timeout-per-file 18000 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # 8-GPU DeepSeek-V3.2 (Basic Accuracy + Perf) | |
| nightly-8-gpu-deepseek-v32: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-deepseek-v32,')) | |
| runs-on: linux-mi325-8gpu-sglang | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: bash scripts/ci/amd/amd_ci_install_dependency.sh | |
| - name: Accuracy Test (8-GPU DeepSeek-V3.2 Basic) | |
| timeout-minutes: 120 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-deepseek-v32 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| - name: Performance Test (8-GPU DeepSeek-V3.2 Basic) | |
| timeout-minutes: 150 | |
| continue-on-error: true # Perf test failure doesn't fail the job if accuracy passed | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-deepseek-v32-basic --nightly --timeout-per-file 5400 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # 8-GPU DeepSeek-V3.2 MTP (MTP Accuracy + Perf) | |
| nightly-8-gpu-deepseek-v32-mtp: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-deepseek-v32-mtp,')) | |
| runs-on: linux-mi325-8gpu-sglang | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: bash scripts/ci/amd/amd_ci_install_dependency.sh | |
| - name: Accuracy Test (8-GPU DeepSeek-V3.2 MTP) | |
| timeout-minutes: 120 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-deepseek-v32-mtp --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| - name: Performance Test (8-GPU DeepSeek-V3.2 MTP) | |
| timeout-minutes: 180 | |
| continue-on-error: true # Perf test failure doesn't fail the job if accuracy passed | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-deepseek-v32-mtp --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # 8-GPU DeepSeek-V3 KV FP8 (Basic + MTP with --kv-cache-dtype fp8_e4m3) | |
| nightly-8-gpu-deepseek-v3-kv-fp8: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-deepseek-v3-kv-fp8,')) | |
| runs-on: linux-mi325-8gpu-sglang | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: bash scripts/ci/amd/amd_ci_install_dependency.sh | |
| - name: DeepSeek-V3 KV FP8 Test (8-GPU Basic + MTP) | |
| timeout-minutes: 120 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-deepseek-v3-kv-fp8 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # 8-GPU Kimi-K2.5 (Accuracy) | |
| nightly-8-gpu-kimi-k25: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-kimi-k25,')) | |
| runs-on: linux-mi325-8gpu-sglang | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: bash scripts/ci/amd/amd_ci_install_dependency.sh | |
| - name: Accuracy Test (8-GPU Kimi-K2.5) | |
| timeout-minutes: 120 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-kimi-k25 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| nightly-8-gpu-qwen3-235b: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-qwen3-235b,')) | |
| runs-on: linux-mi325-8gpu-sglang | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: bash scripts/ci/amd/amd_ci_install_dependency.sh | |
| - name: Accuracy Test + Performance Test (8-GPU Qwen3) | |
| timeout-minutes: 120 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-8-gpu-qwen3-235b --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # 8-GPU Qwen 3.5 (Accuracy + Performance combined) | |
| nightly-8-gpu-qwen35: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-qwen35,')) | |
| runs-on: linux-mi325-8gpu-sglang | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/amd/amd_ci_install_dependency.sh | |
| bash scripts/ci/amd/amd_ci_exec.sh pip install mistral-common "lm-eval[api]" | |
| - name: Accuracy Test (8-GPU Qwen 3.5) | |
| timeout-minutes: 120 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-qwen35 --nightly --timeout-per-file 3600 || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| - name: Performance Test (8-GPU Qwen 3.5 FP8) | |
| timeout-minutes: 120 | |
| continue-on-error: true | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e SGLANG_USE_AITER=1 \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-qwen35-fp8 --nightly --timeout-per-file 5400 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # 8-GPU GLM-5 (Accuracy + Performance combined) | |
| nightly-8-gpu-glm5: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-glm5,')) | |
| runs-on: linux-mi325-8gpu-sglang | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/amd/amd_ci_install_dependency.sh | |
| bash scripts/ci/amd/amd_ci_exec.sh pip install git+https://github.com/huggingface/transformers.git@96f807a33b75 | |
| - name: Accuracy Test (8-GPU GLM-5 NSA) | |
| timeout-minutes: 120 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-glm5 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| - name: Performance Test (8-GPU GLM-5) | |
| timeout-minutes: 120 | |
| continue-on-error: true # Perf test failure doesn't fail the job if accuracy passed | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e SGLANG_USE_AITER=1 \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-glm5 --nightly --timeout-per-file 5400 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # 8-GPU GLM-5.1 (Accuracy + Performance combined) | |
| nightly-8-gpu-glm51: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-glm51,')) | |
| runs-on: linux-mi325-8gpu-sglang | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/amd/amd_ci_install_dependency.sh | |
| bash scripts/ci/amd/amd_ci_exec.sh pip install git+https://github.com/huggingface/transformers.git@96f807a33b75 | |
| - name: Accuracy Test (8-GPU GLM-5.1 NSA) | |
| timeout-minutes: 120 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-glm51 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| - name: Performance Test (8-GPU GLM-5.1) | |
| timeout-minutes: 120 | |
| continue-on-error: true | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e SGLANG_USE_AITER=1 \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-glm51 --nightly --timeout-per-file 5400 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # 8-GPU MiniMax-M2.7 (Accuracy + Performance combined, replaces M2.5) | |
| nightly-8-gpu-minimax-m27: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-minimax-m27,')) | |
| runs-on: linux-mi325-8gpu-sglang | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: bash scripts/ci/amd/amd_ci_install_dependency.sh | |
| - name: Accuracy Test (8-GPU MiniMax-M2.7) | |
| timeout-minutes: 120 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e SGLANG_USE_AITER=1 \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-minimax-m27 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| - name: Performance Test (8-GPU MiniMax-M2.7) | |
| timeout-minutes: 120 | |
| continue-on-error: true # Perf test failure doesn't fail the job if accuracy passed | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e SGLANG_USE_AITER=1 \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-minimax-m27 --nightly --timeout-per-file 5400 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # ============================================== MI30x Diffusion Tests ============================================== | |
| # 1-GPU Z-Image-Turbo (Diffusion T2I) | |
| nightly-1-gpu-zimage-turbo: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-1-gpu-zimage-turbo,')) | |
| runs-on: linux-mi325-1gpu-sglang | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: bash scripts/ci/amd/amd_ci_install_dependency.sh | |
| - name: Z-Image-Turbo Diffusion Test (1-GPU) | |
| timeout-minutes: 45 | |
| run: | | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| -e SGLANG_DIFFUSION_ARTIFACT_DIR="/sglang-checkout/diffusion-artifacts" \ | |
| pytest test/registered/amd/test_zimage_turbo.py -v -s ${{ inputs.continue_on_error && '|| true' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| - name: Upload generated images | |
| if: always() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: zimage-turbo-outputs | |
| path: diffusion-artifacts/ | |
| if-no-files-found: ignore | |
| retention-days: 30 | |
| # ============================================== MI35x Tests ============================================== | |
| # MI35x 1-GPU tests - platform-agnostic tests that may work on CDNA4 (gfx950) | |
| nightly-test-1-gpu-mi35x: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-test-1-gpu-mi35x,')) | |
| runs-on: linux-mi35x-gpu-1 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/amd/amd_ci_install_dependency.sh | |
| # Install tabulate for run_suite.py (missing in MI35x container) | |
| bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate | |
| - name: Nightly Test MI35x (1-GPU) | |
| timeout-minutes: 90 | |
| run: | | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-1-gpu-mi35x --nightly --timeout-per-file 900 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # MI35x 8-GPU Accuracy Tests - GPT-OSS (accuracy only) | |
| nightly-accuracy-8-gpu-mi35x: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-accuracy-8-gpu-mi35x,')) | |
| runs-on: linux-mi35x-gpu-8 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/amd/amd_ci_install_dependency.sh | |
| # Install tabulate for run_suite.py (missing in MI35x container) | |
| bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate | |
| - name: Accuracy Test MI35x (8-GPU GPT-OSS) | |
| timeout-minutes: 180 | |
| run: | | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # MI35x 8-GPU Grok1-INT4 (Accuracy + Performance combined) | |
| nightly-8-gpu-mi35x-grok1-int4: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-grok1-int4,')) | |
| runs-on: linux-mi35x-gpu-8 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/amd/amd_ci_install_dependency.sh | |
| # Install tabulate for run_suite.py (missing in MI35x container) | |
| bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate | |
| - name: Accuracy Test MI35x (8-GPU Grok1-INT4) | |
| timeout-minutes: 90 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e RCCL_MSCCL_ENABLE=0 \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-mi35x-grok1-int4 --nightly --timeout-per-file 5400 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| - name: Performance Test MI35x (8-GPU Grok1-INT4) | |
| timeout-minutes: 60 | |
| continue-on-error: true # Perf test failure doesn't fail the job if accuracy passed | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e RCCL_MSCCL_ENABLE=0 \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-mi35x-grok1-int4 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # MI35x 8-GPU Grok2 (Accuracy + Performance combined) | |
| nightly-8-gpu-mi35x-grok2: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-grok2,')) | |
| runs-on: linux-mi35x-gpu-8 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/amd/amd_ci_install_dependency.sh | |
| # Install tabulate for run_suite.py (missing in MI35x container) | |
| bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate | |
| - name: Accuracy Test MI35x (8-GPU Grok2) | |
| timeout-minutes: 60 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e RCCL_MSCCL_ENABLE=0 \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-mi35x-grok2 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| - name: Performance Test MI35x (8-GPU Grok2) | |
| timeout-minutes: 60 | |
| continue-on-error: true # Perf test failure doesn't fail the job if accuracy passed | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e RCCL_MSCCL_ENABLE=0 \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-mi35x-grok2 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # MI35x 8-GPU DeepSeek-R1-MXFP4 (Accuracy + Performance combined) | |
| nightly-8-gpu-mi35x-deepseek-r1-mxfp4: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-deepseek-r1-mxfp4,')) | |
| runs-on: linux-mi35x-gpu-8 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/amd/amd_ci_install_dependency.sh | |
| # Install tabulate for run_suite.py (missing in MI35x container) | |
| bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate | |
| - name: Accuracy Test MI35x (8-GPU DeepSeek-R1-MXFP4) | |
| timeout-minutes: 180 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x-deepseek-r1-mxfp4 --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| - name: Performance Test MI35x (8-GPU DeepSeek-R1-MXFP4) | |
| timeout-minutes: 300 | |
| continue-on-error: true # Perf test failure doesn't fail the job if accuracy passed | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 registered/amd/perf/mi35x/test_deepseek_r1_mxfp4_perf_mi35x.py || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # MI35x 8-GPU DeepSeek-R1-MXFP4 KV FP8 (Accuracy + Performance combined) | |
| nightly-8-gpu-mi35x-deepseek-r1-mxfp4-kv-fp8: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-deepseek-r1-mxfp4-kv-fp8,')) | |
| runs-on: linux-mi35x-gpu-8 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/amd/amd_ci_install_dependency.sh | |
| # Install tabulate for run_suite.py (missing in MI35x container) | |
| bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate | |
| - name: Accuracy Test MI35x (8-GPU DeepSeek-R1-MXFP4 KV FP8) | |
| timeout-minutes: 180 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x-deepseek-r1-mxfp4-kv-fp8 --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| - name: Performance Test MI35x (8-GPU DeepSeek-R1-MXFP4 KV FP8) | |
| timeout-minutes: 300 | |
| continue-on-error: true # Perf test failure doesn't fail the job if accuracy passed | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 registered/amd/perf/mi35x/test_deepseek_r1_mxfp4_kv_fp8_perf_mi35x.py || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # MI35x 8-GPU DeepSeek-R1-MXFP4 AllReduce Fusion (Accuracy + Performance combined) | |
| nightly-8-gpu-mi35x-deepseek-r1-mxfp4-ar-fusion: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-deepseek-r1-mxfp4-ar-fusion,')) | |
| runs-on: linux-mi35x-gpu-8 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/amd/amd_ci_install_dependency.sh | |
| # Install tabulate for run_suite.py (missing in MI35x container) | |
| bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate | |
| - name: Accuracy Test MI35x (8-GPU DeepSeek-R1-MXFP4 AllReduce Fusion) | |
| timeout-minutes: 180 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x-deepseek-r1-mxfp4-ar-fusion --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| - name: Performance Test MI35x (8-GPU DeepSeek-R1-MXFP4 AllReduce Fusion) | |
| timeout-minutes: 300 | |
| continue-on-error: true # Perf test failure doesn't fail the job if accuracy passed | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 registered/amd/perf/mi35x/test_deepseek_r1_mxfp4_ar_fusion_perf_mi35x.py || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # MI35x 8-GPU DeepSeek-V3.2 Accuracy Test | |
| nightly-accuracy-8-gpu-mi35x-deepseek-v32: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-accuracy-8-gpu-mi35x-deepseek-v32,')) | |
| runs-on: linux-mi35x-gpu-8 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/amd/amd_ci_install_dependency.sh | |
| # Install tabulate for run_suite.py (missing in MI35x container) | |
| bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate | |
| - name: Accuracy Test MI35x (8-GPU DeepSeek-V3.2) | |
| timeout-minutes: 120 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x-deepseek-v32 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # MI35x 8-GPU DeepSeek-V3.2 TP+MTP Accuracy Test | |
| nightly-accuracy-8-gpu-mi35x-deepseek-v32-mtp: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-accuracy-8-gpu-mi35x-deepseek-v32-mtp,')) | |
| runs-on: linux-mi35x-gpu-8 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/amd/amd_ci_install_dependency.sh | |
| # Install tabulate for run_suite.py (missing in MI35x container) | |
| bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate | |
| - name: Accuracy Test MI35x (8-GPU DeepSeek-V3.2 TP+MTP) | |
| timeout-minutes: 120 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-mi35x-deepseek-v32-mtp --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # MI35x 8-GPU DeepSeek-V3.2 Performance Test (Basic) | |
| nightly-perf-8-gpu-mi35x-deepseek-v32-basic: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-perf-8-gpu-mi35x-deepseek-v32-basic,')) | |
| runs-on: linux-mi35x-gpu-8 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/amd/amd_ci_install_dependency.sh | |
| # Install tabulate for run_suite.py (missing in MI35x container) | |
| bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate | |
| - name: Performance Test MI35x (8-GPU DeepSeek-V3.2 Basic) | |
| timeout-minutes: 150 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-mi35x-deepseek-v32-basic --nightly --timeout-per-file 5400 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # MI35x 8-GPU Kimi-K2.5 (Accuracy) | |
| nightly-8-gpu-mi35x-kimi-k25: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-kimi-k25,')) | |
| runs-on: linux-mi35x-gpu-8 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/amd/amd_ci_install_dependency.sh | |
| # Install tabulate for run_suite.py (missing in MI35x container) | |
| bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate | |
| - name: Accuracy Test MI35x (8-GPU Kimi-K2.5) | |
| timeout-minutes: 180 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-mi35x-kimi-k25 --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # MI35x 8-GPU Qwen3-235B-MXFP4 (Accuracy + Performance) | |
| nightly-8-gpu-mi35x-qwen3-235b-mxfp4: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-qwen3-235b-mxfp4,')) | |
| runs-on: linux-mi35x-gpu-8 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/amd/amd_ci_install_dependency.sh | |
| # Install tabulate for run_suite.py (missing in MI35x container) | |
| bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate | |
| - name: Accuracy Test + Performance Test MI35x (8-GPU Qwen3-235B-MXFP4) | |
| timeout-minutes: 120 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-8-gpu-mi35x-qwen3-235b-mxfp4 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # MI35x 8-GPU Qwen 3.5 (Accuracy + Performance combined) | |
| nightly-8-gpu-mi35x-qwen35: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-qwen35,')) | |
| runs-on: linux-mi35x-gpu-8 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/amd/amd_ci_install_dependency.sh | |
| bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate | |
| bash scripts/ci/amd/amd_ci_exec.sh pip install mistral-common "lm-eval[api]" | |
| - name: Accuracy Test MI35x (8-GPU Qwen 3.5) | |
| timeout-minutes: 120 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-mi35x-qwen35 --nightly --timeout-per-file 3600 || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| - name: Performance Test MI35x (8-GPU Qwen 3.5 FP8) | |
| timeout-minutes: 120 | |
| continue-on-error: true | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e SGLANG_USE_AITER=1 \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-mi35x-qwen35-fp8 --nightly --timeout-per-file 5400 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # MI35x 8-GPU GLM-5 (Accuracy only) | |
| nightly-8-gpu-mi35x-glm5: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-glm5,')) | |
| runs-on: linux-mi35x-gpu-8 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/amd/amd_ci_install_dependency.sh | |
| # Install tabulate for run_suite.py (missing in MI35x container) | |
| bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate | |
| bash scripts/ci/amd/amd_ci_exec.sh pip install git+https://github.com/huggingface/transformers.git@96f807a33b75 | |
| - name: Accuracy Test MI35x (8-GPU GLM-5 NSA) | |
| timeout-minutes: 180 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x-glm5 --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # MI35x 8-GPU GLM-5.1 (Accuracy + Performance combined) | |
| nightly-8-gpu-mi35x-glm51: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-glm51,')) | |
| runs-on: linux-mi35x-gpu-8 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/amd/amd_ci_install_dependency.sh | |
| bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate | |
| bash scripts/ci/amd/amd_ci_exec.sh pip install git+https://github.com/huggingface/transformers.git@96f807a33b75 | |
| - name: Accuracy Test MI35x (8-GPU GLM-5.1 NSA) | |
| timeout-minutes: 180 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x-glm51 --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| - name: Performance Test MI35x (8-GPU GLM-5.1) | |
| timeout-minutes: 120 | |
| continue-on-error: true | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-mi35x-glm51 --nightly --timeout-per-file 5400 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # MI35x 8-GPU DeepSeek-V3.2 Performance Test (MTP) | |
| nightly-perf-8-gpu-mi35x-deepseek-v32-mtp: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-perf-8-gpu-mi35x-deepseek-v32-mtp,')) | |
| runs-on: linux-mi35x-gpu-8 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/amd/amd_ci_install_dependency.sh | |
| # Install tabulate for run_suite.py (missing in MI35x container) | |
| bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate | |
| - name: Performance Test MI35x (8-GPU DeepSeek-V3.2 MTP) | |
| timeout-minutes: 180 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-mi35x-deepseek-v32-mtp --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| check-all-jobs: | |
| if: always() && (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' || github.event_name == 'workflow_dispatch') | |
| needs: | |
| # MI30x Unit Tests | |
| - nightly-test-1-gpu-unit | |
| # MI30x Accuracy Tests | |
| - nightly-accuracy-2-gpu | |
| - nightly-accuracy-2-gpu-vlm | |
| # MI30x 4-GPU Tests | |
| - nightly-4-gpu | |
| - nightly-accuracy-8-gpu | |
| # MI30x Performance Tests - excluded from check (perf failures don't block CI) | |
| # - nightly-perf-2-gpu-text | |
| # - nightly-perf-2-gpu-vlm | |
| # MI30x Combined Accuracy + Performance Tests | |
| - nightly-8-gpu-grok1-int4 | |
| - nightly-8-gpu-grok2 | |
| - nightly-8-gpu-deepseek-v31 | |
| - nightly-8-gpu-deepseek-v32 | |
| - nightly-8-gpu-deepseek-v32-mtp | |
| - nightly-8-gpu-deepseek-v3-kv-fp8 | |
| - nightly-8-gpu-kimi-k25 | |
| - nightly-8-gpu-qwen3-235b | |
| - nightly-8-gpu-qwen35 | |
| - nightly-8-gpu-glm5 | |
| - nightly-8-gpu-glm51 | |
| - nightly-8-gpu-minimax-m27 | |
| # MI30x Diffusion Tests | |
| - nightly-1-gpu-zimage-turbo | |
| # MI35x jobs | |
| - nightly-test-1-gpu-mi35x | |
| - nightly-accuracy-8-gpu-mi35x | |
| - nightly-8-gpu-mi35x-grok1-int4 | |
| - nightly-8-gpu-mi35x-grok2 | |
| - nightly-8-gpu-mi35x-deepseek-r1-mxfp4 | |
| - nightly-8-gpu-mi35x-deepseek-r1-mxfp4-kv-fp8 | |
| - nightly-8-gpu-mi35x-deepseek-r1-mxfp4-ar-fusion | |
| - nightly-accuracy-8-gpu-mi35x-deepseek-v32 | |
| - nightly-accuracy-8-gpu-mi35x-deepseek-v32-mtp | |
| - nightly-8-gpu-mi35x-kimi-k25 | |
| - nightly-8-gpu-mi35x-qwen3-235b-mxfp4 | |
| - nightly-8-gpu-mi35x-qwen35 | |
| - nightly-8-gpu-mi35x-glm5 | |
| - nightly-8-gpu-mi35x-glm51 | |
| # MI35x perf jobs excluded from check - perf failures don't block CI | |
| # - nightly-perf-8-gpu-mi35x-deepseek-v32-basic | |
| # - nightly-perf-8-gpu-mi35x-deepseek-v32-mtp | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Check if any job failed | |
| run: | | |
| if [[ "${{ contains(needs.*.result, 'failure') }}" == "true" ]]; then | |
| echo "One or more nightly test jobs failed" | |
| exit 1 | |
| fi | |
| if [[ "${{ contains(needs.*.result, 'cancelled') }}" == "true" ]]; then | |
| echo "One or more nightly test jobs were cancelled" | |
| exit 1 | |
| fi | |
| echo "All nightly test jobs passed" |