streaming session: spec v2 bonus accounting + comprehensive test matrix #86526

Workflow file for this run

.github/workflows/pr-test.yml at 6fd797f

	name: PR Test
	# Dynamic run-name for /rerun-stage commands to enable URL lookup
	# Format: "[stage-name] sha" for fork PRs, "[stage-name]" for non-fork, default for normal runs
	run-name: ${{ inputs.target_stage && (inputs.pr_head_sha && format('[{0}] {1}', inputs.target_stage, inputs.pr_head_sha) \|\| format('[{0}]', inputs.target_stage)) \|\| '' }}

	on:
	schedule:
	- cron: '0 /6 * *' # Run every 6 hours (UTC)
	pull_request:
	branches: [main]
	workflow_dispatch:
	inputs:
	target_stage:
	description: "Specific stage to run (optional, for quick testing)"
	required: false
	type: string
	default: ""
	force_continue_on_error:
	description: "Force continue-on-error (test scheduled CI behavior)"
	required: false
	type: boolean
	default: false
	pr_head_sha:
	description: "PR head SHA to checkout (for /rerun-stage on fork PRs)"
	required: false
	type: string
	default: ""
	test_parallel_dispatch:
	description: "Test parallel dispatch behavior (simulates scheduled run)"
	required: false
	type: boolean
	default: false
	workflow_call:
	inputs:
	git_ref:
	description: 'Git ref (branch, tag, or SHA) to test. If not provided, uses the default branch.'
	required: false
	type: string
	default: ''
	run_all_tests:
	description: "Run all tests (for releasing or testing purpose)"
	required: false
	type: boolean
	default: false
	skip_stage_health_check:
	description: "Skip stage health check fast-fail (e.g. for release branch cuts)"
	required: false
	type: boolean
	default: false

	concurrency:
	# Concurrency group structure: pr-test-{event}-{branch}-{pr_sha}-{stage}
	# - event_name prevents scheduled runs from colliding with fork PRs whose branch is named 'main'
	# (without it, both resolve the branch segment to 'main' and block each other)
	# - github.head_ref (pull_request) or github.ref_name (workflow_dispatch) normalizes to branch name
	# - pr_head_sha isolates /rerun-stage from main branch runs
	# - target_stage allows parallel stage dispatches to run independently
	group: pr-test-${{ github.event_name }}-${{ github.head_ref \|\| github.ref_name \|\| 'default' }}-${{ inputs.pr_head_sha \|\| 'current' }}-${{ inputs.target_stage \|\| inputs.git_ref \|\| 'all' }}
	cancel-in-progress: ${{ github.event_name != 'workflow_call' }}

	env:
	SGLANG_IS_IN_CI: true
	SGLANG_CUDA_COREDUMP: "1"
	SGLANG_JIT_DEEPGEMM_FAST_WARMUP: true
	SKIP_STAGE_HEALTH_CHECK: ${{ inputs.skip_stage_health_check == true && 'true' \|\| 'false' }}
	# Schedule / main-branch dispatch / workflow_call from main use refs/heads/main; PR events use refs/pull/*/merge
	SGLANG_PR_TEST_BYPASS_MAINTENANCE_ON_MAIN: ${{ github.ref == 'refs/heads/main' && 'true' \|\| 'false' }}

	permissions:
	actions: write
	contents: read
	issues: read
	pull-requests: read

	jobs:
	# =============================================== check changes ====================================================
	check-changes:
	runs-on: ubuntu-latest
	outputs:
	# Use API-based detection for target_stage mode (filter-api), otherwise use dorny/paths-filter (filter)
	main_package: ${{ steps.filter-api.outputs.main_package \|\| steps.filter.outputs.main_package \|\| steps.run-mode.outputs.run_all_tests }}
	# sgl_kernel is forced to false when target_stage is set, since sgl-kernel-build-wheels won't run
	# This prevents CUSTOM_BUILD_SGL_KERNEL=true when the wheel artifacts aren't available
	# Note: If PR has kernel changes AND target_stage is set, the validate-target-stage step will fail
	sgl_kernel: ${{ !inputs.target_stage && (steps.filter-api.outputs.sgl_kernel \|\| steps.filter.outputs.sgl_kernel) }}
	# Raw sgl_kernel value before target_stage override (used for validation)
	sgl_kernel_raw: ${{ steps.filter-api.outputs.sgl_kernel \|\| steps.filter.outputs.sgl_kernel }}
	jit_kernel: ${{ steps.filter-api.outputs.jit_kernel \|\| steps.filter.outputs.jit_kernel \|\| steps.run-mode.outputs.run_all_tests }}
	multimodal_gen: ${{ steps.filter-api.outputs.multimodal_gen \|\| steps.filter.outputs.multimodal_gen \|\| steps.run-mode.outputs.run_all_tests }}
	max_parallel: ${{ steps.set-parallel.outputs.max_parallel }}
	max_parallel_small: ${{ steps.set-parallel.outputs.max_parallel_small }}
	max_parallel_2gpu: ${{ steps.set-parallel.outputs.max_parallel_2gpu }}
	b200_runner: ${{ steps.set-runner.outputs.b200_runner }}
	enable_retry: ${{ steps.set-retry.outputs.enable_retry }}
	continue_on_error: ${{ steps.set-continue-on-error.outputs.continue_on_error }}
	steps:
	- name: Checkout code
	uses: actions/checkout@v4
	with:
	ref: ${{ inputs.pr_head_sha \|\| inputs.git_ref \|\| github.sha }}

	- uses: ./.github/actions/check-maintenance

	- name: Determine run mode
	id: run-mode
	run: \|
	# Run all tests for scheduled runs and workflow_call (when ref input is provided)
	# Note: github.event_name is inherited from caller, so we detect workflow_call by checking inputs.git_ref
	if [[ "${{ github.event_name }}" == "schedule" \|\| "${{ inputs.run_all_tests }}" == "true" ]]; then
	echo "run_all_tests=true" >> $GITHUB_OUTPUT
	echo "Run mode: ALL TESTS (schedule=${{ github.event_name == 'schedule' }}, run_all_tests=${{ inputs.run_all_tests }})"
	else
	echo "run_all_tests=false" >> $GITHUB_OUTPUT
	echo "Run mode: FILTERED (triggered by ${{ github.event_name }})"
	fi

	- name: Detect file changes
	id: filter
	uses: dorny/paths-filter@v3
	# Only use paths-filter for pull_request events (where it works correctly)
	# For workflow_dispatch with target_stage, we use GitHub API in the next step
	if: steps.run-mode.outputs.run_all_tests != 'true' && !inputs.target_stage
	with:
	filters: \|
	main_package:
	- ".github/workflows/pr-test.yml"
	- ".github/workflows/pr-gate.yml"
	- ".github/actions/**"
	- "python/pyproject.toml"
	- "python/sglang/!(multimodal_gen)/*/!(.md)"
	- "scripts/ci/cuda/*"
	- "scripts/ci/utils/*"
	- "test/*/!(.md)"
	multimodal_gen:
	- ".github/workflows/pr-test.yml"
	- ".github/workflows/pr-test-multimodal-gen.yml"
	- "python/pyproject.toml"
	- "python/sglang/multimodal_gen/*/.!(md\|ipynb)"
	- "python/sglang/jit_kernel/diffusion/**"
	- "python/sglang/jit_kernel/tests/diffusion/**"
	- "python/sglang/jit_kernel/benchmark/diffusion/**"
	- "python/sglang/cli/**"
	jit_kernel:
	- ".github/workflows/pr-test.yml"
	- ".github/workflows/pr-test-jit-kernel.yml"
	- "python/pyproject.toml"
	- "python/sglang/jit_kernel/**"
	sgl_kernel:
	- ".github/workflows/pr-test-sgl-kernel.yml"
	- "sgl-kernel/*/.!(md\|txt)"

	# For /rerun-stage (workflow_dispatch with target_stage), dorny/paths-filter doesn't work
	# correctly because it falls back to "last commit" detection which breaks for merge commits.
	# Instead, we use the GitHub API to compare the PR commit against main.
	- name: Detect file changes via API (for target_stage)
	id: filter-api
	if: inputs.target_stage && inputs.pr_head_sha
	env:
	GH_TOKEN: ${{ github.token }}
	run: \|
	echo "Detecting file changes via GitHub API for target_stage mode..."
	echo "PR head SHA: ${{ inputs.pr_head_sha }}"

	# Get the list of changed files by comparing PR commit against main
	# This correctly handles merge commits by looking at the actual PR diff
	CHANGED_FILES=$(gh api "repos/${{ github.repository }}/compare/main...${{ inputs.pr_head_sha }}" \
	--jq '[.files[].filename] \| .[]' 2>/dev/null \|\| echo "")

	if [ -z "$CHANGED_FILES" ]; then
	echo "Warning: Could not fetch changed files from API, assuming no changes"
	echo "sgl_kernel=false" >> $GITHUB_OUTPUT
	echo "main_package=false" >> $GITHUB_OUTPUT
	echo "jit_kernel=false" >> $GITHUB_OUTPUT
	echo "multimodal_gen=false" >> $GITHUB_OUTPUT
	exit 0
	fi

	echo "Changed files:"
	echo "$CHANGED_FILES" \| head -20
	echo "..."

	# Check for sgl-kernel changes
	if echo "$CHANGED_FILES" \| grep -qE "^(sgl-kernel/\|\.github/workflows/pr-test-sgl-kernel\.yml)"; then
	echo "sgl_kernel=true" >> $GITHUB_OUTPUT
	echo "Detected sgl-kernel changes"
	else
	echo "sgl_kernel=false" >> $GITHUB_OUTPUT
	fi

	# Check for main_package changes (excluding multimodal_gen)
	# Note: Need to filter out multimodal_gen before checking, not pipe grep -q output
	MAIN_PKG_FILES=$(echo "$CHANGED_FILES" \| grep -E "^(python/sglang/\|python/pyproject\.toml\|scripts/ci/cuda/\|scripts/ci/utils/\|test/\|\.github/workflows/pr-test\.yml\|\.github/workflows/pr-gate\.yml\|\.github/actions/)" \| grep -v "^python/sglang/multimodal_gen/" \|\| true)
	if [ -n "$MAIN_PKG_FILES" ]; then
	echo "main_package=true" >> $GITHUB_OUTPUT
	echo "Detected main_package changes"
	else
	echo "main_package=false" >> $GITHUB_OUTPUT
	fi

	# Check for jit_kernel changes
	if echo "$CHANGED_FILES" \| grep -qE "^(python/sglang/jit_kernel/\|python/pyproject\.toml\|\.github/workflows/pr-test\.yml\|\.github/workflows/pr-test-jit-kernel\.yml)"; then
	echo "jit_kernel=true" >> $GITHUB_OUTPUT
	echo "Detected jit_kernel changes"
	else
	echo "jit_kernel=false" >> $GITHUB_OUTPUT
	fi

	# Check for multimodal_gen changes, including diffusion-specific jit_kernel coverage
	if echo "$CHANGED_FILES" \| grep -qE "^(python/sglang/multimodal_gen/\|python/sglang/cli/\|python/sglang/jit_kernel/diffusion/\|python/sglang/jit_kernel/tests/diffusion/\|python/sglang/jit_kernel/benchmark/diffusion/\|python/pyproject\.toml\|\.github/workflows/pr-test\.yml\|\.github/workflows/pr-test-multimodal-gen\.yml)"; then
	echo "multimodal_gen=true" >> $GITHUB_OUTPUT
	echo "Detected multimodal_gen changes"
	else
	echo "multimodal_gen=false" >> $GITHUB_OUTPUT
	fi

	- name: Set max-parallel based on run type
	id: set-parallel
	env:
	GH_TOKEN: ${{ github.token }}
	run: \|
	# Determine if this run gets full parallelism (scheduled / high priority)
	FULL=false
	if [[ "${{ github.event_name }}" == "schedule" ]]; then
	FULL=true
	echo "Scheduled run detected, using full parallelism"
	elif [[ "${{ github.event_name }}" == "pull_request" && "${{ contains(github.event.pull_request.labels.*.name, 'high priority') }}" == "true" ]]; then
	FULL=true
	echo "High priority PR detected, using full parallelism"
	elif [[ -n "${{ inputs.target_stage }}" ]]; then
	# /rerun-stage (workflow_dispatch): query PR labels via GitHub API
	# Try SHA lookup first (fork PRs), fallback to branch name (non-fork PRs)
	LABELS=""
	PR_HEAD_SHA="${{ inputs.pr_head_sha }}"
	if [[ -n "$PR_HEAD_SHA" ]]; then
	LABELS=$(gh api "repos/${{ github.repository }}/commits/${PR_HEAD_SHA}/pulls" \
	--jq '.[0].labels[].name' 2>/dev/null \|\| true)
	fi
	if [[ -z "$LABELS" ]]; then
	LABELS=$(gh pr list --head "${{ github.ref_name }}" --repo "${{ github.repository }}" \
	--json labels --jq '.[0].labels[].name' 2>/dev/null \|\| true)
	fi
	echo "PR labels: ${LABELS:-"(none)"}"
	if echo "$LABELS" \| grep -Fxq "high priority"; then
	FULL=true
	echo "High priority PR detected via API (/rerun-stage), using full parallelism"
	fi
	fi

	# Set max-parallel for each runner type
	# 1-gpu-h100: 14 partitions, 1-gpu-5090: 8 partitions, 2-gpu-h100: 4 partitions
	if [[ "$FULL" == "true" ]]; then
	LEVEL=full
	echo "max_parallel=14" >> $GITHUB_OUTPUT
	echo "max_parallel_small=8" >> $GITHUB_OUTPUT
	echo "max_parallel_2gpu=4" >> $GITHUB_OUTPUT
	else
	LEVEL=low
	echo "max_parallel=3" >> $GITHUB_OUTPUT
	echo "max_parallel_small=3" >> $GITHUB_OUTPUT
	echo "max_parallel_2gpu=2" >> $GITHUB_OUTPUT
	fi
	echo "parallel_level=$LEVEL" >> $GITHUB_OUTPUT
	echo "Parallelism level: $LEVEL"

	- name: Set B200 runner tag
	id: set-runner
	run: \|
	# Use kernel-build runner only when sgl_kernel changes are detected AND we're not in target_stage mode
	# (target_stage skips wheel builds, so we can't use custom kernels)
	# Use API-based detection (filter-api) for target_stage mode, otherwise use dorny/paths-filter (filter)
	sgl_kernel="${{ steps.filter-api.outputs.sgl_kernel \|\| steps.filter.outputs.sgl_kernel }}"
	target_stage="${{ inputs.target_stage }}"
	if [[ "$sgl_kernel" == "true" && -z "$target_stage" ]]; then
	echo "b200_runner=4-gpu-b200-kernel" >> $GITHUB_OUTPUT
	else
	echo "b200_runner=4-gpu-b200" >> $GITHUB_OUTPUT
	fi

	- name: Enable retry for CI
	id: set-retry
	run: \|
	echo "enable_retry=true" >> $GITHUB_OUTPUT
	echo "Retry logic enabled for CI"

	- name: Set continue-on-error for full test runs
	id: set-continue-on-error
	run: \|
	if [[ "${{ steps.run-mode.outputs.run_all_tests }}" == "true" \|\| "${{ inputs.force_continue_on_error }}" == "true" ]]; then
	echo "continue_on_error=true" >> $GITHUB_OUTPUT
	echo "Full test run or force flag detected, enabling continue-on-error to run all tests"
	else
	echo "continue_on_error=false" >> $GITHUB_OUTPUT
	echo "Filtered run, continue-on-error disabled"
	fi

	- name: Validate target_stage with kernel changes
	# Use API-based detection (filter-api) for target_stage mode, otherwise use dorny/paths-filter (filter)
	if: inputs.target_stage && (steps.filter-api.outputs.sgl_kernel == 'true' \|\| steps.filter.outputs.sgl_kernel == 'true')
	run: \|
	echo "::error::Cannot use /rerun-stage when PR has sgl-kernel changes."
	echo "::error::The sgl-kernel-build-wheels job is skipped in target_stage mode, but this PR modifies sgl-kernel/ files."
	echo "::error::Please use /tag-and-rerun-ci to run the full workflow including kernel builds."
	echo ""
	echo "ERROR: Cannot use /rerun-stage when PR has sgl-kernel changes."
	echo ""
	echo "This PR modifies files in sgl-kernel/, which requires building custom kernel wheels."
	echo "The /rerun-stage command skips the wheel build job, so the test would run against"
	echo "the wrong (PyPI) version of sgl-kernel instead of your changes."
	echo ""
	echo "To properly test your kernel changes, use one of these commands instead:"
	echo " /tag-and-rerun-ci - Re-run the full workflow including kernel builds"
	echo " /rerun-ci - Re-run the full workflow"
	echo ""
	exit 1

	- name: Show filter results in summary (table)
	run: \|
	{
	echo "## Change Detection"
	echo ""
	echo "\| Component \| Changed \|"
	echo "\|-------------------\|---------\|"
	echo "\| main_package \| ${{ steps.filter-api.outputs.main_package \|\| steps.filter.outputs.main_package \|\| steps.run-mode.outputs.run_all_tests }} \|"
	echo "\| sgl_kernel (raw) \| ${{ steps.filter-api.outputs.sgl_kernel \|\| steps.filter.outputs.sgl_kernel }} \|"
	echo "\| sgl_kernel (used) \| ${{ !inputs.target_stage && (steps.filter-api.outputs.sgl_kernel \|\| steps.filter.outputs.sgl_kernel) }} \|"
	echo "\| jit_kernel \| ${{ steps.filter-api.outputs.jit_kernel \|\| steps.filter.outputs.jit_kernel \|\| steps.run-mode.outputs.run_all_tests }} \|"
	echo "\| multimodal_gen \| ${{ steps.filter-api.outputs.multimodal_gen \|\| steps.filter.outputs.multimodal_gen \|\| steps.run-mode.outputs.run_all_tests }} \|"
	echo "\| target_stage \| ${{ inputs.target_stage \|\| '(none)' }} \|"
	echo "\| detection_method \| ${{ inputs.target_stage && 'GitHub API' \|\| 'dorny/paths-filter' }} \|"
	echo "\| max_parallel \| ${{ steps.set-parallel.outputs.parallel_level }} (h100=${{ steps.set-parallel.outputs.max_parallel }}, 5090=${{ steps.set-parallel.outputs.max_parallel_small }}, 2gpu=${{ steps.set-parallel.outputs.max_parallel_2gpu }}) \|"
	echo "\| b200_runner \| ${{ steps.set-runner.outputs.b200_runner }} \|"
	echo "\| enable_retry \| ${{ steps.set-retry.outputs.enable_retry }} \|"
	echo "\| continue_on_error \| ${{ steps.set-continue-on-error.outputs.continue_on_error }} \|"
	} >> $GITHUB_STEP_SUMMARY

	# =============================================== Wait Jobs for Sequential PR Execution ====================================================
	# These jobs poll GitHub API to wait for previous stages to complete.
	# For PR runs: wait jobs run and enforce sequential execution via polling.
	# For scheduled runs: wait jobs are skipped, enabling parallel execution for easier retry.

	wait-for-stage-a:
	needs: [check-changes, call-gate]
	# Only run for PRs (not scheduled) and when not targeting a specific stage
	# Skip if call-gate failed (stage-a jobs will be skipped, nothing to wait for)
	# !cancelled() ensures this job respects workflow cancellation from concurrency group
	if: \|
	always() &&
	!cancelled() &&
	github.event_name == 'pull_request' &&
	!inputs.target_stage &&
	inputs.test_parallel_dispatch != true &&
	(needs.check-changes.outputs.main_package == 'true' \|\| needs.check-changes.outputs.sgl_kernel == 'true') &&
	(needs.call-gate.result == 'success' \|\| needs.call-gate.result == 'skipped')
	runs-on: ubuntu-latest
	outputs:
	stage_a_result: ${{ steps.wait.outputs.result }}
	steps:
	- uses: actions/checkout@v4

	- uses: ./.github/actions/check-maintenance

	- uses: ./.github/actions/wait-for-jobs
	id: wait
	with:
	stage-name: stage-a
	jobs: '["stage-a-test-1-gpu-small", "stage-a-test-cpu"]'
	max-wait-minutes: '240'

	wait-for-stage-b:
	needs: [check-changes, call-gate, wait-for-stage-a]
	# Only run for PRs (not scheduled) and when not targeting a specific stage
	# Skip if call-gate failed (stage-b jobs will be skipped, nothing to wait for)
	if: \|
	always() &&
	!cancelled() &&
	github.event_name == 'pull_request' &&
	!inputs.target_stage &&
	inputs.test_parallel_dispatch != true &&
	(needs.check-changes.outputs.main_package == 'true' \|\| needs.check-changes.outputs.sgl_kernel == 'true') &&
	(needs.wait-for-stage-a.result == 'success' \|\| needs.wait-for-stage-a.result == 'skipped') &&
	(needs.call-gate.result == 'success' \|\| needs.call-gate.result == 'skipped')
	runs-on: ubuntu-latest
	outputs:
	stage_b_result: ${{ steps.wait.outputs.result }}
	steps:
	- uses: actions/checkout@v4

	- uses: ./.github/actions/check-maintenance

	- uses: ./.github/actions/wait-for-jobs
	id: wait
	with:
	stage-name: stage-b
	jobs: \|
	[
	{"prefix": "stage-b-test-1-gpu-small", "expected_count": 8},
	{"prefix": "stage-b-test-1-gpu-large", "expected_count": 14},
	{"prefix": "stage-b-test-2-gpu-large", "expected_count": 4},
	{"prefix": "stage-b-test-4-gpu-b200", "expected_count": 1}
	]
	max-wait-minutes: '480'

	# =============================================== PR Gate ====================================================
	call-gate:
	needs: check-changes
	# Skip for scheduled runs (they run all tests) and when target_stage is specified
	if: \|
	github.event_name != 'schedule' &&
	inputs.test_parallel_dispatch != true &&
	!inputs.target_stage &&
	(
	needs.check-changes.outputs.main_package == 'true' \|\|
	needs.check-changes.outputs.sgl_kernel == 'true' \|\|
	needs.check-changes.outputs.jit_kernel == 'true' \|\|
	needs.check-changes.outputs.multimodal_gen == 'true'
	)
	uses: ./.github/workflows/pr-gate.yml
	secrets: inherit

	# =============================================== sgl-kernel ====================================================

	sgl-kernel-build-wheels:
	needs: [check-changes, call-gate]
	# Skip for scheduled runs (they run stages independently) and when target_stage is set
	if: github.event_name != 'schedule' && inputs.test_parallel_dispatch != true && !inputs.target_stage && needs.check-changes.outputs.sgl_kernel == 'true'
	runs-on: x64-kernel-build-node
	timeout-minutes: 240
	strategy:
	matrix:
	include:
	- python-version: "3.10"
	cuda-version: "13.0"
	name: Build Wheel
	steps:
	- name: Cleanup
	run: \|
	sudo rm -rf $GITHUB_WORKSPACE/* \|\| true

	- uses: actions/checkout@v4
	with:
	submodules: "recursive"
	ref: ${{ inputs.pr_head_sha \|\| inputs.git_ref \|\| github.sha }}

	- uses: ./.github/actions/check-maintenance

	- name: Set up Python ${{ matrix.python-version }}
	uses: actions/setup-python@v5
	with:
	python-version: ${{ matrix.python-version }}

	- name: Build wheel for Python ${{ matrix.python-version }} and CUDA ${{ matrix.cuda-version }}
	run: \|
	cd sgl-kernel
	./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}"
	env:
	USE_CCACHE: 1

	- name: Verify wheel artifacts
	run: \|
	ls -alh sgl-kernel/dist
	ls -alh sgl-kernel/dist/*.whl

	- name: Upload artifacts
	uses: actions/upload-artifact@v4
	with:
	name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }}
	path: sgl-kernel/dist/*
	if-no-files-found: error

	sgl-kernel-build-wheels-arm:
	needs: [check-changes, call-gate]
	# Skip for scheduled runs (they run stages independently) and when target_stage is set
	if: github.event_name != 'schedule' && inputs.test_parallel_dispatch != true && !inputs.target_stage && needs.check-changes.outputs.sgl_kernel == 'true'
	runs-on: arm-kernel-build-node
	timeout-minutes: 240
	strategy:
	matrix:
	include:
	- python-version: "3.10"
	cuda-version: "13.0"
	name: Build Wheel Arm
	steps:
	- name: Cleanup
	run: \|
	if [ -d "$GITHUB_WORKSPACE" ]; then
	sudo rm -rf "$GITHUB_WORKSPACE"/* \|\| true
	else
	echo "$GITHUB_WORKSPACE does not exist, nothing to clean"
	fi

	- uses: actions/checkout@v4
	with:
	submodules: "recursive"
	ref: ${{ inputs.pr_head_sha \|\| inputs.git_ref \|\| github.sha }}

	- uses: ./.github/actions/check-maintenance

	- name: Set up Python ${{ matrix.python-version }}
	uses: actions/setup-python@v5
	with:
	python-version: ${{ matrix.python-version }}

	- name: Build wheel for Python ${{ matrix.python-version }} and CUDA ${{ matrix.cuda-version }}
	run: \|
	cd sgl-kernel
	./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}"
	env:
	USE_CCACHE: 1

	- name: Verify wheel artifacts
	run: \|
	ls -alh sgl-kernel/dist
	ls -alh sgl-kernel/dist/*.whl

	- name: Upload artifacts
	uses: actions/upload-artifact@v4
	with:
	name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }}-aarch64
	path: sgl-kernel/dist/*
	if-no-files-found: error

	call-sgl-kernel-tests:
	needs: [check-changes, call-gate, sgl-kernel-build-wheels]
	if: \|
	github.event_name != 'schedule' &&
	inputs.test_parallel_dispatch != true &&
	!inputs.target_stage &&
	needs.check-changes.outputs.sgl_kernel == 'true'
	uses: ./.github/workflows/pr-test-sgl-kernel.yml
	with:
	sgl_kernel: ${{ needs.check-changes.outputs.sgl_kernel }}
	b200_runner: ${{ needs.check-changes.outputs.b200_runner }}
	pr_head_sha: ${{ inputs.pr_head_sha \|\| '' }}
	git_ref: ${{ inputs.git_ref \|\| '' }}
	skip_stage_health_check: ${{ inputs.skip_stage_health_check == true }}
	secrets: inherit

	# =============================================== jit-kernel ====================================================

	call-jit-kernel-tests:
	needs: [check-changes, call-gate]
	if: needs.check-changes.outputs.jit_kernel == 'true'
	uses: ./.github/workflows/pr-test-jit-kernel.yml
	with:
	jit_kernel: ${{ needs.check-changes.outputs.jit_kernel }}
	b200_runner: ${{ needs.check-changes.outputs.b200_runner }}
	pr_head_sha: ${{ inputs.pr_head_sha \|\| '' }}
	git_ref: ${{ inputs.git_ref \|\| '' }}
	target_stage: ${{ inputs.target_stage \|\| '' }}
	test_parallel_dispatch: ${{ inputs.test_parallel_dispatch == true && 'true' \|\| 'false' }}
	skip_stage_health_check: ${{ inputs.skip_stage_health_check == true }}
	secrets: inherit

	# =============================================== primary ====================================================

	# Runs on 5090 (32GB, SM120)
	stage-a-test-1-gpu-small:
	needs: [check-changes, call-gate, sgl-kernel-build-wheels]
	if: \|
	always() &&
	(
	(inputs.target_stage == 'stage-a-test-1-gpu-small') \|\|
	(
	!inputs.target_stage &&
	((github.event_name == 'schedule' \|\| inputs.test_parallel_dispatch == true) \|\| (!failure() && !cancelled())) &&
	((needs.check-changes.outputs.main_package == 'true') \|\| (needs.check-changes.outputs.sgl_kernel == 'true'))
	)
	)
	runs-on: 1-gpu-5090
	timeout-minutes: 240
	steps:
	- name: Checkout code
	uses: actions/checkout@v4
	with:
	ref: ${{ inputs.pr_head_sha \|\| inputs.git_ref \|\| github.sha }}

	- uses: ./.github/actions/check-stage-health

	- uses: ./.github/actions/check-maintenance

	- name: Download artifacts
	if: needs.check-changes.outputs.sgl_kernel == 'true'
	uses: actions/download-artifact@v4
	with:
	path: sgl-kernel/dist/
	merge-multiple: true
	pattern: wheel-python3.10-cuda13.0

	- name: Install dependencies
	timeout-minutes: 20
	run: \|
	CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh

	- name: Run test
	timeout-minutes: 10
	env:
	CONTINUE_ON_ERROR_FLAG: ${{ needs.check-changes.outputs.continue_on_error == 'true' && '--continue-on-error' \|\| '' }}
	run: \|
	cd test/
	python3 run_suite.py --hw cuda --suite stage-a-test-1-gpu-small $CONTINUE_ON_ERROR_FLAG

	- uses: ./.github/actions/upload-cuda-coredumps
	if: failure()

	stage-a-test-cpu:
	needs: [check-changes, call-gate]
	if: \|
	always() &&
	(
	(inputs.target_stage == 'stage-a-test-cpu') \|\|
	(
	!inputs.target_stage &&
	((github.event_name == 'schedule' \|\| inputs.test_parallel_dispatch == true) \|\| (!failure() && !cancelled())) &&
	(needs.check-changes.outputs.main_package == 'true')
	)
	)
	runs-on: ubuntu-latest
	timeout-minutes: 240
	steps:
	- name: Free disk space
	run: \|
	sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc
	df -h

	- name: Checkout code
	uses: actions/checkout@v4
	with:
	ref: ${{ inputs.pr_head_sha \|\| inputs.git_ref \|\| github.sha }}

	- uses: ./.github/actions/check-stage-health

	- uses: ./.github/actions/check-maintenance

	- name: Set up Python
	uses: actions/setup-python@v5
	with:
	python-version: '3.10'

	- name: Install uv
	uses: astral-sh/setup-uv@v5

	# uv pip targets a venv by default; setup-python has no venv — install into that interpreter (see UV_SYSTEM_PYTHON in https://docs.astral.sh/uv/guides/integration/github/)
	- name: Install dependencies
	timeout-minutes: 20
	env:
	UV_SYSTEM_PYTHON: "1"
	run: \|
	uv pip install -e "python[dev]" --index-strategy unsafe-best-match --prerelease allow

	- name: Run test
	timeout-minutes: 10
	env:
	CONTINUE_ON_ERROR_FLAG: ${{ needs.check-changes.outputs.continue_on_error == 'true' && '--continue-on-error' \|\| '' }}
	run: \|
	cd test/
	python3 run_suite.py --hw cpu --suite stage-a-test-cpu $CONTINUE_ON_ERROR_FLAG

	# Runs on 5090 (32GB, SM120)
	stage-b-test-1-gpu-small:
	needs: [check-changes, call-gate, wait-for-stage-a, sgl-kernel-build-wheels]
	if: \|
	always() &&
	(
	(inputs.target_stage == 'stage-b-test-1-gpu-small') \|\|
	(
	!inputs.target_stage &&
	((github.event_name == 'schedule' \|\| inputs.test_parallel_dispatch == true) \|\| (!failure() && !cancelled())) &&
	((needs.check-changes.outputs.main_package == 'true') \|\| (needs.check-changes.outputs.sgl_kernel == 'true'))
	)
	)
	runs-on: 1-gpu-5090
	timeout-minutes: 240
	strategy:
	fail-fast: false
	max-parallel: ${{ fromJson(needs.check-changes.outputs.max_parallel_small) }}
	matrix:
	partition: [0, 1, 2, 3, 4, 5, 6, 7]
	steps:
	- name: Checkout code
	uses: actions/checkout@v4
	with:
	ref: ${{ inputs.pr_head_sha \|\| inputs.git_ref \|\| github.sha }}

	- uses: ./.github/actions/check-stage-health

	- uses: ./.github/actions/check-maintenance

	- name: Download artifacts
	if: needs.check-changes.outputs.sgl_kernel == 'true'
	uses: actions/download-artifact@v4
	with:
	path: sgl-kernel/dist/
	merge-multiple: true
	pattern: wheel-python3.10-cuda13.0

	- name: Install dependencies
	timeout-minutes: 20
	run: \|
	source /etc/profile.d/sglang-ci.sh
	CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh

	- name: Run test
	timeout-minutes: 30
	env:
	CONTINUE_ON_ERROR_FLAG: ${{ needs.check-changes.outputs.continue_on_error == 'true' && '--continue-on-error' \|\| '' }}
	run: \|
	source /etc/profile.d/sglang-ci.sh
	cd test/
	python3 run_suite.py --hw cuda --suite stage-b-test-1-gpu-small --auto-partition-id ${{ matrix.partition }} --auto-partition-size 8 $CONTINUE_ON_ERROR_FLAG

	- uses: ./.github/actions/upload-cuda-coredumps
	if: failure()
	with:
	artifact-suffix: ${{ matrix.partition }}

	# Runs on H100 (80GB, SM90) - tests that don't pass on 5090 (FA3, FP8, high VRAM, etc.)
	stage-b-test-1-gpu-large:
	needs: [check-changes, call-gate, wait-for-stage-a, sgl-kernel-build-wheels]
	if: \|
	always() &&
	(
	(inputs.target_stage == 'stage-b-test-1-gpu-large') \|\|
	(
	!inputs.target_stage &&
	((github.event_name == 'schedule' \|\| inputs.test_parallel_dispatch == true) \|\| (!failure() && !cancelled())) &&
	((needs.check-changes.outputs.main_package == 'true') \|\| (needs.check-changes.outputs.sgl_kernel == 'true'))
	)
	)
	runs-on: 1-gpu-h100
	timeout-minutes: 240
	strategy:
	fail-fast: false
	max-parallel: ${{ fromJson(needs.check-changes.outputs.max_parallel) }}
	matrix:
	partition: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
	steps:
	- name: Checkout code
	uses: actions/checkout@v4
	with:
	ref: ${{ inputs.pr_head_sha \|\| inputs.git_ref \|\| github.sha }}

	- uses: ./.github/actions/check-stage-health

	- uses: ./.github/actions/check-maintenance

	- name: Download artifacts
	if: needs.check-changes.outputs.sgl_kernel == 'true'
	uses: actions/download-artifact@v4
	with:
	path: sgl-kernel/dist/
	merge-multiple: true
	pattern: wheel-python3.10-cuda13.0

	- name: Install dependencies
	timeout-minutes: 20
	run: \|
	CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh

	- name: Run test
	timeout-minutes: 30
	env:
	CONTINUE_ON_ERROR_FLAG: ${{ needs.check-changes.outputs.continue_on_error == 'true' && '--continue-on-error' \|\| '' }}
	run: \|
	cd test/
	python3 run_suite.py --hw cuda --suite stage-b-test-1-gpu-large --auto-partition-id ${{ matrix.partition }} --auto-partition-size 14 --timeout-per-file 1800 $CONTINUE_ON_ERROR_FLAG

	- uses: ./.github/actions/upload-cuda-coredumps
	if: failure()
	with:
	artifact-suffix: ${{ matrix.partition }}

	stage-b-test-2-gpu-large:
	needs: [check-changes, call-gate, wait-for-stage-a, sgl-kernel-build-wheels]
	if: \|
	always() &&
	(
	(inputs.target_stage == 'stage-b-test-2-gpu-large') \|\|
	(
	!inputs.target_stage &&
	((github.event_name == 'schedule' \|\| inputs.test_parallel_dispatch == true) \|\| (!failure() && !cancelled())) &&
	((needs.check-changes.outputs.main_package == 'true') \|\| (needs.check-changes.outputs.sgl_kernel == 'true'))
	)
	)
	runs-on: 2-gpu-h100
	timeout-minutes: 240
	strategy:
	fail-fast: false
	max-parallel: ${{ fromJson(needs.check-changes.outputs.max_parallel_2gpu) }}
	matrix:
	partition: [0, 1, 2, 3]
	steps:
	- name: Checkout code
	uses: actions/checkout@v4
	with:
	ref: ${{ inputs.pr_head_sha \|\| inputs.git_ref \|\| github.sha }}

	- uses: ./.github/actions/check-stage-health

	- uses: ./.github/actions/check-maintenance

	- name: Download artifacts
	if: needs.check-changes.outputs.sgl_kernel == 'true'
	uses: actions/download-artifact@v4
	with:
	path: sgl-kernel/dist/
	merge-multiple: true
	pattern: wheel-python3.10-cuda13.0

	- name: Install dependencies
	timeout-minutes: 20
	run: \|
	CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh

	- name: Run test
	timeout-minutes: 30
	env:
	CONTINUE_ON_ERROR_FLAG: ${{ needs.check-changes.outputs.continue_on_error == 'true' && '--continue-on-error' \|\| '' }}
	run: \|
	cd test/
	python3 run_suite.py --hw cuda --suite stage-b-test-2-gpu-large --auto-partition-id ${{ matrix.partition }} --auto-partition-size 4 $CONTINUE_ON_ERROR_FLAG

	- uses: ./.github/actions/upload-cuda-coredumps
	if: failure()
	with:
	artifact-suffix: ${{ matrix.partition }}

	stage-b-test-4-gpu-b200:
	needs: [check-changes, call-gate, wait-for-stage-a, sgl-kernel-build-wheels]
	if: \|
	always() &&
	(
	(inputs.target_stage == 'stage-b-test-4-gpu-b200') \|\|
	(
	!inputs.target_stage &&
	((github.event_name == 'schedule' \|\| inputs.test_parallel_dispatch == true) \|\| (!failure() && !cancelled())) &&
	((needs.check-changes.outputs.main_package == 'true') \|\| (needs.check-changes.outputs.sgl_kernel == 'true'))
	)
	)
	runs-on: ${{ needs.check-changes.outputs.b200_runner }}
	timeout-minutes: 240
	strategy:
	fail-fast: false

	steps:
	- name: Checkout code
	uses: actions/checkout@v4
	with:
	ref: ${{ inputs.pr_head_sha \|\| inputs.git_ref \|\| github.sha }}

	- uses: ./.github/actions/check-stage-health

	- uses: ./.github/actions/check-maintenance

	- name: Download artifacts
	if: needs.check-changes.outputs.sgl_kernel == 'true'
	uses: actions/download-artifact@v6
	with:
	path: sgl-kernel/dist/
	merge-multiple: true
	pattern: wheel-python3.10-cuda13.0

	- name: Install dependencies
	timeout-minutes: 20
	run: \|
	CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh

	- name: Run test
	timeout-minutes: 30
	env:
	CONTINUE_ON_ERROR_FLAG: ${{ needs.check-changes.outputs.continue_on_error == 'true' && '--continue-on-error' \|\| '' }}
	run: \|
	cd test
	python3 run_suite.py --hw cuda --suite stage-b-test-4-gpu-b200 $CONTINUE_ON_ERROR_FLAG

	- name: Run FA4 jit_kernel tests (SM100+)
	timeout-minutes: 10
	run: \|
	python3 -m pytest -q python/sglang/jit_kernel/tests/test_flash_attention_4.py

	- uses: ./.github/actions/upload-cuda-coredumps
	if: failure()

	call-multimodal-gen-tests:
	needs: [check-changes, call-gate, sgl-kernel-build-wheels]
	if: \|
	always() &&
	!cancelled() &&
	(
	inputs.target_stage == 'multimodal-gen-test-1-gpu' \|\|
	inputs.target_stage == 'multimodal-gen-test-2-gpu' \|\|
	inputs.target_stage == 'multimodal-gen-component-accuracy-1-gpu' \|\|
	inputs.target_stage == 'multimodal-gen-component-accuracy-2-gpu' \|\|
	inputs.target_stage == 'multimodal-gen-test-1-b200' \|\|
	inputs.target_stage == 'multimodal-gen-unit-test' \|\|
	(
	!inputs.target_stage &&
	((github.event_name == 'schedule' \|\| inputs.test_parallel_dispatch == true) \|\| (!failure() && !cancelled())) &&
	needs.check-changes.outputs.multimodal_gen == 'true'
	)
	)
	uses: ./.github/workflows/pr-test-multimodal-gen.yml
	with:
	multimodal_gen: ${{ needs.check-changes.outputs.multimodal_gen }}
	sgl_kernel: ${{ needs.check-changes.outputs.sgl_kernel }}
	b200_runner: ${{ needs.check-changes.outputs.b200_runner }}
	continue_on_error: ${{ needs.check-changes.outputs.continue_on_error }}
	pr_head_sha: ${{ inputs.pr_head_sha \|\| '' }}
	git_ref: ${{ inputs.git_ref \|\| '' }}
	target_stage: ${{ inputs.target_stage \|\| '' }}
	test_parallel_dispatch: ${{ inputs.test_parallel_dispatch == true && 'true' \|\| 'false' }}
	caller_needs_failure: ${{ (needs.call-gate.result == 'failure' \|\| needs.sgl-kernel-build-wheels.result == 'failure' \|\| needs.check-changes.result == 'failure') && 'true' \|\| 'false' }}
	skip_stage_health_check: ${{ inputs.skip_stage_health_check == true && 'true' \|\| 'false' }}
	secrets: inherit

	stage-c-test-4-gpu-h100:
	needs: [check-changes, call-gate, wait-for-stage-b]
	if: \|
	always() &&
	(
	(inputs.target_stage == 'stage-c-test-4-gpu-h100') \|\|
	(
	!inputs.target_stage &&
	((github.event_name == 'schedule' \|\| inputs.test_parallel_dispatch == true) \|\| (!failure() && !cancelled())) &&
	((needs.check-changes.outputs.main_package == 'true') \|\| (needs.check-changes.outputs.sgl_kernel == 'true'))
	)
	)
	runs-on: 4-gpu-h100
	timeout-minutes: 240
	strategy:
	fail-fast: false
	matrix:
	part: [0, 1, 2]
	steps:
	- name: Checkout code
	uses: actions/checkout@v4
	with:
	ref: ${{ inputs.pr_head_sha \|\| inputs.git_ref \|\| github.sha }}

	- uses: ./.github/actions/check-stage-health

	- uses: ./.github/actions/check-maintenance

	- name: Download artifacts
	if: needs.check-changes.outputs.sgl_kernel == 'true'
	uses: actions/download-artifact@v4
	with:
	path: sgl-kernel/dist/
	merge-multiple: true
	pattern: wheel-python3.10-cuda13.0

	- name: Install dependencies
	timeout-minutes: 20
	run: \|
	CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh

	- name: Run test
	timeout-minutes: 30
	env:
	CONTINUE_ON_ERROR_FLAG: ${{ needs.check-changes.outputs.continue_on_error == 'true' && '--continue-on-error' \|\| '' }}
	run: \|
	cd test
	python3 run_suite.py --hw cuda --suite stage-c-test-4-gpu-h100 --auto-partition-id ${{ matrix.part }} --auto-partition-size 3 $CONTINUE_ON_ERROR_FLAG

	- uses: ./.github/actions/upload-cuda-coredumps
	if: failure()
	with:
	artifact-suffix: ${{ matrix.part }}

	stage-c-test-8-gpu-h200:
	needs: [check-changes, call-gate, wait-for-stage-b]
	if: \|
	always() &&
	(
	(inputs.target_stage == 'stage-c-test-8-gpu-h200') \|\|
	(
	!inputs.target_stage &&
	((github.event_name == 'schedule' \|\| inputs.test_parallel_dispatch == true) \|\| (!failure() && !cancelled())) &&
	((needs.check-changes.outputs.main_package == 'true') \|\| (needs.check-changes.outputs.sgl_kernel == 'true'))
	)
	)
	runs-on: 8-gpu-h200
	timeout-minutes: 240
	strategy:
	fail-fast: false
	matrix:
	part: [0, 1, 2, 3]
	steps:
	- name: Checkout code
	uses: actions/checkout@v4
	with:
	ref: ${{ inputs.pr_head_sha \|\| inputs.git_ref \|\| github.sha }}

	- uses: ./.github/actions/check-stage-health

	- uses: ./.github/actions/check-maintenance

	- name: Download artifacts
	if: needs.check-changes.outputs.sgl_kernel == 'true'
	uses: actions/download-artifact@v4
	with:
	path: sgl-kernel/dist/
	merge-multiple: true
	pattern: wheel-python3.10-cuda13.0

	- name: Install dependencies
	timeout-minutes: 20
	run: \|
	CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh

	- name: Warmup DeepGEMM JIT Compilation
	timeout-minutes: 25
	run: \|
	python3 scripts/ci/cuda/warmup_deep_gemm.py \
	deepseek-ai/DeepSeek-V3-0324:8 \
	deepseek-ai/DeepSeek-V3.2-Exp:8

	- name: Warmup Server CUDA Graphs
	timeout-minutes: 25
	run: \|
	python3 scripts/ci/cuda/warmup_server.py \
	deepseek-ai/DeepSeek-V3-0324:8 \
	inclusionAI/Ring-2.5-1T:8

	- name: Run test
	timeout-minutes: 30
	env:
	CONTINUE_ON_ERROR_FLAG: ${{ needs.check-changes.outputs.continue_on_error == 'true' && '--continue-on-error' \|\| '' }}
	run: \|
	cd test
	python3 run_suite.py --hw cuda --suite stage-c-test-8-gpu-h200 --auto-partition-id ${{ matrix.part }} --auto-partition-size 4 $CONTINUE_ON_ERROR_FLAG

	- uses: ./.github/actions/upload-cuda-coredumps
	if: failure()
	with:
	artifact-suffix: ${{ matrix.part }}

	stage-c-test-8-gpu-h20:
	needs: [check-changes, call-gate, wait-for-stage-b]
	if: \|
	always() &&
	(
	(inputs.target_stage == 'stage-c-test-8-gpu-h20') \|\|
	(
	!inputs.target_stage &&
	((github.event_name == 'schedule' \|\| inputs.test_parallel_dispatch == true) \|\| (!failure() && !cancelled())) &&
	((needs.check-changes.outputs.main_package == 'true') \|\| (needs.check-changes.outputs.sgl_kernel == 'true'))
	)
	)
	runs-on: 8-gpu-h20
	timeout-minutes: 240
	env:
	SGLANG_CI_RDMA_ALL_DEVICES: "mlx5_1,mlx5_2,mlx5_3,mlx5_4"
	strategy:
	fail-fast: false
	matrix:
	part: [0, 1]
	steps:
	- name: Checkout code
	uses: actions/checkout@v4
	with:
	ref: ${{ inputs.pr_head_sha \|\| inputs.git_ref \|\| github.sha }}

	- uses: ./.github/actions/check-stage-health

	- uses: ./.github/actions/check-maintenance

	- name: Download artifacts
	if: needs.check-changes.outputs.sgl_kernel == 'true'
	uses: actions/download-artifact@v4
	with:
	path: sgl-kernel/dist/
	merge-multiple: true
	pattern: wheel-python3.10-cuda13.0

	- name: Install dependencies
	timeout-minutes: 20
	run: \|
	CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_deepep.sh

	- name: Run test
	timeout-minutes: 30
	env:
	CONTINUE_ON_ERROR_FLAG: ${{ needs.check-changes.outputs.continue_on_error == 'true' && '--continue-on-error' \|\| '' }}
	run: \|
	cd test
	python3 run_suite.py --hw cuda --suite stage-c-test-8-gpu-h20 --auto-partition-id ${{ matrix.part }} --auto-partition-size 2 $CONTINUE_ON_ERROR_FLAG

	- uses: ./.github/actions/upload-cuda-coredumps
	if: failure()
	with:
	artifact-suffix: ${{ matrix.part }}

	stage-c-test-deepep-4-gpu-h100:
	needs: [check-changes, call-gate, wait-for-stage-b]
	if: \|
	always() &&
	(
	(inputs.target_stage == 'stage-c-test-deepep-4-gpu-h100') \|\|
	(
	!inputs.target_stage &&
	((github.event_name == 'schedule' \|\| inputs.test_parallel_dispatch == true) \|\| (!failure() && !cancelled())) &&
	((needs.check-changes.outputs.main_package == 'true') \|\| (needs.check-changes.outputs.sgl_kernel == 'true'))
	)
	)
	runs-on: 4-gpu-h100
	timeout-minutes: 240
	steps:
	- name: Checkout code
	uses: actions/checkout@v4
	with:
	ref: ${{ inputs.pr_head_sha \|\| inputs.git_ref \|\| github.sha }}

	- uses: ./.github/actions/check-stage-health

	- uses: ./.github/actions/check-maintenance

	- name: Download artifacts
	if: needs.check-changes.outputs.sgl_kernel == 'true'
	uses: actions/download-artifact@v4
	with:
	path: sgl-kernel/dist/
	merge-multiple: true
	pattern: wheel-python3.10-cuda13.0

	- name: Install dependencies
	timeout-minutes: 20
	run: \|
	CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_deepep.sh

	- name: Warmup DeepGEMM JIT Compilation
	timeout-minutes: 25
	run: \|
	python3 scripts/ci/cuda/warmup_deep_gemm.py \
	lmsys/sglang-ci-dsv3-test:4

	- name: Warmup Server CUDA Graphs
	timeout-minutes: 25
	run: \|
	python3 scripts/ci/cuda/warmup_server.py \
	lmsys/sglang-ci-dsv3-test:4

	- name: Run test
	timeout-minutes: 30
	env:
	CONTINUE_ON_ERROR_FLAG: ${{ needs.check-changes.outputs.continue_on_error == 'true' && '--continue-on-error' \|\| '' }}
	run: \|
	cd test
	python3 run_suite.py --hw cuda --suite stage-c-test-deepep-4-gpu-h100 $CONTINUE_ON_ERROR_FLAG

	- uses: ./.github/actions/upload-cuda-coredumps
	if: failure()

	stage-c-test-deepep-8-gpu-h200:
	needs: [check-changes, call-gate, wait-for-stage-b]
	if: \|
	always() &&
	(
	(inputs.target_stage == 'stage-c-test-deepep-8-gpu-h200') \|\|
	(
	!inputs.target_stage &&
	((github.event_name == 'schedule' \|\| inputs.test_parallel_dispatch == true) \|\| (!failure() && !cancelled())) &&
	((needs.check-changes.outputs.main_package == 'true') \|\| (needs.check-changes.outputs.sgl_kernel == 'true'))
	)
	)
	runs-on: 8-gpu-h200-deepep
	timeout-minutes: 240
	steps:
	- name: Checkout code
	uses: actions/checkout@v4
	with:
	ref: ${{ inputs.pr_head_sha \|\| inputs.git_ref \|\| github.sha }}

	- uses: ./.github/actions/check-stage-health

	- uses: ./.github/actions/check-maintenance

	- name: Download artifacts
	if: needs.check-changes.outputs.sgl_kernel == 'true'
	uses: actions/download-artifact@v4
	with:
	path: sgl-kernel/dist/
	merge-multiple: true
	pattern: wheel-python3.10-cuda13.0

	- name: Install dependencies
	timeout-minutes: 20
	run: \|
	CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_deepep.sh

	- name: Warmup DeepGEMM JIT Compilation
	timeout-minutes: 25
	run: \|
	python3 scripts/ci/cuda/warmup_deep_gemm.py \
	deepseek-ai/DeepSeek-V3-0324:8 \
	deepseek-ai/DeepSeek-V3.2-Exp:8

	- name: Warmup Server CUDA Graphs
	timeout-minutes: 25
	run: \|
	python3 scripts/ci/cuda/warmup_server.py \
	deepseek-ai/DeepSeek-V3-0324:8

	- name: Run test
	timeout-minutes: 45
	env:
	CONTINUE_ON_ERROR_FLAG: ${{ needs.check-changes.outputs.continue_on_error == 'true' && '--continue-on-error' \|\| '' }}
	run: \|
	cd test
	python3 run_suite.py --hw cuda --suite stage-c-test-deepep-8-gpu-h200 $CONTINUE_ON_ERROR_FLAG

	- uses: ./.github/actions/upload-cuda-coredumps
	if: failure()

	stage-c-test-4-gpu-b200:
	needs: [check-changes, call-gate, wait-for-stage-b]
	if: \|
	always() &&
	(
	(inputs.target_stage == 'stage-c-test-4-gpu-b200') \|\|
	(
	!inputs.target_stage &&
	((github.event_name == 'schedule' \|\| inputs.test_parallel_dispatch == true) \|\| (!failure() && !cancelled())) &&
	((needs.check-changes.outputs.main_package == 'true') \|\| (needs.check-changes.outputs.sgl_kernel == 'true'))
	)
	)
	runs-on: ${{ needs.check-changes.outputs.b200_runner }}
	timeout-minutes: 240
	strategy:
	fail-fast: false
	matrix:
	part: [0, 1, 2, 3, 4]

	steps:
	- name: Checkout code
	uses: actions/checkout@v4
	with:
	ref: ${{ inputs.pr_head_sha \|\| inputs.git_ref \|\| github.sha }}

	- uses: ./.github/actions/check-stage-health

	- uses: ./.github/actions/check-maintenance

	- name: Download artifacts
	if: needs.check-changes.outputs.sgl_kernel == 'true'
	uses: actions/download-artifact@v6
	with:
	path: sgl-kernel/dist/
	merge-multiple: true
	pattern: wheel-python3.10-cuda13.0

	- name: Install dependencies
	timeout-minutes: 20
	run: \|
	CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh

	- name: Run test
	timeout-minutes: 30
	env:
	CONTINUE_ON_ERROR_FLAG: ${{ needs.check-changes.outputs.continue_on_error == 'true' && '--continue-on-error' \|\| '' }}
	run: \|
	cd test
	python3 run_suite.py --hw cuda --suite stage-c-test-4-gpu-b200 --auto-partition-id ${{ matrix.part }} --auto-partition-size 5 --timeout-per-file 1800 $CONTINUE_ON_ERROR_FLAG

	- uses: ./.github/actions/upload-cuda-coredumps
	if: failure()
	with:
	artifact-suffix: ${{ matrix.part }}

	# NOTE: GB200 stage temporarily disabled — no company-owned GB200 runner available yet.
	# Re-enable when a 4-gpu-gb200 runner is provisioned.
	# stage-c-test-4-gpu-gb200:
	# needs: [check-changes, call-gate, wait-for-stage-b, sgl-kernel-build-wheels-arm]
	# if: \|
	# always() &&
	# (
	# (inputs.target_stage == 'stage-c-test-4-gpu-gb200') \|\|
	# (
	# !inputs.target_stage &&
	# ((github.event_name == 'schedule' \|\| inputs.test_parallel_dispatch == true) \|\| (!failure() && !cancelled())) &&
	# ((needs.check-changes.outputs.main_package == 'true') \|\| (needs.check-changes.outputs.sgl_kernel == 'true'))
	# )
	# )
	# runs-on: 4-gpu-gb200
	# timeout-minutes: 240
	# strategy:
	# fail-fast: false
	# steps:
	# - uses: ./.github/actions/check-maintenance
	# with:
	# github-token: ${{ github.token }}
	#
	# - name: Checkout code
	# uses: actions/checkout@v4
	# with:
	# ref: ${{ inputs.pr_head_sha \|\| inputs.git_ref \|\| github.sha }}
	#
	# - name: Download artifacts
	# if: needs.check-changes.outputs.sgl_kernel == 'true'
	# uses: actions/download-artifact@v4
	# with:
	# path: sgl-kernel/dist/
	# merge-multiple: true
	# pattern: wheel-python3.10-cuda13.0-aarch64
	#
	# - name: Install dependencies
	# timeout-minutes: 20
	# run: \|
	# CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} GRACE_BLACKWELL=1 bash scripts/ci/cuda/ci_install_deepep.sh
	#
	# - name: Run test
	# timeout-minutes: 45
	# env:
	# CONTINUE_ON_ERROR_FLAG: ${{ needs.check-changes.outputs.continue_on_error == 'true' && '--continue-on-error' \|\| '' }}
	# run: \|
	# cd test
	# python3 run_suite.py --hw cuda --suite stage-c-test-4-gpu-gb200 --timeout-per-file 3600 $CONTINUE_ON_ERROR_FLAG
	#
	# - uses: ./.github/actions/upload-cuda-coredumps
	# if: failure()

	pr-test-finish:
	needs:
	[
	call-gate,
	check-changes,

	sgl-kernel-build-wheels,
	sgl-kernel-build-wheels-arm,
	call-sgl-kernel-tests,

	wait-for-stage-a,
	wait-for-stage-b,

	call-jit-kernel-tests,

	call-multimodal-gen-tests,

	stage-a-test-1-gpu-small,
	stage-a-test-cpu,
	stage-b-test-1-gpu-small,
	stage-b-test-1-gpu-large,
	stage-b-test-2-gpu-large,
	stage-b-test-4-gpu-b200,
	stage-c-test-4-gpu-h100,
	stage-c-test-8-gpu-h20,
	stage-c-test-8-gpu-h200,
	stage-c-test-deepep-4-gpu-h100,
	stage-c-test-deepep-8-gpu-h200,
	stage-c-test-4-gpu-b200,
	# stage-c-test-4-gpu-gb200, # Temporarily disabled — no GB200 runner
	]
	if: always()
	runs-on: ubuntu-latest
	steps:
	- name: Check all dependent job statuses
	run: \|
	# Convert the 'needs' context to a JSON string
	json_needs='${{ toJson(needs) }}'

	# Get a list of all job names from the JSON keys
	job_names=$(echo "$json_needs" \| jq -r 'keys_unsorted[]')

	for job in $job_names; do
	# For each job, extract its result
	result=$(echo "$json_needs" \| jq -r --arg j "$job" '.[$j].result')

	# Print the job name and its result
	echo "$job: $result"

	# Check for failure or cancellation and exit if found
	if [[ "$result" == "failure" \|\| "$result" == "cancelled" ]]; then
	echo "The above jobs failed."
	exit 1
	fi
	done
	# If the loop completes, all jobs were successful
	echo "All jobs completed successfully"
	exit 0

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

streaming session: spec v2 bonus accounting + comprehensive test matrix #86526

Workflow file

streaming session: spec v2 bonus accounting + comprehensive test matrix #86526

Uh oh!

Workflow file for this run