Nightly Test (Nvidia) #162

Workflow file for this run

.github/workflows/nightly-test-nvidia.yml at 7f2fcc0

	name: Nightly Test (Nvidia)

	on:
	schedule:
	- cron: '0 0 * * *'
	workflow_dispatch:
	inputs:
	job_filter:
	description: 'Select which job to run (leave empty or "all" to run all jobs)'
	required: false
	type: choice
	default: 'all'
	options:
	- 'all'
	- 'nightly-test-general-1-gpu-h100'
	- 'nightly-test-general-4-gpu-h100'
	- 'nightly-test-general-8-gpu-h200'
	- 'nightly-test-general-8-gpu-h20'
	- 'nightly-test-general-8-gpu-b200'
	- 'nightly-test-text-accuracy-2-gpu-h100'
	- 'nightly-test-text-perf-2-gpu-h100'
	- 'nightly-test-vlm-accuracy-2-gpu-h100'
	- 'nightly-test-vlm-perf-2-gpu-h100'
	- 'nightly-test-multimodal-server-1-gpu'
	- 'nightly-test-multimodal-server-2-gpu'
	- 'nightly-test-perf-4-gpu-b200'
	- 'nightly-test-perf-8-gpu-b200'
	- 'nightly-test-specialized-8-gpu-b200'
	- 'nightly-test-kernel-1-gpu-h100'
	- 'nightly-test-diffusion-comparison'
	- 'nightly-test-kernel-8-gpu-h200'
	workflow_call:
	inputs:
	ref:
	description: 'Git ref (branch, tag, or SHA) to test. If not provided, uses the default branch.'
	required: false
	type: string
	default: ''
	job_filter:
	description: 'Select which job to run (leave empty or "all" to run all jobs)'
	required: false
	type: string
	default: 'all'

	concurrency:
	group: nightly-test-nvidia-${{ inputs.ref \|\| github.ref }}
	cancel-in-progress: ${{ github.event_name != 'workflow_call' }}

	env:
	SGLANG_IS_IN_CI: true
	SGLANG_CUDA_COREDUMP: "1"
	HF_HUB_DOWNLOAD_TIMEOUT: 300
	HF_HUB_ETAG_TIMEOUT: 300

	jobs:
	# General tests - 1 GPU
	nightly-test-general-1-gpu-h100:
	if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' \|\| inputs.job_filter == 'all' \|\| inputs.job_filter == 'nightly-test-general-1-gpu-h100')
	runs-on: 1-gpu-h100
	steps:
	- name: Checkout code
	uses: actions/checkout@v4
	with:
	ref: ${{ inputs.ref \|\| github.ref }}

	- uses: ./.github/actions/check-maintenance

	- name: Install dependencies
	run: \|
	bash scripts/ci/cuda/ci_install_dependency.sh

	- name: Run test
	timeout-minutes: 60
	run: \|
	cd test
	python3 run_suite.py --hw cuda --suite nightly-1-gpu --nightly --continue-on-error

	- uses: ./.github/actions/upload-cuda-coredumps
	if: always()

	# JIT kernel full unit tests (expanded parameter ranges via SGLANG_JIT_KERNEL_RUN_FULL_TESTS)
	nightly-test-kernel-1-gpu-h100:
	if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' \|\| inputs.job_filter == 'all' \|\| inputs.job_filter == 'nightly-test-kernel-1-gpu-h100')
	runs-on: 1-gpu-h100
	timeout-minutes: 240
	env:
	# Full jit_kernel test grids (see sglang.jit_kernel.utils.should_run_full_tests)
	SGLANG_JIT_KERNEL_RUN_FULL_TESTS: "1"
	# Match pr-test-jit-kernel workflow for consistent JIT warmup behavior
	SGLANG_JIT_DEEPGEMM_FAST_WARMUP: true
	# Allow maintenance bypass on default branch (same semantics as PR JIT workflow)
	SGLANG_PR_TEST_BYPASS_MAINTENANCE_ON_MAIN: ${{ github.ref == 'refs/heads/main' && 'true' \|\| 'false' }}
	steps:
	- name: Checkout code
	uses: actions/checkout@v4
	with:
	ref: ${{ inputs.ref \|\| github.ref }}

	- uses: ./.github/actions/check-maintenance

	- name: Install dependencies
	timeout-minutes: 20
	run: \|
	bash scripts/ci/cuda/ci_install_dependency.sh

	- name: Run jit kernel nightly suite
	timeout-minutes: 60
	run: \|
	cd test
	python3 run_suite.py --hw cuda --suite nightly-kernel-1-gpu --nightly --continue-on-error

	- uses: ./.github/actions/upload-cuda-coredumps
	if: always()

	nightly-test-kernel-8-gpu-h200:
	if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' \|\| inputs.job_filter == 'all' \|\| inputs.job_filter == 'nightly-test-kernel-8-gpu-h200')
	runs-on: 8-gpu-h200
	timeout-minutes: 240
	env:
	SGLANG_JIT_KERNEL_RUN_FULL_TESTS: "1"
	SGLANG_JIT_DEEPGEMM_FAST_WARMUP: true
	SGLANG_PR_TEST_BYPASS_MAINTENANCE_ON_MAIN: ${{ github.ref == 'refs/heads/main' && 'true' \|\| 'false' }}
	steps:
	- name: Checkout code
	uses: actions/checkout@v4
	with:
	ref: ${{ inputs.ref \|\| github.ref }}

	- uses: ./.github/actions/check-maintenance

	- name: Install dependencies
	timeout-minutes: 20
	run: \|
	bash scripts/ci/cuda/ci_install_dependency.sh

	- name: Run multi-GPU jit kernel nightly suite
	timeout-minutes: 90
	run: \|
	cd test
	python3 run_suite.py --hw cuda --suite nightly-kernel-8-gpu-h200 --nightly --continue-on-error

	- uses: ./.github/actions/upload-cuda-coredumps
	if: always()

	# General tests - 4 GPU H100
	nightly-test-general-4-gpu-h100:
	if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' \|\| inputs.job_filter == 'all' \|\| inputs.job_filter == 'nightly-test-general-4-gpu-h100')
	runs-on: 4-gpu-h100
	steps:
	- name: Checkout code
	uses: actions/checkout@v4
	with:
	ref: ${{ inputs.ref \|\| github.ref }}

	- uses: ./.github/actions/check-maintenance

	- name: Install dependencies
	run: \|
	bash scripts/ci/cuda/ci_install_dependency.sh

	- name: Run test
	timeout-minutes: 30
	run: \|
	cd test
	python3 run_suite.py --hw cuda --suite nightly-4-gpu --nightly --continue-on-error

	- uses: ./.github/actions/upload-cuda-coredumps
	if: always()

	# General tests - 8 GPU H200
	nightly-test-general-8-gpu-h200:
	if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' \|\| inputs.job_filter == 'all' \|\| inputs.job_filter == 'nightly-test-general-8-gpu-h200')
	runs-on: 8-gpu-h200
	strategy:
	fail-fast: false
	matrix:
	partition: [0, 1, 2, 3]
	env:
	RUNNER_LABELS: 8-gpu-h200
	steps:
	- name: Checkout code
	uses: actions/checkout@v4
	with:
	ref: ${{ inputs.ref \|\| github.ref }}

	- uses: ./.github/actions/check-maintenance

	- name: Install dependencies
	run: \|
	bash scripts/ci/cuda/ci_install_dependency.sh

	- name: Run common 8-GPU model tests
	if: always()
	timeout-minutes: 300
	env:
	TRACE_BASE_URL: https://raw.githubusercontent.com/sglang-bot/sglang-ci-data/main/traces/${{ github.run_id }}
	PERFETTO_RELAY_URL: ${{ vars.PERFETTO_RELAY_URL }}
	GPU_CONFIG: "8-gpu-h200"
	IS_H200: "1"
	run: \|
	cd test
	python3 run_suite.py --hw cuda --suite nightly-8-gpu-common --nightly --timeout-per-file=18000 --continue-on-error --auto-partition-id=${{ matrix.partition }} --auto-partition-size=4

	- name: Publish traces to storage repo
	if: always()
	continue-on-error: true
	env:
	GITHUB_TOKEN: ${{ secrets.GH_PAT_FOR_NIGHTLY_CI_DATA }}
	GITHUB_RUN_ID: ${{ github.run_id }}
	GITHUB_RUN_NUMBER: ${{ github.run_number }}
	run: \|
	TRACE_ARGS=""
	for dir in test/performance_profiles_*/; do
	[ -d "$dir" ] && TRACE_ARGS="$TRACE_ARGS --traces-dir $dir"
	done
	if [ -n "$TRACE_ARGS" ]; then
	python3 scripts/ci/utils/publish_traces.py $TRACE_ARGS
	find test/performance_profiles_/ -name '.json.gz' -delete
	else
	echo "No trace directories found, skipping publish"
	fi

	- name: Run test
	timeout-minutes: 30
	env:
	GPU_CONFIG: "8-gpu-h200"
	run: \|
	cd test
	python3 run_suite.py --hw cuda --suite nightly-8-gpu-h200 --nightly --continue-on-error

	- name: Collect performance metrics
	if: always()
	run: \|
	python3 scripts/ci/utils/save_metrics.py \
	--gpu-config 8-gpu-h200 \
	--partition ${{ matrix.partition }} \
	--run-id ${{ github.run_id }} \
	--output test/metrics-8gpu-h200-partition-${{ matrix.partition }}.json \
	--search-dir test/performance_profiles_8_gpu \
	--search-dir test

	- name: Upload partition metrics
	if: always()
	uses: actions/upload-artifact@v4
	with:
	name: metrics-8gpu-h200-partition-${{ matrix.partition }}
	path: test/metrics-8gpu-h200-partition-${{ matrix.partition }}.json
	retention-days: 5
	if-no-files-found: ignore

	- uses: ./.github/actions/upload-cuda-coredumps
	if: always()
	with:
	artifact-suffix: ${{ matrix.partition }}

	# General tests - 8 GPU H20
	nightly-test-general-8-gpu-h20:
	if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' \|\| inputs.job_filter == 'all' \|\| inputs.job_filter == 'nightly-test-general-8-gpu-h20')
	runs-on: 8-gpu-h20
	env:
	SGLANG_CI_RDMA_ALL_DEVICES: "mlx5_1,mlx5_2,mlx5_3,mlx5_4"
	steps:
	- name: Checkout code
	uses: actions/checkout@v4
	with:
	ref: ${{ inputs.ref \|\| github.ref }}

	- uses: ./.github/actions/check-maintenance

	- name: Install dependencies
	run: \|
	bash scripts/ci/cuda/ci_install_dependency.sh

	- name: Run test
	timeout-minutes: 30
	env:
	GPU_CONFIG: "8-gpu-h20"
	run: \|
	cd test
	python3 run_suite.py --hw cuda --suite nightly-8-gpu-h20 --nightly --continue-on-error

	- uses: ./.github/actions/upload-cuda-coredumps
	if: always()

	# General tests - 8 GPU B200
	nightly-test-general-8-gpu-b200:
	if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' \|\| inputs.job_filter == 'all' \|\| inputs.job_filter == 'nightly-test-general-8-gpu-b200')
	runs-on: 8-gpu-b200
	strategy:
	fail-fast: false
	matrix:
	partition: [0, 1, 2, 3]
	steps:
	- name: Checkout code
	uses: actions/checkout@v4
	with:
	ref: ${{ inputs.ref \|\| github.ref }}

	- uses: ./.github/actions/check-maintenance

	- name: Install dependencies
	run: \|
	bash scripts/ci/cuda/ci_install_dependency.sh

	- name: Run common 8-GPU model tests
	if: always()
	timeout-minutes: 300
	env:
	TRACE_BASE_URL: https://raw.githubusercontent.com/sglang-bot/sglang-ci-data/main/traces/${{ github.run_id }}
	PERFETTO_RELAY_URL: ${{ vars.PERFETTO_RELAY_URL }}
	GPU_CONFIG: "8-gpu-b200"
	run: \|
	cd test
	python3 run_suite.py --hw cuda --suite nightly-8-gpu-common --nightly --timeout-per-file=12000 --continue-on-error --auto-partition-id=${{ matrix.partition }} --auto-partition-size=4

	- name: Publish traces to storage repo
	if: always()
	continue-on-error: true
	env:
	GITHUB_TOKEN: ${{ secrets.GH_PAT_FOR_NIGHTLY_CI_DATA }}
	GITHUB_RUN_ID: ${{ github.run_id }}
	GITHUB_RUN_NUMBER: ${{ github.run_number }}
	run: \|
	TRACE_ARGS=""
	for dir in test/performance_profiles_*/; do
	[ -d "$dir" ] && TRACE_ARGS="$TRACE_ARGS --traces-dir $dir"
	done
	if [ -n "$TRACE_ARGS" ]; then
	python3 scripts/ci/utils/publish_traces.py $TRACE_ARGS
	find test/performance_profiles_/ -name '.json.gz' -delete
	else
	echo "No trace directories found, skipping publish"
	fi

	- name: Collect performance metrics
	if: always()
	run: \|
	python3 scripts/ci/utils/save_metrics.py \
	--gpu-config 8-gpu-b200 \
	--partition ${{ matrix.partition }} \
	--run-id ${{ github.run_id }} \
	--output test/metrics-8gpu-b200-partition-${{ matrix.partition }}.json \
	--search-dir test/performance_profiles_8_gpu \
	--search-dir test

	- name: Upload partition metrics
	if: always()
	uses: actions/upload-artifact@v4
	with:
	name: metrics-8gpu-b200-partition-${{ matrix.partition }}
	path: test/metrics-8gpu-b200-partition-${{ matrix.partition }}.json
	retention-days: 5
	if-no-files-found: ignore

	- uses: ./.github/actions/upload-cuda-coredumps
	if: always()
	with:
	artifact-suffix: ${{ matrix.partition }}

	# Text model accuracy tests
	nightly-test-text-accuracy-2-gpu-h100:
	if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' \|\| inputs.job_filter == 'all' \|\| inputs.job_filter == 'nightly-test-text-accuracy-2-gpu-h100')
	runs-on: 2-gpu-h100
	steps:
	- name: Checkout code
	uses: actions/checkout@v4
	with:
	ref: ${{ inputs.ref \|\| github.ref }}

	- uses: ./.github/actions/check-maintenance

	- name: Install dependencies
	run: \|
	bash scripts/ci/cuda/ci_install_dependency.sh

	- name: Run eval test for text models
	timeout-minutes: 120
	run: \|
	cd test
	python3 run_suite.py --hw cuda --suite nightly-eval-text-2-gpu --nightly --continue-on-error --timeout-per-file 4500

	- uses: ./.github/actions/upload-cuda-coredumps
	if: always()

	# Text model performance tests
	nightly-test-text-perf-2-gpu-h100:
	if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' \|\| inputs.job_filter == 'all' \|\| inputs.job_filter == 'nightly-test-text-perf-2-gpu-h100')
	runs-on: 2-gpu-h100
	steps:
	- name: Checkout code
	uses: actions/checkout@v4
	with:
	ref: ${{ inputs.ref \|\| github.ref }}

	- uses: ./.github/actions/check-maintenance

	- name: Install dependencies
	run: \|
	bash scripts/ci/cuda/ci_install_dependency.sh

	- name: Run performance test for text models
	timeout-minutes: 180
	env:
	TRACE_BASE_URL: https://raw.githubusercontent.com/sglang-bot/sglang-ci-data/main/traces/${{ github.run_id }}
	PERFETTO_RELAY_URL: ${{ vars.PERFETTO_RELAY_URL }}
	GPU_CONFIG: "2-gpu-h100"
	run: \|
	cd test
	rm -rf performance_profiles_text_models/
	python3 run_suite.py --hw cuda --suite nightly-perf-text-2-gpu --nightly --continue-on-error --timeout-per-file 3600

	- name: Publish traces to storage repo
	env:
	GITHUB_TOKEN: ${{ secrets.GH_PAT_FOR_NIGHTLY_CI_DATA }}
	GITHUB_RUN_ID: ${{ github.run_id }}
	GITHUB_RUN_NUMBER: ${{ github.run_number }}
	run: \|
	python3 scripts/ci/utils/publish_traces.py --traces-dir test/performance_profiles_text_models

	- uses: ./.github/actions/upload-cuda-coredumps
	if: always()

	# VLM accuracy tests
	nightly-test-vlm-accuracy-2-gpu-h100:
	if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' \|\| inputs.job_filter == 'all' \|\| inputs.job_filter == 'nightly-test-vlm-accuracy-2-gpu-h100')
	runs-on: 2-gpu-h100
	steps:
	- name: Checkout code
	uses: actions/checkout@v4
	with:
	ref: ${{ inputs.ref \|\| github.ref }}

	- uses: ./.github/actions/check-maintenance

	- name: Install dependencies
	run: \|
	bash scripts/ci/cuda/ci_install_dependency.sh

	- name: Run eval test for VLM models (fixed MMMU-100)
	timeout-minutes: 240
	run: \|
	cd test
	python3 run_suite.py --hw cuda --suite nightly-eval-vlm-2-gpu --nightly --continue-on-error --timeout-per-file 9000

	- uses: ./.github/actions/upload-cuda-coredumps
	if: always()

	# VLM performance tests
	nightly-test-vlm-perf-2-gpu-h100:
	if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' \|\| inputs.job_filter == 'all' \|\| inputs.job_filter == 'nightly-test-vlm-perf-2-gpu-h100')
	runs-on: 2-gpu-h100
	steps:
	- name: Checkout code
	uses: actions/checkout@v4
	with:
	ref: ${{ inputs.ref \|\| github.ref }}

	- uses: ./.github/actions/check-maintenance

	- name: Install dependencies
	run: \|
	bash scripts/ci/cuda/ci_install_dependency.sh

	- name: Run perf test for VLM models (MMMU)
	timeout-minutes: 240
	env:
	TRACE_BASE_URL: https://raw.githubusercontent.com/sglang-bot/sglang-ci-data/main/traces/${{ github.run_id }}
	PERFETTO_RELAY_URL: ${{ vars.PERFETTO_RELAY_URL }}
	GPU_CONFIG: "2-gpu-h100"
	run: \|
	cd test
	rm -rf performance_profiles_vlms/
	python3 run_suite.py --hw cuda --suite nightly-perf-vlm-2-gpu --nightly --continue-on-error --timeout-per-file 3600

	- name: Publish traces to storage repo
	env:
	GITHUB_TOKEN: ${{ secrets.GH_PAT_FOR_NIGHTLY_CI_DATA }}
	GITHUB_RUN_ID: ${{ github.run_id }}
	GITHUB_RUN_NUMBER: ${{ github.run_number }}
	run: \|
	python3 scripts/ci/utils/publish_traces.py --traces-dir test/performance_profiles_vlms

	- uses: ./.github/actions/upload-cuda-coredumps
	if: always()

	# diffusion performance tests
	nightly-test-multimodal-server-1-gpu:
	if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' \|\| inputs.job_filter == 'all' \|\| inputs.job_filter == 'nightly-test-multimodal-server-1-gpu')
	runs-on: 1-gpu-h100
	strategy:
	fail-fast: false
	max-parallel: 5
	matrix:
	part: [0, 1]
	steps:
	- name: Checkout code
	uses: actions/checkout@v4
	with:
	ref: ${{ inputs.ref \|\| github.ref }}

	- uses: ./.github/actions/check-maintenance

	- name: Install dependencies
	run: \|
	bash scripts/ci/cuda/ci_install_dependency.sh diffusion
	pip install slack_sdk

	- name: Run diffusion server tests
	env:
	SGLANG_DIFFUSION_SLACK_TOKEN: ${{ secrets.SGLANG_DIFFUSION_SLACK_TOKEN }}
	GITHUB_RUN_ID: ${{ github.run_id }}
	GPU_CONFIG: "1-gpu-h100"

	timeout-minutes: 90
	run: \|
	cd python
	python3 sglang/multimodal_gen/test/run_suite.py \
	--suite 1-gpu \
	--partition-id ${{ matrix.part }} \
	--total-partitions 2

	- name: Collect diffusion performance metrics
	if: always()
	run: \|
	python3 scripts/ci/utils/diffusion/save_diffusion_metrics.py \
	--gpu-config 1-gpu-h100 \
	--run-id ${{ github.run_id }} \
	--output python/diffusion-metrics-1gpu-partition-${{ matrix.part }}.json \
	--results-json python/diffusion-results.json

	- name: Upload diffusion metrics
	if: always()
	uses: actions/upload-artifact@v4
	with:
	name: diffusion-metrics-1gpu-partition-${{ matrix.part }}
	path: python/diffusion-metrics-1gpu-partition-${{ matrix.part }}.json
	retention-days: 90
	if-no-files-found: ignore

	- uses: ./.github/actions/upload-cuda-coredumps
	if: always()
	with:
	artifact-suffix: ${{ matrix.part }}

	nightly-test-multimodal-server-2-gpu:
	if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' \|\| inputs.job_filter == 'all' \|\| inputs.job_filter == 'nightly-test-multimodal-server-2-gpu')
	runs-on: 2-gpu-h100
	strategy:
	fail-fast: false
	max-parallel: 5
	matrix:
	part: [0, 1]
	steps:
	- name: Checkout code
	uses: actions/checkout@v4
	with:
	ref: ${{ inputs.ref \|\| github.ref }}

	- uses: ./.github/actions/check-maintenance

	- name: Install dependencies
	run: \|
	bash scripts/ci/cuda/ci_install_dependency.sh diffusion
	pip install slack_sdk

	- name: Run diffusion server tests
	env:
	SGLANG_DIFFUSION_SLACK_TOKEN: ${{ secrets.SGLANG_DIFFUSION_SLACK_TOKEN }}
	GITHUB_RUN_ID: ${{ github.run_id }}
	GPU_CONFIG: "2-gpu-h100"

	timeout-minutes: 90
	run: \|
	cd python
	python3 sglang/multimodal_gen/test/run_suite.py \
	--suite 2-gpu \
	--partition-id ${{ matrix.part }} \
	--total-partitions 2

	- name: Collect diffusion performance metrics
	if: always()
	run: \|
	python3 scripts/ci/utils/diffusion/save_diffusion_metrics.py \
	--gpu-config 2-gpu-h100 \
	--run-id ${{ github.run_id }} \
	--output python/diffusion-metrics-2gpu-partition-${{ matrix.part }}.json \
	--results-json python/diffusion-results.json

	- name: Upload diffusion metrics
	if: always()
	uses: actions/upload-artifact@v4
	with:
	name: diffusion-metrics-2gpu-partition-${{ matrix.part }}
	path: python/diffusion-metrics-2gpu-partition-${{ matrix.part }}.json
	retention-days: 90
	if-no-files-found: ignore

	- uses: ./.github/actions/upload-cuda-coredumps
	if: always()
	with:
	artifact-suffix: ${{ matrix.part }}

	# B200 Performance tests - 4 GPU
	nightly-test-perf-4-gpu-b200:
	if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' \|\| inputs.job_filter == 'all' \|\| inputs.job_filter == 'nightly-test-perf-4-gpu-b200')
	runs-on: 4-gpu-b200
	steps:
	- name: Checkout code
	uses: actions/checkout@v4
	with:
	ref: ${{ inputs.ref \|\| github.ref }}

	- uses: ./.github/actions/check-maintenance

	- name: Install dependencies
	run: \|
	bash scripts/ci/cuda/ci_install_dependency.sh

	- name: Run test
	timeout-minutes: 300
	run: \|
	cd test
	python3 run_suite.py --hw cuda --suite nightly-4-gpu-b200 --nightly --continue-on-error --timeout-per-file 12000

	- uses: ./.github/actions/upload-cuda-coredumps
	if: always()

	# Specialized B200 tests - 8 GPU, for specific backends and configs
	nightly-test-specialized-8-gpu-b200:
	if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' \|\| inputs.job_filter == 'all' \|\| inputs.job_filter == 'nightly-test-perf-8-gpu-b200' \|\| inputs.job_filter == 'nightly-test-specialized-8-gpu-b200')
	runs-on: 8-gpu-b200
	env:
	RUNNER_LABELS: 8-gpu-b200
	steps:
	- name: Checkout code
	uses: actions/checkout@v4
	with:
	ref: ${{ inputs.ref \|\| github.ref }}

	- uses: ./.github/actions/check-maintenance

	- name: Install dependencies
	run: \|
	bash scripts/ci/cuda/ci_install_dependency.sh

	- name: Run test
	timeout-minutes: 120
	env:
	GPU_CONFIG: "8-gpu-b200"
	run: \|
	cd test
	python3 run_suite.py --hw cuda --suite nightly-8-gpu-b200 --nightly --continue-on-error --timeout-per-file 2400

	- uses: ./.github/actions/upload-cuda-coredumps
	if: always()

	# Diffusion cross-framework comparison
	nightly-test-diffusion-comparison:
	if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' \|\| inputs.job_filter == 'all' \|\| inputs.job_filter == 'nightly-test-diffusion-comparison')
	runs-on: 4-gpu-h100
	timeout-minutes: 240
	steps:
	- name: Checkout code
	uses: actions/checkout@v4
	with:
	ref: ${{ inputs.ref \|\| github.ref }}

	- name: Install dependencies
	run: \|
	bash scripts/ci/cuda/ci_install_dependency.sh diffusion

	- name: Run cross-framework comparison
	env:
	GITHUB_SHA: ${{ github.sha }}
	GITHUB_RUN_ID: ${{ github.run_id }}
	PYTHONUNBUFFERED: "1"
	timeout-minutes: 210
	run: \|
	python3 -u scripts/ci/utils/diffusion/run_comparison.py \
	--output comparison-results.json

	- name: Generate dashboard
	if: always()
	env:
	GH_PAT_FOR_NIGHTLY_CI_DATA: ${{ secrets.GH_PAT_FOR_NIGHTLY_CI_DATA }}
	GH_TOKEN: ${{ github.token }}
	run: \|
	python3 scripts/ci/utils/diffusion/generate_diffusion_dashboard.py \
	--results comparison-results.json \
	--output dashboard.md \
	--charts-dir comparison-charts \
	--fetch-history \
	--step-summary

	- name: Publish to sglang-ci-data
	if: always()
	env:
	GH_PAT_FOR_NIGHTLY_CI_DATA: ${{ secrets.GH_PAT_FOR_NIGHTLY_CI_DATA }}
	run: \|
	python3 scripts/ci/utils/diffusion/publish_comparison_results.py \
	--results comparison-results.json \
	--dashboard dashboard.md \
	--charts-dir comparison-charts

	- name: Upload comparison artifacts
	if: always()
	uses: actions/upload-artifact@v4
	with:
	name: diffusion-comparison-${{ github.run_id }}
	path: \|
	comparison-results.json
	dashboard.md
	comparison-charts/
	comparison-logs/
	retention-days: 90
	if-no-files-found: ignore

	- uses: ./.github/actions/upload-cuda-coredumps
	if: always()

	# Consolidate performance metrics from all jobs
	consolidate-metrics:
	if: github.repository == 'sgl-project/sglang' && always()
	needs:
	- nightly-test-general-8-gpu-h200
	- nightly-test-general-8-gpu-b200
	- nightly-test-multimodal-server-1-gpu
	- nightly-test-multimodal-server-2-gpu
	runs-on: ubuntu-latest
	steps:
	- name: Checkout code
	uses: actions/checkout@v4
	with:
	ref: ${{ inputs.ref \|\| github.ref }}

	- name: Download all partition metrics
	uses: actions/download-artifact@v4
	with:
	pattern: "metrics-"
	path: metrics/
	merge-multiple: true

	- name: List downloaded metrics
	run: \|
	echo "Downloaded metrics files:"
	find metrics/ -name "*.json" -type f 2>/dev/null \|\| echo "No metrics files found"

	- name: Merge metrics
	run: \|
	python3 scripts/ci/utils/merge_metrics.py \
	--input-dir metrics/ \
	--output consolidated-metrics-${{ github.run_id }}.json \
	--run-id ${{ github.run_id }} \
	--commit-sha ${{ github.sha }} \
	--branch ${{ github.ref_name }}

	- name: Upload consolidated metrics
	uses: actions/upload-artifact@v4
	with:
	name: consolidated-metrics-${{ github.run_id }}
	path: consolidated-metrics-${{ github.run_id }}.json
	retention-days: 90
	if-no-files-found: warn

	# Final check job
	check-all-jobs:
	if: github.repository == 'sgl-project/sglang' && always()
	needs:
	- nightly-test-general-1-gpu-h100
	- nightly-test-general-4-gpu-h100
	- nightly-test-general-8-gpu-h200
	- nightly-test-general-8-gpu-h20
	- nightly-test-general-8-gpu-b200
	- nightly-test-text-accuracy-2-gpu-h100
	- nightly-test-text-perf-2-gpu-h100
	- nightly-test-vlm-accuracy-2-gpu-h100
	- nightly-test-vlm-perf-2-gpu-h100
	- nightly-test-multimodal-server-1-gpu
	- nightly-test-multimodal-server-2-gpu
	- nightly-test-perf-4-gpu-b200
	- nightly-test-specialized-8-gpu-b200
	- nightly-test-diffusion-comparison
	- consolidate-metrics
	runs-on: ubuntu-latest
	steps:
	- name: Check if any job failed
	run: \|
	if [[ "${{ contains(needs.*.result, 'failure') }}" == "true" ]]; then
	echo "One or more nightly test jobs failed"
	exit 1
	fi
	if [[ "${{ contains(needs.*.result, 'cancelled') }}" == "true" ]]; then
	echo "One or more nightly test jobs were cancelled"
	exit 1
	fi
	echo "All nightly test jobs passed"

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Nightly Test (Nvidia) #162

Workflow file

Nightly Test (Nvidia) #162

Uh oh!

Workflow file for this run