Nightly Test (Nvidia) #37

Workflow file for this run

.github/workflows/nightly-test-nvidia.yml at 156d97b

	name: Nightly Test (Nvidia)

	on:
	schedule:
	- cron: '0 0 * * *'
	push:
	branches:
	- main
	paths:
	- "python/sglang/version.py"
	- "test/run_suite.py"
	workflow_dispatch:
	inputs:
	job_filter:
	description: 'Select which job to run (leave empty or "all" to run all jobs)'
	required: false
	type: choice
	default: 'all'
	options:
	- 'all'
	- 'nightly-test-general-1-gpu-runner'
	- 'nightly-test-general-4-gpu-h100'
	- 'nightly-test-general-8-gpu-h200'
	- 'nightly-test-general-8-gpu-h20'
	- 'nightly-test-general-8-gpu-b200'
	- 'nightly-test-text-accuracy-2-gpu-runner'
	- 'nightly-test-text-perf-2-gpu-runner'
	- 'nightly-test-vlm-accuracy-2-gpu-runner'
	- 'nightly-test-vlm-perf-2-gpu-runner'
	- 'nightly-test-multimodal-server-1-gpu'
	- 'nightly-test-multimodal-server-2-gpu'
	- 'nightly-test-perf-4-gpu-b200'
	- 'nightly-test-perf-8-gpu-b200'
	workflow_call:
	inputs:
	ref:
	description: 'Git ref (branch, tag, or SHA) to test. If not provided, uses the default branch.'
	required: false
	type: string
	default: ''
	job_filter:
	description: 'Select which job to run (leave empty or "all" to run all jobs)'
	required: false
	type: string
	default: 'all'

	concurrency:
	group: nightly-test-nvidia-${{ inputs.ref \|\| github.ref }}
	cancel-in-progress: ${{ github.event_name != 'workflow_call' }}

	env:
	SGLANG_IS_IN_CI: true
	HF_HUB_DOWNLOAD_TIMEOUT: 300
	HF_HUB_ETAG_TIMEOUT: 300

	jobs:
	# General tests - 1 GPU
	nightly-test-general-1-gpu-runner:
	if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' \|\| inputs.job_filter == 'all' \|\| inputs.job_filter == 'nightly-test-general-1-gpu-runner')
	runs-on: 1-gpu-runner
	steps:
	- name: Checkout code
	uses: actions/checkout@v4
	with:
	ref: ${{ inputs.ref \|\| github.ref }}

	- name: Install dependencies
	run: \|
	bash scripts/ci/ci_install_dependency.sh

	- name: Run test
	timeout-minutes: 60
	run: \|
	cd test
	python3 run_suite.py --hw cuda --suite nightly-1-gpu --nightly --continue-on-error

	# General tests - 4 GPU H100
	nightly-test-general-4-gpu-h100:
	if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' \|\| inputs.job_filter == 'all' \|\| inputs.job_filter == 'nightly-test-general-4-gpu-h100')
	runs-on: 4-gpu-h100
	steps:
	- name: Checkout code
	uses: actions/checkout@v4
	with:
	ref: ${{ inputs.ref \|\| github.ref }}

	- name: Install dependencies
	run: \|
	bash scripts/ci/ci_install_dependency.sh

	- name: Run test
	timeout-minutes: 30
	run: \|
	cd test
	python3 run_suite.py --hw cuda --suite nightly-4-gpu --nightly --continue-on-error

	# General tests - 8 GPU H200
	nightly-test-general-8-gpu-h200:
	if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' \|\| inputs.job_filter == 'all' \|\| inputs.job_filter == 'nightly-test-general-8-gpu-h200')
	runs-on: 8-gpu-h200
	env:
	RUNNER_LABELS: 8-gpu-h200
	steps:
	- name: Checkout code
	uses: actions/checkout@v4
	with:
	ref: ${{ inputs.ref \|\| github.ref }}

	- name: Install dependencies
	run: \|
	bash scripts/ci/ci_install_dependency.sh

	- name: Run common 8-GPU model tests
	if: always()
	timeout-minutes: 300
	env:
	TRACE_BASE_URL: https://raw.githubusercontent.com/sglang-bot/sglang-ci-data/main/traces/${{ github.run_id }}
	PERFETTO_RELAY_URL: ${{ vars.PERFETTO_RELAY_URL }}
	GPU_CONFIG: "8-gpu-h200"
	IS_H200: "1"
	run: \|
	cd test
	python3 run_suite.py --hw cuda --suite nightly-8-gpu-common --nightly --timeout-per-file=18000 --continue-on-error

	- name: Run test
	timeout-minutes: 30
	env:
	GPU_CONFIG: "8-gpu-h200"
	run: \|
	cd test
	python3 run_suite.py --hw cuda --suite nightly-8-gpu-h200 --nightly --continue-on-error

	# General tests - 8 GPU H20
	nightly-test-general-8-gpu-h20:
	if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' \|\| inputs.job_filter == 'all' \|\| inputs.job_filter == 'nightly-test-general-8-gpu-h20')
	runs-on: 8-gpu-h20
	env:
	SGLANG_CI_RDMA_ALL_DEVICES: "mlx5_1,mlx5_2,mlx5_3,mlx5_4"
	steps:
	- name: Checkout code
	uses: actions/checkout@v4
	with:
	ref: ${{ inputs.ref \|\| github.ref }}

	- name: Install dependencies
	run: \|
	bash scripts/ci/ci_install_dependency.sh

	- name: Run test
	timeout-minutes: 30
	env:
	GPU_CONFIG: "8-gpu-h20"
	run: \|
	cd test
	python3 run_suite.py --hw cuda --suite nightly-8-gpu-h20 --nightly --continue-on-error

	# General tests - 8 GPU B200
	nightly-test-general-8-gpu-b200:
	if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' \|\| inputs.job_filter == 'all' \|\| inputs.job_filter == 'nightly-test-general-8-gpu-h20')
	runs-on: 8-gpu-b200
	steps:
	- name: Checkout code
	uses: actions/checkout@v4
	with:
	ref: ${{ inputs.ref \|\| github.ref }}

	- name: Install dependencies
	run: \|
	IS_BLACKWELL=1 bash scripts/ci/ci_install_dependency.sh

	- name: Run test
	timeout-minutes: 120
	env:
	GPU_CONFIG: "8-gpu-b200"
	run: \|
	cd test
	python3 run_suite.py --hw cuda --suite nightly-8-gpu-b200 --nightly --continue-on-error --timeout-per-file 2400

	# Text model accuracy tests
	nightly-test-text-accuracy-2-gpu-runner:
	if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' \|\| inputs.job_filter == 'all' \|\| inputs.job_filter == 'nightly-test-text-accuracy-2-gpu-runner')
	runs-on: 2-gpu-runner
	steps:
	- name: Checkout code
	uses: actions/checkout@v4
	with:
	ref: ${{ inputs.ref \|\| github.ref }}

	- name: Install dependencies
	run: \|
	bash scripts/ci/ci_install_dependency.sh

	- name: Run eval test for text models
	timeout-minutes: 120
	run: \|
	cd test
	python3 run_suite.py --hw cuda --suite nightly-eval-text-2-gpu --nightly --continue-on-error --timeout-per-file 4500

	# Text model performance tests
	nightly-test-text-perf-2-gpu-runner:
	if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' \|\| inputs.job_filter == 'all' \|\| inputs.job_filter == 'nightly-test-text-perf-2-gpu-runner')
	runs-on: 2-gpu-runner
	steps:
	- name: Checkout code
	uses: actions/checkout@v4
	with:
	ref: ${{ inputs.ref \|\| github.ref }}

	- name: Install dependencies
	run: \|
	bash scripts/ci/ci_install_dependency.sh

	- name: Run performance test for text models
	timeout-minutes: 180
	env:
	TRACE_BASE_URL: https://raw.githubusercontent.com/sglang-bot/sglang-ci-data/main/traces/${{ github.run_id }}
	PERFETTO_RELAY_URL: ${{ vars.PERFETTO_RELAY_URL }}
	GPU_CONFIG: "2-gpu-runner"
	run: \|
	cd test
	rm -rf performance_profiles_text_models/
	python3 run_suite.py --hw cuda --suite nightly-perf-text-2-gpu --nightly --continue-on-error

	- name: Publish traces to storage repo
	env:
	GITHUB_TOKEN: ${{ secrets.GH_PAT_FOR_NIGHTLY_CI_DATA }}
	GITHUB_RUN_ID: ${{ github.run_id }}
	GITHUB_RUN_NUMBER: ${{ github.run_number }}
	run: \|
	python3 scripts/ci/publish_traces.py --traces-dir test/performance_profiles_text_models

	# VLM accuracy tests
	nightly-test-vlm-accuracy-2-gpu-runner:
	if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' \|\| inputs.job_filter == 'all' \|\| inputs.job_filter == 'nightly-test-vlm-accuracy-2-gpu-runner')
	runs-on: 2-gpu-runner
	steps:
	- name: Checkout code
	uses: actions/checkout@v4
	with:
	ref: ${{ inputs.ref \|\| github.ref }}

	- name: Install dependencies
	run: \|
	bash scripts/ci/ci_install_dependency.sh

	- name: Run eval test for VLM models (fixed MMMU-100)
	timeout-minutes: 240
	run: \|
	cd test
	python3 run_suite.py --hw cuda --suite nightly-eval-vlm-2-gpu --nightly --continue-on-error --timeout-per-file 9000

	# VLM performance tests
	nightly-test-vlm-perf-2-gpu-runner:
	if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' \|\| inputs.job_filter == 'all' \|\| inputs.job_filter == 'nightly-test-vlm-perf-2-gpu-runner')
	runs-on: 2-gpu-runner
	steps:
	- name: Checkout code
	uses: actions/checkout@v4
	with:
	ref: ${{ inputs.ref \|\| github.ref }}

	- name: Install dependencies
	run: \|
	bash scripts/ci/ci_install_dependency.sh

	- name: Run perf test for VLM models (MMMU)
	timeout-minutes: 240
	env:
	TRACE_BASE_URL: https://raw.githubusercontent.com/sglang-bot/sglang-ci-data/main/traces/${{ github.run_id }}
	PERFETTO_RELAY_URL: ${{ vars.PERFETTO_RELAY_URL }}
	GPU_CONFIG: "2-gpu-runner"
	run: \|
	cd test
	rm -rf performance_profiles_vlms/
	python3 run_suite.py --hw cuda --suite nightly-perf-vlm-2-gpu --nightly --continue-on-error

	- name: Publish traces to storage repo
	env:
	GITHUB_TOKEN: ${{ secrets.GH_PAT_FOR_NIGHTLY_CI_DATA }}
	GITHUB_RUN_ID: ${{ github.run_id }}
	GITHUB_RUN_NUMBER: ${{ github.run_number }}
	run: \|
	python3 scripts/ci/publish_traces.py --traces-dir test/performance_profiles_vlms

	# diffusion performance tests
	nightly-test-multimodal-server-1-gpu:
	if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' \|\| inputs.job_filter == 'all' \|\| inputs.job_filter == 'nightly-test-multimodal-server-1-gpu')
	runs-on: 1-gpu-runner
	strategy:
	fail-fast: false
	max-parallel: 5
	matrix:
	part: [0, 1]
	steps:
	- name: Checkout code
	uses: actions/checkout@v4
	with:
	ref: ${{ inputs.ref \|\| github.ref }}

	- name: Install dependencies
	run: \|
	bash scripts/ci/ci_install_dependency.sh diffusion
	pip install slack_sdk

	- name: Run diffusion server tests
	env:
	SGLANG_DIFFUSION_SLACK_TOKEN: ${{ secrets.SGLANG_DIFFUSION_SLACK_TOKEN }}
	GITHUB_RUN_ID: ${{ github.run_id }}

	timeout-minutes: 60
	run: \|
	cd python
	python3 sglang/multimodal_gen/test/run_suite.py \
	--suite 1-gpu \
	--partition-id ${{ matrix.part }} \
	--total-partitions 2


	nightly-test-multimodal-server-2-gpu:
	if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' \|\| inputs.job_filter == 'all' \|\| inputs.job_filter == 'nightly-test-multimodal-server-2-gpu')
	runs-on: 2-gpu-runner
	strategy:
	fail-fast: false
	max-parallel: 5
	matrix:
	part: [0, 1]
	steps:
	- name: Checkout code
	uses: actions/checkout@v4
	with:
	ref: ${{ inputs.ref \|\| github.ref }}

	- name: Install dependencies
	run: \|
	bash scripts/ci/ci_install_dependency.sh diffusion
	pip install slack_sdk

	- name: Run diffusion server tests
	env:
	SGLANG_DIFFUSION_SLACK_TOKEN: ${{ secrets.SGLANG_DIFFUSION_SLACK_TOKEN }}
	GITHUB_RUN_ID: ${{ github.run_id }}

	timeout-minutes: 60
	run: \|
	cd python
	python3 sglang/multimodal_gen/test/run_suite.py \
	--suite 2-gpu \
	--partition-id ${{ matrix.part }} \
	--total-partitions 2

	# B200 Performance tests - 4 GPU
	nightly-test-perf-4-gpu-b200:
	if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' \|\| inputs.job_filter == 'all' \|\| inputs.job_filter == 'nightly-test-perf-4-gpu-b200')
	runs-on: 4-gpu-b200
	steps:
	- name: Checkout code
	uses: actions/checkout@v4
	with:
	ref: ${{ inputs.ref \|\| github.ref }}

	- name: Install dependencies
	run: \|
	IS_BLACKWELL=1 bash scripts/ci/ci_install_dependency.sh

	- name: Run test
	timeout-minutes: 60
	run: \|
	cd test
	python3 run_suite.py --hw cuda --suite nightly-4-gpu-b200 --nightly --continue-on-error

	# B200 Performance tests - 8 GPU
	nightly-test-perf-8-gpu-b200:
	if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' \|\| inputs.job_filter == 'all' \|\| inputs.job_filter == 'nightly-test-perf-8-gpu-b200')
	runs-on: 8-gpu-b200
	env:
	RUNNER_LABELS: 8-gpu-b200
	steps:
	- name: Checkout code
	uses: actions/checkout@v4
	with:
	ref: ${{ inputs.ref \|\| github.ref }}

	- name: Install dependencies
	run: \|
	IS_BLACKWELL=1 bash scripts/ci/ci_install_dependency.sh

	- name: Run common 8-GPU model tests
	if: always()
	timeout-minutes: 300
	env:
	TRACE_BASE_URL: https://raw.githubusercontent.com/sglang-bot/sglang-ci-data/main/traces/${{ github.run_id }}
	PERFETTO_RELAY_URL: ${{ vars.PERFETTO_RELAY_URL }}
	GPU_CONFIG: "8-gpu-b200"
	run: \|
	cd test
	IS_BLACKWELL=1 python3 run_suite.py --hw cuda --suite nightly-8-gpu-common --nightly --timeout-per-file=12000 --continue-on-error

	# Final check job
	check-all-jobs:
	if: github.repository == 'sgl-project/sglang' && always()
	needs:
	- nightly-test-general-1-gpu-runner
	- nightly-test-general-4-gpu-h100
	- nightly-test-general-8-gpu-h200
	- nightly-test-general-8-gpu-h20
	- nightly-test-general-8-gpu-b200
	- nightly-test-text-accuracy-2-gpu-runner
	- nightly-test-text-perf-2-gpu-runner
	- nightly-test-vlm-accuracy-2-gpu-runner
	- nightly-test-vlm-perf-2-gpu-runner
	- nightly-test-multimodal-server-1-gpu
	- nightly-test-multimodal-server-2-gpu
	- nightly-test-perf-4-gpu-b200
	- nightly-test-perf-8-gpu-b200
	runs-on: ubuntu-latest
	steps:
	- name: Check if any job failed
	run: \|
	if [[ "${{ contains(needs.*.result, 'failure') }}" == "true" ]]; then
	echo "One or more nightly test jobs failed"
	exit 1
	fi
	if [[ "${{ contains(needs.*.result, 'cancelled') }}" == "true" ]]; then
	echo "One or more nightly test jobs were cancelled"
	exit 1
	fi
	echo "All nightly test jobs passed"

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Nightly Test (Nvidia) #37

Workflow file

Nightly Test (Nvidia) #37

Uh oh!

Workflow file for this run