[diffusion] refactor: separate runtime metadata from arch config #60119

Workflow file for this run

.github/workflows/pr-test-npu.yml at 03e3ce7

	name: PR Test (NPU)

	on:
	push:
	branches: [ main ]
	pull_request:
	branches: [ main ]
	workflow_dispatch:
	workflow_call:
	inputs:
	ref:
	description: 'Git ref (branch, tag, or SHA) to test. If not provided, uses the default branch.'
	required: false
	type: string
	default: ''
	run_all_tests:
	description: "Run all tests (for releasing or testing purpose)"
	required: false
	type: boolean
	default: false

	concurrency:
	group: pr-test-npu-${{ inputs.ref \|\| github.ref }}
	cancel-in-progress: ${{ github.event_name != 'workflow_call' }}

	jobs:
	# ==================== Check Changes ==================== #
	check-changes:
	runs-on: ubuntu-latest
	outputs:
	changes_exist: ${{ steps.filter.outputs.main_package == 'true' \|\| steps.filter.outputs.multimodal_gen == 'true' \|\| steps.run-mode.outputs.run_all_tests == 'true'}}
	main_package: ${{ steps.filter.outputs.main_package == 'true' \|\| steps.run-mode.outputs.run_all_tests == 'true' }}
	multimodal_gen: ${{ steps.filter.outputs.multimodal_gen == 'true' \|\| steps.run-mode.outputs.run_all_tests == 'true' }}
	steps:
	- name: Checkout code
	uses: actions/checkout@v4
	with:
	ref: ${{ inputs.ref \|\| github.ref }}

	- name: Determine run mode
	id: run-mode
	run: \|
	# Run all tests for workflow_call (when ref input is provided)
	# Note: github.event_name is inherited from caller, so we detect workflow_call by checking inputs.ref
	if [[ "${{ inputs.run_all_tests }}" == "true" ]]; then
	echo "run_all_tests=true" >> $GITHUB_OUTPUT
	echo "Run mode: ALL TESTS (run_all_tests=${{ inputs.run_all_tests }})"
	else
	echo "run_all_tests=false" >> $GITHUB_OUTPUT
	echo "Run mode: FILTERED (triggered by ${{ github.event_name }})"
	fi

	- name: Detect file changes
	id: filter
	uses: dorny/paths-filter@v3
	if: steps.run-mode.outputs.run_all_tests != 'true'
	with:
	filters: \|
	main_package:
	- "python/sglang/!(multimodal_gen)/*/!(.md)"
	- "python/pyproject_npu.toml"
	- "scripts/ci/npu/npu_ci_install_dependency.sh"
	- "test/srt/ascend/**"
	- ".github/workflows/pr-test-npu.yml"
	multimodal_gen:
	- "python/sglang/multimodal_gen/*/.!(md\|ipynb)"
	- "python/sglang/srt/**"
	- "python/pyproject_npu.toml"
	- "scripts/ci/npu/npu_ci_install_dependency.sh"
	- ".github/workflows/pr-test-npu.yml"

	# ==================== PR Gate ==================== #
	pr-gate:
	needs: check-changes
	if: needs.check-changes.outputs.changes_exist == 'true'
	uses: ./.github/workflows/pr-gate.yml
	secrets: inherit

	stage-b-test-1-npu-a2:
	needs: [check-changes, pr-gate]
	if: needs.check-changes.outputs.main_package == 'true'
	runs-on: linux-aarch64-a2-1
	strategy:
	fail-fast: false
	matrix:
	part: [ 0, 1 ]
	container:
	image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-910b-ubuntu22.04-py3.11
	steps:
	- name: Checkout code
	uses: actions/checkout@v4
	with:
	ref: ${{ inputs.ref \|\| github.ref }}

	- name: Mark repository safe
	run: \|
	git config --system --add safe.directory ${GITHUB_WORKSPACE}

	- name: Install dependencies
	env:
	TORCH_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/whl/cpu"
	PYPI_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple"
	UV_INDEX_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple"
	GITHUB_PROXY_URL: "https://gh-proxy.test.osinfra.cn/"
	run: \|
	# speed up by using infra cache services
	CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
	sed -Ei "s@(ports\|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
	pip config set global.index-url http://${CACHING_URL}/pypi/simple
	pip config set global.trusted-host "${CACHING_URL}"

	bash scripts/ci/npu/npu_ci_install_dependency.sh 910b
	# copy required file from our daily cache
	cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
	# copy gsm8k dataset
	cp ~/.cache/modelscope/hub/datasets/tmp/test.jsonl /tmp

	- name: Run test
	timeout-minutes: 60
	env:
	SGLANG_USE_MODELSCOPE: true
	SGLANG_IS_IN_CI: true
	HF_ENDPOINT: https://hf-mirror.com
	TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
	PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
	STREAMS_PER_DEVICE: 32
	run: \|
	cd test
	python3 run_suite.py --hw npu --suite stage-b-test-1-npu-a2 --auto-partition-id ${{ matrix.part }} --auto-partition-size 2

	stage-b-test-2-npu-a2:
	needs: [check-changes, pr-gate]
	if: needs.check-changes.outputs.main_package == 'true'
	runs-on: linux-aarch64-a2-2
	strategy:
	fail-fast: true
	matrix:
	part: [0, 1]
	container:
	image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-910b-ubuntu22.04-py3.11
	steps:
	- name: Checkout code
	uses: actions/checkout@v4
	with:
	ref: ${{ inputs.ref \|\| github.ref }}

	- name: Mark repository safe
	run: \|
	git config --system --add safe.directory ${GITHUB_WORKSPACE}

	- name: Install dependencies
	env:
	TORCH_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/whl/cpu"
	PYPI_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple"
	UV_INDEX_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple"
	GITHUB_PROXY_URL: "https://gh-proxy.test.osinfra.cn/"
	run: \|
	# speed up by using infra cache services
	CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
	sed -Ei "s@(ports\|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
	pip config set global.index-url http://${CACHING_URL}/pypi/simple
	pip config set global.trusted-host "${CACHING_URL}"

	bash scripts/ci/npu/npu_ci_install_dependency.sh 910b
	# copy required file from our daily cache
	cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
	# copy gsm8k dataset
	cp ~/.cache/modelscope/hub/datasets/tmp/test.jsonl /tmp

	- name: Run test
	timeout-minutes: 60
	env:
	SGLANG_USE_MODELSCOPE: true
	SGLANG_IS_IN_CI: true
	HF_ENDPOINT: https://hf-mirror.com
	TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
	PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
	STREAMS_PER_DEVICE: 32
	run: \|
	cd test
	python3 run_suite.py --hw npu --suite stage-b-test-2-npu-a2 --auto-partition-id ${{ matrix.part }} --auto-partition-size 2

	stage-b-test-4-npu-a3:
	needs: [check-changes, pr-gate]
	if: needs.check-changes.outputs.main_package == 'true'
	runs-on: linux-aarch64-a3-4
	container:
	image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-a3-ubuntu22.04-py3.11
	steps:
	- name: Checkout code
	uses: actions/checkout@v4
	with:
	ref: ${{ inputs.ref \|\| github.ref }}

	- name: Mark repository safe
	run: \|
	git config --system --add safe.directory ${GITHUB_WORKSPACE}

	- name: Install dependencies
	env:
	TORCH_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/whl/cpu"
	PYPI_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple"
	UV_INDEX_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple"
	GITHUB_PROXY_URL: "https://gh-proxy.test.osinfra.cn/"
	run: \|
	# speed up by using infra cache services
	CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
	sed -Ei "s@(ports\|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
	pip config set global.index-url http://${CACHING_URL}/pypi/simple
	pip config set global.trusted-host "${CACHING_URL}"

	bash scripts/ci/npu/npu_ci_install_dependency.sh a3
	# copy required file from our daily cache
	cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
	# copy gsm8k dataset
	cp ~/.cache/modelscope/hub/datasets/tmp/test.jsonl /tmp

	- name: Run test
	timeout-minutes: 60
	env:
	SGLANG_USE_MODELSCOPE: true
	SGLANG_IS_IN_CI: true
	HF_ENDPOINT: https://hf-mirror.com
	TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
	PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
	STREAMS_PER_DEVICE: 32
	run: \|
	cd test
	python3 run_suite.py --hw npu --suite stage-b-test-4-npu-a3 --timeout-per-file 3600


	stage-b-test-16-npu-a3:
	needs: [check-changes, pr-gate]
	if: needs.check-changes.outputs.main_package == 'true'
	runs-on: linux-aarch64-a3-16
	container:
	image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-a3-ubuntu22.04-py3.11
	steps:
	- name: Checkout code
	uses: actions/checkout@v4
	with:
	ref: ${{ inputs.ref \|\| github.ref }}

	- name: Mark repository safe
	run: \|
	git config --system --add safe.directory ${GITHUB_WORKSPACE}

	- name: Install dependencies
	env:
	TORCH_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/whl/cpu"
	PYPI_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple"
	UV_INDEX_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple"
	GITHUB_PROXY_URL: "https://gh-proxy.test.osinfra.cn/"
	run: \|
	# speed up by using infra cache services
	CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
	sed -Ei "s@(ports\|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
	pip config set global.index-url http://${CACHING_URL}/pypi/simple
	pip config set global.trusted-host "${CACHING_URL}"

	bash scripts/ci/npu/npu_ci_install_dependency.sh a3
	# copy required file from our daily cache
	cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
	# copy gsm8k dataset
	cp ~/.cache/modelscope/hub/datasets/tmp/test.jsonl /tmp

	- name: Run test
	timeout-minutes: 60
	env:
	SGLANG_USE_MODELSCOPE: true
	SGLANG_IS_IN_CI: true
	HF_ENDPOINT: https://hf-mirror.com
	TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
	PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
	STREAMS_PER_DEVICE: 32
	run: \|
	cd test
	python3 run_suite.py --hw npu --suite stage-b-test-16-npu-a3 --timeout-per-file 3600

	multimodal-gen-test-1-npu-a3:
	needs: [check-changes, pr-gate]
	if: needs.check-changes.outputs.multimodal_gen == 'true'
	runs-on: linux-aarch64-a3-2
	container:
	image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-a3-ubuntu22.04-py3.11
	steps:
	- name: Checkout code
	uses: actions/checkout@v4

	- name: Mark repository safe
	run: \|
	git config --system --add safe.directory ${GITHUB_WORKSPACE}

	- name: Install dependencies
	env:
	TORCH_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/whl/cpu"
	PYPI_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple"
	UV_INDEX_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple"
	GITHUB_PROXY_URL: "https://gh-proxy.test.osinfra.cn/"
	run: \|
	# speed up by using infra cache services
	CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
	sed -Ei "s@(ports\|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
	pip config set global.index-url http://${CACHING_URL}/pypi/simple
	pip config set global.trusted-host "${CACHING_URL}"

	bash scripts/ci/npu/npu_ci_install_dependency.sh a3 diffusion
	# copy required file from our daily cache
	cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
	# copy gsm8k dataset
	cp ~/.cache/modelscope/hub/datasets/tmp/test.jsonl /tmp

	- name: Run test
	timeout-minutes: 60
	env:
	SGLANG_USE_MODELSCOPE: true
	SGLANG_IS_IN_CI: true
	HF_ENDPOINT: https://hf-mirror.com
	TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
	PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
	STREAMS_PER_DEVICE: 32
	run: \|
	export PATH="/usr/local/Ascend/8.3.RC1/compiler/bishengir/bin:${PATH}"
	cd python
	python3 sglang/multimodal_gen/test/run_suite.py --suite 1-npu

	multimodal-gen-test-2-npu-a3:
	needs: [check-changes, pr-gate]
	if: needs.check-changes.outputs.multimodal_gen == 'true'
	runs-on: linux-aarch64-a3-16
	container:
	image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-a3-ubuntu22.04-py3.11
	steps:
	- name: Checkout code
	uses: actions/checkout@v4

	- name: Mark repository safe
	run: \|
	git config --system --add safe.directory ${GITHUB_WORKSPACE}

	- name: Install dependencies
	env:
	TORCH_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/whl/cpu"
	PYPI_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple"
	UV_INDEX_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple"
	GITHUB_PROXY_URL: "https://gh-proxy.test.osinfra.cn/"
	run: \|
	# speed up by using infra cache services
	CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
	sed -Ei "s@(ports\|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
	pip config set global.index-url http://${CACHING_URL}/pypi/simple
	pip config set global.trusted-host "${CACHING_URL}"

	bash scripts/ci/npu/npu_ci_install_dependency.sh a3 diffusion
	# copy required file from our daily cache
	cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
	# copy gsm8k dataset
	cp ~/.cache/modelscope/hub/datasets/tmp/test.jsonl /tmp

	- name: Run test
	timeout-minutes: 60
	env:
	SGLANG_USE_MODELSCOPE: true
	SGLANG_IS_IN_CI: true
	HF_ENDPOINT: https://hf-mirror.com
	TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
	PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
	STREAMS_PER_DEVICE: 32
	run: \|
	export PATH="/usr/local/Ascend/8.3.RC1/compiler/bishengir/bin:${PATH}"
	cd python
	python3 sglang/multimodal_gen/test/run_suite.py --suite 2-npu

	multimodal-gen-test-8-npu-a3:
	needs: [check-changes, pr-gate]
	if: needs.check-changes.outputs.multimodal_gen == 'true'
	runs-on: linux-aarch64-a3-8
	container:
	image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-a3-ubuntu22.04-py3.11
	steps:
	- name: Checkout code
	uses: actions/checkout@v4

	- name: Mark repository safe
	run: \|
	git config --system --add safe.directory ${GITHUB_WORKSPACE}

	- name: Install dependencies
	env:
	TORCH_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/whl/cpu"
	PYPI_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple"
	UV_INDEX_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple"
	GITHUB_PROXY_URL: "https://gh-proxy.test.osinfra.cn/"
	run: \|
	# speed up by using infra cache services
	CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
	sed -Ei "s@(ports\|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
	pip config set global.index-url http://${CACHING_URL}/pypi/simple
	pip config set global.trusted-host "${CACHING_URL}"

	bash scripts/ci/npu/npu_ci_install_dependency.sh a3 diffusion
	# copy required file from our daily cache
	cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
	# copy gsm8k dataset
	cp ~/.cache/modelscope/hub/datasets/tmp/test.jsonl /tmp

	- name: Run test
	timeout-minutes: 60
	env:
	SGLANG_USE_MODELSCOPE: true
	SGLANG_IS_IN_CI: true
	HF_ENDPOINT: https://hf-mirror.com
	TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
	PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
	STREAMS_PER_DEVICE: 32
	run: \|
	cd python
	python3 sglang/multimodal_gen/test/run_suite.py --suite 8-npu

	pr-test-npu-finish:
	needs:
	[
	check-changes,

	stage-b-test-1-npu-a2,
	stage-b-test-2-npu-a2,
	stage-b-test-4-npu-a3,
	stage-b-test-16-npu-a3,

	multimodal-gen-test-1-npu-a3,
	multimodal-gen-test-2-npu-a3,
	multimodal-gen-test-8-npu-a3,
	]
	if: always()
	runs-on: ubuntu-latest
	steps:
	- name: Check all dependent job statuses
	run: \|
	# Convert the 'needs' context to a JSON string
	json_needs='${{ toJson(needs) }}'

	# Get a list of all job names from the JSON keys
	job_names=$(echo "$json_needs" \| jq -r 'keys_unsorted[]')

	for job in $job_names; do
	# For each job, extract its result
	result=$(echo "$json_needs" \| jq -r --arg j "$job" '.[$j].result')

	# Print the job name and its result
	echo "$job: $result"

	# Check for failure or cancellation and exit if found
	if [[ "$result" == "failure" \|\| "$result" == "cancelled" ]]; then
	echo "The above jobs failed."
	exit 1
	fi
	done
	# If the loop completes, all jobs were successful
	echo "All jobs completed successfully"
	exit 0

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[diffusion] refactor: separate runtime metadata from arch config #60119

Workflow file

[diffusion] refactor: separate runtime metadata from arch config #60119

Uh oh!

Workflow file for this run