[Future] [P/D] support hybrid attention for mooncake connector #21861

Workflow file for this run

.github/workflows/pr_test_light.yaml at e38e282

	#
	# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	# This file is a part of the vllm-ascend project.
	#

	name: E2E-Light

	on:
	pull_request:
	branches:
	- 'main'
	- '*-dev'
	- 'releases/v*'

	# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
	# declared as "shell: bash -el {0}" on steps that need to be properly activated.
	# It's used to activate ascend-toolkit environment variables.
	defaults:
	run:
	shell: bash -el {0}

	# only cancel in-progress runs of the same workflow
	# and ignore the lint / 1 card / 4 cards test type
	concurrency:
	group: ${{ github.workflow }}-${{ github.ref }}
	cancel-in-progress: true

	jobs:
	lint:
	uses: ./.github/workflows/_pre_commit.yml
	with:
	vllm: 0d4d334eaa583b9c09aa4eb7538c22db99fd84b3
	changes:
	runs-on: linux-aarch64-a2b3-0
	container:
	image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:nightly-cpu
	outputs:
	e2e_tracker: ${{ steps.filter.outputs.e2e_tracker }}
	_310_tracker: ${{ steps.filter.outputs._310_tracker }}
	has_tests: ${{ steps.scope.outputs.has_tests }}
	test_groups: ${{ steps.scope.outputs.test_groups }}
	matched_modules: ${{ steps.scope.outputs.matched_modules }}
	steps:
	- uses: actions/checkout@v6
	with:
	fetch-depth: 0
	# For now, we still need to use paths-filter to determine whether the change is related to e2e test.
	# we will migrate to use a single script to determine all the test scope in the future.
	- uses: dorny/paths-filter@v4
	id: filter
	with:
	filters: \|
	e2e_tracker:
	- '.github/workflows/pr_test_light.yaml'
	- '.github/workflows/_e2e_test.yaml'
	- '.github/workflows/scripts/**'
	- '.github/workflows/_optional_smart_e2e.yaml'
	- 'vllm_ascend/**'
	- 'csrc/**'
	- 'cmake/**'
	- 'tests/e2e/**'
	- 'CMakeLists.txt'
	- 'setup.py'
	- 'requirements.txt'
	- 'requirements-dev.txt'
	- 'requirements-lint.txt'
	- 'packages.txt'
	ut_tracker:
	- 'tests/ut/**'
	_310_tracker:
	- 'vllm_ascend/_310p/**'
	- 'csrc/**'
	- 'tests/e2e/310p/**'
	- 'vllm_ascend/worker/**'
	- 'vllm_ascend/attention/attention_v1.py'
	- 'vllm_ascend/ops/fused_moe/**'
	- 'vllm_ascend/patch/**'
	- 'CMakeLists.txt'

	- name: Determine smart UT test scope
	if: ${{ steps.filter.outputs.ut_tracker == 'true' \|\| steps.filter.outputs.e2e_tracker == 'true' }}
	id: scope
	run: \|
	git config --global --add safe.directory /__w/vllm-ascend/vllm-ascend
	git fetch origin ${{ github.base_ref }}
	pip install pyyaml
	python3 .github/workflows/scripts/determine_smart_e2e_scope.py \
	--diff-base origin/${{ github.base_ref }} \
	--run-all-cpu

	- name: Post scope summary
	if: ${{ steps.scope.outputs.has_tests == 'true' }}
	env:
	TEST_GROUPS: ${{ steps.scope.outputs.test_groups }}
	MATCHED_MODULES: ${{ steps.scope.outputs.matched_modules }}
	run: \|
	python3 -c "
	import json, os, yaml
	from pathlib import Path

	groups = json.loads(os.environ['TEST_GROUPS'])
	modules = os.environ['MATCHED_MODULES']
	blacklist_path = Path('.github/workflows/scripts/ut_blacklist.yaml')
	blacklist = yaml.safe_load(blacklist_path.read_text()) if blacklist_path.exists() else []

	lines = ['## Smart UT Test Scope', '']
	lines.append(f'Matched modules: {modules}')
	lines.append('')

	if blacklist:
	lines.append('<details>')
	lines.append(f'<summary>Blacklisted ({len(blacklist)} tests)</summary>')
	lines.append('')
	for bl in sorted(blacklist):
	lines.append(f'- \`{bl}\`')
	lines.append('')
	lines.append('</details>')
	lines.append('')

	for g in groups:
	npu_type = g['npu_type']
	num_npus = g['num_npus']
	runner = g['runner']
	tests = g['tests'].split()
	if npu_type == 'cpu':
	header = f'### CPU ({len(tests)} tests) \u2192 \`{runner}\`'
	else:
	header = f'### {npu_type.upper()} x{num_npus} ({len(tests)} tests) \u2192 \`{runner}\`'
	lines.append(header)
	lines.append('')
	lines.append('\| # \| Test target \|')
	lines.append('\|---\|------------\|')
	for i, t in enumerate(tests, 1):
	lines.append(f'\| {i} \| \`{t}\` \|')
	lines.append('')

	print('\n'.join(lines))
	" >> $GITHUB_STEP_SUMMARY

	smart_e2e:
	needs: [lint, changes]
	name: smart test
	if: ${{ needs.lint.result == 'success' && needs.changes.outputs.has_tests == 'true' }}
	strategy:
	matrix:
	vllm_version: [0d4d334eaa583b9c09aa4eb7538c22db99fd84b3, v0.20.2]
	uses: ./.github/workflows/_optional_smart_e2e.yaml
	with:
	vllm: ${{ matrix.vllm_version }}
	test_groups: ${{ needs.changes.outputs.test_groups }}

	e2e-light:
	name: e2e-light
	strategy:
	matrix:
	vllm_version: [0d4d334eaa583b9c09aa4eb7538c22db99fd84b3, v0.20.2]
	# Note (yikun): If CI resource are limited we can split job into two chain jobs
	needs: [lint, changes]
	# only trigger e2e test after lint passed and the change is e2e related with pull request.
	if: ${{ github.event_name == 'pull_request' && needs.lint.result == 'success' && needs.changes.outputs.e2e_tracker == 'true' && !contains(github.event.pull_request.labels.*.name, 'ready') }}
	uses: ./.github/workflows/_e2e_test.yaml
	with:
	vllm: ${{ matrix.vllm_version }}
	image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:9.0.0-910b-ubuntu22.04-py3.11
	contains_310: ${{ needs.changes.outputs._310_tracker == 'true' }}
	type: light

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[Future] [P/D] support hybrid attention for mooncake connector #21861

Workflow file

[Future] [P/D] support hybrid attention for mooncake connector #21861

Uh oh!

Workflow file for this run