Skip to content

[Future] [P/D] support hybrid attention for mooncake connector #21861

[Future] [P/D] support hybrid attention for mooncake connector

[Future] [P/D] support hybrid attention for mooncake connector #21861

Workflow file for this run

#
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This file is a part of the vllm-ascend project.
#
name: E2E-Light
on:
pull_request:
branches:
- 'main'
- '*-dev'
- 'releases/v*'
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
# declared as "shell: bash -el {0}" on steps that need to be properly activated.
# It's used to activate ascend-toolkit environment variables.
defaults:
run:
shell: bash -el {0}
# only cancel in-progress runs of the same workflow
# and ignore the lint / 1 card / 4 cards test type
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
lint:
uses: ./.github/workflows/_pre_commit.yml
with:
vllm: 0d4d334eaa583b9c09aa4eb7538c22db99fd84b3
changes:
runs-on: linux-aarch64-a2b3-0
container:
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:nightly-cpu
outputs:
e2e_tracker: ${{ steps.filter.outputs.e2e_tracker }}
_310_tracker: ${{ steps.filter.outputs._310_tracker }}
has_tests: ${{ steps.scope.outputs.has_tests }}
test_groups: ${{ steps.scope.outputs.test_groups }}
matched_modules: ${{ steps.scope.outputs.matched_modules }}
steps:
- uses: actions/checkout@v6
with:
fetch-depth: 0
# For now, we still need to use paths-filter to determine whether the change is related to e2e test.
# we will migrate to use a single script to determine all the test scope in the future.
- uses: dorny/paths-filter@v4
id: filter
with:
filters: |
e2e_tracker:
- '.github/workflows/pr_test_light.yaml'
- '.github/workflows/_e2e_test.yaml'
- '.github/workflows/scripts/**'
- '.github/workflows/_optional_smart_e2e.yaml'
- 'vllm_ascend/**'
- 'csrc/**'
- 'cmake/**'
- 'tests/e2e/**'
- 'CMakeLists.txt'
- 'setup.py'
- 'requirements.txt'
- 'requirements-dev.txt'
- 'requirements-lint.txt'
- 'packages.txt'
ut_tracker:
- 'tests/ut/**'
_310_tracker:
- 'vllm_ascend/_310p/**'
- 'csrc/**'
- 'tests/e2e/310p/**'
- 'vllm_ascend/worker/**'
- 'vllm_ascend/attention/attention_v1.py'
- 'vllm_ascend/ops/fused_moe/**'
- 'vllm_ascend/patch/**'
- 'CMakeLists.txt'
- name: Determine smart UT test scope
if: ${{ steps.filter.outputs.ut_tracker == 'true' || steps.filter.outputs.e2e_tracker == 'true' }}
id: scope
run: |
git config --global --add safe.directory /__w/vllm-ascend/vllm-ascend
git fetch origin ${{ github.base_ref }}
pip install pyyaml
python3 .github/workflows/scripts/determine_smart_e2e_scope.py \
--diff-base origin/${{ github.base_ref }} \
--run-all-cpu
- name: Post scope summary
if: ${{ steps.scope.outputs.has_tests == 'true' }}
env:
TEST_GROUPS: ${{ steps.scope.outputs.test_groups }}
MATCHED_MODULES: ${{ steps.scope.outputs.matched_modules }}
run: |
python3 -c "
import json, os, yaml
from pathlib import Path
groups = json.loads(os.environ['TEST_GROUPS'])
modules = os.environ['MATCHED_MODULES']
blacklist_path = Path('.github/workflows/scripts/ut_blacklist.yaml')
blacklist = yaml.safe_load(blacklist_path.read_text()) if blacklist_path.exists() else []
lines = ['## Smart UT Test Scope', '']
lines.append(f'**Matched modules:** {modules}')
lines.append('')
if blacklist:
lines.append('<details>')
lines.append(f'<summary>Blacklisted ({len(blacklist)} tests)</summary>')
lines.append('')
for bl in sorted(blacklist):
lines.append(f'- \`{bl}\`')
lines.append('')
lines.append('</details>')
lines.append('')
for g in groups:
npu_type = g['npu_type']
num_npus = g['num_npus']
runner = g['runner']
tests = g['tests'].split()
if npu_type == 'cpu':
header = f'### CPU ({len(tests)} tests) \u2192 \`{runner}\`'
else:
header = f'### {npu_type.upper()} x{num_npus} ({len(tests)} tests) \u2192 \`{runner}\`'
lines.append(header)
lines.append('')
lines.append('| # | Test target |')
lines.append('|---|------------|')
for i, t in enumerate(tests, 1):
lines.append(f'| {i} | \`{t}\` |')
lines.append('')
print('\n'.join(lines))
" >> $GITHUB_STEP_SUMMARY
smart_e2e:
needs: [lint, changes]
name: smart test
if: ${{ needs.lint.result == 'success' && needs.changes.outputs.has_tests == 'true' }}
strategy:
matrix:
vllm_version: [0d4d334eaa583b9c09aa4eb7538c22db99fd84b3, v0.20.2]
uses: ./.github/workflows/_optional_smart_e2e.yaml
with:
vllm: ${{ matrix.vllm_version }}
test_groups: ${{ needs.changes.outputs.test_groups }}
e2e-light:
name: e2e-light
strategy:
matrix:
vllm_version: [0d4d334eaa583b9c09aa4eb7538c22db99fd84b3, v0.20.2]
# Note (yikun): If CI resource are limited we can split job into two chain jobs
needs: [lint, changes]
# only trigger e2e test after lint passed and the change is e2e related with pull request.
if: ${{ github.event_name == 'pull_request' && needs.lint.result == 'success' && needs.changes.outputs.e2e_tracker == 'true' && !contains(github.event.pull_request.labels.*.name, 'ready') }}
uses: ./.github/workflows/_e2e_test.yaml
with:
vllm: ${{ matrix.vllm_version }}
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:9.0.0-910b-ubuntu22.04-py3.11
contains_310: ${{ needs.changes.outputs._310_tracker == 'true' }}
type: light