-
Notifications
You must be signed in to change notification settings - Fork 1.5k
295 lines (269 loc) · 10.8 KB
/
Copy path_selected_tests.yaml
File metadata and controls
295 lines (269 loc) · 10.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
#
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This file is a part of the vllm-ascend project.
#
name: Selected Tests
on:
workflow_call:
inputs:
vllm:
type: string
required: true
description: 'The vllm commit hash or tag to test.'
test_groups:
type: string
required: true
description: 'JSON array of test groups from select_tests.py'
ref:
type: string
required: false
default: ''
description: 'The vllm-ascend ref to test.'
upload_timing:
type: boolean
required: false
default: false
description: 'Upload test_timing_data.json as artifact'
continue_on_error:
type: boolean
required: false
default: false
description: 'Continue running the job even if tests fail'
enable-coverage:
type: boolean
required: false
default: false
description: 'Whether to run tests with coverage enabled.'
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
# declared as "shell: bash -el {0}" on steps that need to be properly activated.
# It's used to activate ascend-toolkit environment variables.
defaults:
run:
shell: bash -el {0}
permissions:
contents: read
pull-requests: write
jobs:
selected-tests:
name: ${{ matrix.group.npu_type }}-${{ matrix.group.num_npus }} card-${{ matrix.group.partition && format('(part {0})', matrix.group.partition) || '' }}
strategy:
fail-fast: false
matrix:
group: ${{ fromJSON(inputs.test_groups) }}
runs-on: ${{ matrix.group.runner }}
container:
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:${{ matrix.group.image_tag }}
env:
VLLM_LOGGING_LEVEL: ERROR
VLLM_USE_MODELSCOPE: True
HF_HUB_OFFLINE: 1
MAX_JOBS: 4
UV_INDEX_URL: http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
UV_EXTRA_INDEX_URL: "https://repo.huaweicloud.com/ascend/repos/pypi http://cache-service.nginx-pypi-cache.svc.cluster.local/whl/cpu/"
UV_INDEX_STRATEGY: unsafe-best-match
UV_INSECURE_HOST: cache-service.nginx-pypi-cache.svc.cluster.local
UV_HTTP_TIMEOUT: 120
UV_NO_CACHE: 1
UV_SYSTEM_PYTHON: 1
steps:
- name: Check NPU availability
if: ${{ matrix.group.npu_type != 'cpu' }}
run: |
npu-smi info
- name: Install packages
run: |
sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list
pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
apt-get update -y
apt-get install -y python3-pip git vim wget net-tools gcc g++ cmake libnuma-dev curl gnupg2 zstd
git config --global --add safe.directory /__w/vllm-ascend/vllm-ascend
pip install uv
- name: Checkout vllm-project/vllm repo
uses: actions/checkout@v6
with:
repository: vllm-project/vllm
ref: ${{ inputs.vllm }}
path: ./vllm-empty
- name: Install vllm-project/vllm from source
working-directory: ./vllm-empty
run: |
VLLM_TARGET_DEVICE=empty uv pip install .
pip uninstall -y triton
- name: Checkout vllm-project/vllm-ascend repo
uses: actions/checkout@v6
with:
ref: ${{ inputs.ref || github.ref }}
fetch-depth: 0
- name: Rebase on latest main
run: |
git config user.name "vllm-ascend-ci"
git config user.email "vllm-ascend-ci@users.noreply.github.com"
git fetch origin main
git rebase origin/main
- name: Get csrc hash
id: get_csrc_hash
run: |
CSRC_HASH=$(find ./csrc ./setup.py ./CMakeLists.txt ./cmake \
-type f -not -path '*/.*' | sort | xargs sha256sum | sha256sum | awk '{print $1}')
echo "CSRC_HASH=$CSRC_HASH" >> $GITHUB_OUTPUT
- name: Get architecture
id: get_arch
run: |
ARCH=$(uname -m)
case "$ARCH" in
x86_64) echo "arch=X64" >> $GITHUB_OUTPUT ;;
aarch64) echo "arch=ARM64" >> $GITHUB_OUTPUT ;;
*) echo "arch=$ARCH" >> $GITHUB_OUTPUT ;;
esac
- uses: dorny/paths-filter@v4
id: csrc-filter
if: ${{ matrix.group.npu_type != 'cpu' }}
with:
filters: |
csrc:
- 'csrc/**'
- 'setup.py'
- 'CMakeLists.txt'
- 'cmake/**'
- name: Restore vllm-ascend csrc cache
id: cache-csrc
if: ${{ matrix.group.npu_type != 'cpu' }}
uses: runs-on/cache/restore@v5
with:
path: |
vllm_ascend/_cann_ops_custom
vllm_ascend/*.so
vllm_ascend/lib
vllm_ascend/include
key: vllm-ascend-build-v1-${{ steps.get_arch.outputs.arch }}-${{ matrix.group.image_tag }}-${{ steps.get_csrc_hash.outputs.CSRC_HASH }}
- name: Install Mooncake wheel
if: ${{ matrix.group.npu_type != 'cpu' }}
run: |
set -euxo pipefail
apt-get install -y --no-install-recommends \
libibverbs1 \
ibverbs-providers \
librdmacm1 \
libnuma1 \
libcurl4
ldconfig
MOONCAKE_WHEEL="mooncake_transfer_engine_ascend-0.3.9-cp312-cp312-manylinux_2_35_aarch64.whl"
pip install --no-cache-dir --no-deps \
"https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/${MOONCAKE_WHEEL}"
pip show mooncake-transfer-engine-ascend || true
- name: Install vllm-project/vllm-ascend with device
if: ${{ matrix.group.npu_type != 'cpu' }}
run: |
export MAX_JOBS=$(( ${{ matrix.group.num_npus }} * 23 ))
pip install uc-manager
uv pip install -r requirements-dev.txt
uv pip install --force-reinstall --no-deps triton-ascend==3.2.1
if find vllm_ascend -maxdepth 1 -name '*.so' -type f 2>/dev/null | grep -q .; then
echo "CSRC cache hit: .so files found, skip kernel compilation"
COMPILE_CUSTOM_KERNELS=0 uv pip install -e .
else
echo "CSRC cache miss: no .so files found, compile kernels"
uv pip install -e . --no-build-isolation
fi
- name: Save vllm-ascend csrc cache
if: ${{ matrix.group.npu_type != 'cpu' && steps.csrc-filter.outputs.csrc == 'true' && steps.cache-csrc.outputs.cache-hit != 'true' }}
uses: runs-on/cache/save@v5
with:
path: |
vllm_ascend/_cann_ops_custom
vllm_ascend/*.so
vllm_ascend/lib
vllm_ascend/include
key: vllm-ascend-build-v1-${{ steps.get_arch.outputs.arch }}-${{ matrix.group.image_tag }}-${{ steps.get_csrc_hash.outputs.CSRC_HASH }}
- name: Install vllm-project/vllm-ascend no device
if: ${{ matrix.group.npu_type == 'cpu' }}
env:
SOC_VERSION: ascend910b1
COMPILE_CUSTOM_KERNELS: 0
run: |
pip install uc-manager
uv pip install -r requirements-dev.txt
uv pip install -e .
- name: Uninstall Triton for 310P tests
if: ${{ matrix.group.npu_type != 'cpu' && matrix.group.npu_type == '310p' }}
run: |
pip uninstall -y triton-ascend triton
- name: Run selected tests with device
if: ${{ matrix.group.npu_type != 'cpu' }}
continue-on-error: ${{ inputs.continue_on_error }}
env:
VLLM_WORKER_MULTIPROC_METHOD: spawn
ENABLE_COVERAGE: ${{ inputs.enable-coverage}}
run: |
. /usr/local/Ascend/ascend-toolkit/set_env.sh
if [ "${{ inputs.enable-coverage }}" = "true" ]; then
export ENABLE_COVERAGE=true
fi
TIMING_FLAG=""
if [ "${{ inputs.upload_timing }}" = "true" ]; then
TIMING_FLAG="--timing"
fi
.github/workflows/scripts/run_selected_tests.sh \
"${{ matrix.group.npu_type }}" \
"${{ matrix.group.num_npus }}" \
"with-device" \
${TIMING_FLAG} \
${{ matrix.group.tests }}
- name: Run selected tests without device
if: ${{ matrix.group.npu_type == 'cpu' }}
continue-on-error: ${{ inputs.continue_on_error }}
env:
VLLM_WORKER_MULTIPROC_METHOD: spawn
TORCH_DEVICE_BACKEND_AUTOLOAD: 0
ENABLE_COVERAGE: ${{ inputs.enable-coverage}}
run: |
if [ "${{ inputs.enable-coverage }}" = "true" ]; then
export ENABLE_COVERAGE=true
fi
.github/workflows/scripts/run_selected_tests.sh \
"${{ matrix.group.npu_type }}" \
"${{ matrix.group.num_npus }}" \
"without-device" \
${{ matrix.group.tests }}
- name: Upload timing data
if: ${{ inputs.upload_timing && matrix.group.npu_type != 'cpu' }}
continue-on-error: true
uses: actions/upload-artifact@v7
with:
name: timing-data-${{ matrix.group.npu_type }}-${{ matrix.group.num_npus }}card-${{ matrix.group.partition }}
path: ${{ runner.temp }}/selected-tests-*/test_timing_data.json
if-no-files-found: ignore
retention-days: 7
- name: Upload coverage data
if: ${{ always() && inputs.enable-coverage }}
continue-on-error: true
uses: actions/upload-artifact@v7
with:
name: selected-test-coverage-vllm-${{ inputs.vllm }}-${{ matrix.group.npu_type }}-${{ matrix.group.num_npus }}card
path: tests/outputs/**/covdata/**
if-no-files-found: ignore
retention-days: 14
compression-level: 0
- name: Upload selected test logs
if: always()
continue-on-error: true
uses: actions/upload-artifact@v7
with:
name: selected-test-logs-vllm-${{ inputs.vllm }}-${{ matrix.group.npu_type }}-${{ matrix.group.num_npus }}card
path: ${{ runner.temp }}/selected-tests-*
if-no-files-found: ignore
retention-days: 14
compression-level: 0