vllm-ascend/.github/workflows/_selected_tests.yaml at ea10c0c32655c3809c9224544ff1a7edc76c6576 · vllm-project/vllm-ascend · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
#
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This file is a part of the vllm-ascend project.
#
name: Selected Tests

on:
  workflow_call:
    inputs:
      vllm:
        type: string
        required: true
        description: 'The vllm commit hash or tag to test.'
      test_groups:
        type: string
        required: true
        description: 'JSON array of test groups from select_tests.py'
      ref:
        type: string
        required: false
        default: ''
        description: 'The vllm-ascend ref to test.'
      upload_timing:
        type: boolean
        required: false
        default: false
        description: 'Upload test_timing_data.json as artifact'
      continue_on_error:
        type: boolean
        required: false
        default: false
        description: 'Continue running the job even if tests fail'
      enable-coverage:
        type: boolean
        required: false
        default: false
        description: 'Whether to run tests with coverage enabled.'

# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
# declared as "shell: bash -el {0}" on steps that need to be properly activated.
# It's used to activate ascend-toolkit environment variables.
defaults:
  run:
    shell: bash -el {0}

permissions:
  contents: read
  pull-requests: write

jobs:
  selected-tests:
    name: ${{ matrix.group.npu_type }}-${{ matrix.group.num_npus }} card-${{ matrix.group.partition && format('(part {0})', matrix.group.partition) || '' }}
    strategy:
      fail-fast: false
      matrix:
        group: ${{ fromJSON(inputs.test_groups) }}
    runs-on: ${{ matrix.group.runner }}
    container:
      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:${{ matrix.group.image_tag }}
      env:
        VLLM_LOGGING_LEVEL: ERROR
        VLLM_USE_MODELSCOPE: True
        HF_HUB_OFFLINE: 1
        MAX_JOBS: 4
        UV_INDEX_URL: http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
        UV_EXTRA_INDEX_URL: "https://repo.huaweicloud.com/ascend/repos/pypi http://cache-service.nginx-pypi-cache.svc.cluster.local/whl/cpu/"
        UV_INDEX_STRATEGY: unsafe-best-match
        UV_INSECURE_HOST: cache-service.nginx-pypi-cache.svc.cluster.local
        UV_HTTP_TIMEOUT: 120
        UV_NO_CACHE: 1
        UV_SYSTEM_PYTHON: 1
    steps:
      - name: Check NPU availability
        if: ${{ matrix.group.npu_type != 'cpu' }}
        run: |
          npu-smi info

      - name: Install packages
        run: |
          sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list
          pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
          pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
          apt-get update -y
          apt-get install -y python3-pip git vim wget net-tools gcc g++ cmake libnuma-dev curl gnupg2 zstd
          git config --global --add safe.directory /__w/vllm-ascend/vllm-ascend
          pip install uv

      - name: Checkout vllm-project/vllm repo
        uses: actions/checkout@v6
        with:
          repository: vllm-project/vllm
          ref: ${{ inputs.vllm }}
          path: ./vllm-empty

      - name: Install vllm-project/vllm from source
        working-directory: ./vllm-empty
        run: |
          VLLM_TARGET_DEVICE=empty uv pip install .
          pip uninstall -y triton

      - name: Checkout vllm-project/vllm-ascend repo
        uses: actions/checkout@v6
        with:
          ref: ${{ inputs.ref || github.ref }}
          fetch-depth: 0

      - name: Rebase on latest main
        run: |
          git config user.name "vllm-ascend-ci"
          git config user.email "vllm-ascend-ci@users.noreply.github.com"
          git fetch origin main
          git rebase origin/main

      - name: Get csrc hash
        id: get_csrc_hash
        run: |
          CSRC_HASH=$(find ./csrc ./setup.py ./CMakeLists.txt ./cmake \
            -type f -not -path '*/.*' | sort | xargs sha256sum | sha256sum | awk '{print $1}')
          echo "CSRC_HASH=$CSRC_HASH" >> $GITHUB_OUTPUT

      - name: Get architecture
        id: get_arch
        run: |
          ARCH=$(uname -m)
          case "$ARCH" in
            x86_64) echo "arch=X64" >> $GITHUB_OUTPUT ;;
            aarch64) echo "arch=ARM64" >> $GITHUB_OUTPUT ;;
            *) echo "arch=$ARCH" >> $GITHUB_OUTPUT ;;
          esac

      - uses: dorny/paths-filter@v4
        id: csrc-filter
        if: ${{ matrix.group.npu_type != 'cpu' }}
        with:
          filters: |
            csrc:
              - 'csrc/**'
              - 'setup.py'
              - 'CMakeLists.txt'
              - 'cmake/**'

      - name: Restore vllm-ascend csrc cache
        id: cache-csrc
        if: ${{ matrix.group.npu_type != 'cpu' }}
        uses: runs-on/cache/restore@v5
        with:
          path: |
            vllm_ascend/_cann_ops_custom
            vllm_ascend/*.so
            vllm_ascend/lib
            vllm_ascend/include
          key: vllm-ascend-build-v1-${{ steps.get_arch.outputs.arch }}-${{ matrix.group.image_tag }}-${{ steps.get_csrc_hash.outputs.CSRC_HASH }}

      - name: Install Mooncake wheel
        if: ${{ matrix.group.npu_type != 'cpu' }}
        run: |
          set -euxo pipefail

          apt-get install -y --no-install-recommends \
            libibverbs1 \
            ibverbs-providers \
            librdmacm1 \
            libnuma1 \
            libcurl4
          ldconfig

          MOONCAKE_WHEEL="mooncake_transfer_engine_ascend-0.3.9-cp312-cp312-manylinux_2_35_aarch64.whl"
          pip install --no-cache-dir --no-deps \
            "https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/${MOONCAKE_WHEEL}"

          pip show mooncake-transfer-engine-ascend || true

      - name: Install vllm-project/vllm-ascend with device
        if: ${{ matrix.group.npu_type != 'cpu' }}
        run: |
          export MAX_JOBS=$(( ${{ matrix.group.num_npus }} * 23 ))
          pip install uc-manager
          uv pip install -r requirements-dev.txt
          uv pip install --force-reinstall --no-deps triton-ascend==3.2.1
          if find vllm_ascend -maxdepth 1 -name '*.so' -type f 2>/dev/null | grep -q .; then
            echo "CSRC cache hit: .so files found, skip kernel compilation"
            COMPILE_CUSTOM_KERNELS=0 uv pip install -e .
          else
            echo "CSRC cache miss: no .so files found, compile kernels"
            uv pip install -e . --no-build-isolation
          fi

      - name: Save vllm-ascend csrc cache
        if: ${{ matrix.group.npu_type != 'cpu' && steps.csrc-filter.outputs.csrc == 'true' && steps.cache-csrc.outputs.cache-hit != 'true' }}
        uses: runs-on/cache/save@v5
        with:
          path: |
            vllm_ascend/_cann_ops_custom
            vllm_ascend/*.so
            vllm_ascend/lib
            vllm_ascend/include
          key: vllm-ascend-build-v1-${{ steps.get_arch.outputs.arch }}-${{ matrix.group.image_tag }}-${{ steps.get_csrc_hash.outputs.CSRC_HASH }}

      - name: Install vllm-project/vllm-ascend no device
        if: ${{ matrix.group.npu_type == 'cpu' }}
        env:
          SOC_VERSION: ascend910b1
          COMPILE_CUSTOM_KERNELS: 0
        run: |
          pip install uc-manager
          uv pip install -r requirements-dev.txt
          uv pip install -e .

      - name: Uninstall Triton for 310P tests
        if: ${{ matrix.group.npu_type != 'cpu' && matrix.group.npu_type == '310p' }}
        run: |
          pip uninstall -y triton-ascend triton

      - name: Run selected tests with device
        if: ${{ matrix.group.npu_type != 'cpu' }}
        continue-on-error: ${{ inputs.continue_on_error }}
        env:
          VLLM_WORKER_MULTIPROC_METHOD: spawn
          ENABLE_COVERAGE: ${{ inputs.enable-coverage}}
        run: |
          . /usr/local/Ascend/ascend-toolkit/set_env.sh
          if [ "${{ inputs.enable-coverage }}" = "true" ]; then
            export ENABLE_COVERAGE=true
          fi
          TIMING_FLAG=""
          if [ "${{ inputs.upload_timing }}" = "true" ]; then
            TIMING_FLAG="--timing"
          fi
          .github/workflows/scripts/run_selected_tests.sh \
            "${{ matrix.group.npu_type }}" \
            "${{ matrix.group.num_npus }}" \
            "with-device" \
            ${TIMING_FLAG} \
            ${{ matrix.group.tests }}

      - name: Run selected tests without device
        if: ${{ matrix.group.npu_type == 'cpu' }}
        continue-on-error: ${{ inputs.continue_on_error }}
        env:
          VLLM_WORKER_MULTIPROC_METHOD: spawn
          TORCH_DEVICE_BACKEND_AUTOLOAD: 0
          ENABLE_COVERAGE: ${{ inputs.enable-coverage}}
        run: |
          if [ "${{ inputs.enable-coverage }}" = "true" ]; then
            export ENABLE_COVERAGE=true
          fi
          .github/workflows/scripts/run_selected_tests.sh \
            "${{ matrix.group.npu_type }}" \
            "${{ matrix.group.num_npus }}" \
            "without-device" \
            ${{ matrix.group.tests }}

      - name: Upload timing data
        if: ${{ inputs.upload_timing && matrix.group.npu_type != 'cpu' }}
        continue-on-error: true
        uses: actions/upload-artifact@v7
        with:
          name: timing-data-${{ matrix.group.npu_type }}-${{ matrix.group.num_npus }}card-${{ matrix.group.partition }}
          path: ${{ runner.temp }}/selected-tests-*/test_timing_data.json
          if-no-files-found: ignore
          retention-days: 7

      - name: Upload coverage data
        if: ${{ always() && inputs.enable-coverage }}
        continue-on-error: true
        uses: actions/upload-artifact@v7
        with:
          name: selected-test-coverage-vllm-${{ inputs.vllm }}-${{ matrix.group.npu_type }}-${{ matrix.group.num_npus }}card
          path: tests/outputs/**/covdata/**
          if-no-files-found: ignore
          retention-days: 14
          compression-level: 0

      - name: Upload selected test logs
        if: always()
        continue-on-error: true
        uses: actions/upload-artifact@v7
        with:
          name: selected-test-logs-vllm-${{ inputs.vllm }}-${{ matrix.group.npu_type }}-${{ matrix.group.num_npus }}card
          path: ${{ runner.temp }}/selected-tests-*
          if-no-files-found: ignore
          retention-days: 14
          compression-level: 0