-
Notifications
You must be signed in to change notification settings - Fork 46
159 lines (141 loc) · 6.17 KB
/
ut.yaml
File metadata and controls
159 lines (141 loc) · 6.17 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
name: run unit tests on Intel GPU.
on:
pull_request:
branches: [main]
push:
branches: [main]
schedule:
# Nightly full-scope run at 02:00 UTC
- cron: '0 2 * * *'
workflow_dispatch:
inputs:
test_scope:
description: 'Test scope: full, ci, mini, ondemand:llama, ondemand:deepseek'
required: false
default: 'ci'
permissions:
contents: read
env:
REGISTRY: localhost:5000
# Nightly (schedule) → full scope; manual dispatch → user choice; PR/push → ci scope
XPU_KERNEL_TEST_SCOPE: ${{ github.event_name == 'schedule' && 'full' || github.event.inputs.test_scope || 'ci' }}
jobs:
build-docker-image-latest-pvc:
runs-on: self-hosted-pvc
steps:
- name: Clean workspace
run: |
sudo chown -R "$(id -u):$(id -g)" . || true
git clean -ffdx || true
git reset --hard || true
- name: Checkout
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
with:
set-safe-directory: true
clean: true
- name: build docker image & push to local
id: build-image
run: |
docker build -t xpu-kernel-ci-image:latest -f Dockerfile.xpu .
docker tag xpu-kernel-ci-image:latest ${{ env.REGISTRY }}/xpu-kernel-ci-image:latest
docker push ${{ env.REGISTRY }}/xpu-kernel-ci-image:latest
build-docker-image-latest-bmg:
runs-on: self-hosted-bmg
steps:
- name: Clean workspace
run: |
sudo chown -R "$(id -u):$(id -g)" . || true
git clean -ffdx || true
git reset --hard || true
- name: Checkout
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
with:
set-safe-directory: true
clean: true
- name: build docker image & push to local
id: build-image
run: |
docker build -t xpu-kernel-ci-image:latest -f Dockerfile.xpu .
docker tag xpu-kernel-ci-image:latest ${{ env.REGISTRY }}/xpu-kernel-ci-image:latest
docker push ${{ env.REGISTRY }}/xpu-kernel-ci-image:latest
run-unit-tests-pvc:
runs-on: self-hosted-pvc
needs: build-docker-image-latest-pvc
timeout-minutes: 40
container:
image: localhost:5000/xpu-kernel-ci-image:latest
options: --device /dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged -v ccache:/root/.ccache -e CCACHE_DIR=/root/.ccache
steps:
- name: Checkout
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
with:
set-safe-directory: true
- name: build & install wheel
run: |
git config --global --add safe.directory "${GITHUB_WORKSPACE}"
export CCACHE_DIR=/root/.ccache
export CCACHE_BASEDIR="${GITHUB_WORKSPACE}"
export CCACHE_NOHASHDIR=1
export CCACHE_COMPILERCHECK=content
ccache -s || true
ccache -p || true
git submodule sync && git submodule update --init --recursive
uv pip install -r requirements.txt
MAX_JOBS=128 uv pip install --no-build-isolation -e . -v
ccache -s || true
- name: test
run: |
echo "Running tests with XPU_KERNEL_TEST_SCOPE=${{ env.XPU_KERNEL_TEST_SCOPE }}"
XPU_KERNEL_TEST_SCOPE=${{ env.XPU_KERNEL_TEST_SCOPE }} ZE_AFFINITY_MASK=0,1 SKIP_HANG_KERNEL=1 SKIP_ACC_ERROR_KERNEL=1 pytest -v -s tests/
VLLM_XPU_FORCE_XE_DEFAULT_KERNEL=1 XPU_KERNEL_TEST_SCOPE=${{ env.XPU_KERNEL_TEST_SCOPE }} ZE_AFFINITY_MASK=0,1 pytest -v -s tests/fused_moe/test_grouped_gemm.py::test_grouped_gemm
clean-repo-pvc:
runs-on: self-hosted-pvc
needs: run-unit-tests-pvc
steps:
- name: Clean workspace
run: |
sudo chown -R "$(id -u):$(id -g)" . || true
git clean -ffdx || true
git reset --hard || true
run-unit-tests-bmg:
runs-on: self-hosted-bmg
needs: build-docker-image-latest-bmg
timeout-minutes: 40
container:
image: localhost:5000/xpu-kernel-ci-image:latest
options: --device /dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged -v ccache:/root/.ccache -e CCACHE_DIR=/root/.ccache
steps:
- name: Checkout
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
with:
set-safe-directory: true
- name: build & install wheel
run: |
git config --global --add safe.directory "${GITHUB_WORKSPACE}"
export CCACHE_DIR=/root/.ccache
export CCACHE_BASEDIR="${GITHUB_WORKSPACE}"
export CCACHE_NOHASHDIR=1
export CCACHE_COMPILERCHECK=content
ccache -s || true
ccache -p || true
git submodule sync && git submodule update --init --recursive
uv pip install -r requirements.txt
MAX_JOBS=80 uv pip install --no-build-isolation -e . -v
ccache -s || true
- name: test
run: |
echo "Running tests with XPU_KERNEL_TEST_SCOPE=${{ env.XPU_KERNEL_TEST_SCOPE }}"
# tests/test_moe_align_block_size.py, tests/test_moe_lora_align_sum.py takes much time than expected. ignore it for now.
XPU_KERNEL_TEST_SCOPE=${{ env.XPU_KERNEL_TEST_SCOPE }} ZE_AFFINITY_MASK=0,1 pytest -v -s tests/ --ignore=tests/test_lora_ops.py --ignore=tests/test_fp8_quant.py --ignore=tests/test_moe_align_block_size.py --ignore=tests/test_moe_lora_align_sum.py --ignore=tests/test_cache.py::test_swap_blocks --ignore=tests/test_topk_per_row.py --ignore=tests/test_lora_ops.py
# fixme: Running lora UT separately to avoid OOM when running together with other tests.
XPU_KERNEL_TEST_SCOPE=${{ env.XPU_KERNEL_TEST_SCOPE }} ZE_AFFINITY_MASK=0,1 pytest -v -s tests/test_lora_ops.py
VLLM_XPU_FORCE_XE_DEFAULT_KERNEL=1 XPU_KERNEL_TEST_SCOPE=${{ env.XPU_KERNEL_TEST_SCOPE }} ZE_AFFINITY_MASK=0,1 pytest -v -s tests/fused_moe/test_grouped_gemm.py::test_grouped_gemm
clean-repo-bmg:
runs-on: self-hosted-bmg
needs: run-unit-tests-bmg
steps:
- name: Clean workspace
run: |
sudo chown -R "$(id -u):$(id -g)" . || true
git clean -ffdx || true
git reset --hard || true