-
Notifications
You must be signed in to change notification settings - Fork 740
209 lines (193 loc) · 7.84 KB
/
_xpu_8cards_case_test.yml
File metadata and controls
209 lines (193 loc) · 7.84 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
name: xpu_8cards_case_test
on:
workflow_call:
inputs:
DOCKER_IMAGE:
description: "Build Images"
required: true
type: string
default: "ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/fastdeploy-xpu:ci"
FASTDEPLOY_ARCHIVE_URL:
description: "URL of the compressed FastDeploy code archive."
required: true
type: string
FASTDEPLOY_WHEEL_URL:
description: "URL of the compressed FastDeploy whl ."
required: true
type: string
FD_VERSION:
description: "FastDeploy Package Version"
required: false
type: string
default: ""
PADDLEVERSION:
description: "Paddle Version Build Use"
required: false
type: string
default: ""
PADDLE_WHL_URL:
description: "Paddle Wheel Package URL"
required: false
type: string
default: ""
MODEL_PATH:
description: "MODEL Dir Use"
required: true
type: string
default: ""
secrets:
github-token:
required: true
jobs:
check_bypass:
uses: ./.github/workflows/check-bypass.yml
secrets:
github-token: ${{ secrets.github-token }}
with:
workflow-name: xpu_8cards_test
run_xpu_8cards_cases:
runs-on: [self-hosted, XPU-P800-8Cards]
needs: check_bypass
if: ${{ inputs.FASTDEPLOY_WHEEL_URL != '' && needs.check_bypass.outputs.can-skip != 'true' }}
timeout-minutes: 60
steps:
- name: Print current runner name
run: |
echo "Current runner name: ${{ runner.name }}"
- name: Code Prepare
shell: bash
env:
docker_image: ${{ inputs.DOCKER_IMAGE }}
fd_archive_url: ${{ inputs.FASTDEPLOY_ARCHIVE_URL }}
fd_wheel_url: ${{ inputs.FASTDEPLOY_WHEEL_URL }}
model_path: ${{ inputs.MODEL_PATH }}
run: |
set -x
REPO="https://github.com/${{ github.repository }}.git"
FULL_REPO="${{ github.repository }}"
REPO_NAME="${FULL_REPO##*/}"
BASE_BRANCH="${{ github.base_ref }}"
docker pull ${docker_image} || true
# Clean the repository directory before starting
docker run --rm --net=host -v $(pwd):/workspace -w /workspace \
-e "REPO_NAME=${REPO_NAME}" \
${docker_image} /bin/bash -c '
CLEAN_RETRIES=3
CLEAN_COUNT=0
while [ $CLEAN_COUNT -lt $CLEAN_RETRIES ]; do
echo "Attempt $((CLEAN_COUNT+1)) to remove ${REPO_NAME}* ..."
rm -rf "${REPO_NAME}"* || true
sleep 2
# Check if anything matching ${REPO_NAME}* still exists
if ! ls "${REPO_NAME}"* >/dev/null 2>&1; then
echo "All ${REPO_NAME}* removed successfully"
break
fi
CLEAN_COUNT=$((CLEAN_COUNT + 1))
done
if ls "${REPO_NAME}"* >/dev/null 2>&1; then
echo "ERROR: Failed to clean ${REPO_NAME}* after multiple attempts"
ls -ld "${REPO_NAME}"*
exit 1
fi
'
wget -q --no-proxy ${fd_archive_url}
tar -xf FastDeploy.tar.gz
rm -rf FastDeploy.tar.gz
cd FastDeploy
git config --global user.name "FastDeployCI"
git config --global user.email "fastdeploy_ci@example.com"
git log -n 3 --oneline
- name: Run CI unittest
env:
docker_image: ${{ inputs.DOCKER_IMAGE }}
fd_archive_url: ${{ inputs.FASTDEPLOY_ARCHIVE_URL }}
fd_wheel_url: ${{ inputs.FASTDEPLOY_WHEEL_URL }}
model_path: ${{ inputs.MODEL_PATH }}
run: |
runner_name="${{ runner.name }}"
last_char="${runner_name: -1}"
PARENT_DIR=$(dirname "$WORKSPACE")
echo "PARENT_DIR:$PARENT_DIR"
docker run --rm --net=host --cap-add=SYS_PTRACE --privileged --shm-size=64G \
-v $(pwd):/workspace -w /workspace \
-v "/ssd3:/ssd3" \
-e "MODEL_PATH=${model_path}" \
-e "FASTDEPLOY_ARCHIVE_URL=${fd_archive_url}" \
-e "FASTDEPLOY_WHEEL_URL=${fd_wheel_url}" \
-e "PADDLEVERSION=${PADDLEVERSION}" \
-e "PADDLE_WHL_URL=${PADDLE_WHL_URL}" \
-e "http_proxy=$(git config --global --get http.proxy)" \
-e "https_proxy=$(git config --global --get https.proxy)" \
-e "no_proxy=bcebos.com,mirrors.tuna.tsinghua.edu.cn,127.0.0.1,localhost" \
${docker_image} /bin/bash -c '
echo "安装lsof工具..."
apt install -y lsof
# 设置XPU_VISIBLE_DEVICES
export XPU_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
echo "XPU_VISIBLE_DEVICES=$XPU_VISIBLE_DEVICES"
# 下载和安装xre
echo "下载和安装xre..."
mkdir -p /workspace/deps
cd /workspace/deps
if [ ! -d "xre" ]; then
wget -q https://klx-sdk-release-public.su.bcebos.com/xre/kl3-release/5.0.21.21/xre-Linux-x86_64-5.0.21.21.tar.gz
tar -zxf xre-Linux-x86_64-5.0.21.21.tar.gz && mv xre-Linux-x86_64-5.0.21.21 xre
fi
cd -
export PATH=/workspace/deps/xre/bin:$PATH
# 重启XPU卡
echo "重启XPU卡..."
xpu-smi -r -i $XPU_VISIBLE_DEVICES
xpu-smi
set -e
git config --global --add safe.directory /workspace/FastDeploy
cd FastDeploy
python -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
python -m pip install -r requirements.txt
echo "安装PaddlePaddle..."
# 针对不同分支和tag使用不同的PaddlePaddle安装包
if [[ "${PADDLE_WHL_URL}" != "" ]];then
python -m pip install ${PADDLE_WHL_URL}
elif [[ "${PADDLEVERSION}" != "" ]];then
python -m pip uninstall paddlepaddle-xpu fastdeploy-xpu -y
python -m pip install paddlepaddle-xpu==${PADDLEVERSION} -i https://www.paddlepaddle.org.cn/packages/stable/xpu-p800/
else
python -m pip uninstall paddlepaddle-xpu fastdeploy-xpu -y
python -m pip install --pre paddlepaddle-xpu -i https://www.paddlepaddle.org.cn/packages/nightly/xpu-p800/
fi
echo "安装上游任务编译的fastdeploy-xpu..."
python -m pip install ${FASTDEPLOY_WHEEL_URL}
rm -rf fastdeploy
python -m pip install ${FASTDEPLOY_WHEEL_URL} --no-deps --target=/workspace/FastDeploy
echo "============================安装测试依赖============================"
python -m pip install openai -U
python -m pip install pytest
python -m pip install pytest-timeout
unset http_proxy
unset https_proxy
echo "============================开始运行pytest测试============================"
export PYTHONPATH=/workspace/FastDeploy/
export PYTHONPATH=$(pwd)/tests/xpu_ci:$PYTHONPATH
mkdir -p case_logs
set +e
python -m pytest -v -s --tb=short tests/xpu_ci/8cards_cases/
exit_code=$?
set -e
# 修改case_logs权限,确保Docker外部的runner用户可以读取并上传
chmod -R a+rX case_logs/ 2>/dev/null || true
if [ $exit_code -eq 0 ]; then
echo "============================8卡cases测试通过!============================"
else
echo "============================8卡cases测试失败,请检查日志!============================"
exit $exit_code
fi
'
- name: Upload case logs
if: always()
uses: actions/upload-artifact@v6
with:
name: xpu-8cards-case-logs
path: FastDeploy/case_logs/
retention-days: 7
if-no-files-found: ignore