-
Notifications
You must be signed in to change notification settings - Fork 3k
204 lines (193 loc) · 8.45 KB
/
Copy pathunittest-gpu.yml
File metadata and controls
204 lines (193 loc) · 8.45 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
# Disabled unittest GPU workflow - manually enabled when needed
name: Unittest GPU CI (DISABLED)
#on:
# pull_request:
# types: [opened, synchronize, reopened]
# branches: [develop]
# schedule:
# - cron: "3 0 * * *"
# workflow_call:
# inputs:
# run_downstream:
# required: true
# type: string
# image_name:
# required: true
# type: string
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number }}
cancel-in-progress: true
env:
PR_ID: ${{ github.event.pull_request.number }}
COMMIT_ID: ${{ github.event.pull_request.head.sha }}
TASK: paddlenlp-CI-${{ github.event.pull_request.number }}-unit-gpu
ci_scripts: /workspace/PaddleNLP/scripts/unit_test
BRANCH: ${{ github.event.pull_request.base.ref }}
AGILE_COMPILE_BRANCH: ${{ github.event.pull_request.base.ref }}
CI_name: unittest-gpu-ci
no_proxy: "localhost,bj.bcebos.com,su.bcebos.com,bcebos.com,apiin.im.baidu.com,gitee.com,aliyun.com,.baidu.com,.tuna.tsinghua.edu.cn"
HF_ENDPOINT: https://hf-mirror.com
STUDIO_GIT_HOST: http://git.prod.idc-to-cloud.aistudio.baidu-int.com
PPNLP_HOME: /ssd1/paddlenlp
HF_DATASETS_CACHE: /ssd1/paddlenlp/huggingface/datasets
TRANSFORMERS_CACHE: /ssd1/paddlenlp/huggingface
CCACHE_DIR: /home/data/gzcfs/.ccache/gpubox
RUN_DOWNSTREAM: ${{ inputs.run_downstream }}
defaults:
run:
shell: bash
#jobs:
# unittest-gpu-ci:
# name: unittest-gpu-ci (DISABLED)
runs-on: [self-hosted, ernie-8gpu]
steps:
- name: Determine Image Name
env:
IMAGE_NAME: ${{ inputs.image_name }}
run: |
if [[ -n "${IMAGE_NAME}" ]]; then
echo "IMAGE_NAME=${IMAGE_NAME}" >> "$GITHUB_ENV"
else
echo "IMAGE_NAME=iregistry.baidu-int.com/paddlecloud/base-images:paddlecloud-ubuntu18.04-gcc8.2-cuda11.8-cudnn8.6-nccl2.15.5-paddlenlp-latest" >> "$GITHUB_ENV"
fi
- name: Run Container
env:
work_dir: ${{ github.workspace }}
CACHE_DIR: /home/data/cfs/.cache
FLAGS_dynamic_static_unified_comm: "True"
python_version: "3.10"
paddle_whl: https://paddle-qa.bj.bcebos.com/paddle-pipeline/Develop-GpuSome-LinuxCentos-Gcc82-Cuda118-Cudnn86-Trt85-Py310-CINN-Compile/latest/paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl
run: |
container_name=${TASK}-$(date +%Y%m%d-%H%M%S)
echo "container_name=${container_name}" >> "$GITHUB_ENV"
if [[ "$RUN_DOWNSTREAM" == "false" ]]; then
echo "Not in a pull_request or test_build event. Skipping..."
else
docker run -d -t --name ${container_name} --net=host -v /dev/shm:/dev/shm --shm-size=32G \
-v $work_dir/../../..:$work_dir/../../.. \
-v $work_dir:/workspace \
-v /home/.cache/pip:/home/.cache/pip \
-v /ssd1/paddlenlp:/ssd1/paddlenlp \
-v /home/data/gzcfs/.ccache/gpubox:/home/data/gzcfs/.ccache/gpubox \
-e BRANCH \
-e AGILE_COMPILE_BRANCH \
-e PR_ID \
-e COMMIT_ID \
-e work_dir \
-e ci_scripts \
-e no_proxy \
-e CI_name \
-e paddle_whl \
-e HF_ENDPOINT \
-e STUDIO_GIT_HOST \
-e PPNLP_HOME \
-e HF_DATASETS_CACHE \
-e TRANSFORMERS_CACHE \
-e CACHE_DIR \
-e FLAGS_dynamic_static_unified_comm \
-e python_version \
-w /workspace --runtime=nvidia $IMAGE_NAME
fi
- name: Download Code
env:
work_dir: ${{ github.workspace }}
run: |
if [[ "$RUN_DOWNSTREAM" == "false" ]]; then
echo "Not in a pull_request or test_build event. Skipping.."
else
docker exec -t $container_name /bin/bash -c '
rm -rf * .[^.]*
echo "Downloading PaddleNLP.tar.gz"
wget -q --no-proxy https://paddle-qa.bj.bcebos.com/CodeSync/develop/PaddleNLP.tar --no-check-certificate
echo "Extracting PaddleNLP.tar.gz"
tar xf PaddleNLP.tar && rm -rf PaddleNLP.tar
source $work_dir/../../../proxy
cd PaddleNLP
git config --global user.name "PaddleCI"
git config --global user.email "paddle_ci@example.com"
git pull
git submodule update --init --recursive --force
if [ -n "${PR_ID}" ]; then
git fetch origin pull/${PR_ID}/head
git checkout -b PR_${PR_ID} FETCH_HEAD
git remote add upstream https://github.com/PaddlePaddle/PaddleNLP.git
git fetch upstream ${BRANCH}
git merge ${BRANCH} --no-edit
git diff --numstat ${BRANCH} -- | awk "{print \$NF}"
else
echo "Not in a pull_request event. Skipping PR-specific operations."
fi
git log --pretty=oneline -10
'
fi
- name: Skip For Bug
run: |
if [[ "$RUN_DOWNSTREAM" == "false" ]]; then
echo "Not in a pull_request or test_build event. Skipping..."
else
docker exec -t $container_name /bin/bash -c '
cd /workspace/PaddleNLP
echo "no skip for bug"
'
fi
- name: Test
run: |
if [[ "$RUN_DOWNSTREAM" == "false" ]]; then
echo "Not in a pull_request or test_build event. Skipping..."
else
docker exec -t $container_name /bin/bash -c '
ldconfig
unlink /usr/bin/python3
ln -sf $(which python${python_version}) /usr/bin/python3
pip config set global.cache-dir "/home/.cache/pip"
source $work_dir/../../../proxy
set -e
cd /workspace/PaddleNLP && git config --global --add safe.directory $PWD
timeout 50m bash scripts/unit_test/ci_unit.sh ${paddle_whl}
'
fi
- name: Upload Allure-reports & Logs
if: always()
env:
home_path: ${{ github.workspace }}/../../..
bos_file: ${{ github.workspace }}/../../../bos/BosClient.py
allure_file: ${{ github.workspace }}/../../../allure-2.19.0/bin/allure
run: |
if [[ "$RUN_DOWNSTREAM" == "false" ]]; then
echo "Not in a pull_request or test_build event. Skipping..."
else
docker exec -t $container_name /bin/bash -c '
unset http_proxy && unset https_proxy
if [ ! -f "${{ env.bos_file }}" ]; then
wget -q --no-proxy -O ${{ env.home_path }}/bos_new.tar.gz https://xly-devops.bj.bcebos.com/home/bos_new.tar.gz --no-check-certificate
mkdir ${{ env.home_path }}/bos
tar xf ${{ env.home_path }}/bos_new.tar.gz -C ${{ env.home_path }}/bos
fi
if [ ! -f "${{ env.allure_file }}" ]; then
wget -q --no-proxy -O ${{ env.home_path }}/allure-2.19.0.zip https://xly-devops.bj.bcebos.com/tools/allure-2.19.0.zip --no-check-certificate
unzip -q ${{ env.home_path }}/allure-2.19.0.zip -d ${{ env.home_path }}/
fi
if [[ "${{ env.RUN_DOWNSTREAM }}" == "" && -n "${PR_ID}" ]]; then
bos_prefix="${PR_ID}/${COMMIT_ID}"
elif [[ "${{ env.RUN_DOWNSTREAM }}" == "true" && -n "${PR_ID}" ]]; then
bos_prefix="${PR_ID}/${COMMIT_ID}/test_build"
else
bos_prefix="schedule/$(date +%Y%m%d)"
fi
cd /workspace/PaddleNLP/unittest_logs
for FILE in /workspace/PaddleNLP/unittest_logs/*; do
file=$(basename "$FILE")
python ${{ env.bos_file }} $file paddle-github-action/PR/PaddleNLP/unittest-gpu/${bos_prefix}/logs
echo "$file: https://paddle-github-action.bj.bcebos.com/PR/PaddleNLP/unittest-gpu/${bos_prefix}/logs/$file"
done
cd /workspace/PaddleNLP/
${{ env.allure_file }} generate result -o report
tar -czf products.tar.gz report unittest_logs
python ${{ env.bos_file }} products.tar.gz paddle-github-action/PR/PaddleNLP/unittest-gpu/${bos_prefix}/logs
echo "report: https://paddle-github-action.bj.bcebos.com/PR/PaddleNLP/unittest-gpu/${bos_prefix}/logs/products.tar.gz"
'
fi
- name: Terminate And Delete the Container
if: always()
run: |
docker rm -f $container_name 2>/dev/null || true