forked from PaddlePaddle/PaddleFormers
-
Notifications
You must be signed in to change notification settings - Fork 0
167 lines (157 loc) · 6.9 KB
/
model-unittest-gpu.yml
File metadata and controls
167 lines (157 loc) · 6.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
name: Model Unittest GPU CI
on:
pull_request:
schedule:
- cron: "0 18 * * *"
workflow_call:
inputs:
runner:
required: false
type: string
image_name:
required: false
type: string
concurrency:
group: model-unittest-${{ github.workflow }}-${{ github.event.pull_request.number || github.run_id }}
cancel-in-progress: true
env:
PR_ID: ${{ github.event.pull_request.number || '0' }}
COMMIT_ID: ${{ github.event.pull_request.head.sha || github.sha }}
TASK: PaddleFormers-CI-${{ github.event.pull_request.number }}-model-unittest-gpu
CI_SCRIPTS_PATH: /workspace/PaddleFormers/scripts/ci_model_unittest.sh
BRANCH: ${{ github.event.pull_request.base.ref || github.ref_name }}
AGILE_COMPILE_BRANCH: ${{ github.event.pull_request.base.ref }}
CI_JOB_NAME: model-unittest-gpu-ci
no_proxy: "localhost,bj.bcebos.com,su.bcebos.com,bcebos.com,apiin.im.baidu.com,gitee.com,aliyun.com,.baidu.com,.tuna.tsinghua.edu.cn"
defaults:
run:
shell: bash
jobs:
model-unittest-gpu-ci:
name: model-unittest-gpu-ci
runs-on: ${{ inputs.runner || 'ernie-8gpu' || 'distrbute' }}
steps:
- name: Determine Image Name
env:
IMAGE_NAME: ${{ inputs.image_name }}
run: |
if [[ -n "${IMAGE_NAME}" ]]; then
echo "IMAGE_NAME=${IMAGE_NAME}" >> "$GITHUB_ENV"
else
echo "IMAGE_NAME=ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddle:cuda126-dev-latest" >> "$GITHUB_ENV"
fi
- name: Run Container
env:
work_dir: ${{ github.workspace }}
FLAGS_dynamic_static_unified_comm: "True"
python_version: "3.10"
PIP_CACHE_DIR: /root/.cache/pip
paddle_whl: https://paddle-qa.bj.bcebos.com/paddle-pipeline/Develop-TagBuild-Training-Linux-Gpu-Cuda12.6-Cudnn9.5-Trt10.5-Mkl-Avx-Gcc11-SelfBuiltPypiUse/latest/paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl
run: |
container_name=${TASK}-$(date +%Y%m%d-%H%M%S)
echo "container_name=${container_name}" >> "$GITHUB_ENV"
echo "Workspace path: ${{ github.workspace }}"
DOCKER_VER=$(docker version --format '{{.Server.Version}}' | cut -d. -f1,2)
if (( $(echo "$DOCKER_VER < 19.03" | bc -l) )); then
GPU_OPTION="--runtime=nvidia"
else
GPU_OPTION="--gpus all"
fi
echo "DOCKER_VER=${DOCKER_VER}"
echo "GPU_OPTION=${GPU_OPTION}"
docker run -d -t ${GPU_OPTION} --name ${container_name} --net=host -v /dev/shm:/dev/shm --shm-size=32G \
-v ${work_dir}/../../..:${work_dir}/../../.. \
-v ${work_dir}:/workspace \
-v /home/.cache/:/root/.cache/ \
-v /home/paddle-1/models/:/home/models/ \
-e "BRANCH=$BRANCH" \
-e "AGILE_COMPILE_BRANCH=$AGILE_COMPILE_BRANCH" \
-e "PR_ID=$PR_ID" \
-e "COMMIT_ID=$COMMIT_ID" \
-e "work_dir=$work_dir" \
-e "CI_SCRIPTS_PATH=$CI_SCRIPTS_PATH" \
-e "no_proxy=$no_proxy" \
-e "CI_JOB_NAME=$CI_JOB_NAME" \
-e "paddle_whl=$paddle_whl" \
-e "FLAGS_dynamic_static_unified_comm=$FLAGS_dynamic_static_unified_comm" \
-e "python_version=$python_version" \
-e HF_PROXY_PATH=${work_dir}/../../../proxy_huggingface \
-e AISTUDIO_PROXY_PATH=${work_dir}/../../../proxy_aistudio \
-e PF_HOME=/home/models/ \
-e PIP_CACHE_DIR \
-w /workspace --privileged ${IMAGE_NAME}
- name: Download Code
run: |
docker exec -t $container_name /bin/bash -c '
rm -rf * .[^.]*
echo "Downloading PaddleFormers.tar"
wget -q --no-proxy https://paddle-qa.bj.bcebos.com/CodeSync/develop/PaddleFormers.tar --no-check-certificate
echo "Extracting PaddleFormers.tar"
rm -rf PaddleFormers
tar xf PaddleFormers.tar && rm -rf PaddleFormers.tar
echo "work_dir = ${work_dir}"
source ${work_dir}/../../../proxy
pip install uv
cd PaddleFormers
git config --global user.name "PaddleCI"
git config --global user.email "paddle_ci@example.com"
git pull
git submodule update --init --recursive --force
if [ -n "$PR_ID" ] && [ "$PR_ID" != "0" ]; then
git fetch origin pull/${PR_ID}/head
git checkout -b PR_${PR_ID} FETCH_HEAD
git remote add upstream https://github.com/PaddlePaddle/PaddleFormers.git
echo "Checking out ${BRANCH}..."
git fetch upstream ${BRANCH}:${BRANCH}
git merge ${BRANCH} --no-edit
git diff --numstat ${BRANCH} -- | awk "{print \$NF}"
else
echo "Not in a pull_request event. Skipping PR-specific operations."
fi
git log --pretty=oneline -10
'
- name: Test
run: |
docker exec -t $container_name /bin/bash -c '
ldconfig
mkdir -p /root/.cache/pip
pip cache dir
uv cache dir
set -e
rm -rf /root/.cache/aistudio/
cd /workspace/PaddleFormers && git config --global --add safe.directory $PWD
echo "work_dir = ${work_dir}"
cp -r ${work_dir}/../../../models ./models
echo "Check whether the local model file exists:"
ls -l ./models
timeout 35m bash -x scripts/regression/ci_model_unittest.sh ${paddle_whl} false ${AGILE_COMPILE_BRANCH}
'
- name: Upload Products
if: always()
env:
home_path: ${{ github.workspace }}/../../..
bos_file: ${{ github.workspace }}/../../../bos/BosClient.py
run: |
docker exec -t $container_name /bin/bash -c '
if [ ! -f "${{ env.bos_file }}" ]; then
wget -q --no-proxy -O ${{ env.home_path }}/bos_new.tar.gz https://xly-devops.bj.bcebos.com/home/bos_new.tar.gz --no-check-certificate
mkdir ${{ env.home_path }}/bos
tar xf ${{ env.home_path }}/bos_new.tar.gz -C ${{ env.home_path }}/bos
fi
if [ -n "$PR_ID" ] && [ "$PR_ID" != "0" ]; then
bos_prefix="${PR_ID}/${COMMIT_ID}"
else
bos_prefix="schedule/$(date +%Y%m%d)"
fi
# logs
cd /workspace/PaddleFormers/model_unittest_logs
for FILE in /workspace/PaddleFormers/model_unittest_logs/*; do
file=$(basename "$FILE")
python ${{ env.bos_file }} $file paddle-github-action/PR/PaddleFormers/model-unittest-gpu/${bos_prefix}/logs
echo "$file: https://paddle-github-action.bj.bcebos.com/PR/PaddleFormers/model-unittest-gpu/${bos_prefix}/logs/$file"
done
'
- name: Terminate And Delete the Container
if: always()
run: |
docker rm -f $container_name 2>/dev/null || true