Skip to content

Unittest GPU CE

Unittest GPU CE #30

name: Unittest GPU CE
on:
schedule:
- cron: "0 1 * * *" # every day at 09:00 Beijing time (UTC+8)
workflow_dispatch: # allow to manually trigger the workflow
inputs:
paddle_whl:
description: "paddle_whl"
required: false
default: "https://paddle-qa.bj.bcebos.com/paddle-pipeline/Develop-GpuAll-LinuxCentos-Gcc11-Cuda126-Cudnn95-Trt105-Py310-Compile/latest/paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl"
type: string
repo_branch:
description: "repo_branch"
required: false
type: string
concurrency:
group: unittest-ce-${{ github.workflow }}
cancel-in-progress: true
env:
COMMIT_ID: ${{ github.event.pull_request.head.sha }}
TASK: PaddleFormers-CE-unittest-gpu
ce_scripts: /workspace/PaddleFormers/scripts/unit_test
BRANCH: ${{ github.event.pull_request.base.ref }}
AGILE_COMPILE_BRANCH: ${{ github.event.pull_request.base.ref }}
CE_name: unittest-gpu-ce
no_proxy: "localhost,bj.bcebos.com,su.bcebos.com,bcebos.com,apiin.im.baidu.com,gitee.com,aliyun.com,.baidu.com,.tuna.tsinghua.edu.cn"
defaults:
run:
shell: bash
jobs:
unittest-gpu-ce:
name: unittest-gpu-ce
runs-on: [self-hosted, ernie-8gpu]
steps:
- name: Determine Image Name
run: |
echo "IMAGE_NAME=ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddle:cuda126-dev-latest" >> "$GITHUB_ENV"
- name: Run Container
env:
work_dir: ${{ github.workspace }}
FLAGS_dynamic_static_unified_comm: "True"
python_version: "3.10"
paddle_whl: ${{ github.event.inputs.paddle_whl || 'https://paddle-qa.bj.bcebos.com/paddle-pipeline/Develop-GpuAll-LinuxCentos-Gcc11-Cuda126-Cudnn95-Trt105-Py310-Compile/latest/paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl' }}
repo_branch: ${{ github.event.inputs.repo_branch || '' }}
run: |
container_name=${TASK}-$(date +%Y%m%d-%H%M%S)
echo "container_name=${container_name}" >> "$GITHUB_ENV"
echo "Workspace path: ${{ github.workspace }}"
DOCKER_VER=$(docker version --format '{{.Server.Version}}' | cut -d. -f1,2)
if (( $(echo "$DOCKER_VER < 19.03" | bc -l) )); then
GPU_OPTION="--runtime=nvidia"
else
GPU_OPTION="--gpus all"
fi
echo "DOCKER_VER=${DOCKER_VER}"
echo "GPU_OPTION=${GPU_OPTION}"
docker run -d -t --name ${container_name} --net=host -v /dev/shm:/dev/shm --shm-size=32G \
-v $work_dir/../../..:$work_dir/../../.. \
-v $work_dir:/workspace \
-v /home/.cache/pip:/home/.cache/pip \
-v /home/paddle-1/models/:/home/models/ \
-e PF_HOME=/home/models/ \
-e BRANCH \
-e AGILE_COMPILE_BRANCH \
-e COMMIT_ID \
-e work_dir \
-e ce_scripts \
-e no_proxy \
-e CE_name \
-e paddle_whl \
-e repo_branch \
-e FLAGS_dynamic_static_unified_comm \
-e python_version \
-e HF_PROXY_PATH=$work_dir/../../../proxy_huggingface \
-e AISTUDIO_PROXY_PATH=$work_dir/../../../proxy_aistudio \
-e "HF_DATASETS_CACHE=$work_dir/../../../paddlenlp/huggingface/datasets" \
-e "TRANSFORMERS_CACHE=$work_dir/../../../paddlenlp/huggingface" \
-w /workspace ${GPU_OPTION} --privileged $IMAGE_NAME
- name: Download Code
run: |
docker exec -t $container_name /bin/bash -c '
rm -rf * .[^.]*
echo "Downloading PaddleFormers.tar"
wget -q --no-proxy https://paddle-qa.bj.bcebos.com/CodeSync/develop/PaddleFormers.tar --no-check-certificate
echo "Extracting PaddleFormers.tar"
tar xf PaddleFormers.tar && rm -rf PaddleFormers.tar
source $work_dir/../../../proxy
cd PaddleFormers
git config --global user.name "PaddleCE"
git config --global user.email "[email protected]"
if [ -n "$repo_branch" ]; then
echo "Switching to branch: $repo_branch"
git fetch origin $repo_branch
git checkout $repo_branch || git checkout -b $repo_branch origin/$repo_branch
git pull
else
echo "No repo_branch provided, just pulling latest changes"
git pull
fi
git submodule update --init --recursive --force
git log --pretty=oneline -10
'
- name: Test
run: |
docker exec -t $container_name /bin/bash -c '
ldconfig
pip config set global.cache-dir "/home/.cache/pip"
set -e
rm -rf /root/.cache/aistudio/
cd /workspace/PaddleFormers && git config --global --add safe.directory $PWD
source $work_dir/../../../proxy
source $work_dir/../../../AISTUDIO_ACCESS_TOKEN
echo "work_dir = ${work_dir}"
cp -r ${work_dir}/../../../models ./models
echo "Check whether the local model file exists:"
ls -l ./models
timeout 40m bash scripts/unit_test/ci_unittest.sh ${paddle_whl} true
'
- name: Upload Allure-reports & Logs
if: always()
env:
home_path: ${{ github.workspace }}/../../..
bos_file: ${{ github.workspace }}/../../../bos/BosClient.py
allure_file: ${{ github.workspace }}/../../../allure-2.19.0/bin/allure
run: |
docker exec -t $container_name /bin/bash -c '
if [ ! -f "${{ env.bos_file }}" ]; then
wget -q --no-proxy -O ${{ env.home_path }}/bos_new.tar.gz https://xly-devops.bj.bcebos.com/home/bos_new.tar.gz --no-check-certificate
mkdir ${{ env.home_path }}/bos
tar xf ${{ env.home_path }}/bos_new.tar.gz -C ${{ env.home_path }}/bos
fi
# if [ ! -f "${{ env.allure_file }}" ]; then
# wget -q --no-proxy -O ${{ env.home_path }}/allure-2.19.0.zip https://xly-devops.bj.bcebos.com/tools/allure-2.19.0.zip --no-check-certificate
# unzip -q ${{ env.home_path }}/allure-2.19.0.zip
# fi
bos_prefix="schedule/github-ce-$(date +%Y%m%d)"
# # coverage.xml
# cd /workspace/PaddleFormers
# python ${{ env.bos_file }} coverage.xml paddle-github-action/PR/PaddleFormers/unittest-gpu/${bos_prefix}/logs
# echo "cov-report: https://paddle-github-action.bj.bcebos.com/PR/PaddleFormers/unittest-gpu/${bos_prefix}/logs/coverage.xml"
# logs
cd /workspace/PaddleFormers/unittest_logs
for FILE in /workspace/PaddleFormers/unittest_logs/*; do
file=$(basename "$FILE")
python ${{ env.bos_file }} $file paddle-github-action/PR/PaddleFormers/unittest-gpu/${bos_prefix}/logs
echo "$file: https://paddle-github-action.bj.bcebos.com/PR/PaddleFormers/unittest-gpu/${bos_prefix}/logs/$file"
done
# cd /workspace/PaddleFormers/
# ${{ env.allure_file }} generate result -o report
# tar -czf products.tar.gz report unittest_logs
# python ${{ env.bos_file }} products.tar.gz paddle-github-action/PR/PaddleFormers/unittest-gpu/${bos_prefix}/logs
# echo "report: https://paddle-github-action.bj.bcebos.com/PR/PaddleFormers/unittest-gpu/${bos_prefix}/logs/products.tar.gz"
'
- name: Terminate And Delete the Container
if: always()
run: |
docker rm -f $container_name 2>/dev/null || true