Model Unittest GPU CI #137
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Model Unittest GPU CI | |
| on: | |
| pull_request: | |
| schedule: | |
| - cron: "0 18 * * *" | |
| workflow_call: | |
| inputs: | |
| runner: | |
| required: false | |
| type: string | |
| image_name: | |
| required: false | |
| type: string | |
| concurrency: | |
| group: model-unittest-${{ github.workflow }}-${{ github.event.pull_request.number || github.run_id }} | |
| cancel-in-progress: true | |
| env: | |
| PR_ID: ${{ github.event.pull_request.number || '0' }} | |
| COMMIT_ID: ${{ github.event.pull_request.head.sha || github.sha }} | |
| TASK: PaddleFormers-CI-${{ github.event.pull_request.number }}-model-unittest-gpu | |
| CI_SCRIPTS_PATH: /workspace/PaddleFormers/scripts/ci_model_unittest.sh | |
| BRANCH: ${{ github.event.pull_request.base.ref || github.ref_name }} | |
| AGILE_COMPILE_BRANCH: ${{ github.event.pull_request.base.ref }} | |
| CI_JOB_NAME: model-unittest-gpu-ci | |
| no_proxy: "localhost,bj.bcebos.com,su.bcebos.com,bcebos.com,apiin.im.baidu.com,gitee.com,aliyun.com,.baidu.com,.tuna.tsinghua.edu.cn" | |
| defaults: | |
| run: | |
| shell: bash | |
| jobs: | |
| model-unittest-gpu-ci: | |
| name: model-unittest-gpu-ci | |
| runs-on: ${{ inputs.runner || 'ernie-8gpu' || 'distrbute' }} | |
| steps: | |
| - name: Determine Image Name | |
| env: | |
| IMAGE_NAME: ${{ inputs.image_name }} | |
| run: | | |
| if [[ -n "${IMAGE_NAME}" ]]; then | |
| echo "IMAGE_NAME=${IMAGE_NAME}" >> "$GITHUB_ENV" | |
| else | |
| echo "IMAGE_NAME=ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddle:cuda126-dev-latest" >> "$GITHUB_ENV" | |
| fi | |
| - name: Run Container | |
| env: | |
| work_dir: ${{ github.workspace }} | |
| FLAGS_dynamic_static_unified_comm: "True" | |
| python_version: "3.10" | |
| PIP_CACHE_DIR: /root/.cache/pip | |
| paddle_whl: https://paddle-qa.bj.bcebos.com/paddle-pipeline/Develop-TagBuild-Training-Linux-Gpu-Cuda12.6-Cudnn9.5-Trt10.5-Mkl-Avx-Gcc11-SelfBuiltPypiUse/latest/paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl | |
| run: | | |
| container_name=${TASK}-$(date +%Y%m%d-%H%M%S) | |
| echo "container_name=${container_name}" >> "$GITHUB_ENV" | |
| echo "Workspace path: ${{ github.workspace }}" | |
| DOCKER_VER=$(docker version --format '{{.Server.Version}}' | cut -d. -f1,2) | |
| if (( $(echo "$DOCKER_VER < 19.03" | bc -l) )); then | |
| GPU_OPTION="--runtime=nvidia" | |
| else | |
| GPU_OPTION="--gpus all" | |
| fi | |
| echo "DOCKER_VER=${DOCKER_VER}" | |
| echo "GPU_OPTION=${GPU_OPTION}" | |
| docker run -d -t ${GPU_OPTION} --name ${container_name} --net=host -v /dev/shm:/dev/shm --shm-size=32G \ | |
| -v ${work_dir}/../../..:${work_dir}/../../.. \ | |
| -v ${work_dir}:/workspace \ | |
| -v /home/.cache/:/root/.cache/ \ | |
| -v /home/paddle-1/models/:/home/models/ \ | |
| -e "BRANCH=$BRANCH" \ | |
| -e "AGILE_COMPILE_BRANCH=$AGILE_COMPILE_BRANCH" \ | |
| -e "PR_ID=$PR_ID" \ | |
| -e "COMMIT_ID=$COMMIT_ID" \ | |
| -e "work_dir=$work_dir" \ | |
| -e "CI_SCRIPTS_PATH=$CI_SCRIPTS_PATH" \ | |
| -e "no_proxy=$no_proxy" \ | |
| -e "CI_JOB_NAME=$CI_JOB_NAME" \ | |
| -e "paddle_whl=$paddle_whl" \ | |
| -e "FLAGS_dynamic_static_unified_comm=$FLAGS_dynamic_static_unified_comm" \ | |
| -e "python_version=$python_version" \ | |
| -e HF_PROXY_PATH=${work_dir}/../../../proxy_huggingface \ | |
| -e AISTUDIO_PROXY_PATH=${work_dir}/../../../proxy_aistudio \ | |
| -e PF_HOME=/home/models/ \ | |
| -e PIP_CACHE_DIR \ | |
| -w /workspace --privileged ${IMAGE_NAME} | |
| - name: Download Code | |
| run: | | |
| docker exec -t $container_name /bin/bash -c ' | |
| rm -rf * .[^.]* | |
| echo "Downloading PaddleFormers.tar" | |
| wget -q --no-proxy https://paddle-qa.bj.bcebos.com/CodeSync/develop/PaddleFormers.tar --no-check-certificate | |
| echo "Extracting PaddleFormers.tar" | |
| rm -rf PaddleFormers | |
| tar xf PaddleFormers.tar && rm -rf PaddleFormers.tar | |
| echo "work_dir = ${work_dir}" | |
| source ${work_dir}/../../../proxy | |
| pip install uv | |
| cd PaddleFormers | |
| git config --global user.name "PaddleCI" | |
| git config --global user.email "paddle_ci@example.com" | |
| git pull | |
| git submodule update --init --recursive --force | |
| if [ -n "$PR_ID" ] && [ "$PR_ID" != "0" ]; then | |
| git fetch origin pull/${PR_ID}/head | |
| git checkout -b PR_${PR_ID} FETCH_HEAD | |
| git remote add upstream https://github.com/PaddlePaddle/PaddleFormers.git | |
| echo "Checking out ${BRANCH}..." | |
| git fetch upstream ${BRANCH}:${BRANCH} | |
| git merge ${BRANCH} --no-edit | |
| git diff --numstat ${BRANCH} -- | awk "{print \$NF}" | |
| else | |
| echo "Not in a pull_request event. Skipping PR-specific operations." | |
| fi | |
| git log --pretty=oneline -10 | |
| ' | |
| - name: Test | |
| run: | | |
| docker exec -t $container_name /bin/bash -c ' | |
| ldconfig | |
| mkdir -p /root/.cache/pip | |
| pip cache dir | |
| uv cache dir | |
| set -e | |
| rm -rf /root/.cache/aistudio/ | |
| cd /workspace/PaddleFormers && git config --global --add safe.directory $PWD | |
| echo "work_dir = ${work_dir}" | |
| cp -r ${work_dir}/../../../models ./models | |
| echo "Check whether the local model file exists:" | |
| ls -l ./models | |
| timeout 35m bash -x scripts/regression/ci_model_unittest.sh ${paddle_whl} false ${AGILE_COMPILE_BRANCH} | |
| ' | |
| - name: Upload Products | |
| if: always() | |
| env: | |
| home_path: ${{ github.workspace }}/../../.. | |
| bos_file: ${{ github.workspace }}/../../../bos/BosClient.py | |
| run: | | |
| docker exec -t $container_name /bin/bash -c ' | |
| if [ ! -f "${{ env.bos_file }}" ]; then | |
| wget -q --no-proxy -O ${{ env.home_path }}/bos_new.tar.gz https://xly-devops.bj.bcebos.com/home/bos_new.tar.gz --no-check-certificate | |
| mkdir ${{ env.home_path }}/bos | |
| tar xf ${{ env.home_path }}/bos_new.tar.gz -C ${{ env.home_path }}/bos | |
| fi | |
| if [ -n "$PR_ID" ] && [ "$PR_ID" != "0" ]; then | |
| bos_prefix="${PR_ID}/${COMMIT_ID}" | |
| else | |
| bos_prefix="schedule/$(date +%Y%m%d)" | |
| fi | |
| # logs | |
| cd /workspace/PaddleFormers/model_unittest_logs | |
| for FILE in /workspace/PaddleFormers/model_unittest_logs/*; do | |
| file=$(basename "$FILE") | |
| python ${{ env.bos_file }} $file paddle-github-action/PR/PaddleFormers/model-unittest-gpu/${bos_prefix}/logs | |
| echo "$file: https://paddle-github-action.bj.bcebos.com/PR/PaddleFormers/model-unittest-gpu/${bos_prefix}/logs/$file" | |
| done | |
| ' | |
| - name: Terminate And Delete the Container | |
| if: always() | |
| run: | | |
| docker rm -f $container_name 2>/dev/null || true |