@@ -17,7 +17,7 @@ concurrency:
1717 cancel-in-progress : true
1818
1919env :
20- CONDA_ENV : pr_test
20+ CONDA_ENV : pr_regression
2121 HF_DATASETS_OFFLINE : 1
2222 HF_EVALUATE_OFFLINE : 1
2323 TRANSFORMERS_OFFLINE : 1
2626 HF_HUB_OFFLINE : 1
2727 CONDA_PATH : /mnt/shared-storage-user/opencompass-shared/qa-llm-cicd/miniconda3
2828 REPORT_ROOT : /mnt/shared-storage-user/opencompass-shared/qa-llm-cicd/eval_report/prtest
29+ BASELINE_DIR : mock-api-baseline
2930 COMPASS_DATA_CACHE : /mnt/shared-storage-user/auto-eval-pipeline/opencompass/llmeval/compass_data_cache
30- HF_DATASETS_CACHE : /mnt/shared-storage-user/auto-eval-pipeline/opencompass/llmeval /hf_cache
31+ HF_DATASETS_CACHE : /mnt/shared-storage-user/opencompass-shared/qa-llm-cicd /hf_cache
3132 HF_HUB_CACHE : /mnt/shared-storage-gpfs2/gpfs2-shared-public/huggingface/hub
3233 KUBEBRAIN_CLUSTER_ENTRY : https://h.pjlab.org.cn
3334 KUBEBRAIN_NAMESPACE : ailab-opencompass
34- JOB_NAME : pr-test-${{ github.run_id }}-${{ github.run_attempt }}
3535
3636jobs :
37- pr_run_test :
37+ cmd_test :
3838 runs-on : yidian_cu12
3939 timeout-minutes : 45
40+ env :
41+ JOB_NAME : pr-test-${{ github.run_id }}-cmd-${{ github.run_attempt }}
4042 steps :
4143 - name : Checkout repository
42- uses : actions/checkout@v2
44+ uses : actions/checkout@v6
4345 - name : Prepare - Install opencompass
4446 run : |
4547 . ${{env.CONDA_PATH}}/bin/activate
@@ -54,13 +56,18 @@ jobs:
5456 . ${{env.CONDA_PATH}}/bin/activate
5557 conda activate ${{env.CONDA_ENV}}
5658 pip list
57- rjob submit --metadata-name=${{ env.JOB_NAME }} --charged-group=opencompass_gpu --private-machine=group --group=opencompass_gpu --gpu=2 --cpu=32 --memory=32568 --private-machine=group --image=registry.h.pjlab.org.cn/ailab-puyu/xpuyu:torch-2.6.0-45d96d5f-0607 --env=COMPASS_DATA_CACHE=/mnt/shared-storage-user/auto-eval-pipeline/opencompass/llmeval/compass_data_cache --env=TIKTOKEN_CACHE_DIR=/mnt/shared-storage-user/auto-eval-pipeline/opencompass/llmeval/share_tiktoken --env=HF_ENDPOINT=https://hf-mirror.com --env=HF_DATASETS_CACHE=/mnt/shared-storage-user/auto-eval-pipeline/opencompass/llmeval/hf_cache --env=HF_HUB_CACHE=/mnt/shared-storage-user/large-model-center-share-weights/hf_hub --env=CUDA_MODULE_LOADING=EAGER --env=HF_DATASETS_OFFLINE=1 --env=TRANSFORMERS_OFFLINE=1 --env=HF_EVALUATE_OFFLINE=1 --env=HF_HUB_OFFLINE=1 --mount=gpfs://gpfs1/qa-llm-cicd:/mnt/shared-storage-user/qa-llm-cicd --mount=gpfs://gpfs1/opencompass-shared:/mnt/shared-storage-user/opencompass-shared --mount=gpfs://gpfs1/auto-eval-pipeline:/mnt/shared-storage-user/auto-eval-pipeline --mount=gpfs://gpfs1/large-model-center-share-weights:/mnt/shared-storage-user/large-model-center-share-weights --host-network=True -- bash -exc '/mnt/shared-storage-user/opencompass-shared/qa-llm-cicd/pr_test.sh ${{env.REPORT_ROOT}}/${{ github.run_id }}'
59+ rjob submit --metadata-name=${{ env.JOB_NAME }} --charged-group=opencompass_gpu --private-machine=group --group=opencompass_gpu --gpu=2 --cpu=32 --memory=32568 --private-machine=group --image=registry.h.pjlab.org.cn/ailab-puyu/xpuyu:torch-2.6.0-45d96d5f-0607 --env=COMPASS_DATA_CACHE=/mnt/shared-storage-user/auto-eval-pipeline/opencompass/llmeval/compass_data_cache --env=TIKTOKEN_CACHE_DIR=/mnt/shared-storage-user/auto-eval-pipeline/opencompass/llmeval/share_tiktoken --env=HF_ENDPOINT=https://hf-mirror.com --env=HF_DATASETS_CACHE=${{env.HF_DATASETS_CACHE}} --env=HF_HUB_CACHE=/mnt/shared-storage-user/large-model-center-share-weights/hf_hub --env=CUDA_MODULE_LOADING=EAGER --env=HF_DATASETS_OFFLINE=1 --env=TRANSFORMERS_OFFLINE=1 --env=HF_EVALUATE_OFFLINE=1 --env=HF_HUB_OFFLINE=1 --mount=gpfs://gpfs1/qa-llm-cicd:/mnt/shared-storage-user/qa-llm-cicd --mount=gpfs://gpfs1/opencompass-shared:/mnt/shared-storage-user/opencompass-shared --mount=gpfs://gpfs1/auto-eval-pipeline:/mnt/shared-storage-user/auto-eval-pipeline --mount=gpfs://gpfs1/large-model-center-share-weights:/mnt/shared-storage-user/large-model-center-share-weights --host-network=True -- bash -exc '/mnt/shared-storage-user/opencompass-shared/qa-llm-cicd/pr_test_new.sh ${{env.REPORT_ROOT}}/${{ github.run_id }}'
5860
5961 for i in {1..300}; do
6062 current_status=$(rjob get ${{ env.JOB_NAME }} | grep -oP 'rjob [^:]+: \K[^ ]+')
61- if [[ $current_status == "Succeeded" || $current_status == "Failed" || $current_status == "Stopped" ]]; then
62- echo "Current status: $current_status, stop checking"
63- break
63+ if [[ $current_status == "Succeeded" ]]; then
64+ echo "Task succeeded"
65+ rjob logs job ${{ env.JOB_NAME }} -n 100
66+ exit 0
67+ elif [[ $current_status == "Failed" || $current_status == "Stopped" ]]; then
68+ echo "Task failed or stopped, fetching logs"
69+ rjob logs job ${{ env.JOB_NAME }} -n 100
70+ exit 1
6471 fi
6572 sleep 6
6673 done
@@ -96,12 +103,93 @@ jobs:
96103 conda info --envs
97104 rjob stop job ${{ env.JOB_NAME }}
98105
99- notify_to_feishu :
100- if : ${{ always() && !cancelled() && contains(needs.*.result, 'failure') && (github.ref_name == 'develop' || github.ref_name == 'main') }}
101- needs : [pr_run_test]
102- timeout-minutes : 5
103- runs-on : self-hosted
106+ mock_api_test :
107+ runs-on : yidian_cu12
108+ timeout-minutes : 120
109+ strategy :
110+ fail-fast : false
111+ matrix :
112+ include :
113+ - func_type : chat_obj_fullbench_v1
114+ name : chat-v1
115+ memory : 32568
116+ cpu : 16
117+ - func_type : chat_obj_fullbench_v2
118+ name : chat-v2
119+ memory : 32568
120+ cpu : 16
121+ - func_type : chat_sub_fullbench
122+ name : chat-sub
123+ # memory: 3072
124+ memory : 32568
125+ cpu : 2
126+ - func_type : chat_longtext_fullbench
127+ name : chat-longtext
128+ memory : 65136
129+ cpu : 16
130+ env :
131+ JOB_NAME : pr-test-${{ github.run_id }}-api-${{ matrix.name }}-${{ github.run_attempt }}
104132 steps :
105- - name : notify
133+ - name : Checkout repository
134+ uses : actions/checkout@v6
135+ - name : Prepare - Install opencompass
136+ run : |
137+ . ${{env.CONDA_PATH}}/bin/activate
138+ conda activate ${{env.CONDA_ENV}}
139+ python3 -m pip uninstall opencompass -y
140+ python3 -m pip install .[full]
141+ conda info --envs
142+ pip list
143+ lmdeploy check_env
144+ - name : Run test
106145 run : |
107- curl -X POST -H "Content-Type: application/json" -d '{"msg_type":"post","content":{"post":{"zh_cn":{"title":"Opencompass- pr test failed","content":[[{"tag":"text","text":"branch: ${{github.ref_name}}, run action: ${{github.workflow}} failed. "},{"tag":"a","text":"Please click here for details ","href":"https://github.com/'${{ github.repository }}'/actions/runs/'${GITHUB_RUN_ID}'"},{"tag":"at","user_id":"'${{ secrets.USER_ID }}'"}]]}}}}' ${{ secrets.WEBHOOK_URL }}
146+ . ${{env.CONDA_PATH}}/bin/activate
147+ conda activate ${{env.CONDA_ENV}}
148+ pip list
149+
150+ rjob submit --metadata-name=${{ env.JOB_NAME }} --charged-group=opencompass_gpu --private-machine=group --group=opencompass_gpu --gpu=0 --cpu=${{ matrix.cpu }} --memory=${{ matrix.memory }} --private-machine=group --image=registry.h.pjlab.org.cn/ailab-puyu-puyu_gpu/lmdeploy:v0.12.0-cu12.8 --env=COMPASS_DATA_CACHE=/mnt/shared-storage-user/auto-eval-pipeline/opencompass/llmeval/compass_data_cache --env=TIKTOKEN_CACHE_DIR=/mnt/shared-storage-user/auto-eval-pipeline/opencompass/llmeval/share_tiktoken --env=HF_ENDPOINT=https://hf-mirror.com --env=HF_DATASETS_CACHE=${{env.HF_DATASETS_CACHE}} --env=HF_HUB_CACHE=/mnt/shared-storage-gpfs2/gpfs2-shared-public/huggingface/hub --env=CUDA_MODULE_LOADING=EAGER --env=HF_DATASETS_OFFLINE=1 --env=TRANSFORMERS_OFFLINE=1 --env=HF_EVALUATE_OFFLINE=1 --env=HF_HUB_OFFLINE=1 --env=REPORT_DIR=${{env.REPORT_ROOT}} --env=CHAT_TYPE=${{matrix.func_type}} --mount=gpfs://gpfs1/qa-llm-cicd:/mnt/shared-storage-user/qa-llm-cicd --mount=gpfs://gpfs1/opencompass-shared:/mnt/shared-storage-user/opencompass-shared --mount=gpfs://gpfs1/auto-eval-pipeline:/mnt/shared-storage-user/auto-eval-pipeline --mount=gpfs://gpfs2/gpfs2-shared-public:/mnt/shared-storage-gpfs2/gpfs2-shared-public --host-network=True -- bash -exc 'source ${{env.CONDA_PATH}}/bin/activate; conda activate ${{env.CONDA_ENV}}; conda env list; cd ${{github.workspace}}; ln -s /mnt/shared-storage-user/auto-eval-pipeline/opencompass/llmeval/compass_data_cache/data .; python /mnt/shared-storage-user/opencompass-shared/qa-llm-cicd/mock_chat_api.py --type winrate --port 26333 > mock_${{matrix.name}}.log 2>&1 & sleep 3; opencompass autotest/all/${{matrix.func_type}}.py --work-dir ${{env.REPORT_ROOT}}/${{ github.run_id }}/${{matrix.func_type}} --reuse;'
151+
152+ for i in {1..300}; do
153+ current_status=$(rjob get ${{ env.JOB_NAME }} | grep -oP 'rjob [^:]+: \K[^ ]+')
154+ if [[ $current_status == "Succeeded" ]]; then
155+ echo "Task succeeded"
156+ rjob logs job ${{ env.JOB_NAME }} -n 100
157+ exit 0
158+ elif [[ $current_status == "Failed" || $current_status == "Stopped" ]]; then
159+ echo "Task failed or stopped, fetching logs"
160+ rjob logs job ${{ env.JOB_NAME }} -n 100
161+ exit 1
162+ fi
163+ sleep 6
164+ done
165+ - name : Compare predictions with baseline
166+ run : |
167+ . ${{env.CONDA_PATH}}/bin/activate
168+ conda activate ${{env.CONDA_ENV}}
169+ CURRENT="${{env.REPORT_ROOT}}/${{ github.run_id }}/${{matrix.func_type}}"
170+ BASELINE="${{env.REPORT_ROOT}}/${{env.BASELINE_DIR}}/${{matrix.func_type}}"
171+ echo "Current run: $CURRENT"
172+ echo "Baseline: $BASELINE"
173+ if [[ ! -d "$CURRENT" ]]; then
174+ echo "Current run output not found: $CURRENT"
175+ exit 1
176+ fi
177+ if [[ ! -d "$BASELINE" ]]; then
178+ echo "Baseline not found: $BASELINE"
179+ echo "Upload golden predictions to REPORT_ROOT/BASELINE_DIR/<func_type> on shared storage."
180+ exit 1
181+ fi
182+ python autotest/utils/compare_results.py compare_results \
183+ "$CURRENT" "$BASELINE" predictions
184+ python autotest/utils/compare_results.py compare_results \
185+ "$CURRENT" "$BASELINE" results
186+ python autotest/utils/compare_results.py compare_results \
187+ "$CURRENT" "$BASELINE" summary
188+ - name : Uninstall opencompass
189+ if : always()
190+ run : |
191+ . ${{env.CONDA_PATH}}/bin/activate
192+ conda activate ${{env.CONDA_ENV}}
193+ python3 -m pip uninstall opencompass -y
194+ conda info --envs
195+ rjob stop job ${{ env.JOB_NAME }}
0 commit comments