Nightly Test (NPU) #126
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Nightly Test (NPU) | |
| on: | |
| schedule: | |
| - cron: '0 18 * * *' # Execute at 2:00 a.m. Beijing Time every day | |
| pull_request: | |
| branches: | |
| - main | |
| paths: | |
| - ".github/workflows/nightly-test-npu.yml" | |
| workflow_dispatch: | |
| workflow_call: | |
| inputs: | |
| ref: | |
| description: 'Git ref (branch, tag, or SHA) to test. If not provided, uses the default branch.' | |
| required: false | |
| type: string | |
| default: '' | |
| job_filter: | |
| description: 'Select which job to run (leave empty or "all" to run all jobs)' | |
| required: false | |
| type: string | |
| default: 'all' | |
| image_a3: | |
| description: 'The a3 running docker image of the test task.' | |
| required: false | |
| type: string | |
| default: 'swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-a3-ubuntu22.04-py3.11' | |
| skip_install_flag: | |
| description: 'Indicates whether to skip the installation of sglang, defaulting to false.' | |
| required: false | |
| type: string | |
| default: 'false' | |
| concurrency: | |
| group: nightly-test-npu-${{ inputs.ref || github.ref }} | |
| cancel-in-progress: ${{ github.event_name != 'workflow_call' }} | |
| jobs: | |
| set-image-config: | |
| runs-on: ubuntu-latest | |
| outputs: | |
| ref: ${{ steps.set-vars.outputs.ref }} | |
| job_filter: ${{ steps.set-vars.outputs.job_filter }} | |
| image_a3: ${{ steps.set-vars.outputs.image_a3 }} | |
| skip_install_flag: ${{ steps.set-vars.outputs.skip_install_flag }} | |
| steps: | |
| # When triggered by PR, no inputs parameters are used. The latest community code is tested by default. | |
| - name: Set image config | |
| id: set-vars | |
| run: | | |
| if [ -z "${{ inputs.ref }}" ]; then | |
| echo "ref=" >> $GITHUB_OUTPUT | |
| else | |
| echo "ref=${{ inputs.ref }}" >> $GITHUB_OUTPUT | |
| fi | |
| if [ -z "${{ inputs.job_filter }}" ]; then | |
| echo "job_filter=all" >> $GITHUB_OUTPUT | |
| else | |
| echo "job_filter=${{ inputs.job_filter }}" >> $GITHUB_OUTPUT | |
| fi | |
| if [ -z "${{ inputs.image_a3 }}" ]; then | |
| echo "image_a3=swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-a3-ubuntu22.04-py3.11" >> $GITHUB_OUTPUT | |
| else | |
| echo "image_a3=${{ inputs.image_a3 }}" >> $GITHUB_OUTPUT | |
| fi | |
| if [ -z "${{ inputs.skip_install_flag }}" ]; then | |
| echo "skip_install_flag=false" >> $GITHUB_OUTPUT | |
| else | |
| echo "skip_install_flag=${{ inputs.skip_install_flag }}" >> $GITHUB_OUTPUT | |
| fi | |
| nightly-1-npu-a3: | |
| needs: [set-image-config] | |
| if: ${{ (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') }} | |
| runs-on: linux-aarch64-a3-2 | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| part: [0, 1] | |
| container: | |
| image: ${{ needs.set-image-config.outputs.image_a3 }} | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ needs.set-image-config.outputs.ref || github.ref }} | |
| - name: Install dependencies | |
| env: | |
| TORCH_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/whl/cpu" | |
| PYPI_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple" | |
| GITHUB_PROXY_URL: "https://gh-proxy.test.osinfra.cn/" | |
| run: | | |
| # speed up by using infra cache services | |
| CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local" | |
| sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list | |
| pip config set global.index-url http://${CACHING_URL}/pypi/simple | |
| pip config set global.trusted-host "${CACHING_URL}" | |
| if [ ${{ needs.set-image-config.outputs.skip_install_flag }} != "true" ];then | |
| bash scripts/ci/npu/npu_ci_install_dependency.sh a3 | |
| fi | |
| # copy required file from our daily cache | |
| cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp | |
| # copy gsm8k dataset | |
| cp ~/.cache/modelscope/hub/datasets/tmp/test.jsonl /tmp | |
| - name: Print Log Information | |
| run: | | |
| bash scripts/ci/npu/npu_log_print.sh | |
| - name: Run test | |
| timeout-minutes: 240 | |
| env: | |
| SGLANG_USE_MODELSCOPE: true | |
| SGLANG_IS_IN_CI: true | |
| HF_ENDPOINT: https://hf-mirror.com | |
| TORCH_EXTENSIONS_DIR: /tmp/torch_extensions | |
| PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True" | |
| STREAMS_PER_DEVICE: 32 | |
| run: | | |
| pip install sglang_router | |
| hf download lmms-lab/MMMU --repo-type dataset | |
| pip install sentence_transformers torchaudio==2.8.0 | |
| pip install protobuf==6.31.1 zss pre-commit wandb>=0.16.0 tenacity==8.3.0 loguru openpyxl latex2sympy2 zstandard transformers-stream-generator tqdm-multiprocess pycocoevalcap | |
| pip install yt-dlp sentencepiece==0.1.99 nltk av ftfy sqlitedict==2.1.0 sacrebleu>=1.5.0 pytablewriter black==24.1.0 isort==5.13.2 peft>=0.2.0 accelerate>=0.29.1 | |
| pip install jsonlines httpx==0.25.0 evaluate>=0.4.0 datasets==2.16.1 numexpr xgrammar==0.1.32 numpy==1.26.4 dotenv | |
| git clone --branch v0.3.3 --depth 1 https://github.com/EvolvingLMMs-Lab/lmms-eval.git | |
| cd ./lmms-eval | |
| nohup pip install . > lmmslog.txt 2>&1 & | |
| sleep 120 | |
| export PYTHONPATH=$PYTHONPATH:$(pwd) | |
| cd ../ | |
| cd test | |
| python3 run_suite.py --hw npu --suite nightly-1-npu-a3 --nightly --continue-on-error --timeout-per-file 3600 --auto-partition-id ${{ matrix.part }} --auto-partition-size 2 | |
| nightly-2-npu-a3: | |
| needs: [set-image-config] | |
| if: ${{ (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') }} | |
| runs-on: linux-aarch64-a3-2 | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| part: [0] | |
| container: | |
| image: ${{ needs.set-image-config.outputs.image_a3 }} | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ needs.set-image-config.outputs.ref || github.ref }} | |
| - name: Install dependencies | |
| env: | |
| TORCH_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/whl/cpu" | |
| PYPI_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple" | |
| GITHUB_PROXY_URL: "https://gh-proxy.test.osinfra.cn/" | |
| run: | | |
| # speed up by using infra cache services | |
| CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local" | |
| sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list | |
| pip config set global.index-url http://${CACHING_URL}/pypi/simple | |
| pip config set global.trusted-host "${CACHING_URL}" | |
| if [ ${{ needs.set-image-config.outputs.skip_install_flag }} != "true" ];then | |
| bash scripts/ci/npu/npu_ci_install_dependency.sh a3 | |
| fi | |
| # copy required file from our daily cache | |
| cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp | |
| # copy gsm8k dataset | |
| cp ~/.cache/modelscope/hub/datasets/tmp/test.jsonl /tmp | |
| - name: Print Log Information | |
| run: | | |
| bash scripts/ci/npu/npu_log_print.sh | |
| - name: Run test | |
| timeout-minutes: 240 | |
| env: | |
| SGLANG_USE_MODELSCOPE: true | |
| SGLANG_IS_IN_CI: true | |
| HF_ENDPOINT: https://hf-mirror.com | |
| TORCH_EXTENSIONS_DIR: /tmp/torch_extensions | |
| PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True" | |
| STREAMS_PER_DEVICE: 32 | |
| run: | | |
| pip install sglang_router | |
| hf download lmms-lab/MMMU --repo-type dataset | |
| pip install sentence_transformers torchaudio==2.8.0 | |
| pip install protobuf==6.31.1 zss pre-commit wandb>=0.16.0 tenacity==8.3.0 loguru openpyxl latex2sympy2 zstandard transformers-stream-generator tqdm-multiprocess pycocoevalcap | |
| pip install yt-dlp sentencepiece==0.1.99 nltk av ftfy sqlitedict==2.1.0 sacrebleu>=1.5.0 pytablewriter black==24.1.0 isort==5.13.2 peft>=0.2.0 accelerate>=0.29.1 | |
| pip install jsonlines httpx==0.25.0 evaluate>=0.4.0 datasets==2.16.1 numexpr xgrammar==0.1.32 numpy==1.26.4 dotenv | |
| git clone --branch v0.3.3 --depth 1 https://github.com/EvolvingLMMs-Lab/lmms-eval.git | |
| cd ./lmms-eval | |
| nohup pip install . > lmmslog.txt 2>&1 & | |
| sleep 120 | |
| export PYTHONPATH=$PYTHONPATH:$(pwd) | |
| cd ../ | |
| cd test | |
| python3 run_suite.py --hw npu --suite nightly-2-npu-a3 --nightly --continue-on-error --timeout-per-file 3600 --auto-partition-id ${{ matrix.part }} --auto-partition-size 1 | |
| nightly-4-npu-a3: | |
| needs: [set-image-config] | |
| if: ${{ (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') }} | |
| runs-on: linux-aarch64-a3-4 | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| part: [0] | |
| container: | |
| image: ${{ needs.set-image-config.outputs.image_a3 }} | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ needs.set-image-config.outputs.ref|| github.ref }} | |
| - name: Install dependencies | |
| env: | |
| TORCH_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/whl/cpu" | |
| PYPI_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple" | |
| GITHUB_PROXY_URL: "https://gh-proxy.test.osinfra.cn/" | |
| run: | | |
| # speed up by using infra cache services | |
| CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local" | |
| sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list | |
| pip config set global.index-url http://${CACHING_URL}/pypi/simple | |
| pip config set global.trusted-host "${CACHING_URL}" | |
| if [ ${{ needs.set-image-config.outputs.skip_install_flag }} != "true" ];then | |
| bash scripts/ci/npu/npu_ci_install_dependency.sh a3 | |
| fi | |
| # copy required file from our daily cache | |
| cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp | |
| # copy gsm8k dataset | |
| cp ~/.cache/modelscope/hub/datasets/tmp/test.jsonl /tmp | |
| - name: Print Log Information | |
| run: | | |
| bash scripts/ci/npu/npu_log_print.sh | |
| - name: Run test | |
| timeout-minutes: 240 | |
| env: | |
| SGLANG_USE_MODELSCOPE: true | |
| SGLANG_IS_IN_CI: true | |
| HF_ENDPOINT: https://hf-mirror.com | |
| TORCH_EXTENSIONS_DIR: /tmp/torch_extensions | |
| PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True" | |
| STREAMS_PER_DEVICE: 32 | |
| run: | | |
| pip install sglang_router | |
| hf download lmms-lab/MMMU --repo-type dataset | |
| pip install sentence_transformers torchaudio==2.8.0 | |
| pip install protobuf==6.31.1 zss pre-commit wandb>=0.16.0 tenacity==8.3.0 loguru openpyxl latex2sympy2 zstandard transformers-stream-generator tqdm-multiprocess pycocoevalcap | |
| pip install yt-dlp sentencepiece==0.1.99 nltk av ftfy sqlitedict==2.1.0 sacrebleu>=1.5.0 pytablewriter black==24.1.0 isort==5.13.2 peft>=0.2.0 accelerate>=0.29.1 | |
| pip install jsonlines httpx==0.25.0 evaluate>=0.4.0 datasets==2.16.1 numexpr xgrammar==0.1.32 numpy==1.26.4 dotenv | |
| git clone --branch v0.3.3 --depth 1 https://github.com/EvolvingLMMs-Lab/lmms-eval.git | |
| cd ./lmms-eval | |
| nohup pip install . > lmmslog.txt 2>&1 & | |
| sleep 120 | |
| export PYTHONPATH=$PYTHONPATH:$(pwd) | |
| cd ../ | |
| cd test | |
| python3 run_suite.py --hw npu --suite nightly-4-npu-a3 --nightly --continue-on-error --timeout-per-file 3600 --auto-partition-id ${{ matrix.part }} --auto-partition-size 1 | |
| nightly-8-npu-a3: | |
| needs: [set-image-config] | |
| if: ${{ (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') }} | |
| runs-on: linux-aarch64-a3-8 | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| part: [0] | |
| container: | |
| image: ${{ needs.set-image-config.outputs.image_a3 }} | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ needs.set-image-config.outputs.ref || github.ref }} | |
| - name: Install dependencies | |
| env: | |
| TORCH_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/whl/cpu" | |
| PYPI_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple" | |
| GITHUB_PROXY_URL: "https://gh-proxy.test.osinfra.cn/" | |
| run: | | |
| # speed up by using infra cache services | |
| CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local" | |
| sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list | |
| pip config set global.index-url http://${CACHING_URL}/pypi/simple | |
| pip config set global.trusted-host "${CACHING_URL}" | |
| if [ ${{ needs.set-image-config.outputs.skip_install_flag }} != "true" ];then | |
| bash scripts/ci/npu/npu_ci_install_dependency.sh a3 | |
| fi | |
| # copy required file from our daily cache | |
| cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp | |
| # copy gsm8k dataset | |
| cp ~/.cache/modelscope/hub/datasets/tmp/test.jsonl /tmp | |
| - name: Print Log Information | |
| run: | | |
| bash scripts/ci/npu/npu_log_print.sh | |
| - name: Run test | |
| timeout-minutes: 240 | |
| env: | |
| SGLANG_USE_MODELSCOPE: true | |
| SGLANG_IS_IN_CI: true | |
| HF_ENDPOINT: https://hf-mirror.com | |
| TORCH_EXTENSIONS_DIR: /tmp/torch_extensions | |
| PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True" | |
| STREAMS_PER_DEVICE: 32 | |
| run: | | |
| pip install sglang_router | |
| hf download lmms-lab/MMMU --repo-type dataset | |
| pip install sentence_transformers torchaudio==2.8.0 | |
| pip install protobuf==6.31.1 zss pre-commit wandb>=0.16.0 tenacity==8.3.0 loguru openpyxl latex2sympy2 zstandard transformers-stream-generator tqdm-multiprocess pycocoevalcap | |
| pip install yt-dlp sentencepiece==0.1.99 nltk av ftfy sqlitedict==2.1.0 sacrebleu>=1.5.0 pytablewriter black==24.1.0 isort==5.13.2 peft>=0.2.0 accelerate>=0.29.1 | |
| pip install jsonlines httpx==0.25.0 evaluate>=0.4.0 datasets==2.16.1 numexpr xgrammar==0.1.32 numpy==1.26.4 dotenv | |
| git clone --branch v0.3.3 --depth 1 https://github.com/EvolvingLMMs-Lab/lmms-eval.git | |
| cd ./lmms-eval | |
| nohup pip install . > lmmslog.txt 2>&1 & | |
| sleep 120 | |
| export PYTHONPATH=$PYTHONPATH:$(pwd) | |
| cd ../ | |
| cd test | |
| python3 run_suite.py --hw npu --suite nightly-8-npu-a3 --nightly --continue-on-error --timeout-per-file 3600 --auto-partition-id ${{ matrix.part }} --auto-partition-size 1 | |
| nightly-16-npu-a3: | |
| needs: [set-image-config] | |
| if: ${{ (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') }} | |
| runs-on: linux-aarch64-a3-16 | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| part: [0, 1] | |
| container: | |
| image: ${{ needs.set-image-config.outputs.image_a3 }} | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ needs.set-image-config.outputs.ref || github.ref }} | |
| - name: Install dependencies | |
| env: | |
| TORCH_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/whl/cpu" | |
| PYPI_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple" | |
| GITHUB_PROXY_URL: "https://gh-proxy.test.osinfra.cn/" | |
| run: | | |
| # speed up by using infra cache services | |
| CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local" | |
| sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list | |
| pip config set global.index-url http://${CACHING_URL}/pypi/simple | |
| pip config set global.trusted-host "${CACHING_URL}" | |
| if [ ${{ needs.set-image-config.outputs.skip_install_flag }} != "true" ];then | |
| bash scripts/ci/npu/npu_ci_install_dependency.sh a3 | |
| fi | |
| # copy required file from our daily cache | |
| cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp | |
| # copy gsm8k dataset | |
| cp ~/.cache/modelscope/hub/datasets/tmp/test.jsonl /tmp | |
| - name: Print Log Information | |
| run: | | |
| bash scripts/ci/npu/npu_log_print.sh | |
| - name: Run test | |
| timeout-minutes: 240 | |
| env: | |
| SGLANG_USE_MODELSCOPE: true | |
| SGLANG_IS_IN_CI: true | |
| HF_ENDPOINT: https://hf-mirror.com | |
| TORCH_EXTENSIONS_DIR: /tmp/torch_extensions | |
| PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True" | |
| STREAMS_PER_DEVICE: 32 | |
| run: | | |
| pip install sglang_router | |
| hf download lmms-lab/MMMU --repo-type dataset | |
| pip install sentence_transformers torchaudio==2.8.0 | |
| pip install protobuf==6.31.1 zss pre-commit wandb>=0.16.0 tenacity==8.3.0 loguru openpyxl latex2sympy2 zstandard transformers-stream-generator tqdm-multiprocess pycocoevalcap | |
| pip install yt-dlp sentencepiece==0.1.99 nltk av ftfy sqlitedict==2.1.0 sacrebleu>=1.5.0 pytablewriter black==24.1.0 isort==5.13.2 peft>=0.2.0 accelerate>=0.29.1 | |
| pip install jsonlines httpx==0.25.0 evaluate>=0.4.0 datasets==2.16.1 numexpr xgrammar==0.1.32 numpy==1.26.4 dotenv | |
| git clone --branch v0.3.3 --depth 1 https://github.com/EvolvingLMMs-Lab/lmms-eval.git | |
| cd ./lmms-eval | |
| nohup pip install . > lmmslog.txt 2>&1 & | |
| sleep 120 | |
| export PYTHONPATH=$PYTHONPATH:$(pwd) | |
| cd ../ | |
| cd test | |
| python3 run_suite.py --hw npu --suite nightly-16-npu-a3 --nightly --continue-on-error --timeout-per-file 3600 --auto-partition-id ${{ matrix.part }} --auto-partition-size 2 | |
| check-all-jobs: | |
| if: github.repository == 'sgl-project/sglang' && always() | |
| needs: | |
| - nightly-1-npu-a3 | |
| - nightly-2-npu-a3 | |
| - nightly-4-npu-a3 | |
| - nightly-8-npu-a3 | |
| - nightly-16-npu-a3 | |
| runs-on: ubuntu-latest | |
| container: | |
| image: docker.m.daocloud.io/ubuntu:22.04 | |
| steps: | |
| - name: Check if any job failed | |
| run: | | |
| if [[ "${{ contains(needs.*.result, 'failure') }}" == "true" ]]; then | |
| echo "One or more nightly test jobs failed" | |
| exit 1 | |
| fi | |
| if [[ "${{ contains(needs.*.result, 'cancelled') }}" == "true" ]]; then | |
| echo "One or more nightly test jobs were cancelled" | |
| exit 1 | |
| fi | |
| echo "All nightly test jobs passed" |