[diffusion] refactor: separate runtime metadata from arch config #60119
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: PR Test (NPU) | |
| on: | |
| push: | |
| branches: [ main ] | |
| pull_request: | |
| branches: [ main ] | |
| workflow_dispatch: | |
| workflow_call: | |
| inputs: | |
| ref: | |
| description: 'Git ref (branch, tag, or SHA) to test. If not provided, uses the default branch.' | |
| required: false | |
| type: string | |
| default: '' | |
| run_all_tests: | |
| description: "Run all tests (for releasing or testing purpose)" | |
| required: false | |
| type: boolean | |
| default: false | |
| concurrency: | |
| group: pr-test-npu-${{ inputs.ref || github.ref }} | |
| cancel-in-progress: ${{ github.event_name != 'workflow_call' }} | |
| jobs: | |
| # ==================== Check Changes ==================== # | |
| check-changes: | |
| runs-on: ubuntu-latest | |
| outputs: | |
| changes_exist: ${{ steps.filter.outputs.main_package == 'true' || steps.filter.outputs.multimodal_gen == 'true' || steps.run-mode.outputs.run_all_tests == 'true'}} | |
| main_package: ${{ steps.filter.outputs.main_package == 'true' || steps.run-mode.outputs.run_all_tests == 'true' }} | |
| multimodal_gen: ${{ steps.filter.outputs.multimodal_gen == 'true' || steps.run-mode.outputs.run_all_tests == 'true' }} | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Determine run mode | |
| id: run-mode | |
| run: | | |
| # Run all tests for workflow_call (when ref input is provided) | |
| # Note: github.event_name is inherited from caller, so we detect workflow_call by checking inputs.ref | |
| if [[ "${{ inputs.run_all_tests }}" == "true" ]]; then | |
| echo "run_all_tests=true" >> $GITHUB_OUTPUT | |
| echo "Run mode: ALL TESTS (run_all_tests=${{ inputs.run_all_tests }})" | |
| else | |
| echo "run_all_tests=false" >> $GITHUB_OUTPUT | |
| echo "Run mode: FILTERED (triggered by ${{ github.event_name }})" | |
| fi | |
| - name: Detect file changes | |
| id: filter | |
| uses: dorny/paths-filter@v3 | |
| if: steps.run-mode.outputs.run_all_tests != 'true' | |
| with: | |
| filters: | | |
| main_package: | |
| - "python/sglang/!(multimodal_gen)/**/!(*.md)" | |
| - "python/pyproject_npu.toml" | |
| - "scripts/ci/npu/npu_ci_install_dependency.sh" | |
| - "test/srt/ascend/**" | |
| - ".github/workflows/pr-test-npu.yml" | |
| multimodal_gen: | |
| - "python/sglang/multimodal_gen/**/*.!(md|ipynb)" | |
| - "python/sglang/srt/**" | |
| - "python/pyproject_npu.toml" | |
| - "scripts/ci/npu/npu_ci_install_dependency.sh" | |
| - ".github/workflows/pr-test-npu.yml" | |
| # ==================== PR Gate ==================== # | |
| pr-gate: | |
| needs: check-changes | |
| if: needs.check-changes.outputs.changes_exist == 'true' | |
| uses: ./.github/workflows/pr-gate.yml | |
| secrets: inherit | |
| stage-b-test-1-npu-a2: | |
| needs: [check-changes, pr-gate] | |
| if: needs.check-changes.outputs.main_package == 'true' | |
| runs-on: linux-aarch64-a2-1 | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| part: [ 0, 1 ] | |
| container: | |
| image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-910b-ubuntu22.04-py3.11 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Mark repository safe | |
| run: | | |
| git config --system --add safe.directory ${GITHUB_WORKSPACE} | |
| - name: Install dependencies | |
| env: | |
| TORCH_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/whl/cpu" | |
| PYPI_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple" | |
| UV_INDEX_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple" | |
| GITHUB_PROXY_URL: "https://gh-proxy.test.osinfra.cn/" | |
| run: | | |
| # speed up by using infra cache services | |
| CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local" | |
| sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list | |
| pip config set global.index-url http://${CACHING_URL}/pypi/simple | |
| pip config set global.trusted-host "${CACHING_URL}" | |
| bash scripts/ci/npu/npu_ci_install_dependency.sh 910b | |
| # copy required file from our daily cache | |
| cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp | |
| # copy gsm8k dataset | |
| cp ~/.cache/modelscope/hub/datasets/tmp/test.jsonl /tmp | |
| - name: Run test | |
| timeout-minutes: 60 | |
| env: | |
| SGLANG_USE_MODELSCOPE: true | |
| SGLANG_IS_IN_CI: true | |
| HF_ENDPOINT: https://hf-mirror.com | |
| TORCH_EXTENSIONS_DIR: /tmp/torch_extensions | |
| PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True" | |
| STREAMS_PER_DEVICE: 32 | |
| run: | | |
| cd test | |
| python3 run_suite.py --hw npu --suite stage-b-test-1-npu-a2 --auto-partition-id ${{ matrix.part }} --auto-partition-size 2 | |
| stage-b-test-2-npu-a2: | |
| needs: [check-changes, pr-gate] | |
| if: needs.check-changes.outputs.main_package == 'true' | |
| runs-on: linux-aarch64-a2-2 | |
| strategy: | |
| fail-fast: true | |
| matrix: | |
| part: [0, 1] | |
| container: | |
| image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-910b-ubuntu22.04-py3.11 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Mark repository safe | |
| run: | | |
| git config --system --add safe.directory ${GITHUB_WORKSPACE} | |
| - name: Install dependencies | |
| env: | |
| TORCH_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/whl/cpu" | |
| PYPI_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple" | |
| UV_INDEX_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple" | |
| GITHUB_PROXY_URL: "https://gh-proxy.test.osinfra.cn/" | |
| run: | | |
| # speed up by using infra cache services | |
| CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local" | |
| sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list | |
| pip config set global.index-url http://${CACHING_URL}/pypi/simple | |
| pip config set global.trusted-host "${CACHING_URL}" | |
| bash scripts/ci/npu/npu_ci_install_dependency.sh 910b | |
| # copy required file from our daily cache | |
| cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp | |
| # copy gsm8k dataset | |
| cp ~/.cache/modelscope/hub/datasets/tmp/test.jsonl /tmp | |
| - name: Run test | |
| timeout-minutes: 60 | |
| env: | |
| SGLANG_USE_MODELSCOPE: true | |
| SGLANG_IS_IN_CI: true | |
| HF_ENDPOINT: https://hf-mirror.com | |
| TORCH_EXTENSIONS_DIR: /tmp/torch_extensions | |
| PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True" | |
| STREAMS_PER_DEVICE: 32 | |
| run: | | |
| cd test | |
| python3 run_suite.py --hw npu --suite stage-b-test-2-npu-a2 --auto-partition-id ${{ matrix.part }} --auto-partition-size 2 | |
| stage-b-test-4-npu-a3: | |
| needs: [check-changes, pr-gate] | |
| if: needs.check-changes.outputs.main_package == 'true' | |
| runs-on: linux-aarch64-a3-4 | |
| container: | |
| image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-a3-ubuntu22.04-py3.11 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Mark repository safe | |
| run: | | |
| git config --system --add safe.directory ${GITHUB_WORKSPACE} | |
| - name: Install dependencies | |
| env: | |
| TORCH_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/whl/cpu" | |
| PYPI_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple" | |
| UV_INDEX_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple" | |
| GITHUB_PROXY_URL: "https://gh-proxy.test.osinfra.cn/" | |
| run: | | |
| # speed up by using infra cache services | |
| CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local" | |
| sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list | |
| pip config set global.index-url http://${CACHING_URL}/pypi/simple | |
| pip config set global.trusted-host "${CACHING_URL}" | |
| bash scripts/ci/npu/npu_ci_install_dependency.sh a3 | |
| # copy required file from our daily cache | |
| cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp | |
| # copy gsm8k dataset | |
| cp ~/.cache/modelscope/hub/datasets/tmp/test.jsonl /tmp | |
| - name: Run test | |
| timeout-minutes: 60 | |
| env: | |
| SGLANG_USE_MODELSCOPE: true | |
| SGLANG_IS_IN_CI: true | |
| HF_ENDPOINT: https://hf-mirror.com | |
| TORCH_EXTENSIONS_DIR: /tmp/torch_extensions | |
| PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True" | |
| STREAMS_PER_DEVICE: 32 | |
| run: | | |
| cd test | |
| python3 run_suite.py --hw npu --suite stage-b-test-4-npu-a3 --timeout-per-file 3600 | |
| stage-b-test-16-npu-a3: | |
| needs: [check-changes, pr-gate] | |
| if: needs.check-changes.outputs.main_package == 'true' | |
| runs-on: linux-aarch64-a3-16 | |
| container: | |
| image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-a3-ubuntu22.04-py3.11 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Mark repository safe | |
| run: | | |
| git config --system --add safe.directory ${GITHUB_WORKSPACE} | |
| - name: Install dependencies | |
| env: | |
| TORCH_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/whl/cpu" | |
| PYPI_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple" | |
| UV_INDEX_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple" | |
| GITHUB_PROXY_URL: "https://gh-proxy.test.osinfra.cn/" | |
| run: | | |
| # speed up by using infra cache services | |
| CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local" | |
| sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list | |
| pip config set global.index-url http://${CACHING_URL}/pypi/simple | |
| pip config set global.trusted-host "${CACHING_URL}" | |
| bash scripts/ci/npu/npu_ci_install_dependency.sh a3 | |
| # copy required file from our daily cache | |
| cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp | |
| # copy gsm8k dataset | |
| cp ~/.cache/modelscope/hub/datasets/tmp/test.jsonl /tmp | |
| - name: Run test | |
| timeout-minutes: 60 | |
| env: | |
| SGLANG_USE_MODELSCOPE: true | |
| SGLANG_IS_IN_CI: true | |
| HF_ENDPOINT: https://hf-mirror.com | |
| TORCH_EXTENSIONS_DIR: /tmp/torch_extensions | |
| PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True" | |
| STREAMS_PER_DEVICE: 32 | |
| run: | | |
| cd test | |
| python3 run_suite.py --hw npu --suite stage-b-test-16-npu-a3 --timeout-per-file 3600 | |
| multimodal-gen-test-1-npu-a3: | |
| needs: [check-changes, pr-gate] | |
| if: needs.check-changes.outputs.multimodal_gen == 'true' | |
| runs-on: linux-aarch64-a3-2 | |
| container: | |
| image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-a3-ubuntu22.04-py3.11 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Mark repository safe | |
| run: | | |
| git config --system --add safe.directory ${GITHUB_WORKSPACE} | |
| - name: Install dependencies | |
| env: | |
| TORCH_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/whl/cpu" | |
| PYPI_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple" | |
| UV_INDEX_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple" | |
| GITHUB_PROXY_URL: "https://gh-proxy.test.osinfra.cn/" | |
| run: | | |
| # speed up by using infra cache services | |
| CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local" | |
| sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list | |
| pip config set global.index-url http://${CACHING_URL}/pypi/simple | |
| pip config set global.trusted-host "${CACHING_URL}" | |
| bash scripts/ci/npu/npu_ci_install_dependency.sh a3 diffusion | |
| # copy required file from our daily cache | |
| cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp | |
| # copy gsm8k dataset | |
| cp ~/.cache/modelscope/hub/datasets/tmp/test.jsonl /tmp | |
| - name: Run test | |
| timeout-minutes: 60 | |
| env: | |
| SGLANG_USE_MODELSCOPE: true | |
| SGLANG_IS_IN_CI: true | |
| HF_ENDPOINT: https://hf-mirror.com | |
| TORCH_EXTENSIONS_DIR: /tmp/torch_extensions | |
| PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True" | |
| STREAMS_PER_DEVICE: 32 | |
| run: | | |
| export PATH="/usr/local/Ascend/8.3.RC1/compiler/bishengir/bin:${PATH}" | |
| cd python | |
| python3 sglang/multimodal_gen/test/run_suite.py --suite 1-npu | |
| multimodal-gen-test-2-npu-a3: | |
| needs: [check-changes, pr-gate] | |
| if: needs.check-changes.outputs.multimodal_gen == 'true' | |
| runs-on: linux-aarch64-a3-16 | |
| container: | |
| image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-a3-ubuntu22.04-py3.11 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Mark repository safe | |
| run: | | |
| git config --system --add safe.directory ${GITHUB_WORKSPACE} | |
| - name: Install dependencies | |
| env: | |
| TORCH_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/whl/cpu" | |
| PYPI_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple" | |
| UV_INDEX_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple" | |
| GITHUB_PROXY_URL: "https://gh-proxy.test.osinfra.cn/" | |
| run: | | |
| # speed up by using infra cache services | |
| CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local" | |
| sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list | |
| pip config set global.index-url http://${CACHING_URL}/pypi/simple | |
| pip config set global.trusted-host "${CACHING_URL}" | |
| bash scripts/ci/npu/npu_ci_install_dependency.sh a3 diffusion | |
| # copy required file from our daily cache | |
| cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp | |
| # copy gsm8k dataset | |
| cp ~/.cache/modelscope/hub/datasets/tmp/test.jsonl /tmp | |
| - name: Run test | |
| timeout-minutes: 60 | |
| env: | |
| SGLANG_USE_MODELSCOPE: true | |
| SGLANG_IS_IN_CI: true | |
| HF_ENDPOINT: https://hf-mirror.com | |
| TORCH_EXTENSIONS_DIR: /tmp/torch_extensions | |
| PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True" | |
| STREAMS_PER_DEVICE: 32 | |
| run: | | |
| export PATH="/usr/local/Ascend/8.3.RC1/compiler/bishengir/bin:${PATH}" | |
| cd python | |
| python3 sglang/multimodal_gen/test/run_suite.py --suite 2-npu | |
| multimodal-gen-test-8-npu-a3: | |
| needs: [check-changes, pr-gate] | |
| if: needs.check-changes.outputs.multimodal_gen == 'true' | |
| runs-on: linux-aarch64-a3-8 | |
| container: | |
| image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-a3-ubuntu22.04-py3.11 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Mark repository safe | |
| run: | | |
| git config --system --add safe.directory ${GITHUB_WORKSPACE} | |
| - name: Install dependencies | |
| env: | |
| TORCH_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/whl/cpu" | |
| PYPI_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple" | |
| UV_INDEX_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple" | |
| GITHUB_PROXY_URL: "https://gh-proxy.test.osinfra.cn/" | |
| run: | | |
| # speed up by using infra cache services | |
| CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local" | |
| sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list | |
| pip config set global.index-url http://${CACHING_URL}/pypi/simple | |
| pip config set global.trusted-host "${CACHING_URL}" | |
| bash scripts/ci/npu/npu_ci_install_dependency.sh a3 diffusion | |
| # copy required file from our daily cache | |
| cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp | |
| # copy gsm8k dataset | |
| cp ~/.cache/modelscope/hub/datasets/tmp/test.jsonl /tmp | |
| - name: Run test | |
| timeout-minutes: 60 | |
| env: | |
| SGLANG_USE_MODELSCOPE: true | |
| SGLANG_IS_IN_CI: true | |
| HF_ENDPOINT: https://hf-mirror.com | |
| TORCH_EXTENSIONS_DIR: /tmp/torch_extensions | |
| PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True" | |
| STREAMS_PER_DEVICE: 32 | |
| run: | | |
| cd python | |
| python3 sglang/multimodal_gen/test/run_suite.py --suite 8-npu | |
| pr-test-npu-finish: | |
| needs: | |
| [ | |
| check-changes, | |
| stage-b-test-1-npu-a2, | |
| stage-b-test-2-npu-a2, | |
| stage-b-test-4-npu-a3, | |
| stage-b-test-16-npu-a3, | |
| multimodal-gen-test-1-npu-a3, | |
| multimodal-gen-test-2-npu-a3, | |
| multimodal-gen-test-8-npu-a3, | |
| ] | |
| if: always() | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Check all dependent job statuses | |
| run: | | |
| # Convert the 'needs' context to a JSON string | |
| json_needs='${{ toJson(needs) }}' | |
| # Get a list of all job names from the JSON keys | |
| job_names=$(echo "$json_needs" | jq -r 'keys_unsorted[]') | |
| for job in $job_names; do | |
| # For each job, extract its result | |
| result=$(echo "$json_needs" | jq -r --arg j "$job" '.[$j].result') | |
| # Print the job name and its result | |
| echo "$job: $result" | |
| # Check for failure or cancellation and exit if found | |
| if [[ "$result" == "failure" || "$result" == "cancelled" ]]; then | |
| echo "The above jobs failed." | |
| exit 1 | |
| fi | |
| done | |
| # If the loop completes, all jobs were successful | |
| echo "All jobs completed successfully" | |
| exit 0 |