Skip to content

[diffusion] refactor: separate runtime metadata from arch config #60119

[diffusion] refactor: separate runtime metadata from arch config

[diffusion] refactor: separate runtime metadata from arch config #60119

Workflow file for this run

name: PR Test (NPU)
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
workflow_dispatch:
workflow_call:
inputs:
ref:
description: 'Git ref (branch, tag, or SHA) to test. If not provided, uses the default branch.'
required: false
type: string
default: ''
run_all_tests:
description: "Run all tests (for releasing or testing purpose)"
required: false
type: boolean
default: false
concurrency:
group: pr-test-npu-${{ inputs.ref || github.ref }}
cancel-in-progress: ${{ github.event_name != 'workflow_call' }}
jobs:
# ==================== Check Changes ==================== #
check-changes:
runs-on: ubuntu-latest
outputs:
changes_exist: ${{ steps.filter.outputs.main_package == 'true' || steps.filter.outputs.multimodal_gen == 'true' || steps.run-mode.outputs.run_all_tests == 'true'}}
main_package: ${{ steps.filter.outputs.main_package == 'true' || steps.run-mode.outputs.run_all_tests == 'true' }}
multimodal_gen: ${{ steps.filter.outputs.multimodal_gen == 'true' || steps.run-mode.outputs.run_all_tests == 'true' }}
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Determine run mode
id: run-mode
run: |
# Run all tests for workflow_call (when ref input is provided)
# Note: github.event_name is inherited from caller, so we detect workflow_call by checking inputs.ref
if [[ "${{ inputs.run_all_tests }}" == "true" ]]; then
echo "run_all_tests=true" >> $GITHUB_OUTPUT
echo "Run mode: ALL TESTS (run_all_tests=${{ inputs.run_all_tests }})"
else
echo "run_all_tests=false" >> $GITHUB_OUTPUT
echo "Run mode: FILTERED (triggered by ${{ github.event_name }})"
fi
- name: Detect file changes
id: filter
uses: dorny/paths-filter@v3
if: steps.run-mode.outputs.run_all_tests != 'true'
with:
filters: |
main_package:
- "python/sglang/!(multimodal_gen)/**/!(*.md)"
- "python/pyproject_npu.toml"
- "scripts/ci/npu/npu_ci_install_dependency.sh"
- "test/srt/ascend/**"
- ".github/workflows/pr-test-npu.yml"
multimodal_gen:
- "python/sglang/multimodal_gen/**/*.!(md|ipynb)"
- "python/sglang/srt/**"
- "python/pyproject_npu.toml"
- "scripts/ci/npu/npu_ci_install_dependency.sh"
- ".github/workflows/pr-test-npu.yml"
# ==================== PR Gate ==================== #
pr-gate:
needs: check-changes
if: needs.check-changes.outputs.changes_exist == 'true'
uses: ./.github/workflows/pr-gate.yml
secrets: inherit
stage-b-test-1-npu-a2:
needs: [check-changes, pr-gate]
if: needs.check-changes.outputs.main_package == 'true'
runs-on: linux-aarch64-a2-1
strategy:
fail-fast: false
matrix:
part: [ 0, 1 ]
container:
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-910b-ubuntu22.04-py3.11
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Mark repository safe
run: |
git config --system --add safe.directory ${GITHUB_WORKSPACE}
- name: Install dependencies
env:
TORCH_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/whl/cpu"
PYPI_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple"
UV_INDEX_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple"
GITHUB_PROXY_URL: "https://gh-proxy.test.osinfra.cn/"
run: |
# speed up by using infra cache services
CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
pip config set global.index-url http://${CACHING_URL}/pypi/simple
pip config set global.trusted-host "${CACHING_URL}"
bash scripts/ci/npu/npu_ci_install_dependency.sh 910b
# copy required file from our daily cache
cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
# copy gsm8k dataset
cp ~/.cache/modelscope/hub/datasets/tmp/test.jsonl /tmp
- name: Run test
timeout-minutes: 60
env:
SGLANG_USE_MODELSCOPE: true
SGLANG_IS_IN_CI: true
HF_ENDPOINT: https://hf-mirror.com
TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
STREAMS_PER_DEVICE: 32
run: |
cd test
python3 run_suite.py --hw npu --suite stage-b-test-1-npu-a2 --auto-partition-id ${{ matrix.part }} --auto-partition-size 2
stage-b-test-2-npu-a2:
needs: [check-changes, pr-gate]
if: needs.check-changes.outputs.main_package == 'true'
runs-on: linux-aarch64-a2-2
strategy:
fail-fast: true
matrix:
part: [0, 1]
container:
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-910b-ubuntu22.04-py3.11
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Mark repository safe
run: |
git config --system --add safe.directory ${GITHUB_WORKSPACE}
- name: Install dependencies
env:
TORCH_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/whl/cpu"
PYPI_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple"
UV_INDEX_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple"
GITHUB_PROXY_URL: "https://gh-proxy.test.osinfra.cn/"
run: |
# speed up by using infra cache services
CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
pip config set global.index-url http://${CACHING_URL}/pypi/simple
pip config set global.trusted-host "${CACHING_URL}"
bash scripts/ci/npu/npu_ci_install_dependency.sh 910b
# copy required file from our daily cache
cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
# copy gsm8k dataset
cp ~/.cache/modelscope/hub/datasets/tmp/test.jsonl /tmp
- name: Run test
timeout-minutes: 60
env:
SGLANG_USE_MODELSCOPE: true
SGLANG_IS_IN_CI: true
HF_ENDPOINT: https://hf-mirror.com
TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
STREAMS_PER_DEVICE: 32
run: |
cd test
python3 run_suite.py --hw npu --suite stage-b-test-2-npu-a2 --auto-partition-id ${{ matrix.part }} --auto-partition-size 2
stage-b-test-4-npu-a3:
needs: [check-changes, pr-gate]
if: needs.check-changes.outputs.main_package == 'true'
runs-on: linux-aarch64-a3-4
container:
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-a3-ubuntu22.04-py3.11
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Mark repository safe
run: |
git config --system --add safe.directory ${GITHUB_WORKSPACE}
- name: Install dependencies
env:
TORCH_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/whl/cpu"
PYPI_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple"
UV_INDEX_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple"
GITHUB_PROXY_URL: "https://gh-proxy.test.osinfra.cn/"
run: |
# speed up by using infra cache services
CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
pip config set global.index-url http://${CACHING_URL}/pypi/simple
pip config set global.trusted-host "${CACHING_URL}"
bash scripts/ci/npu/npu_ci_install_dependency.sh a3
# copy required file from our daily cache
cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
# copy gsm8k dataset
cp ~/.cache/modelscope/hub/datasets/tmp/test.jsonl /tmp
- name: Run test
timeout-minutes: 60
env:
SGLANG_USE_MODELSCOPE: true
SGLANG_IS_IN_CI: true
HF_ENDPOINT: https://hf-mirror.com
TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
STREAMS_PER_DEVICE: 32
run: |
cd test
python3 run_suite.py --hw npu --suite stage-b-test-4-npu-a3 --timeout-per-file 3600
stage-b-test-16-npu-a3:
needs: [check-changes, pr-gate]
if: needs.check-changes.outputs.main_package == 'true'
runs-on: linux-aarch64-a3-16
container:
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-a3-ubuntu22.04-py3.11
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Mark repository safe
run: |
git config --system --add safe.directory ${GITHUB_WORKSPACE}
- name: Install dependencies
env:
TORCH_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/whl/cpu"
PYPI_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple"
UV_INDEX_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple"
GITHUB_PROXY_URL: "https://gh-proxy.test.osinfra.cn/"
run: |
# speed up by using infra cache services
CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
pip config set global.index-url http://${CACHING_URL}/pypi/simple
pip config set global.trusted-host "${CACHING_URL}"
bash scripts/ci/npu/npu_ci_install_dependency.sh a3
# copy required file from our daily cache
cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
# copy gsm8k dataset
cp ~/.cache/modelscope/hub/datasets/tmp/test.jsonl /tmp
- name: Run test
timeout-minutes: 60
env:
SGLANG_USE_MODELSCOPE: true
SGLANG_IS_IN_CI: true
HF_ENDPOINT: https://hf-mirror.com
TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
STREAMS_PER_DEVICE: 32
run: |
cd test
python3 run_suite.py --hw npu --suite stage-b-test-16-npu-a3 --timeout-per-file 3600
multimodal-gen-test-1-npu-a3:
needs: [check-changes, pr-gate]
if: needs.check-changes.outputs.multimodal_gen == 'true'
runs-on: linux-aarch64-a3-2
container:
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-a3-ubuntu22.04-py3.11
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Mark repository safe
run: |
git config --system --add safe.directory ${GITHUB_WORKSPACE}
- name: Install dependencies
env:
TORCH_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/whl/cpu"
PYPI_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple"
UV_INDEX_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple"
GITHUB_PROXY_URL: "https://gh-proxy.test.osinfra.cn/"
run: |
# speed up by using infra cache services
CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
pip config set global.index-url http://${CACHING_URL}/pypi/simple
pip config set global.trusted-host "${CACHING_URL}"
bash scripts/ci/npu/npu_ci_install_dependency.sh a3 diffusion
# copy required file from our daily cache
cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
# copy gsm8k dataset
cp ~/.cache/modelscope/hub/datasets/tmp/test.jsonl /tmp
- name: Run test
timeout-minutes: 60
env:
SGLANG_USE_MODELSCOPE: true
SGLANG_IS_IN_CI: true
HF_ENDPOINT: https://hf-mirror.com
TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
STREAMS_PER_DEVICE: 32
run: |
export PATH="/usr/local/Ascend/8.3.RC1/compiler/bishengir/bin:${PATH}"
cd python
python3 sglang/multimodal_gen/test/run_suite.py --suite 1-npu
multimodal-gen-test-2-npu-a3:
needs: [check-changes, pr-gate]
if: needs.check-changes.outputs.multimodal_gen == 'true'
runs-on: linux-aarch64-a3-16
container:
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-a3-ubuntu22.04-py3.11
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Mark repository safe
run: |
git config --system --add safe.directory ${GITHUB_WORKSPACE}
- name: Install dependencies
env:
TORCH_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/whl/cpu"
PYPI_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple"
UV_INDEX_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple"
GITHUB_PROXY_URL: "https://gh-proxy.test.osinfra.cn/"
run: |
# speed up by using infra cache services
CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
pip config set global.index-url http://${CACHING_URL}/pypi/simple
pip config set global.trusted-host "${CACHING_URL}"
bash scripts/ci/npu/npu_ci_install_dependency.sh a3 diffusion
# copy required file from our daily cache
cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
# copy gsm8k dataset
cp ~/.cache/modelscope/hub/datasets/tmp/test.jsonl /tmp
- name: Run test
timeout-minutes: 60
env:
SGLANG_USE_MODELSCOPE: true
SGLANG_IS_IN_CI: true
HF_ENDPOINT: https://hf-mirror.com
TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
STREAMS_PER_DEVICE: 32
run: |
export PATH="/usr/local/Ascend/8.3.RC1/compiler/bishengir/bin:${PATH}"
cd python
python3 sglang/multimodal_gen/test/run_suite.py --suite 2-npu
multimodal-gen-test-8-npu-a3:
needs: [check-changes, pr-gate]
if: needs.check-changes.outputs.multimodal_gen == 'true'
runs-on: linux-aarch64-a3-8
container:
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-a3-ubuntu22.04-py3.11
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Mark repository safe
run: |
git config --system --add safe.directory ${GITHUB_WORKSPACE}
- name: Install dependencies
env:
TORCH_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/whl/cpu"
PYPI_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple"
UV_INDEX_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple"
GITHUB_PROXY_URL: "https://gh-proxy.test.osinfra.cn/"
run: |
# speed up by using infra cache services
CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
pip config set global.index-url http://${CACHING_URL}/pypi/simple
pip config set global.trusted-host "${CACHING_URL}"
bash scripts/ci/npu/npu_ci_install_dependency.sh a3 diffusion
# copy required file from our daily cache
cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
# copy gsm8k dataset
cp ~/.cache/modelscope/hub/datasets/tmp/test.jsonl /tmp
- name: Run test
timeout-minutes: 60
env:
SGLANG_USE_MODELSCOPE: true
SGLANG_IS_IN_CI: true
HF_ENDPOINT: https://hf-mirror.com
TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
STREAMS_PER_DEVICE: 32
run: |
cd python
python3 sglang/multimodal_gen/test/run_suite.py --suite 8-npu
pr-test-npu-finish:
needs:
[
check-changes,
stage-b-test-1-npu-a2,
stage-b-test-2-npu-a2,
stage-b-test-4-npu-a3,
stage-b-test-16-npu-a3,
multimodal-gen-test-1-npu-a3,
multimodal-gen-test-2-npu-a3,
multimodal-gen-test-8-npu-a3,
]
if: always()
runs-on: ubuntu-latest
steps:
- name: Check all dependent job statuses
run: |
# Convert the 'needs' context to a JSON string
json_needs='${{ toJson(needs) }}'
# Get a list of all job names from the JSON keys
job_names=$(echo "$json_needs" | jq -r 'keys_unsorted[]')
for job in $job_names; do
# For each job, extract its result
result=$(echo "$json_needs" | jq -r --arg j "$job" '.[$j].result')
# Print the job name and its result
echo "$job: $result"
# Check for failure or cancellation and exit if found
if [[ "$result" == "failure" || "$result" == "cancelled" ]]; then
echo "The above jobs failed."
exit 1
fi
done
# If the loop completes, all jobs were successful
echo "All jobs completed successfully"
exit 0