Fix KeyError when logprobs=false in completions endpoint (#16095) #1
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: PR Test (NPU) | |
| on: | |
| push: | |
| branches: [ main ] | |
| pull_request: | |
| branches: [ main ] | |
| workflow_dispatch: | |
| workflow_call: | |
| inputs: | |
| ref: | |
| description: 'Git ref (branch, tag, or SHA) to test. If not provided, uses the default branch.' | |
| required: false | |
| type: string | |
| default: '' | |
| run_all_tests: | |
| description: "Run all tests (for releasing or testing purpose)" | |
| required: false | |
| type: boolean | |
| default: false | |
| concurrency: | |
| group: pr-test-npu-${{ inputs.ref || github.ref }} | |
| cancel-in-progress: ${{ github.event_name != 'workflow_call' }} | |
| jobs: | |
| # ==================== Check Changes ==================== # | |
| check-changes: | |
| runs-on: ubuntu-latest | |
| outputs: | |
| main_package: ${{ steps.filter.outputs.main_package || steps.run-mode.outputs.run_all_tests }} | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Determine run mode | |
| id: run-mode | |
| run: | | |
| # Run all tests for workflow_call (when ref input is provided) | |
| # Note: github.event_name is inherited from caller, so we detect workflow_call by checking inputs.ref | |
| if [[ "${{ inputs.run_all_tests }}" == "true" ]]; then | |
| echo "run_all_tests=true" >> $GITHUB_OUTPUT | |
| echo "Run mode: ALL TESTS (run_all_tests=${{ inputs.run_all_tests }})" | |
| else | |
| echo "run_all_tests=false" >> $GITHUB_OUTPUT | |
| echo "Run mode: FILTERED (triggered by ${{ github.event_name }})" | |
| fi | |
| - name: Detect file changes | |
| id: filter | |
| uses: dorny/paths-filter@v3 | |
| if: steps.run-mode.outputs.run_all_tests != 'true' | |
| with: | |
| filters: | | |
| main_package: | |
| - "python/sglang/!(multimodal_gen)/**" | |
| - "python/*.toml" | |
| - "scripts/ci/npu_ci_install_dependency.sh" | |
| - "test/srt/ascend/**" | |
| - ".github/workflows/pr-test-npu.yml" | |
| # ==================== PR Gate ==================== # | |
| pr-gate: | |
| needs: check-changes | |
| if: needs.check-changes.outputs.main_package == 'true' | |
| uses: ./.github/workflows/pr-gate.yml | |
| secrets: inherit | |
| per-commit-1-npu-a2: | |
| needs: [check-changes, pr-gate] | |
| if: needs.check-changes.outputs.main_package == 'true' | |
| runs-on: linux-aarch64-a2-1 | |
| container: | |
| image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Install dependencies | |
| run: | | |
| # speed up by using infra cache services | |
| CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local" | |
| sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list | |
| pip config set global.index-url http://${CACHING_URL}/pypi/simple | |
| pip config set global.extra-index-url "https://pypi.tuna.tsinghua.edu.cn/simple" | |
| pip config set global.trusted-host "${CACHING_URL} pypi.tuna.tsinghua.edu.cn" | |
| bash scripts/ci/npu_ci_install_dependency.sh 910b | |
| # copy required file from our daily cache | |
| cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp | |
| # copy download through proxy | |
| curl -o /tmp/test.jsonl -L https://raw.githubusercontent.com/openai/grade-school-math/master/grade_school_math/data/test.jsonl | |
| - name: Run test | |
| timeout-minutes: 60 | |
| env: | |
| SGLANG_USE_MODELSCOPE: true | |
| SGLANG_IS_IN_CI: true | |
| HF_ENDPOINT: https://hf-mirror.com | |
| TORCH_EXTENSIONS_DIR: /tmp/torch_extensions | |
| PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True" | |
| STREAMS_PER_DEVICE: 32 | |
| run: | | |
| export PATH="/usr/local/Ascend/8.3.RC1/compiler/bishengir/bin:${PATH}" | |
| cd test/srt | |
| python3 run_suite.py --suite per-commit-1-npu-a2 | |
| per-commit-2-npu-a2: | |
| needs: [check-changes, pr-gate] | |
| if: needs.check-changes.outputs.main_package == 'true' | |
| runs-on: linux-aarch64-a2-2 | |
| strategy: | |
| fail-fast: true | |
| matrix: | |
| part: [0, 1, 2] | |
| container: | |
| image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Install dependencies | |
| run: | | |
| # speed up by using infra cache services | |
| CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local" | |
| sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list | |
| pip config set global.index-url http://${CACHING_URL}/pypi/simple | |
| pip config set global.extra-index-url "https://pypi.tuna.tsinghua.edu.cn/simple" | |
| pip config set global.trusted-host "${CACHING_URL} pypi.tuna.tsinghua.edu.cn" | |
| bash scripts/ci/npu_ci_install_dependency.sh 910b | |
| # copy required file from our daily cache | |
| cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp | |
| # copy download through proxy | |
| curl -o /tmp/test.jsonl -L https://raw.githubusercontent.com/openai/grade-school-math/master/grade_school_math/data/test.jsonl | |
| - name: Run test | |
| timeout-minutes: 60 | |
| env: | |
| SGLANG_USE_MODELSCOPE: true | |
| SGLANG_IS_IN_CI: true | |
| HF_ENDPOINT: https://hf-mirror.com | |
| TORCH_EXTENSIONS_DIR: /tmp/torch_extensions | |
| PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True" | |
| STREAMS_PER_DEVICE: 32 | |
| run: | | |
| export PATH="/usr/local/Ascend/8.3.RC1/compiler/bishengir/bin:${PATH}" | |
| cd test/srt | |
| python3 run_suite.py --suite per-commit-2-npu-a2 --auto-partition-id ${{ matrix.part }} --auto-partition-size 3 | |
| per-commit-4-npu-a2: | |
| needs: [check-changes, pr-gate] | |
| if: needs.check-changes.outputs.main_package == 'true' | |
| runs-on: linux-aarch64-a2-4 | |
| container: | |
| image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Install dependencies | |
| run: | | |
| # speed up by using infra cache services | |
| CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local" | |
| sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list | |
| pip config set global.index-url http://${CACHING_URL}/pypi/simple | |
| pip config set global.extra-index-url "https://pypi.tuna.tsinghua.edu.cn/simple" | |
| pip config set global.trusted-host "${CACHING_URL} pypi.tuna.tsinghua.edu.cn" | |
| bash scripts/ci/npu_ci_install_dependency.sh 910b | |
| # copy required file from our daily cache | |
| cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp | |
| # copy download through proxy | |
| curl -o /tmp/test.jsonl -L https://raw.githubusercontent.com/openai/grade-school-math/master/grade_school_math/data/test.jsonl | |
| - name: Run test | |
| timeout-minutes: 60 | |
| env: | |
| SGLANG_USE_MODELSCOPE: true | |
| SGLANG_IS_IN_CI: true | |
| HF_ENDPOINT: https://hf-mirror.com | |
| TORCH_EXTENSIONS_DIR: /tmp/torch_extensions | |
| PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True" | |
| STREAMS_PER_DEVICE: 32 | |
| run: | | |
| export PATH="/usr/local/Ascend/8.3.RC1/compiler/bishengir/bin:${PATH}" | |
| cd test/srt | |
| python3 run_suite.py --suite per-commit-4-npu-a2 --timeout-per-file 3600 | |
| per-commit-16-npu-a3: | |
| needs: [check-changes, pr-gate] | |
| if: needs.check-changes.outputs.main_package == 'true' | |
| runs-on: linux-aarch64-a3-16 | |
| strategy: | |
| fail-fast: true | |
| matrix: | |
| part: [0, 1] | |
| container: | |
| image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-a3-ubuntu22.04-py3.11 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Install dependencies | |
| run: | | |
| # speed up by using infra cache services | |
| CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local" | |
| sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list | |
| pip config set global.index-url http://${CACHING_URL}/pypi/simple | |
| pip config set global.extra-index-url "https://pypi.tuna.tsinghua.edu.cn/simple" | |
| pip config set global.trusted-host "${CACHING_URL} pypi.tuna.tsinghua.edu.cn" | |
| bash scripts/ci/npu_ci_install_dependency.sh a3 | |
| # copy required file from our daily cache | |
| cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp | |
| # copy download through proxy | |
| curl -o /tmp/test.jsonl -L https://gh-proxy.test.osinfra.cn/https://raw.githubusercontent.com/openai/grade-school-math/master/grade_school_math/data/test.jsonl | |
| - name: Run test | |
| timeout-minutes: 60 | |
| env: | |
| SGLANG_USE_MODELSCOPE: true | |
| SGLANG_IS_IN_CI: true | |
| HF_ENDPOINT: https://hf-mirror.com | |
| TORCH_EXTENSIONS_DIR: /tmp/torch_extensions | |
| PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True" | |
| STREAMS_PER_DEVICE: 32 | |
| run: | | |
| export PATH="/usr/local/Ascend/8.3.RC1/compiler/bishengir/bin:${PATH}" | |
| cd test/srt | |
| python3 run_suite.py --suite per-commit-16-npu-a3 --timeout-per-file 3600 --auto-partition-id ${{ matrix.part }} --auto-partition-size 2 |