Fix KeyError when logprobs=false in completions endpoint (#16095) #1

Workflow file for this run

.github/workflows/pr-test-npu.yml at 156d97b

	name: PR Test (NPU)

	on:
	push:
	branches: [ main ]
	pull_request:
	branches: [ main ]
	workflow_dispatch:
	workflow_call:
	inputs:
	ref:
	description: 'Git ref (branch, tag, or SHA) to test. If not provided, uses the default branch.'
	required: false
	type: string
	default: ''
	run_all_tests:
	description: "Run all tests (for releasing or testing purpose)"
	required: false
	type: boolean
	default: false

	concurrency:
	group: pr-test-npu-${{ inputs.ref \|\| github.ref }}
	cancel-in-progress: ${{ github.event_name != 'workflow_call' }}

	jobs:
	# ==================== Check Changes ==================== #
	check-changes:
	runs-on: ubuntu-latest
	outputs:
	main_package: ${{ steps.filter.outputs.main_package \|\| steps.run-mode.outputs.run_all_tests }}
	steps:
	- name: Checkout code
	uses: actions/checkout@v4
	with:
	ref: ${{ inputs.ref \|\| github.ref }}

	- name: Determine run mode
	id: run-mode
	run: \|
	# Run all tests for workflow_call (when ref input is provided)
	# Note: github.event_name is inherited from caller, so we detect workflow_call by checking inputs.ref
	if [[ "${{ inputs.run_all_tests }}" == "true" ]]; then
	echo "run_all_tests=true" >> $GITHUB_OUTPUT
	echo "Run mode: ALL TESTS (run_all_tests=${{ inputs.run_all_tests }})"
	else
	echo "run_all_tests=false" >> $GITHUB_OUTPUT
	echo "Run mode: FILTERED (triggered by ${{ github.event_name }})"
	fi

	- name: Detect file changes
	id: filter
	uses: dorny/paths-filter@v3
	if: steps.run-mode.outputs.run_all_tests != 'true'
	with:
	filters: \|
	main_package:
	- "python/sglang/!(multimodal_gen)/**"
	- "python/*.toml"
	- "scripts/ci/npu_ci_install_dependency.sh"
	- "test/srt/ascend/**"
	- ".github/workflows/pr-test-npu.yml"

	# ==================== PR Gate ==================== #
	pr-gate:
	needs: check-changes
	if: needs.check-changes.outputs.main_package == 'true'
	uses: ./.github/workflows/pr-gate.yml
	secrets: inherit

	per-commit-1-npu-a2:
	needs: [check-changes, pr-gate]
	if: needs.check-changes.outputs.main_package == 'true'
	runs-on: linux-aarch64-a2-1
	container:
	image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
	steps:
	- name: Checkout code
	uses: actions/checkout@v4
	with:
	ref: ${{ inputs.ref \|\| github.ref }}

	- name: Install dependencies
	run: \|
	# speed up by using infra cache services
	CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
	sed -Ei "s@(ports\|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
	pip config set global.index-url http://${CACHING_URL}/pypi/simple
	pip config set global.extra-index-url "https://pypi.tuna.tsinghua.edu.cn/simple"
	pip config set global.trusted-host "${CACHING_URL} pypi.tuna.tsinghua.edu.cn"

	bash scripts/ci/npu_ci_install_dependency.sh 910b
	# copy required file from our daily cache
	cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
	# copy download through proxy
	curl -o /tmp/test.jsonl -L https://raw.githubusercontent.com/openai/grade-school-math/master/grade_school_math/data/test.jsonl

	- name: Run test
	timeout-minutes: 60
	env:
	SGLANG_USE_MODELSCOPE: true
	SGLANG_IS_IN_CI: true
	HF_ENDPOINT: https://hf-mirror.com
	TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
	PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
	STREAMS_PER_DEVICE: 32
	run: \|
	export PATH="/usr/local/Ascend/8.3.RC1/compiler/bishengir/bin:${PATH}"
	cd test/srt
	python3 run_suite.py --suite per-commit-1-npu-a2

	per-commit-2-npu-a2:
	needs: [check-changes, pr-gate]
	if: needs.check-changes.outputs.main_package == 'true'
	runs-on: linux-aarch64-a2-2
	strategy:
	fail-fast: true
	matrix:
	part: [0, 1, 2]
	container:
	image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
	steps:
	- name: Checkout code
	uses: actions/checkout@v4
	with:
	ref: ${{ inputs.ref \|\| github.ref }}

	- name: Install dependencies
	run: \|
	# speed up by using infra cache services
	CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
	sed -Ei "s@(ports\|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
	pip config set global.index-url http://${CACHING_URL}/pypi/simple
	pip config set global.extra-index-url "https://pypi.tuna.tsinghua.edu.cn/simple"
	pip config set global.trusted-host "${CACHING_URL} pypi.tuna.tsinghua.edu.cn"

	bash scripts/ci/npu_ci_install_dependency.sh 910b
	# copy required file from our daily cache
	cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
	# copy download through proxy
	curl -o /tmp/test.jsonl -L https://raw.githubusercontent.com/openai/grade-school-math/master/grade_school_math/data/test.jsonl

	- name: Run test
	timeout-minutes: 60
	env:
	SGLANG_USE_MODELSCOPE: true
	SGLANG_IS_IN_CI: true
	HF_ENDPOINT: https://hf-mirror.com
	TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
	PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
	STREAMS_PER_DEVICE: 32
	run: \|
	export PATH="/usr/local/Ascend/8.3.RC1/compiler/bishengir/bin:${PATH}"
	cd test/srt
	python3 run_suite.py --suite per-commit-2-npu-a2 --auto-partition-id ${{ matrix.part }} --auto-partition-size 3

	per-commit-4-npu-a2:
	needs: [check-changes, pr-gate]
	if: needs.check-changes.outputs.main_package == 'true'
	runs-on: linux-aarch64-a2-4
	container:
	image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
	steps:
	- name: Checkout code
	uses: actions/checkout@v4
	with:
	ref: ${{ inputs.ref \|\| github.ref }}

	- name: Install dependencies
	run: \|
	# speed up by using infra cache services
	CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
	sed -Ei "s@(ports\|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
	pip config set global.index-url http://${CACHING_URL}/pypi/simple
	pip config set global.extra-index-url "https://pypi.tuna.tsinghua.edu.cn/simple"
	pip config set global.trusted-host "${CACHING_URL} pypi.tuna.tsinghua.edu.cn"

	bash scripts/ci/npu_ci_install_dependency.sh 910b
	# copy required file from our daily cache
	cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
	# copy download through proxy
	curl -o /tmp/test.jsonl -L https://raw.githubusercontent.com/openai/grade-school-math/master/grade_school_math/data/test.jsonl

	- name: Run test
	timeout-minutes: 60
	env:
	SGLANG_USE_MODELSCOPE: true
	SGLANG_IS_IN_CI: true
	HF_ENDPOINT: https://hf-mirror.com
	TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
	PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
	STREAMS_PER_DEVICE: 32
	run: \|
	export PATH="/usr/local/Ascend/8.3.RC1/compiler/bishengir/bin:${PATH}"
	cd test/srt
	python3 run_suite.py --suite per-commit-4-npu-a2 --timeout-per-file 3600

	per-commit-16-npu-a3:
	needs: [check-changes, pr-gate]
	if: needs.check-changes.outputs.main_package == 'true'
	runs-on: linux-aarch64-a3-16
	strategy:
	fail-fast: true
	matrix:
	part: [0, 1]
	container:
	image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-a3-ubuntu22.04-py3.11
	steps:
	- name: Checkout code
	uses: actions/checkout@v4
	with:
	ref: ${{ inputs.ref \|\| github.ref }}

	- name: Install dependencies
	run: \|
	# speed up by using infra cache services
	CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
	sed -Ei "s@(ports\|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
	pip config set global.index-url http://${CACHING_URL}/pypi/simple
	pip config set global.extra-index-url "https://pypi.tuna.tsinghua.edu.cn/simple"
	pip config set global.trusted-host "${CACHING_URL} pypi.tuna.tsinghua.edu.cn"

	bash scripts/ci/npu_ci_install_dependency.sh a3
	# copy required file from our daily cache
	cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
	# copy download through proxy
	curl -o /tmp/test.jsonl -L https://gh-proxy.test.osinfra.cn/https://raw.githubusercontent.com/openai/grade-school-math/master/grade_school_math/data/test.jsonl

	- name: Run test
	timeout-minutes: 60
	env:
	SGLANG_USE_MODELSCOPE: true
	SGLANG_IS_IN_CI: true
	HF_ENDPOINT: https://hf-mirror.com
	TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
	PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
	STREAMS_PER_DEVICE: 32
	run: \|
	export PATH="/usr/local/Ascend/8.3.RC1/compiler/bishengir/bin:${PATH}"
	cd test/srt
	python3 run_suite.py --suite per-commit-16-npu-a3 --timeout-per-file 3600 --auto-partition-id ${{ matrix.part }} --auto-partition-size 2

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Fix KeyError when logprobs=false in completions endpoint (#16095) #1

Workflow file

Fix KeyError when logprobs=false in completions endpoint (#16095) #1

Uh oh!

Workflow file for this run