Skip to content

Commit 24e0084

Browse files
committed
fix: dsv3.2 will set first token to none sometimes
Signed-off-by: mondaylord <[email protected]>
1 parent ed586e7 commit 24e0084

File tree

1 file changed

+28
-32
lines changed

1 file changed

+28
-32
lines changed

vllm/entrypoints/openai/serving_chat.py

Lines changed: 28 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -964,21 +964,9 @@ async def chat_completion_stream_generator(
964964
assert reasoning_end_arr is not None
965965
output_token_ids = as_list(output.token_ids)
966966
if not reasoning_end_arr[i]:
967-
delta_message = (
968-
reasoning_parser.extract_reasoning_streaming(
969-
previous_text,
970-
current_text,
971-
delta_text,
972-
previous_token_ids,
973-
current_token_ids,
974-
output_token_ids,
975-
)
976-
)
977967
# When encountering think end id in prompt_token_ids
978968
# i.e {"enable_thinking": False},
979969
# set reasoning status to end.
980-
# Remove the text and token ids related
981-
# to 'reasoning'.
982970
if (
983971
res.prompt_token_ids
984972
and reasoning_parser.is_reasoning_end(
@@ -987,30 +975,38 @@ async def chat_completion_stream_generator(
987975
):
988976
reasoning_end_arr[i] = True
989977
current_token_ids = output_token_ids
990-
if delta_message and delta_message.content:
991-
current_text = delta_message.content
992-
delta_message.content = None
993-
else:
994-
current_text = ""
995-
# When encountering think end id in delta_token_ids,
996-
# set reasoning status to end.
997-
# Remove the text and token ids related
998-
# to 'reasoning'.
999-
if reasoning_parser.is_reasoning_end(output_token_ids):
1000-
reasoning_end_arr[i] = True
1001-
current_token_ids = (
1002-
reasoning_parser.extract_content_ids(
1003-
output_token_ids
978+
# Don't update current_text, keep it as is from delta
979+
else:
980+
delta_message = (
981+
reasoning_parser.extract_reasoning_streaming(
982+
previous_text,
983+
current_text,
984+
delta_text,
985+
previous_token_ids,
986+
current_token_ids,
987+
output_token_ids,
1004988
)
1005989
)
1006-
if delta_message and delta_message.content:
1007-
current_text = delta_message.content
1008-
delta_message.content = None
1009-
else:
1010-
current_text = ""
990+
991+
# When encountering think end id in delta_token_ids,
992+
# set reasoning status to end.
993+
# Remove the text and token ids related
994+
# to 'reasoning'.
995+
if reasoning_parser.is_reasoning_end(output_token_ids):
996+
reasoning_end_arr[i] = True
997+
current_token_ids = (
998+
reasoning_parser.extract_content_ids(
999+
output_token_ids
1000+
)
1001+
)
1002+
if delta_message and delta_message.content:
1003+
current_text = delta_message.content
1004+
delta_message.content = None
1005+
else:
1006+
current_text = ""
10111007

10121008
# handle tool calls only after reasoning is done,
1013-
else:
1009+
if reasoning_end_arr[i]:
10141010
delta_token_ids = output_token_ids
10151011
# First time to tool call,
10161012
# add the remaining text and token ids

0 commit comments

Comments
 (0)