@@ -964,21 +964,9 @@ async def chat_completion_stream_generator(
964964 assert reasoning_end_arr is not None
965965 output_token_ids = as_list (output .token_ids )
966966 if not reasoning_end_arr [i ]:
967- delta_message = (
968- reasoning_parser .extract_reasoning_streaming (
969- previous_text ,
970- current_text ,
971- delta_text ,
972- previous_token_ids ,
973- current_token_ids ,
974- output_token_ids ,
975- )
976- )
977967 # When encountering think end id in prompt_token_ids
978968 # i.e {"enable_thinking": False},
979969 # set reasoning status to end.
980- # Remove the text and token ids related
981- # to 'reasoning'.
982970 if (
983971 res .prompt_token_ids
984972 and reasoning_parser .is_reasoning_end (
@@ -987,30 +975,38 @@ async def chat_completion_stream_generator(
987975 ):
988976 reasoning_end_arr [i ] = True
989977 current_token_ids = output_token_ids
990- if delta_message and delta_message .content :
991- current_text = delta_message .content
992- delta_message .content = None
993- else :
994- current_text = ""
995- # When encountering think end id in delta_token_ids,
996- # set reasoning status to end.
997- # Remove the text and token ids related
998- # to 'reasoning'.
999- if reasoning_parser .is_reasoning_end (output_token_ids ):
1000- reasoning_end_arr [i ] = True
1001- current_token_ids = (
1002- reasoning_parser .extract_content_ids (
1003- output_token_ids
978+ # Don't update current_text, keep it as is from delta
979+ else :
980+ delta_message = (
981+ reasoning_parser .extract_reasoning_streaming (
982+ previous_text ,
983+ current_text ,
984+ delta_text ,
985+ previous_token_ids ,
986+ current_token_ids ,
987+ output_token_ids ,
1004988 )
1005989 )
1006- if delta_message and delta_message .content :
1007- current_text = delta_message .content
1008- delta_message .content = None
1009- else :
1010- current_text = ""
990+
991+ # When encountering think end id in delta_token_ids,
992+ # set reasoning status to end.
993+ # Remove the text and token ids related
994+ # to 'reasoning'.
995+ if reasoning_parser .is_reasoning_end (output_token_ids ):
996+ reasoning_end_arr [i ] = True
997+ current_token_ids = (
998+ reasoning_parser .extract_content_ids (
999+ output_token_ids
1000+ )
1001+ )
1002+ if delta_message and delta_message .content :
1003+ current_text = delta_message .content
1004+ delta_message .content = None
1005+ else :
1006+ current_text = ""
10111007
10121008 # handle tool calls only after reasoning is done,
1013- else :
1009+ if reasoning_end_arr [ i ] :
10141010 delta_token_ids = output_token_ids
10151011 # First time to tool call,
10161012 # add the remaining text and token ids
0 commit comments