Skip to content

feat: Add handling for openai full tool call in a single stream chunk (WIP)#559

Draft
noahlwest wants to merge 1 commit intoGoogleCloudPlatform:mainfrom
noahlwest:changes
Draft

feat: Add handling for openai full tool call in a single stream chunk (WIP)#559
noahlwest wants to merge 1 commit intoGoogleCloudPlatform:mainfrom
noahlwest:changes

Conversation

@noahlwest
Copy link
Collaborator

@noahlwest noahlwest commented Oct 4, 2025

I was experimenting with Z.ai's GLM 4.6 model served with openai-compatible endpoint from vllm, and noticed that it gives output in a slightly different format from other models.

This change makes it operate successfully, though I'm not sure if this is the best way of making the change.

Here's an example of the last couple streaming chunks from a GLM tool request:

    data: {"id":"chatcmpl-2bdf923fc05d4720b236bf42b780c095","object":"chat.completion.chunk","created":1759514500,"model":"zai-org/GLM-4.6","choices":[{"index":0,"delta":{"content":"","tool_calls":[{"id":"chatcmpl-tool-9e3c12e00d494ab49bd59359e9c231bb","type":"function","index":0,"function":{"name":"kubectl","arguments":"{\"command\": \"kubectl get deployments --all-namespaces\", \"modifies_resource\": \"no\"}"}}]},"logprobs":null,"finish_reason":null,"token_ids":null}]}

    data: {"id":"chatcmpl-2bdf923fc05d4720b236bf42b780c095","object":"chat.completion.chunk","created":1759514500,"model":"zai-org/GLM-4.6","choices":[{"index":0,"delta":{"content":""},"logprobs":null,"finish_reason":"tool_calls","stop_reason":151338,"token_ids":null}]}

Here's an example from a different open model (Qwen3-Next-80B-A3B-Instruct) served with vllm that already works with our existing code:

    data: {"id":"chatcmpl-2e4752429ee14412a83d57080fffa77c","object":"chat.completion.chunk","created":1759533916,"model":"Qwen/Qwen3-Next-80B-A3B-Instruct","choices":[{"index":0,"delta":{"tool_calls":[{"id":"chatcmpl-tool-4ff5f0260d154d29b924b158eff38e64","type":"function","index":0,"function":{"name":"kubectl"}}]},"logprobs":null,"finish_reason":null,"token_ids":null}]}

    data: {"id":"chatcmpl-2e4752429ee14412a83d57080fffa77c","object":"chat.completion.chunk","created":1759533916,"model":"Qwen/Qwen3-Next-80B-A3B-Instruct","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\"command\": \""}}]},"logprobs":null,"finish_reason":null,"token_ids":null}]}

    data: {"id":"chatcmpl-2e4752429ee14412a83d57080fffa77c","object":"chat.completion.chunk","created":1759533916,"model":"Qwen/Qwen3-Next-80B-A3B-Instruct","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"kubectl"}}]},"logprobs":null,"finish_reason":null,"token_ids":null}]}

    data: {"id":"chatcmpl-2e4752429ee14412a83d57080fffa77c","object":"chat.completion.chunk","created":1759533916,"model":"Qwen/Qwen3-Next-80B-A3B-Instruct","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":" get"}}]},"logprobs":null,"finish_reason":null,"token_ids":null}]}

    data: {"id":"chatcmpl-2e4752429ee14412a83d57080fffa77c","object":"chat.completion.chunk","created":1759533916,"model":"Qwen/Qwen3-Next-80B-A3B-Instruct","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":" namespaces"}}]},"logprobs":null,"finish_reason":null,"token_ids":null}]}

    data: {"id":"chatcmpl-2e4752429ee14412a83d57080fffa77c","object":"chat.completion.chunk","created":1759533916,"model":"Qwen/Qwen3-Next-80B-A3B-Instruct","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":" create"}}]},"logprobs":null,"finish_reason":null,"token_ids":null}]}

    data: {"id":"chatcmpl-2e4752429ee14412a83d57080fffa77c","object":"chat.completion.chunk","created":1759533916,"model":"Qwen/Qwen3-Next-80B-A3B-Instruct","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"-simple"}}]},"logprobs":null,"finish_reason":null,"token_ids":null}]}

    data: {"id":"chatcmpl-2e4752429ee14412a83d57080fffa77c","object":"chat.completion.chunk","created":1759533916,"model":"Qwen/Qwen3-Next-80B-A3B-Instruct","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"-r"}}]},"logprobs":null,"finish_reason":null,"token_ids":null}]}

    data: {"id":"chatcmpl-2e4752429ee14412a83d57080fffa77c","object":"chat.completion.chunk","created":1759533916,"model":"Qwen/Qwen3-Next-80B-A3B-Instruct","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"bac"}}]},"logprobs":null,"finish_reason":null,"token_ids":null}]}

    data: {"id":"chatcmpl-2e4752429ee14412a83d57080fffa77c","object":"chat.completion.chunk","created":1759533916,"model":"Qwen/Qwen3-Next-80B-A3B-Instruct","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\","}}]},"logprobs":null,"finish_reason":null,"token_ids":null}]}

    data: {"id":"chatcmpl-2e4752429ee14412a83d57080fffa77c","object":"chat.completion.chunk","created":1759533916,"model":"Qwen/Qwen3-Next-80B-A3B-Instruct","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":" \""}}]},"logprobs":null,"finish_reason":null,"token_ids":null}]}

    data: {"id":"chatcmpl-2e4752429ee14412a83d57080fffa77c","object":"chat.completion.chunk","created":1759533916,"model":"Qwen/Qwen3-Next-80B-A3B-Instruct","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"mod"}}]},"logprobs":null,"finish_reason":null,"token_ids":null}]}

    data: {"id":"chatcmpl-2e4752429ee14412a83d57080fffa77c","object":"chat.completion.chunk","created":1759533916,"model":"Qwen/Qwen3-Next-80B-A3B-Instruct","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"ifies"}}]},"logprobs":null,"finish_reason":null,"token_ids":null}]}

    data: {"id":"chatcmpl-2e4752429ee14412a83d57080fffa77c","object":"chat.completion.chunk","created":1759533916,"model":"Qwen/Qwen3-Next-80B-A3B-Instruct","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"_resource"}}]},"logprobs":null,"finish_reason":null,"token_ids":null}]}

    data: {"id":"chatcmpl-2e4752429ee14412a83d57080fffa77c","object":"chat.completion.chunk","created":1759533916,"model":"Qwen/Qwen3-Next-80B-A3B-Instruct","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\":"}}]},"logprobs":null,"finish_reason":null,"token_ids":null}]}

    data: {"id":"chatcmpl-2e4752429ee14412a83d57080fffa77c","object":"chat.completion.chunk","created":1759533916,"model":"Qwen/Qwen3-Next-80B-A3B-Instruct","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":" \""}}]},"logprobs":null,"finish_reason":null,"token_ids":null}]}

    data: {"id":"chatcmpl-2e4752429ee14412a83d57080fffa77c","object":"chat.completion.chunk","created":1759533916,"model":"Qwen/Qwen3-Next-80B-A3B-Instruct","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"no"}}]},"logprobs":null,"finish_reason":null,"token_ids":null}]}

    data: {"id":"chatcmpl-2e4752429ee14412a83d57080fffa77c","object":"chat.completion.chunk","created":1759533916,"model":"Qwen/Qwen3-Next-80B-A3B-Instruct","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\"}"}}]},"logprobs":null,"finish_reason":null,"token_ids":null}]}

    data: {"id":"chatcmpl-2e4752429ee14412a83d57080fffa77c","object":"chat.completion.chunk","created":1759533916,"model":"Qwen/Qwen3-Next-80B-A3B-Instruct","choices":[{"index":0,"delta":{"content":""},"logprobs":null,"finish_reason":"tool_calls","stop_reason":null,"token_ids":null}]}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

1 participant