System Info / 系統信息
cuda:cuda_12.8
transformer:4.57.3
python:3.12
vllm:0.17
Who can help? / 谁可以帮助到您?
No response
Information / 问题信息
Reproduction / 复现过程
使用vllm部署glm-4.7-fp8,部署参数如下:
python3 -m vllm.entrypoints.openai.api_server --model /ai/models/glm-4.7-fp8 --served-model-name glm-4.7 --tensor-parallel-size 8 --speculative-config.method mtp --speculative-config.num_speculative_tokens 1 --tool-call-parser glm47 --reasoning-parser glm45 --enable-auto-tool-choice --port 8004
启动后,使用如下请求参数测试:
{ "top_p": 0.9, "temperature": 0, "stream": true, "messages": [{"content": "你是一个有用的助手", "role": "system"}, {"role": "user", "name": "user", "content": "北京今天天气怎么样"}], "tools":[ { "type" : "function", "function" : { "name" : "get_weather", "description" : "用于获取指定城市温度的工具", "parameters" : { "type" : "object", "properties" : { "city" : { "description" : "city", "title" : "City", "type" : "string" } }, "required" : [ "city" ], "additionalProperties" : false }, "strict" : false } } ] }
响应结果:
`data: {"id":"chatcmpl-ae623d37888a3754","object":"chat.completion.chunk","created":1774886002,"model":"glm-4.7","choices":[{"index":0,"delta":{"role":"assistant","content":""},"logprobs":null,"finish_reason":null}],"prompt_token_ids":null}
data: {"id":"chatcmpl-ae623d37888a3754","object":"chat.completion.chunk","created":1774886002,"model":"glm-4.7","choices":[{"index":0,"delta":{"reasoning":"用户"},"logprobs":null,"finish_reason":null,"token_ids":null}]}
data: {"id":"chatcmpl-ae623d37888a3754","object":"chat.completion.chunk","created":1774886002,"model":"glm-4.7","choices":[{"index":0,"delta":{"reasoning":"询问北京"},"logprobs":null,"finish_reason":null,"token_ids":null}]}
data: {"id":"chatcmpl-ae623d37888a3754","object":"chat.completion.chunk","created":1774886002,"model":"glm-4.7","choices":[{"index":0,"delta":{"reasoning":"今天的天气"},"logprobs":null,"finish_reason":null,"token_ids":null}]}
data: {"id":"chatcmpl-ae623d37888a3754","object":"chat.completion.chunk","created":1774886002,"model":"glm-4.7","choices":[{"index":0,"delta":{"reasoning":"情况。"},"logprobs":null,"finish_reason":null,"token_ids":null}]}
data: {"id":"chatcmpl-ae623d37888a3754","object":"chat.completion.chunk","created":1774886002,"model":"glm-4.7","choices":[{"index":0,"delta":{"reasoning":"我需要"},"logprobs":null,"finish_reason":null,"token_ids":null}]}
data: {"id":"chatcmpl-ae623d37888a3754","object":"chat.completion.chunk","created":1774886002,"model":"glm-4.7","choices":[{"index":0,"delta":{"reasoning":"使用get"},"logprobs":null,"finish_reason":null,"token_ids":null}]}
data: {"id":"chatcmpl-ae623d37888a3754","object":"chat.completion.chunk","created":1774886002,"model":"glm-4.7","choices":[{"index":0,"delta":{"reasoning":"_weather工具"},"logprobs":null,"finish_reason":null,"token_ids":null}]}
data: {"id":"chatcmpl-ae623d37888a3754","object":"chat.completion.chunk","created":1774886002,"model":"glm-4.7","choices":[{"index":0,"delta":{"reasoning":"来获取"},"logprobs":null,"finish_reason":null,"token_ids":null}]}
data: {"id":"chatcmpl-ae623d37888a3754","object":"chat.completion.chunk","created":1774886002,"model":"glm-4.7","choices":[{"index":0,"delta":{"reasoning":"北京的天气"},"logprobs":null,"finish_reason":null,"token_ids":null}]}
data: {"id":"chatcmpl-ae623d37888a3754","object":"chat.completion.chunk","created":1774886002,"model":"glm-4.7","choices":[{"index":0,"delta":{"reasoning":"信息。"},"logprobs":null,"finish_reason":null,"token_ids":null}]}
data: {"id":"chatcmpl-ae623d37888a3754","object":"chat.completion.chunk","created":1774886002,"model":"glm-4.7","choices":[{"index":0,"delta":{"reasoning":"工具需要一个"},"logprobs":null,"finish_reason":null,"token_ids":null}]}
data: {"id":"chatcmpl-ae623d37888a3754","object":"chat.completion.chunk","created":1774886002,"model":"glm-4.7","choices":[{"index":0,"delta":{"reasoning":"city参数"},"logprobs":null,"finish_reason":null,"token_ids":null}]}
data: {"id":"chatcmpl-ae623d37888a3754","object":"chat.completion.chunk","created":1774886002,"model":"glm-4.7","choices":[{"index":0,"delta":{"reasoning":",用户"},"logprobs":null,"finish_reason":null,"token_ids":null}]}
data: {"id":"chatcmpl-ae623d37888a3754","object":"chat.completion.chunk","created":1774886002,"model":"glm-4.7","choices":[{"index":0,"delta":{"reasoning":"已经明确"},"logprobs":null,"finish_reason":null,"token_ids":null}]}
data: {"id":"chatcmpl-ae623d37888a3754","object":"chat.completion.chunk","created":1774886002,"model":"glm-4.7","choices":[{"index":0,"delta":{"reasoning":"提到了""},"logprobs":null,"finish_reason":null,"token_ids":null}]}
data: {"id":"chatcmpl-ae623d37888a3754","object":"chat.completion.chunk","created":1774886002,"model":"glm-4.7","choices":[{"index":0,"delta":{"reasoning":"北京","},"logprobs":null,"finish_reason":null,"token_ids":null}]}
data: {"id":"chatcmpl-ae623d37888a3754","object":"chat.completion.chunk","created":1774886002,"model":"glm-4.7","choices":[{"index":0,"delta":{"reasoning":"所以我可以"},"logprobs":null,"finish_reason":null,"token_ids":null}]}
data: {"id":"chatcmpl-ae623d37888a3754","object":"chat.completion.chunk","created":1774886002,"model":"glm-4.7","choices":[{"index":0,"delta":{"reasoning":"直接调用"},"logprobs":null,"finish_reason":null,"token_ids":null}]}
data: {"id":"chatcmpl-ae623d37888a3754","object":"chat.completion.chunk","created":1774886002,"model":"glm-4.7","choices":[{"index":0,"delta":{"reasoning":"这个工具"},"logprobs":null,"finish_reason":null,"token_ids":null}]}
data: {"id":"chatcmpl-ae623d37888a3754","object":"chat.completion.chunk","created":1774886002,"model":"glm-4.7","choices":[{"index":0,"delta":{"content":"我来帮"},"logprobs":null,"finish_reason":null,"token_ids":null}]}
data: {"id":"chatcmpl-ae623d37888a3754","object":"chat.completion.chunk","created":1774886002,"model":"glm-4.7","choices":[{"index":0,"delta":{"content":"您查询"},"logprobs":null,"finish_reason":null,"token_ids":null}]}
data: {"id":"chatcmpl-ae623d37888a3754","object":"chat.completion.chunk","created":1774886002,"model":"glm-4.7","choices":[{"index":0,"delta":{"content":"北京今天的"},"logprobs":null,"finish_reason":null,"token_ids":null}]}
data: {"id":"chatcmpl-ae623d37888a3754","object":"chat.completion.chunk","created":1774886002,"model":"glm-4.7","choices":[{"index":0,"delta":{"content":"天气情况"},"logprobs":null,"finish_reason":null,"token_ids":null}]}
data: {"id":"chatcmpl-ae623d37888a3754","object":"chat.completion.chunk","created":1774886002,"model":"glm-4.7","choices":[{"index":0,"delta":{"content":"。"},"logprobs":null,"finish_reason":null,"token_ids":null}]}
data: {"id":"chatcmpl-ae623d37888a3754","object":"chat.completion.chunk","created":1774886002,"model":"glm-4.7","choices":[{"index":0,"delta":{"tool_calls":[{"id":"chatcmpl-tool-af87121be7f2a60f","type":"function","index":0,"function":{"name":"get_weather","arguments":""}}]},"logprobs":null,"finish_reason":null,"token_ids":null}]}
data: {"id":"chatcmpl-ae623d37888a3754","object":"chat.completion.chunk","created":1774886002,"model":"glm-4.7","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{"city":""}}]},"logprobs":null,"finish_reason":null,"token_ids":null}]}
data: {"id":"chatcmpl-ae623d37888a3754","object":"chat.completion.chunk","created":1774886002,"model":"glm-4.7","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"北京""}}]},"logprobs":null,"finish_reason":null,"token_ids":null}]}
data: {"id":"chatcmpl-ae623d37888a3754","object":"chat.completion.chunk","created":1774886002,"model":"glm-4.7","choices":[{"index":0,"delta":{"tool_calls":[{"id":null,"type":null,"index":0,"function":{"name":null,"arguments":"{"city": "北京"}"}}]},"logprobs":null,"finish_reason":"tool_calls","stop_reason":151338,"token_ids":null}]}
data: [DONE]`
最后的工具调用,参数拼接出的结果为{"city":"北京" 缺少最后的"}"
在cherry studio中会因为把最后一个流式输出块中的完整工具参数拼接导致报错:
Expected behavior / 期待表现
工具调用的流式输出块拼接后是正确的参数
System Info / 系統信息
cuda:cuda_12.8
transformer:4.57.3
python:3.12
vllm:0.17
Who can help? / 谁可以帮助到您?
No response
Information / 问题信息
Reproduction / 复现过程
使用vllm部署glm-4.7-fp8,部署参数如下:
python3 -m vllm.entrypoints.openai.api_server --model /ai/models/glm-4.7-fp8 --served-model-name glm-4.7 --tensor-parallel-size 8 --speculative-config.method mtp --speculative-config.num_speculative_tokens 1 --tool-call-parser glm47 --reasoning-parser glm45 --enable-auto-tool-choice --port 8004启动后,使用如下请求参数测试:
{ "top_p": 0.9, "temperature": 0, "stream": true, "messages": [{"content": "你是一个有用的助手", "role": "system"}, {"role": "user", "name": "user", "content": "北京今天天气怎么样"}], "tools":[ { "type" : "function", "function" : { "name" : "get_weather", "description" : "用于获取指定城市温度的工具", "parameters" : { "type" : "object", "properties" : { "city" : { "description" : "city", "title" : "City", "type" : "string" } }, "required" : [ "city" ], "additionalProperties" : false }, "strict" : false } } ] }响应结果:
`data: {"id":"chatcmpl-ae623d37888a3754","object":"chat.completion.chunk","created":1774886002,"model":"glm-4.7","choices":[{"index":0,"delta":{"role":"assistant","content":""},"logprobs":null,"finish_reason":null}],"prompt_token_ids":null}
data: {"id":"chatcmpl-ae623d37888a3754","object":"chat.completion.chunk","created":1774886002,"model":"glm-4.7","choices":[{"index":0,"delta":{"reasoning":"用户"},"logprobs":null,"finish_reason":null,"token_ids":null}]}
data: {"id":"chatcmpl-ae623d37888a3754","object":"chat.completion.chunk","created":1774886002,"model":"glm-4.7","choices":[{"index":0,"delta":{"reasoning":"询问北京"},"logprobs":null,"finish_reason":null,"token_ids":null}]}
data: {"id":"chatcmpl-ae623d37888a3754","object":"chat.completion.chunk","created":1774886002,"model":"glm-4.7","choices":[{"index":0,"delta":{"reasoning":"今天的天气"},"logprobs":null,"finish_reason":null,"token_ids":null}]}
data: {"id":"chatcmpl-ae623d37888a3754","object":"chat.completion.chunk","created":1774886002,"model":"glm-4.7","choices":[{"index":0,"delta":{"reasoning":"情况。"},"logprobs":null,"finish_reason":null,"token_ids":null}]}
data: {"id":"chatcmpl-ae623d37888a3754","object":"chat.completion.chunk","created":1774886002,"model":"glm-4.7","choices":[{"index":0,"delta":{"reasoning":"我需要"},"logprobs":null,"finish_reason":null,"token_ids":null}]}
data: {"id":"chatcmpl-ae623d37888a3754","object":"chat.completion.chunk","created":1774886002,"model":"glm-4.7","choices":[{"index":0,"delta":{"reasoning":"使用get"},"logprobs":null,"finish_reason":null,"token_ids":null}]}
data: {"id":"chatcmpl-ae623d37888a3754","object":"chat.completion.chunk","created":1774886002,"model":"glm-4.7","choices":[{"index":0,"delta":{"reasoning":"_weather工具"},"logprobs":null,"finish_reason":null,"token_ids":null}]}
data: {"id":"chatcmpl-ae623d37888a3754","object":"chat.completion.chunk","created":1774886002,"model":"glm-4.7","choices":[{"index":0,"delta":{"reasoning":"来获取"},"logprobs":null,"finish_reason":null,"token_ids":null}]}
data: {"id":"chatcmpl-ae623d37888a3754","object":"chat.completion.chunk","created":1774886002,"model":"glm-4.7","choices":[{"index":0,"delta":{"reasoning":"北京的天气"},"logprobs":null,"finish_reason":null,"token_ids":null}]}
data: {"id":"chatcmpl-ae623d37888a3754","object":"chat.completion.chunk","created":1774886002,"model":"glm-4.7","choices":[{"index":0,"delta":{"reasoning":"信息。"},"logprobs":null,"finish_reason":null,"token_ids":null}]}
data: {"id":"chatcmpl-ae623d37888a3754","object":"chat.completion.chunk","created":1774886002,"model":"glm-4.7","choices":[{"index":0,"delta":{"reasoning":"工具需要一个"},"logprobs":null,"finish_reason":null,"token_ids":null}]}
data: {"id":"chatcmpl-ae623d37888a3754","object":"chat.completion.chunk","created":1774886002,"model":"glm-4.7","choices":[{"index":0,"delta":{"reasoning":"city参数"},"logprobs":null,"finish_reason":null,"token_ids":null}]}
data: {"id":"chatcmpl-ae623d37888a3754","object":"chat.completion.chunk","created":1774886002,"model":"glm-4.7","choices":[{"index":0,"delta":{"reasoning":",用户"},"logprobs":null,"finish_reason":null,"token_ids":null}]}
data: {"id":"chatcmpl-ae623d37888a3754","object":"chat.completion.chunk","created":1774886002,"model":"glm-4.7","choices":[{"index":0,"delta":{"reasoning":"已经明确"},"logprobs":null,"finish_reason":null,"token_ids":null}]}
data: {"id":"chatcmpl-ae623d37888a3754","object":"chat.completion.chunk","created":1774886002,"model":"glm-4.7","choices":[{"index":0,"delta":{"reasoning":"提到了""},"logprobs":null,"finish_reason":null,"token_ids":null}]}
data: {"id":"chatcmpl-ae623d37888a3754","object":"chat.completion.chunk","created":1774886002,"model":"glm-4.7","choices":[{"index":0,"delta":{"reasoning":"北京","},"logprobs":null,"finish_reason":null,"token_ids":null}]}
data: {"id":"chatcmpl-ae623d37888a3754","object":"chat.completion.chunk","created":1774886002,"model":"glm-4.7","choices":[{"index":0,"delta":{"reasoning":"所以我可以"},"logprobs":null,"finish_reason":null,"token_ids":null}]}
data: {"id":"chatcmpl-ae623d37888a3754","object":"chat.completion.chunk","created":1774886002,"model":"glm-4.7","choices":[{"index":0,"delta":{"reasoning":"直接调用"},"logprobs":null,"finish_reason":null,"token_ids":null}]}
data: {"id":"chatcmpl-ae623d37888a3754","object":"chat.completion.chunk","created":1774886002,"model":"glm-4.7","choices":[{"index":0,"delta":{"reasoning":"这个工具"},"logprobs":null,"finish_reason":null,"token_ids":null}]}
data: {"id":"chatcmpl-ae623d37888a3754","object":"chat.completion.chunk","created":1774886002,"model":"glm-4.7","choices":[{"index":0,"delta":{"content":"我来帮"},"logprobs":null,"finish_reason":null,"token_ids":null}]}
data: {"id":"chatcmpl-ae623d37888a3754","object":"chat.completion.chunk","created":1774886002,"model":"glm-4.7","choices":[{"index":0,"delta":{"content":"您查询"},"logprobs":null,"finish_reason":null,"token_ids":null}]}
data: {"id":"chatcmpl-ae623d37888a3754","object":"chat.completion.chunk","created":1774886002,"model":"glm-4.7","choices":[{"index":0,"delta":{"content":"北京今天的"},"logprobs":null,"finish_reason":null,"token_ids":null}]}
data: {"id":"chatcmpl-ae623d37888a3754","object":"chat.completion.chunk","created":1774886002,"model":"glm-4.7","choices":[{"index":0,"delta":{"content":"天气情况"},"logprobs":null,"finish_reason":null,"token_ids":null}]}
data: {"id":"chatcmpl-ae623d37888a3754","object":"chat.completion.chunk","created":1774886002,"model":"glm-4.7","choices":[{"index":0,"delta":{"content":"。"},"logprobs":null,"finish_reason":null,"token_ids":null}]}
data: {"id":"chatcmpl-ae623d37888a3754","object":"chat.completion.chunk","created":1774886002,"model":"glm-4.7","choices":[{"index":0,"delta":{"tool_calls":[{"id":"chatcmpl-tool-af87121be7f2a60f","type":"function","index":0,"function":{"name":"get_weather","arguments":""}}]},"logprobs":null,"finish_reason":null,"token_ids":null}]}
data: {"id":"chatcmpl-ae623d37888a3754","object":"chat.completion.chunk","created":1774886002,"model":"glm-4.7","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{"city":""}}]},"logprobs":null,"finish_reason":null,"token_ids":null}]}
data: {"id":"chatcmpl-ae623d37888a3754","object":"chat.completion.chunk","created":1774886002,"model":"glm-4.7","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"北京""}}]},"logprobs":null,"finish_reason":null,"token_ids":null}]}
data: {"id":"chatcmpl-ae623d37888a3754","object":"chat.completion.chunk","created":1774886002,"model":"glm-4.7","choices":[{"index":0,"delta":{"tool_calls":[{"id":null,"type":null,"index":0,"function":{"name":null,"arguments":"{"city": "北京"}"}}]},"logprobs":null,"finish_reason":"tool_calls","stop_reason":151338,"token_ids":null}]}
data: [DONE]`
最后的工具调用,参数拼接出的结果为{"city":"北京" 缺少最后的"}"
在cherry studio中会因为把最后一个流式输出块中的完整工具参数拼接导致报错:
Expected behavior / 期待表现
工具调用的流式输出块拼接后是正确的参数