Skip to content

Commit f0fc0ef

Browse files
stebbinsclaude
andcommitted
fix: add provider-aware message splitting to fix Moonshot thinking model errors
Moonshot's kimi-k2-thinking model requires a single assistant message with all tool_calls and reasoning_content together. The previous code always split thinking and tool calls into separate messages (needed by Gemini), which broke Moonshot with "reasoning_content is missing" errors. Add `should_split_tool_messages()` to the Provider trait (default false) and override it to true only for Google/Gemini/GcpVertexAI providers. When false, the agent keeps the original response message intact and appends tool responses after it. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 2719b6d commit f0fc0ef

File tree

5 files changed

+74
-29
lines changed

5 files changed

+74
-29
lines changed

crates/goose/src/agents/agent.rs

Lines changed: 49 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1417,36 +1417,56 @@ impl Agent {
14171417
}
14181418
}
14191419

1420-
// Preserve thinking content from the original response
1421-
// Gemini (and other thinking models) require thinking to be echoed back
1422-
let thinking_content: Vec<MessageContent> = response.content.iter()
1423-
.filter(|c| matches!(c, MessageContent::Thinking(_)))
1424-
.cloned()
1425-
.collect();
1426-
if !thinking_content.is_empty() {
1427-
let thinking_msg = Message::new(
1428-
response.role.clone(),
1429-
response.created,
1430-
thinking_content,
1431-
).with_id(format!("msg_{}", Uuid::new_v4()));
1432-
messages_to_add.push(thinking_msg);
1433-
}
1420+
let should_split = self.provider().await
1421+
.map(|p| p.should_split_tool_messages())
1422+
.unwrap_or(false);
1423+
1424+
if should_split {
1425+
// Split mode (Google/Gemini): separate thinking and individual
1426+
// tool call messages. Gemini requires thinking to be echoed back
1427+
// as a separate message, and each tool call in its own message.
1428+
let thinking_content: Vec<MessageContent> = response.content.iter()
1429+
.filter(|c| matches!(c, MessageContent::Thinking(_)))
1430+
.cloned()
1431+
.collect();
1432+
if !thinking_content.is_empty() {
1433+
let thinking_msg = Message::new(
1434+
response.role.clone(),
1435+
response.created,
1436+
thinking_content,
1437+
).with_id(format!("msg_{}", Uuid::new_v4()));
1438+
messages_to_add.push(thinking_msg);
1439+
}
14341440

1435-
for (idx, request) in frontend_requests.iter().chain(remaining_requests.iter()).enumerate() {
1436-
if request.tool_call.is_ok() {
1437-
let request_msg = Message::assistant()
1438-
.with_id(format!("msg_{}", Uuid::new_v4()))
1439-
.with_tool_request_with_metadata(
1440-
request.id.clone(),
1441-
request.tool_call.clone(),
1442-
request.metadata.as_ref(),
1443-
request.tool_meta.clone(),
1444-
);
1445-
messages_to_add.push(request_msg);
1446-
let final_response = tool_response_messages[idx]
1447-
.lock().await.clone();
1448-
yield AgentEvent::Message(final_response.clone());
1449-
messages_to_add.push(final_response);
1441+
for (idx, request) in frontend_requests.iter().chain(remaining_requests.iter()).enumerate() {
1442+
if request.tool_call.is_ok() {
1443+
let request_msg = Message::assistant()
1444+
.with_id(format!("msg_{}", Uuid::new_v4()))
1445+
.with_tool_request_with_metadata(
1446+
request.id.clone(),
1447+
request.tool_call.clone(),
1448+
request.metadata.as_ref(),
1449+
request.tool_meta.clone(),
1450+
);
1451+
messages_to_add.push(request_msg);
1452+
let final_response = tool_response_messages[idx]
1453+
.lock().await.clone();
1454+
yield AgentEvent::Message(final_response.clone());
1455+
messages_to_add.push(final_response);
1456+
}
1457+
}
1458+
} else {
1459+
// Non-split mode (OpenAI-compatible providers including Moonshot):
1460+
// Keep the original response message intact with all tool_calls
1461+
// and reasoning_content together, then append tool responses.
1462+
messages_to_add.push(response.clone());
1463+
for (idx, request) in frontend_requests.iter().chain(remaining_requests.iter()).enumerate() {
1464+
if request.tool_call.is_ok() {
1465+
let final_response = tool_response_messages[idx]
1466+
.lock().await.clone();
1467+
yield AgentEvent::Message(final_response.clone());
1468+
messages_to_add.push(final_response);
1469+
}
14501470
}
14511471
}
14521472

crates/goose/src/providers/base.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -563,6 +563,19 @@ pub trait Provider: Send + Sync {
563563
false
564564
}
565565

566+
/// Whether tool call messages should be split into individual messages.
567+
///
568+
/// When true (Google/Gemini providers), the agent splits a response with thinking + multiple
569+
/// tool calls into separate messages: one thinking message and one message per tool call.
570+
/// This is required because Gemini expects thinking content to be echoed back separately.
571+
///
572+
/// When false (default, OpenAI-compatible providers), the original response message is kept
573+
/// intact with all tool calls and reasoning content together. This is required by providers
574+
/// like Moonshot that expect a single assistant message with all tool_calls and reasoning_content.
575+
fn should_split_tool_messages(&self) -> bool {
576+
false
577+
}
578+
566579
/// Get the currently active model name
567580
/// For regular providers, this returns the configured model
568581
/// For LeadWorkerProvider, this returns the currently active model (lead or worker)

crates/goose/src/providers/gcpvertexai.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -648,6 +648,10 @@ impl Provider for GcpVertexAIProvider {
648648
true
649649
}
650650

651+
fn should_split_tool_messages(&self) -> bool {
652+
true
653+
}
654+
651655
async fn stream(
652656
&self,
653657
session_id: &str,

crates/goose/src/providers/gemini_cli.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,10 @@ impl Provider for GeminiCliProvider {
268268
self.model.clone()
269269
}
270270

271+
fn should_split_tool_messages(&self) -> bool {
272+
true
273+
}
274+
271275
#[tracing::instrument(
272276
skip(self, _model_config, system, messages, tools),
273277
fields(model_config, input, output, input_tokens, output_tokens, total_tokens)

crates/goose/src/providers/google.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,10 @@ impl Provider for GoogleProvider {
211211
true
212212
}
213213

214+
fn should_split_tool_messages(&self) -> bool {
215+
true
216+
}
217+
214218
async fn stream(
215219
&self,
216220
session_id: &str,

0 commit comments

Comments
 (0)