aaif-goose · stebbins · Feb 14, 2026
diff --git a/crates/goose/src/agents/agent.rs b/crates/goose/src/agents/agent.rs
@@ -1346,8 +1346,9 @@ impl Agent {
                                     }
                                 }
 
-                                // Preserve thinking content from the original response
+                                // Preserve thinking/reasoning content from the original response
                                 // Gemini (and other thinking models) require thinking to be echoed back
+                                // Kimi/DeepSeek require reasoning_content on assistant tool call messages
                                 let thinking_content: Vec<MessageContent> = response.content.iter()
                                     .filter(|c| matches!(c, MessageContent::Thinking(_)))
                                     .cloned()
@@ -1361,10 +1362,25 @@ impl Agent {
                                     messages_to_add.push(thinking_msg);
                                 }
 
+                                // Collect reasoning content to attach to tool request messages
+                                let reasoning_content: Vec<MessageContent> = response.content.iter()
+                                    .filter(|c| matches!(c, MessageContent::Reasoning(_)))
+                                    .cloned()
+                                    .collect();
+
                                 for (idx, request) in frontend_requests.iter().chain(remaining_requests.iter()).enumerate() {
                                     if request.tool_call.is_ok() {
-                                        let request_msg = Message::assistant()
-                                            .with_id(format!("msg_{}", Uuid::new_v4()))
+                                        let mut request_msg = Message::assistant()
+                                            .with_id(format!("msg_{}", Uuid::new_v4()));
+
+                                        // Attach reasoning content to EVERY split tool request message.
+                                        // Providers like Kimi require reasoning_content on all assistant
+                                        // messages with tool_calls when thinking mode is enabled.
+                                        for rc in &reasoning_content {
+                                            request_msg = request_msg.with_content(rc.clone());
+                                        }
+
+                                        request_msg = request_msg
                                             .with_tool_request_with_metadata(
                                                 request.id.clone(),
                                                 request.tool_call.clone(),

diff --git a/crates/goose/src/model.rs b/crates/goose/src/model.rs
@@ -98,8 +98,11 @@ static MODEL_SPECIFIC_LIMITS: Lazy<Vec<(&'static str, usize)>> = Lazy::new(|| {
         ("grok-4", 256_000),
         ("grok-code-fast-1", 256_000),
         ("grok", 131_072),
-        // other
-        ("kimi-k2", 131_072),
+        // moonshot/kimi
+        ("kimi-k2-0711", 131_072),
+        ("kimi-k2", 262_144),
+        ("kimi-for-coding", 262_144),
+        ("kimi-code", 262_144),
     ]
 });
 

diff --git a/crates/goose/src/providers/declarative/kimi.json b/crates/goose/src/providers/declarative/kimi.json
@@ -0,0 +1,29 @@
+{
+  "name": "kimi",
+  "engine": "openai",
+  "display_name": "Kimi Code",
+  "description": "Kimi Code subscription models (powered by Kimi K2.5)",
+  "api_key_env": "KIMI_API_KEY",
+  "base_url": "https://api.kimi.com/coding/v1/chat/completions",
+  "models": [
+    {
+      "name": "kimi-for-coding",
+      "context_limit": 262144,
+      "input_token_cost": null,
+      "output_token_cost": null,
+      "currency": null,
+      "supports_cache_control": null
+    },
+    {
+      "name": "kimi-code",
+      "context_limit": 262144,
+      "input_token_cost": null,
+      "output_token_cost": null,
+      "currency": null,
+      "supports_cache_control": null
+    }
+  ],
+  "headers": null,
+  "timeout_seconds": null,
+  "supports_streaming": true
+}
diff --git a/crates/goose/src/providers/formats/openai.rs b/crates/goose/src/providers/formats/openai.rs
@@ -82,7 +82,7 @@ pub fn format_messages(messages: &[Message], image_format: &ImageFormat) -> Vec<
         let mut output = Vec::new();
         let mut content_array = Vec::new();
         let mut text_array = Vec::new();
-        let mut reasoning_text: Option<String> = None;
+        let mut reasoning_text = String::new();
 
         for content in &message.content {
             match content {
@@ -116,7 +116,7 @@ pub fn format_messages(messages: &[Message], image_format: &ImageFormat) -> Vec<
                     continue;
                 }
                 MessageContent::Reasoning(r) => {
-                    reasoning_text = Some(r.text.clone());
+                    reasoning_text.push_str(&r.text);
                 }
                 MessageContent::ToolRequest(request) => match &request.tool_call {
                     Ok(tool_call) => {
@@ -278,15 +278,11 @@ pub fn format_messages(messages: &[Message], image_format: &ImageFormat) -> Vec<
             converted["content"] = json!(null);
         }
 
-        // DeepSeek requires reasoning_content field when tool_calls are present
-        // Set it to the captured reasoning text, or empty string if not present
-        if converted.get("tool_calls").is_some() {
-            let reasoning = reasoning_text.unwrap_or_default();
-            converted["reasoning_content"] = json!(reasoning);
-        } else if let Some(reasoning) = reasoning_text {
-            if !reasoning.is_empty() {
-                converted["reasoning_content"] = json!(reasoning);
-            }
+        // Include reasoning_content only when non-empty.
+        // Kimi rejects empty reasoning_content (""), so we must omit it entirely
+        // when there's no reasoning to send.
+        if !reasoning_text.is_empty() {
+            converted["reasoning_content"] = json!(reasoning_text);
         }
 
         if converted.get("content").is_some() || converted.get("tool_calls").is_some() {
@@ -542,6 +538,7 @@ where
         use futures::StreamExt;
 
         let mut accumulated_reasoning: Vec<Value> = Vec::new();
+        let mut accumulated_reasoning_content = String::new();
 
         'outer: while let Some(response) = stream.next().await {
             if response.as_ref().is_ok_and(|s| s == "data: [DONE]") {
@@ -562,6 +559,9 @@ where
                 if let Some(details) = &chunk.choices[0].delta.reasoning_details {
                     accumulated_reasoning.extend(details.iter().cloned());
                 }
+                if let Some(rc) = &chunk.choices[0].delta.reasoning_content {
+                    accumulated_reasoning_content.push_str(rc);
+                }
             }
 
             let mut usage = extract_usage_with_output_tokens(&chunk);
@@ -602,6 +602,9 @@ where
                                     if let Some(details) = &tool_chunk.choices[0].delta.reasoning_details {
                                         accumulated_reasoning.extend(details.iter().cloned());
                                     }
+                                    if let Some(rc) = &tool_chunk.choices[0].delta.reasoning_content {
+                                        accumulated_reasoning_content.push_str(rc);
+                                    }
                                     if let Some(delta_tool_calls) = &tool_chunk.choices[0].delta.tool_calls {
                                         for delta_call in delta_tool_calls {
                                             if let Some(index) = delta_call.index {
@@ -642,6 +645,10 @@ where
                 };
 
                 let mut contents = Vec::new();
+                if !accumulated_reasoning_content.is_empty() {
+                    contents.push(MessageContent::reasoning(&accumulated_reasoning_content));
+                    accumulated_reasoning_content.clear();
+                }
                 let mut sorted_indices: Vec<_> = tool_call_data.keys().cloned().collect();
                 sorted_indices.sort();
 
@@ -835,6 +842,15 @@ pub fn create_request(
         payload["stream_options"] = json!({"include_usage": true});
     }
 
+    // Merge provider-specific request parameters (e.g., reasoning_effort, thinking config)
+    if let Some(ref params) = model_config.request_params {
+        if let Some(payload_obj) = payload.as_object_mut() {
+            for (key, value) in params {
+                payload_obj.insert(key.clone(), value.clone());
+            }
+        }
+    }
+
     Ok(payload)
 }