Fix truncating responses.

rexlunae · rexlunae · commit 9ec814603860 · 2026-04-07T00:05:42.000-06:00
diff --git a/crates/rustyclaw-core/src/gateway/providers.rs b/crates/rustyclaw-core/src/gateway/providers.rs
@@ -957,6 +957,7 @@ pub async fn call_openai_with_tools(
     let mut body = json!({
         "model": req.model,
         "messages": messages,
+        "max_tokens": 16384,
         "stream": true,
         "stream_options": { "include_usage": true },
     });
@@ -1107,8 +1108,8 @@ pub async fn call_anthropic_with_tools(
     // Use streaming when we have a writer to forward chunks to
     let use_streaming = writer.is_some();
 
-    // Increase max_tokens when streaming to allow for longer responses
-    let max_tokens = if use_streaming { 16384 } else { 4096 };
+    // Allow generous output length to avoid truncation on long responses
+    let max_tokens = 16384;
 
     let mut body = json!({
         "model": req.model,
diff --git a/crates/rustyclaw-core/src/streaming.rs b/crates/rustyclaw-core/src/streaming.rs
@@ -78,6 +78,7 @@ pub async fn call_openai_streaming(
     let mut body = json!({
         "model": req.model,
         "messages": messages,
+        "max_tokens": 16384,
         "stream": true,
     });
 
@@ -226,12 +227,12 @@ pub async fn call_anthropic_streaming(
         })
         .collect();
 
-    // Determine max_tokens based on whether thinking is enabled
-    // Extended thinking requires higher max_tokens to accommodate thinking + response
+    // Allow generous output length to avoid truncation on long responses.
+    // Extended thinking needs even more room, but 16384 is a good baseline.
     let max_tokens = if req.thinking_budget.is_some() {
-        16384 // Allow room for thinking + response
+        32768
     } else {
-        4096
+        16384
     };
 
     let mut body = json!({