Skip to content

Commit 4ea6b3d

Browse files
henrypark133claude
authored andcommitted
fix(llm): invert reasoning default — unknown models skip think/final tags (nearai#1952)
* fix(llm): invert reasoning default — unknown models skip <think>/<final> injection When NEAR AI model="auto" resolves server-side to Qwen 3.5, the system prompt injected <think>/<final> tags because "auto" didn't match any known native-thinking pattern. This caused empty responses: 1. Qwen 3.5's native thinking puts reasoning in a `reasoning` field (not `reasoning_content`) — silently dropped due to field name mismatch 2. Content contained only <think> tags or <tool_call> XML, which clean_response() stripped to empty → "I'm not sure how to respond" Three fixes: - Invert the default: new requires_think_final_tags() with empty allowlist means unknown/alias models get the safe direct-answer prompt - Add #[serde(alias = "reasoning")] so vLLM's field name is accepted - Update active_model from API response.model so capability checks use the resolved model name after the first call Confirmed via direct API testing against NEAR AI staging with Qwen/Qwen3.5-122B-A10B. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * remove model alias resolution from nearai_chat auto should stay as the active model name — no reason to overwrite it with the resolved model since requires_think_final_tags() returns false for both "auto" and the resolved name. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * fix wording: remove native-thinking assumption from direct-answer prompt The direct-answer prompt is now the default for all models, not just native-thinking ones. Remove misleading "handled natively" language. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com> (cherry picked from commit 0588dd1)
1 parent 10fe3e6 commit 4ea6b3d

File tree

3 files changed

+229
-85
lines changed

3 files changed

+229
-85
lines changed

src/llm/nearai_chat.rs

Lines changed: 91 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1044,9 +1044,10 @@ struct ChatCompletionResponseMessage {
10441044
#[allow(dead_code)]
10451045
role: String,
10461046
content: Option<String>,
1047-
/// Some models (e.g. GLM-5) return chain-of-thought reasoning here
1048-
/// instead of in `content`.
1049-
#[serde(default)]
1047+
/// Some models return chain-of-thought reasoning here instead of in
1048+
/// `content`. vLLM/SGLang backends (used by NEAR AI) return the field
1049+
/// as `reasoning`; other APIs (GLM-5, DeepSeek) use `reasoning_content`.
1050+
#[serde(default, alias = "reasoning")]
10501051
reasoning_content: Option<String>,
10511052
tool_calls: Option<Vec<ChatCompletionToolCall>>,
10521053
}
@@ -1535,6 +1536,93 @@ mod tests {
15351536
assert!(tool_calls.is_empty());
15361537
}
15371538

1539+
/// The vLLM/SGLang API returns `reasoning` (not `reasoning_content`).
1540+
/// Verify that the serde alias deserializes it correctly.
1541+
#[test]
1542+
fn test_reasoning_field_alias_accepted() {
1543+
let response: ChatCompletionResponse = serde_json::from_value(serde_json::json!({
1544+
"id": "chatcmpl-test",
1545+
"model": "Qwen/Qwen3.5-122B-A10B",
1546+
"choices": [{
1547+
"message": {
1548+
"role": "assistant",
1549+
"content": null,
1550+
"reasoning": "The answer is 42."
1551+
},
1552+
"finish_reason": "stop"
1553+
}],
1554+
"usage": { "prompt_tokens": 50, "completion_tokens": 20 }
1555+
}))
1556+
.unwrap();
1557+
1558+
let choice = response.choices.into_iter().next().unwrap();
1559+
let content = choice.message.content.or(choice.message.reasoning_content);
1560+
1561+
assert_eq!(
1562+
content,
1563+
Some("The answer is 42.".to_string()),
1564+
"reasoning field (vLLM alias) should deserialize into reasoning_content"
1565+
);
1566+
}
1567+
1568+
/// Verify that `reasoning` field does NOT leak into tool-call responses
1569+
/// (same logic as reasoning_content — only used for text fallback).
1570+
#[test]
1571+
fn test_reasoning_alias_not_leaked_into_tool_calls() {
1572+
let response: ChatCompletionResponse = serde_json::from_value(serde_json::json!({
1573+
"id": "chatcmpl-test",
1574+
"model": "Qwen/Qwen3.5-122B-A10B",
1575+
"choices": [{
1576+
"message": {
1577+
"role": "assistant",
1578+
"content": null,
1579+
"reasoning": "Let me think about which tool to call...",
1580+
"tool_calls": [{
1581+
"id": "call_xyz",
1582+
"type": "function",
1583+
"function": {
1584+
"name": "web_search",
1585+
"arguments": "{\"query\":\"test\"}"
1586+
}
1587+
}]
1588+
},
1589+
"finish_reason": "tool_calls"
1590+
}],
1591+
"usage": { "prompt_tokens": 100, "completion_tokens": 50 }
1592+
}))
1593+
.unwrap();
1594+
1595+
let choice = response.choices.into_iter().next().unwrap();
1596+
let tool_calls: Vec<ToolCall> = choice
1597+
.message
1598+
.tool_calls
1599+
.unwrap_or_default()
1600+
.into_iter()
1601+
.map(|tc| {
1602+
let arguments = serde_json::from_str(&tc.function.arguments)
1603+
.unwrap_or(serde_json::Value::Object(Default::default()));
1604+
ToolCall {
1605+
id: tc.id,
1606+
name: tc.function.name,
1607+
arguments,
1608+
reasoning: None,
1609+
}
1610+
})
1611+
.collect();
1612+
1613+
let content = if tool_calls.is_empty() {
1614+
choice.message.content.or(choice.message.reasoning_content)
1615+
} else {
1616+
choice.message.content
1617+
};
1618+
1619+
assert!(
1620+
content.is_none(),
1621+
"reasoning (alias) should NOT leak into tool-call responses"
1622+
);
1623+
assert_eq!(tool_calls.len(), 1);
1624+
}
1625+
15381626
#[tokio::test]
15391627
async fn test_resolve_bearer_token_config_api_key() {
15401628
// When config.api_key is set, it takes top priority.

src/llm/reasoning.rs

Lines changed: 45 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -984,21 +984,16 @@ Respond with a JSON plan in this format:
984984
.to_string()
985985
};
986986

987-
// Models with native thinking (Qwen3, DeepSeek-R1, etc.) produce their
988-
// own <think> tags or reasoning_content. Injecting our <think>/<final>
989-
// format collides with their native behavior, causing thinking-only
990-
// responses that clean to empty strings. See issue #789.
991-
let has_native_thinking = self
987+
// Default: direct-answer format. Only inject <think>/<final> tags for
988+
// models explicitly known to require them. Unknown models, aliases like
989+
// "auto", and native-thinking models all get the safe direct-answer
990+
// format. See issue #789.
991+
let needs_tags = self
992992
.model_name
993993
.as_ref()
994-
.is_some_and(|n| crate::llm::reasoning_models::has_native_thinking(n));
994+
.is_some_and(|n| crate::llm::reasoning_models::requires_think_final_tags(n));
995995

996-
let response_format = if has_native_thinking {
997-
r#"## Response Format
998-
999-
Respond directly with your answer. Do not wrap your response in any special tags.
1000-
Your reasoning process is handled natively — just provide the final user-facing answer."#
1001-
} else {
996+
let response_format = if needs_tags {
1002997
r#"## Response Format — CRITICAL
1003998
1004999
ALL internal reasoning MUST be inside <think>...</think> tags.
@@ -1010,6 +1005,10 @@ Only text inside <final> is shown to the user; everything else is discarded.
10101005
Example:
10111006
<think>The user is asking about X.</think>
10121007
<final>Here is the answer about X.</final>"#
1008+
} else {
1009+
r#"## Response Format
1010+
1011+
Respond directly with your final answer. Do not wrap your response in any special tags."#
10131012
};
10141013

10151014
format!(
@@ -3007,41 +3006,61 @@ That's my plan."#;
30073006
}
30083007

30093008
#[test]
3010-
fn test_system_prompt_skips_think_final_for_native_thinking() {
3009+
fn test_system_prompt_direct_answer_for_native_thinking_model() {
30113010
let reasoning = make_reasoning_with_model("qwen3-8b");
30123011
let prompt = reasoning.build_system_prompt_with_tools(&[]);
30133012
assert!(
30143013
!prompt.contains("<think>"),
30153014
"Native thinking model should NOT have <think> in system prompt"
30163015
);
3017-
assert!(prompt.contains("Respond directly with your answer"));
3016+
assert!(prompt.contains("Respond directly"));
30183017
}
30193018

30203019
#[test]
3021-
fn test_system_prompt_includes_think_final_for_regular_model() {
3020+
fn test_system_prompt_direct_answer_for_regular_model() {
3021+
// Regular models also get direct-answer format by default (inverted default)
30223022
let reasoning = make_reasoning_with_model("llama-3.1-70b");
30233023
let prompt = reasoning.build_system_prompt_with_tools(&[]);
3024-
assert!(prompt.contains("<think>"));
3025-
assert!(prompt.contains("<final>"));
3024+
assert!(!prompt.contains("<think>"));
3025+
assert!(!prompt.contains("<final>"));
3026+
assert!(prompt.contains("Respond directly"));
30263027
}
30273028

30283029
#[test]
3029-
fn test_system_prompt_defaults_to_think_final_when_no_model() {
3030+
fn test_system_prompt_defaults_to_direct_answer_when_no_model() {
30303031
use crate::testing::StubLlm;
30313032
let reasoning = Reasoning::new(Arc::new(StubLlm::new("test")));
30323033
let prompt = reasoning.build_system_prompt_with_tools(&[]);
3033-
assert!(prompt.contains("<think>"));
3034-
assert!(prompt.contains("<final>"));
3034+
// No model name → safe default → direct-answer (no tags)
3035+
assert!(!prompt.contains("<think>"));
3036+
assert!(!prompt.contains("<final>"));
3037+
assert!(prompt.contains("Respond directly"));
30353038
}
30363039

30373040
#[test]
3038-
fn test_system_prompt_deepseek_r1_skips_think_final() {
3041+
fn test_system_prompt_direct_answer_for_deepseek_r1() {
30393042
let reasoning = make_reasoning_with_model("deepseek-r1-distill-qwen-32b");
30403043
let prompt = reasoning.build_system_prompt_with_tools(&[]);
30413044
assert!(!prompt.contains("CRITICAL"));
30423045
assert!(prompt.contains("Respond directly"));
30433046
}
30443047

3048+
#[test]
3049+
fn test_system_prompt_direct_answer_for_auto_alias() {
3050+
let reasoning = make_reasoning_with_model("auto");
3051+
let prompt = reasoning.build_system_prompt_with_tools(&[]);
3052+
assert!(!prompt.contains("<think>"));
3053+
assert!(prompt.contains("Respond directly"));
3054+
}
3055+
3056+
#[test]
3057+
fn test_system_prompt_direct_answer_for_resolved_qwen() {
3058+
let reasoning = make_reasoning_with_model("Qwen/Qwen3.5-122B-A10B");
3059+
let prompt = reasoning.build_system_prompt_with_tools(&[]);
3060+
assert!(!prompt.contains("<think>"));
3061+
assert!(prompt.contains("Respond directly"));
3062+
}
3063+
30453064
// ---- Issue #789: additional edge case tests for truncate_at_tool_tags ----
30463065

30473066
#[test]
@@ -3332,13 +3351,15 @@ That's my plan."#;
33323351
);
33333352
assert!(prompt.contains("Respond directly"));
33343353

3335-
// Now create reasoning WITHOUT with_model_name — should get default prompt
3354+
// Now create reasoning WITHOUT with_model_name — should get direct-answer
3355+
// default (inverted default: unknown models are native-thinking-safe)
33363356
let reasoning_no_model = Reasoning::new(llm);
33373357
let prompt2 = reasoning_no_model.build_system_prompt_with_tools(&[]);
33383358
assert!(
3339-
prompt2.contains("<think>"),
3340-
"Without model name, should get default think/final prompt"
3359+
!prompt2.contains("<think>"),
3360+
"Without model name, should get direct-answer prompt (safe default)"
33413361
);
3362+
assert!(prompt2.contains("Respond directly"));
33423363
}
33433364

33443365
// ---- Issue #789: case-insensitive truncation ----

0 commit comments

Comments
 (0)