Skip to content

Commit 5b07a59

Browse files
wehosHongzhi Wenclaude
authored
fix(llm): 非流式收口剥离 Qwen3.5/3.6 泄漏进 content 的思考链 (#1529)
qwen3-vl-* 会把推理放进 reasoning_content(content 干净),但 Qwen3.5/3.6 混合模型走 OpenAI 兼容端点时 reasoning_content 恒空,整段思考直接落在 content 里,只有一个无开标签的孤立 </think> 分隔答案——普通成对正则抓不到。 新增共享 strip_thinking_segments():先删成对 <think>…</think>,再把剩下 的无头 </think> 之前的内容(必为思考)整段切掉;无标签的干净回复原样透传。 挂在 ChatOpenAI.ainvoke/invoke 这个非流式唯一收口处(astream 读 reasoning_content 的对偶位置),一处覆盖 proactive、memory 等所有非流式调用。 openclaw 原来的 _strip_reasoning_trace 去重复用该函数,顺带获得无头 </think> 处理能力。流式不动(思考逐 token 无分隔,无可靠切点)。 Co-authored-by: Hongzhi Wen <cartabio.coder1@gmail.com> Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent aa3548f commit 5b07a59

3 files changed

Lines changed: 108 additions & 6 deletions

File tree

brain/openclaw_adapter.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020

2121
from config import OPENCLAW_MAGIC_INTENT_MAX_TOKENS
2222
from utils.file_utils import robust_json_loads
23-
from utils.llm_client import create_chat_llm
23+
from utils.llm_client import create_chat_llm, strip_thinking_segments
2424
from utils.config_manager import get_config_manager
2525
from utils.logger_config import get_module_logger
2626

@@ -458,7 +458,10 @@ async def stop_running(
458458

459459
@staticmethod
460460
def _strip_reasoning_trace(text: str) -> str:
461-
cleaned = re.sub(r"<think>.*?</think>", "", str(text or ""), flags=re.IGNORECASE | re.DOTALL).strip()
461+
# Shared stripper handles both paired <think>...</think> and the
462+
# Qwen3.5/3.6 dangling-</think> leak shape; ReAct line filtering below
463+
# is openclaw-specific and stays here.
464+
cleaned = strip_thinking_segments(text)
462465
if not cleaned:
463466
return ""
464467

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
# -*- coding: utf-8 -*-
2+
"""``strip_thinking_segments`` — defensive chain-of-thought removal for
3+
non-streaming replies.
4+
5+
Background: qwen3-vl-* route reasoning to the ``reasoning_content`` field
6+
(``content`` stays clean), but the Qwen3.5/3.6 hybrid models never populate
7+
``reasoning_content`` over the OpenAI-compatible endpoint — the whole
8+
chain-of-thought lands in ``content`` with only a *dangling* ``</think>`` (no
9+
opening tag) before the real answer. A paired-tag regex can't catch that;
10+
these cases pin the dangling-close behavior plus the well-formed and
11+
passthrough cases.
12+
"""
13+
import os
14+
import sys
15+
16+
import pytest
17+
18+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../../")))
19+
20+
from utils.llm_client import strip_thinking_segments
21+
22+
23+
@pytest.mark.parametrize(
24+
("raw", "expected"),
25+
[
26+
# 1) Qwen3.5 leak: implicit-open thinking + lone </think> + answer.
27+
("用户让我描述图片。草稿2更准确简洁。\n</think>\n\n这张图片包含一个红色的矩形。",
28+
"这张图片包含一个红色的矩形。"),
29+
# 2) Well-formed paired block.
30+
("<think>reason here</think>final answer", "final answer"),
31+
# 3) <thinking> long-form variant, paired.
32+
("<thinking>step 1\nstep 2</thinking>\nDone.", "Done."),
33+
# 4) Multiple paired blocks.
34+
("<think>a</think>X<think>b</think>Y", "XY"),
35+
# 5) Clean reply (qwen3-vl path) passes through untouched.
36+
("图中左侧是一个红色矩形,右侧是一个蓝色圆形。",
37+
"图中左侧是一个红色矩形,右侧是一个蓝色圆形。"),
38+
# 6) Multiline reasoning before the dangling close (real probe shape).
39+
("1. 识别主体\n2. 组织语言\n精简一下:\n</think>\n\n答案在这里", "答案在这里"),
40+
# 7) Case-insensitive close tag.
41+
("thinking...</THINK>answer", "answer"),
42+
# 8) Empty / falsy inputs.
43+
("", ""),
44+
(None, ""),
45+
],
46+
)
47+
def test_strip(raw, expected):
48+
assert strip_thinking_segments(raw) == expected
49+
50+
51+
def test_no_answer_after_dangling_close_yields_empty():
52+
"""Pure-thinking reply with a trailing close tag → nothing left."""
53+
assert strip_thinking_segments("just reasoning, no answer\n</think>") == ""
54+
55+
56+
def test_plain_text_with_no_tags_is_identity():
57+
txt = "这是一段普通回复,没有任何思考标签,应原样返回。"
58+
assert strip_thinking_segments(txt) == txt

utils/llm_client.py

Lines changed: 45 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,53 @@
1212

1313
import contextvars
1414
import json as _json
15+
import re
1516
from dataclasses import dataclass, field
1617
from typing import Any, AsyncIterator, Union
1718

1819
from openai import AsyncOpenAI, OpenAI
1920

2021

22+
# ────────────────────────────────────────────────────────────────
23+
# Reasoning-trace stripping (non-streaming defensive cleanup)
24+
# ────────────────────────────────────────────────────────────────
25+
# Well-formed <think>...</think> / <thinking>...</thinking> blocks.
26+
_THINK_PAIRED_RE = re.compile(r"<think(?:ing)?\s*>.*?</think(?:ing)?\s*>", re.IGNORECASE | re.DOTALL)
27+
# A *dangling* close tag with no matching open. This is the Qwen3.5/3.6
28+
# OpenAI-compat leak shape: unlike qwen3-vl-* (which route reasoning to the
29+
# ``reasoning_content`` field), the 3.5/3.6 hybrid models never populate
30+
# ``reasoning_content`` — the whole chain-of-thought lands in ``content`` with
31+
# only a lone ``</think>`` (implicit open) separating it from the real answer.
32+
# A paired-tag regex alone can't catch this; we strip everything up to and
33+
# including the first unmatched close tag.
34+
_THINK_DANGLING_CLOSE_RE = re.compile(r"^.*?</think(?:ing)?\s*>", re.IGNORECASE | re.DOTALL)
35+
_THINK_ANY_CLOSE_RE = re.compile(r"</think(?:ing)?\s*>", re.IGNORECASE)
36+
37+
38+
def strip_thinking_segments(text: str | None) -> str:
39+
"""Remove leaked chain-of-thought from a *non-streaming* model reply.
40+
41+
Handles two shapes:
42+
1. Well-formed ``<think>...</think>`` blocks (any count).
43+
2. Qwen3.5/3.6 leak: reasoning dumped into ``content`` with only a
44+
dangling ``</think>`` (no opening tag) before the answer.
45+
46+
Conservative — only acts when a think tag is present, so clean replies
47+
(qwen3-vl-*, gpt, claude, etc.) pass through untouched. Streaming is *not*
48+
covered here on purpose: when the chain-of-thought arrives token-by-token
49+
in ``delta.content`` with no delimiter there's nothing reliable to strip.
50+
"""
51+
if not text:
52+
return text or ""
53+
s = str(text)
54+
# 1) drop well-formed blocks first
55+
s = _THINK_PAIRED_RE.sub("", s)
56+
# 2) any close tag still present is unmatched → preceding text is thinking
57+
if _THINK_ANY_CLOSE_RE.search(s):
58+
s = _THINK_DANGLING_CLOSE_RE.sub("", s, count=1)
59+
return s.strip()
60+
61+
2162
# ────────────────────────────────────────────────────────────────
2263
# Active-character context — used by ChatOpenAI._params to substitute
2364
# ``{MASTER_NAME}`` / ``{LANLAN_NAME}`` placeholders that originated from
@@ -475,18 +516,18 @@ async def ainvoke(self, messages: Any, **overrides: Any) -> LLMResponse:
475516
# message=None 的合法响应,直接 .message.content 会 NoneType 崩溃。
476517
choice = resp.choices[0] if resp.choices else None
477518
msg = choice.message if choice else None
478-
content = getattr(msg, "content", None)
519+
content = strip_thinking_segments(getattr(msg, "content", None))
479520
usage_dict = resp.usage.model_dump() if resp.usage else {}
480-
return LLMResponse(content=content or "", response_metadata={"token_usage": usage_dict})
521+
return LLMResponse(content=content, response_metadata={"token_usage": usage_dict})
481522

482523
def invoke(self, messages: Any, **overrides: Any) -> LLMResponse:
483524
"""Sync twin of ``ainvoke``. See its docstring for ``overrides``."""
484525
resp = self._client.chat.completions.create(**self._params(messages, **overrides))
485526
choice = resp.choices[0] if resp.choices else None
486527
msg = choice.message if choice else None
487-
content = getattr(msg, "content", None)
528+
content = strip_thinking_segments(getattr(msg, "content", None))
488529
usage_dict = resp.usage.model_dump() if resp.usage else {}
489-
return LLMResponse(content=content or "", response_metadata={"token_usage": usage_dict})
530+
return LLMResponse(content=content, response_metadata={"token_usage": usage_dict})
490531

491532
# --- raw-resp invoke (for callers needing reasoning_content / raw choices) ---
492533

0 commit comments

Comments
 (0)