Skip to content

Commit 8d8ce60

Browse files
authored
feat: Preserve thinking content in Tinker server chat templates (#685)
1 parent 48488ea commit 8d8ce60

1 file changed

Lines changed: 12 additions & 17 deletions

File tree

src/art/tinker/server.py

Lines changed: 12 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -113,10 +113,6 @@ def _normalize_qwen3_dot_messages(
113113
return normalized_messages
114114

115115

116-
def _chat_template_disables_thinking(base_model: str) -> bool:
117-
return is_qwen3_dot_family_model(base_model)
118-
119-
120116
@dataclass
121117
class OpenAICompatibleTinkerServer:
122118
host: str | None = None
@@ -556,19 +552,18 @@ async def prompt_tokens(
556552
) -> list[int]:
557553
normalized_messages = _normalize_qwen3_dot_messages(base_model, messages)
558554
tokenizer = self._get_renderer(base_model).tokenizer
559-
if _chat_template_disables_thinking(base_model):
560-
encoding = tokenizer.apply_chat_template(
561-
cast(Any, normalized_messages),
562-
tools=cast(Any, tools),
563-
add_generation_prompt=True,
564-
enable_thinking=False,
565-
)
566-
else:
567-
encoding = tokenizer.apply_chat_template(
568-
cast(Any, normalized_messages),
569-
tools=cast(Any, tools),
570-
add_generation_prompt=True,
571-
)
555+
chat_template_kwargs = {}
556+
if isinstance(tokenizer.chat_template, str):
557+
if "enable_thinking" in tokenizer.chat_template:
558+
chat_template_kwargs["enable_thinking"] = False
559+
if "preserve_thinking" in tokenizer.chat_template:
560+
chat_template_kwargs["preserve_thinking"] = True
561+
encoding = tokenizer.apply_chat_template(
562+
cast(Any, normalized_messages),
563+
tools=cast(Any, tools),
564+
add_generation_prompt=True,
565+
**chat_template_kwargs,
566+
)
572567
if isinstance(encoding, BatchEncoding):
573568
return encoding.input_ids
574569
else:

0 commit comments

Comments
 (0)