@@ -113,10 +113,6 @@ def _normalize_qwen3_dot_messages(
113113 return normalized_messages
114114
115115
116- def _chat_template_disables_thinking (base_model : str ) -> bool :
117- return is_qwen3_dot_family_model (base_model )
118-
119-
120116@dataclass
121117class OpenAICompatibleTinkerServer :
122118 host : str | None = None
@@ -556,19 +552,18 @@ async def prompt_tokens(
556552 ) -> list [int ]:
557553 normalized_messages = _normalize_qwen3_dot_messages (base_model , messages )
558554 tokenizer = self ._get_renderer (base_model ).tokenizer
559- if _chat_template_disables_thinking (base_model ):
560- encoding = tokenizer .apply_chat_template (
561- cast (Any , normalized_messages ),
562- tools = cast (Any , tools ),
563- add_generation_prompt = True ,
564- enable_thinking = False ,
565- )
566- else :
567- encoding = tokenizer .apply_chat_template (
568- cast (Any , normalized_messages ),
569- tools = cast (Any , tools ),
570- add_generation_prompt = True ,
571- )
555+ chat_template_kwargs = {}
556+ if isinstance (tokenizer .chat_template , str ):
557+ if "enable_thinking" in tokenizer .chat_template :
558+ chat_template_kwargs ["enable_thinking" ] = False
559+ if "preserve_thinking" in tokenizer .chat_template :
560+ chat_template_kwargs ["preserve_thinking" ] = True
561+ encoding = tokenizer .apply_chat_template (
562+ cast (Any , normalized_messages ),
563+ tools = cast (Any , tools ),
564+ add_generation_prompt = True ,
565+ ** chat_template_kwargs ,
566+ )
572567 if isinstance (encoding , BatchEncoding ):
573568 return encoding .input_ids
574569 else :
0 commit comments