@@ -1979,6 +1979,146 @@ static common_chat_params common_chat_params_init_deepseek_v3_2(const common_cha
19791979 return data;
19801980}
19811981
1982+ // Cohere2 MoE (a.k.a. "North Code") parser.
1983+ //
1984+ // The assistant turn is fully marker-wrapped:
1985+ // <|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>
1986+ // <|START_THINKING|>{reasoning}<|END_THINKING|>
1987+ // then EITHER content: <|START_TEXT|>{content}<|END_TEXT|>
1988+ // OR tool calls: <|START_ACTION|>[
1989+ // {"tool_call_id": "0", "tool_name": "f", "parameters": {...}}, ...
1990+ // ]<|END_ACTION|>
1991+ // <|END_OF_TURN_TOKEN|>
1992+ //
1993+ // The generation prompt forces a leading <|START_THINKING|> (when reasoning is enabled, which is
1994+ // the template default), so the model's output continues from *inside* the thinking block. The
1995+ // parser literal therefore only covers the stable <|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|> prefix
1996+ // and the reasoning rule consumes the <|START_THINKING|> ... <|END_THINKING|> markers itself,
1997+ // regardless of whether they came from the generation prompt or the generated text.
1998+ static common_chat_params common_chat_params_init_cohere2moe (const common_chat_template & tmpl,
1999+ const autoparser::generation_params & inputs) {
2000+ common_chat_params data;
2001+
2002+ const std::string TURN_START = " <|START_OF_TURN_TOKEN|>" ;
2003+ const std::string TURN_END = " <|END_OF_TURN_TOKEN|>" ;
2004+ const std::string CHATBOT = " <|CHATBOT_TOKEN|>" ;
2005+ const std::string USER = " <|USER_TOKEN|>" ;
2006+ const std::string SYSTEM = " <|SYSTEM_TOKEN|>" ;
2007+ const std::string THINK_START = " <|START_THINKING|>" ;
2008+ const std::string THINK_END = " <|END_THINKING|>" ;
2009+ const std::string TEXT_START = " <|START_TEXT|>" ;
2010+ const std::string TEXT_END = " <|END_TEXT|>" ;
2011+ const std::string ACTION_START = " <|START_ACTION|>" ;
2012+ const std::string ACTION_END = " <|END_ACTION|>" ;
2013+ const std::string RESULT_START = " <|START_TOOL_RESULT|>" ;
2014+ const std::string RESULT_END = " <|END_TOOL_RESULT|>" ;
2015+
2016+ // Stable prefix of the generation prompt that precedes the (forced) <|START_THINKING|> marker.
2017+ const std::string GEN_PREFIX = TURN_START + CHATBOT ;
2018+
2019+ data.prompt = common_chat_template_direct_apply_impl (tmpl, inputs);
2020+ data.generation_prompt = common_chat_template_generation_prompt_impl (tmpl, inputs);
2021+ data.format = COMMON_CHAT_FORMAT_PEG_NATIVE ;
2022+ data.supports_thinking = true ;
2023+ data.thinking_start_tag = THINK_START ;
2024+ data.thinking_end_tag = THINK_END ;
2025+ data.preserved_tokens = {
2026+ TURN_START , TURN_END , CHATBOT , USER , SYSTEM ,
2027+ THINK_START , THINK_END ,
2028+ TEXT_START , TEXT_END ,
2029+ ACTION_START , ACTION_END ,
2030+ RESULT_START , RESULT_END ,
2031+ };
2032+
2033+ // Split the rendered prompt into per-role message spans. Tool results are rendered with the
2034+ // system token followed by <|START_TOOL_RESULT|>, so the "tool" delimiter must be listed before
2035+ // the plain "system" one (it is a strict superset, and the role split tries delimiters in order).
2036+ data.message_spans = common_chat_split_by_role (data.prompt , {
2037+ { " assistant" , GEN_PREFIX },
2038+ { " user" , TURN_START + USER },
2039+ { " tool" , TURN_START + SYSTEM + RESULT_START },
2040+ { " system" , TURN_START + SYSTEM },
2041+ });
2042+
2043+ auto has_tools = inputs.tools .is_array () && !inputs.tools .empty ();
2044+ auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE ;
2045+ auto include_grammar = has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE ;
2046+
2047+ if (inputs.has_continuation ()) {
2048+ const auto & msg = inputs.continue_msg ;
2049+
2050+ data.generation_prompt = GEN_PREFIX + THINK_START + msg.reasoning_content ;
2051+ if (inputs.continue_final_message == COMMON_CHAT_CONTINUATION_CONTENT ) {
2052+ data.generation_prompt += THINK_END + TEXT_START + msg.render_content ();
2053+ }
2054+
2055+ data.prompt += data.generation_prompt ;
2056+ }
2057+
2058+ auto parser = build_chat_peg_parser ([&](common_chat_peg_builder & p) {
2059+ auto generation_prompt = p.literal (GEN_PREFIX );
2060+ auto end = p.end ();
2061+
2062+ // The thinking block is always present (the generation prompt forces <|START_THINKING|>).
2063+ // When extracting reasoning, capture its body; otherwise keep the whole block (markers
2064+ // included) inline as content, matching reasoning_format=NONE conventions.
2065+ common_peg_parser reasoning = p.eps ();
2066+ if (extract_reasoning) {
2067+ reasoning = p.optional (p.literal (THINK_START ) +
2068+ p.reasoning (p.until_one_of ({ THINK_END , TEXT_START , ACTION_START })) +
2069+ p.optional (p.literal (THINK_END )));
2070+ } else {
2071+ reasoning = p.optional (p.content (p.literal (THINK_START ) +
2072+ p.until_one_of ({ THINK_END , TEXT_START , ACTION_START }) +
2073+ p.optional (p.literal (THINK_END ))));
2074+ }
2075+
2076+ auto text_content = p.literal (TEXT_START ) + p.content (p.until (TEXT_END )) + p.optional (p.literal (TEXT_END ));
2077+
2078+ if (!has_tools || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE ) {
2079+ return generation_prompt + reasoning + text_content + p.optional (p.literal (TURN_END )) + end;
2080+ }
2081+
2082+ auto require_tools = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED ;
2083+
2084+ // <|START_ACTION|>[ {"tool_call_id": "0", "tool_name": "f", "parameters": {...}}, ... ]<|END_ACTION|>
2085+ auto tool_calls = p.standard_json_tools (ACTION_START , ACTION_END , inputs.tools , inputs.parallel_tool_calls ,
2086+ /* force_tool_calls = */ true ,
2087+ /* name_key = */ " tool_name" ,
2088+ /* args_key = */ " parameters" ,
2089+ /* array_wrapped = */ true ,
2090+ /* function_is_key = */ false ,
2091+ /* call_id_key = */ " " ,
2092+ /* gen_call_id_key = */ " tool_call_id" ,
2093+ /* parameters_order = */ { " tool_call_id" , " tool_name" , " parameters" });
2094+
2095+ // Content and tool calls are mutually exclusive in this format.
2096+ common_peg_parser body = require_tools ? tool_calls : p.choice ({ tool_calls, text_content });
2097+
2098+ return generation_prompt + reasoning + body + p.optional (p.literal (TURN_END )) + end;
2099+ });
2100+
2101+ data.parser = parser.save ();
2102+
2103+ if (include_grammar) {
2104+ data.grammar_lazy = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_AUTO ;
2105+ data.grammar = build_grammar ([&](const common_grammar_builder & builder) {
2106+ foreach_function (inputs.tools , [&](const json & tool) {
2107+ const auto & function = tool.at (" function" );
2108+ auto schema = function.at (" parameters" );
2109+ builder.resolve_refs (schema);
2110+ });
2111+ parser.build_grammar (builder, data.grammar_lazy );
2112+ });
2113+
2114+ data.grammar_triggers = {
2115+ { COMMON_GRAMMAR_TRIGGER_TYPE_WORD , ACTION_START }
2116+ };
2117+ }
2118+
2119+ return data;
2120+ }
2121+
19822122namespace workaround {
19832123
19842124static void map_developer_role_to_system (json & messages) {
@@ -2227,6 +2367,15 @@ std::optional<common_chat_params> common_chat_try_specialized_template(
22272367 return common_chat_params_init_kimi_k2 (tmpl, params);
22282368 }
22292369
2370+ // Cohere2 MoE / North Code - marker-wrapped format with <|START_TEXT|> content and
2371+ // <|START_ACTION|> JSON tool calls. <|START_TEXT|> is unique to this template (the older
2372+ // Command-R templates use <|START_RESPONSE|>).
2373+ if (src.find (" <|START_TEXT|>" ) != std::string::npos &&
2374+ src.find (" <|START_ACTION|>" ) != std::string::npos) {
2375+ LOG_DBG (" Using specialized template: Cohere2 MoE\n " );
2376+ return common_chat_params_init_cohere2moe (tmpl, params);
2377+ }
2378+
22302379 if (is_lfm2_template (src)) {
22312380 LOG_DBG (" Using specialized template: LFM2\n " );
22322381 return common_chat_params_init_lfm2 (tmpl, params, /* tool_list_tokens = */ true );
0 commit comments