Skip to content

Commit aedb2a5

Browse files
authored
chat: add dedicated Cohere2MoE (North Code) parser (#24615)
* chat: add dedicated Cohere2MoE (North Code) parser * Some renames to make @CISC happy :>
1 parent 8edaca9 commit aedb2a5

3 files changed

Lines changed: 506 additions & 0 deletions

File tree

common/chat.cpp

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1979,6 +1979,146 @@ static common_chat_params common_chat_params_init_deepseek_v3_2(const common_cha
19791979
return data;
19801980
}
19811981

1982+
// Cohere2 MoE (a.k.a. "North Code") parser.
1983+
//
1984+
// The assistant turn is fully marker-wrapped:
1985+
// <|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>
1986+
// <|START_THINKING|>{reasoning}<|END_THINKING|>
1987+
// then EITHER content: <|START_TEXT|>{content}<|END_TEXT|>
1988+
// OR tool calls: <|START_ACTION|>[
1989+
// {"tool_call_id": "0", "tool_name": "f", "parameters": {...}}, ...
1990+
// ]<|END_ACTION|>
1991+
// <|END_OF_TURN_TOKEN|>
1992+
//
1993+
// The generation prompt forces a leading <|START_THINKING|> (when reasoning is enabled, which is
1994+
// the template default), so the model's output continues from *inside* the thinking block. The
1995+
// parser literal therefore only covers the stable <|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|> prefix
1996+
// and the reasoning rule consumes the <|START_THINKING|> ... <|END_THINKING|> markers itself,
1997+
// regardless of whether they came from the generation prompt or the generated text.
1998+
static common_chat_params common_chat_params_init_cohere2moe(const common_chat_template & tmpl,
1999+
const autoparser::generation_params & inputs) {
2000+
common_chat_params data;
2001+
2002+
const std::string TURN_START = "<|START_OF_TURN_TOKEN|>";
2003+
const std::string TURN_END = "<|END_OF_TURN_TOKEN|>";
2004+
const std::string CHATBOT = "<|CHATBOT_TOKEN|>";
2005+
const std::string USER = "<|USER_TOKEN|>";
2006+
const std::string SYSTEM = "<|SYSTEM_TOKEN|>";
2007+
const std::string THINK_START = "<|START_THINKING|>";
2008+
const std::string THINK_END = "<|END_THINKING|>";
2009+
const std::string TEXT_START = "<|START_TEXT|>";
2010+
const std::string TEXT_END = "<|END_TEXT|>";
2011+
const std::string ACTION_START = "<|START_ACTION|>";
2012+
const std::string ACTION_END = "<|END_ACTION|>";
2013+
const std::string RESULT_START = "<|START_TOOL_RESULT|>";
2014+
const std::string RESULT_END = "<|END_TOOL_RESULT|>";
2015+
2016+
// Stable prefix of the generation prompt that precedes the (forced) <|START_THINKING|> marker.
2017+
const std::string GEN_PREFIX = TURN_START + CHATBOT;
2018+
2019+
data.prompt = common_chat_template_direct_apply_impl(tmpl, inputs);
2020+
data.generation_prompt = common_chat_template_generation_prompt_impl(tmpl, inputs);
2021+
data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
2022+
data.supports_thinking = true;
2023+
data.thinking_start_tag = THINK_START;
2024+
data.thinking_end_tag = THINK_END;
2025+
data.preserved_tokens = {
2026+
TURN_START, TURN_END, CHATBOT, USER, SYSTEM,
2027+
THINK_START, THINK_END,
2028+
TEXT_START, TEXT_END,
2029+
ACTION_START, ACTION_END,
2030+
RESULT_START, RESULT_END,
2031+
};
2032+
2033+
// Split the rendered prompt into per-role message spans. Tool results are rendered with the
2034+
// system token followed by <|START_TOOL_RESULT|>, so the "tool" delimiter must be listed before
2035+
// the plain "system" one (it is a strict superset, and the role split tries delimiters in order).
2036+
data.message_spans = common_chat_split_by_role(data.prompt, {
2037+
{ "assistant", GEN_PREFIX },
2038+
{ "user", TURN_START + USER },
2039+
{ "tool", TURN_START + SYSTEM + RESULT_START },
2040+
{ "system", TURN_START + SYSTEM },
2041+
});
2042+
2043+
auto has_tools = inputs.tools.is_array() && !inputs.tools.empty();
2044+
auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
2045+
auto include_grammar = has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE;
2046+
2047+
if (inputs.has_continuation()) {
2048+
const auto & msg = inputs.continue_msg;
2049+
2050+
data.generation_prompt = GEN_PREFIX + THINK_START + msg.reasoning_content;
2051+
if (inputs.continue_final_message == COMMON_CHAT_CONTINUATION_CONTENT) {
2052+
data.generation_prompt += THINK_END + TEXT_START + msg.render_content();
2053+
}
2054+
2055+
data.prompt += data.generation_prompt;
2056+
}
2057+
2058+
auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) {
2059+
auto generation_prompt = p.literal(GEN_PREFIX);
2060+
auto end = p.end();
2061+
2062+
// The thinking block is always present (the generation prompt forces <|START_THINKING|>).
2063+
// When extracting reasoning, capture its body; otherwise keep the whole block (markers
2064+
// included) inline as content, matching reasoning_format=NONE conventions.
2065+
common_peg_parser reasoning = p.eps();
2066+
if (extract_reasoning) {
2067+
reasoning = p.optional(p.literal(THINK_START) +
2068+
p.reasoning(p.until_one_of({ THINK_END, TEXT_START, ACTION_START })) +
2069+
p.optional(p.literal(THINK_END)));
2070+
} else {
2071+
reasoning = p.optional(p.content(p.literal(THINK_START) +
2072+
p.until_one_of({ THINK_END, TEXT_START, ACTION_START }) +
2073+
p.optional(p.literal(THINK_END))));
2074+
}
2075+
2076+
auto text_content = p.literal(TEXT_START) + p.content(p.until(TEXT_END)) + p.optional(p.literal(TEXT_END));
2077+
2078+
if (!has_tools || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) {
2079+
return generation_prompt + reasoning + text_content + p.optional(p.literal(TURN_END)) + end;
2080+
}
2081+
2082+
auto require_tools = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED;
2083+
2084+
// <|START_ACTION|>[ {"tool_call_id": "0", "tool_name": "f", "parameters": {...}}, ... ]<|END_ACTION|>
2085+
auto tool_calls = p.standard_json_tools(ACTION_START, ACTION_END, inputs.tools, inputs.parallel_tool_calls,
2086+
/* force_tool_calls = */ true,
2087+
/* name_key = */ "tool_name",
2088+
/* args_key = */ "parameters",
2089+
/* array_wrapped = */ true,
2090+
/* function_is_key = */ false,
2091+
/* call_id_key = */ "",
2092+
/* gen_call_id_key = */ "tool_call_id",
2093+
/* parameters_order = */ { "tool_call_id", "tool_name", "parameters" });
2094+
2095+
// Content and tool calls are mutually exclusive in this format.
2096+
common_peg_parser body = require_tools ? tool_calls : p.choice({ tool_calls, text_content });
2097+
2098+
return generation_prompt + reasoning + body + p.optional(p.literal(TURN_END)) + end;
2099+
});
2100+
2101+
data.parser = parser.save();
2102+
2103+
if (include_grammar) {
2104+
data.grammar_lazy = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_AUTO;
2105+
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
2106+
foreach_function(inputs.tools, [&](const json & tool) {
2107+
const auto & function = tool.at("function");
2108+
auto schema = function.at("parameters");
2109+
builder.resolve_refs(schema);
2110+
});
2111+
parser.build_grammar(builder, data.grammar_lazy);
2112+
});
2113+
2114+
data.grammar_triggers = {
2115+
{ COMMON_GRAMMAR_TRIGGER_TYPE_WORD, ACTION_START }
2116+
};
2117+
}
2118+
2119+
return data;
2120+
}
2121+
19822122
namespace workaround {
19832123

19842124
static void map_developer_role_to_system(json & messages) {
@@ -2227,6 +2367,15 @@ std::optional<common_chat_params> common_chat_try_specialized_template(
22272367
return common_chat_params_init_kimi_k2(tmpl, params);
22282368
}
22292369

2370+
// Cohere2 MoE / North Code - marker-wrapped format with <|START_TEXT|> content and
2371+
// <|START_ACTION|> JSON tool calls. <|START_TEXT|> is unique to this template (the older
2372+
// Command-R templates use <|START_RESPONSE|>).
2373+
if (src.find("<|START_TEXT|>") != std::string::npos &&
2374+
src.find("<|START_ACTION|>") != std::string::npos) {
2375+
LOG_DBG("Using specialized template: Cohere2 MoE\n");
2376+
return common_chat_params_init_cohere2moe(tmpl, params);
2377+
}
2378+
22302379
if (is_lfm2_template(src)) {
22312380
LOG_DBG("Using specialized template: LFM2\n");
22322381
return common_chat_params_init_lfm2(tmpl, params, /* tool_list_tokens = */ true);

0 commit comments

Comments
 (0)