Skip to content

Commit 65d1ecb

Browse files
Merge pull request #483 from janhq/update-dev-from-master-2026-04-12-00-57
Sync master with upstream release b8763
2 parents b9898e6 + ff5ef82 commit 65d1ecb

66 files changed

Lines changed: 3369 additions & 2759 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

cmake/arm64-linux-clang.cmake

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
set( CMAKE_SYSTEM_NAME Linux )
2+
set( CMAKE_SYSTEM_PROCESSOR arm64 )
3+
4+
set( target aarch64-linux-gnu )
5+
6+
set( CMAKE_C_COMPILER clang )
7+
set( CMAKE_CXX_COMPILER clang++ )
8+
9+
set( CMAKE_C_COMPILER_TARGET ${target} )
10+
set( CMAKE_CXX_COMPILER_TARGET ${target} )
11+
12+
set( arch_c_flags "-march=armv8.7-a -fvectorize -ffp-model=fast -fno-finite-math-only" )
13+
set( warn_c_flags "-Wno-format -Wno-unused-variable -Wno-unused-function -Wno-gnu-zero-variadic-macro-arguments" )
14+
15+
set( CMAKE_C_FLAGS_INIT "${arch_c_flags} ${warn_c_flags}" )
16+
set( CMAKE_CXX_FLAGS_INIT "${arch_c_flags} ${warn_c_flags}" )
17+

common/arg.cpp

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -291,14 +291,16 @@ static bool common_params_handle_remote_preset(common_params & params, llama_exa
291291
hf_tag = "default";
292292
}
293293

294-
const bool offline = params.offline;
295294
std::string model_endpoint = get_model_endpoint();
296295
auto preset_url = model_endpoint + hf_repo + "/resolve/main/preset.ini";
297296

298297
// prepare local path for caching
299298
auto preset_fname = clean_file_name(hf_repo + "_preset.ini");
300299
auto preset_path = fs_get_cache_file(preset_fname);
301-
const int status = common_download_file_single(preset_url, preset_path, params.hf_token, offline);
300+
common_download_opts opts;
301+
opts.bearer_token = params.hf_token;
302+
opts.offline = params.offline;
303+
const int status = common_download_file_single(preset_url, preset_path, opts);
302304
const bool has_preset = status >= 200 && status < 400;
303305

304306
// remote preset is optional, so we don't error out if not found
@@ -341,10 +343,10 @@ static handle_model_result common_params_handle_model(struct common_params_model
341343
model.hf_file = model.path;
342344
model.path = "";
343345
}
344-
common_download_model_opts opts;
345-
opts.download_mmproj = true;
346+
common_download_opts opts;
347+
opts.bearer_token = bearer_token;
346348
opts.offline = offline;
347-
auto download_result = common_download_model(model, bearer_token, opts);
349+
auto download_result = common_download_model(model, opts, true);
348350

349351
if (download_result.model_path.empty()) {
350352
LOG_ERR("error: failed to download model from Hugging Face\n");
@@ -365,9 +367,10 @@ static handle_model_result common_params_handle_model(struct common_params_model
365367
model.path = fs_get_cache_file(string_split<std::string>(f, '/').back());
366368
}
367369

368-
common_download_model_opts opts;
370+
common_download_opts opts;
371+
opts.bearer_token = bearer_token;
369372
opts.offline = offline;
370-
auto download_result = common_download_model(model, bearer_token, opts);
373+
auto download_result = common_download_model(model, opts);
371374
if (download_result.model_path.empty()) {
372375
LOG_ERR("error: failed to download model from %s\n", model.url.c_str());
373376
exit(1);

common/chat-auto-parser-generator.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,10 @@ common_chat_params peg_generator::generate_parser(const common_chat_template &
6969
auto schema = function.contains("parameters") ? function.at("parameters") : json::object();
7070
builder.resolve_refs(schema);
7171
});
72+
if (has_response_format) {
73+
auto schema = inputs.json_schema;
74+
builder.resolve_refs(schema);
75+
}
7276
parser.build_grammar(builder, data.grammar_lazy);
7377
});
7478

common/chat.cpp

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -865,9 +865,10 @@ static common_chat_params common_chat_params_init_ministral_3(const common_chat_
865865
adjusted_messages.push_back(adjusted);
866866
}
867867

868-
auto has_tools = inputs.tools.is_array() && !inputs.tools.empty();
869-
auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
870-
auto include_grammar = true;
868+
auto has_tools = inputs.tools.is_array() && !inputs.tools.empty();
869+
auto has_response_format = inputs.json_schema.is_object() && !inputs.json_schema.empty();
870+
auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
871+
auto include_grammar = true;
871872

872873
data.supports_thinking = true;
873874
data.thinking_start_tag = "[THINK]";
@@ -887,7 +888,7 @@ static common_chat_params common_chat_params_init_ministral_3(const common_chat_
887888
extract_reasoning ? p.optional("[THINK]" + p.reasoning(p.until("[/THINK]")) + "[/THINK]") : p.eps();
888889

889890
// Response format parser
890-
if (inputs.json_schema.is_object() && !inputs.json_schema.empty()) {
891+
if (has_response_format) {
891892
// Ministral wants to emit json surrounded by code fences
892893
return generation_prompt + (reasoning << "```json" << p.content(p.schema(p.json(), "response-format", inputs.json_schema)) << "```");
893894
}
@@ -928,6 +929,10 @@ static common_chat_params common_chat_params_init_ministral_3(const common_chat_
928929
auto schema = function.at("parameters");
929930
builder.resolve_refs(schema);
930931
});
932+
if (has_response_format) {
933+
auto schema = inputs.json_schema;
934+
builder.resolve_refs(schema);
935+
}
931936
parser.build_grammar(builder, data.grammar_lazy);
932937
});
933938

@@ -1063,6 +1068,10 @@ static common_chat_params common_chat_params_init_gpt_oss(const common_chat_temp
10631068
auto schema = function.at("parameters");
10641069
builder.resolve_refs(schema);
10651070
});
1071+
if (has_response_format) {
1072+
auto schema = inputs.json_schema;
1073+
builder.resolve_refs(schema);
1074+
}
10661075
parser.build_grammar(builder, data.grammar_lazy);
10671076
});
10681077

@@ -1193,6 +1202,10 @@ static common_chat_params common_chat_params_init_gemma4(const common_chat_templ
11931202
auto schema = function.at("parameters");
11941203
builder.resolve_refs(schema);
11951204
});
1205+
if (has_response_format) {
1206+
auto schema = inputs.json_schema;
1207+
builder.resolve_refs(schema);
1208+
}
11961209
parser.build_grammar(builder, data.grammar_lazy);
11971210
});
11981211

@@ -1916,7 +1929,12 @@ std::optional<common_chat_params> common_chat_try_specialized_template(
19161929

19171930
// Gemma4 format detection
19181931
if (src.find("'<|tool_call>call:'") != std::string::npos) {
1919-
workaround::convert_tool_responses_gemma4(params.messages);
1932+
if (src.find("{#- OpenAI Chat Completions:") == std::string::npos) {
1933+
// apply workarounds if using the older gemma4 templates
1934+
LOG_WRN("%s: detected an outdated gemma4 chat template, applying compatibility workarounds. "
1935+
"Consider updating to the official template.\n", __func__);
1936+
workaround::convert_tool_responses_gemma4(params.messages);
1937+
}
19201938
return common_chat_params_init_gemma4(tmpl, params);
19211939
}
19221940

0 commit comments

Comments
 (0)