Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
5473949
webgpu : Query for adapter support when registering WebGPU backend (#…
reeselevine Apr 8, 2026
3ba12fe
kv-cache : extend cache quantization checks (#21586)
Green-Sky Apr 8, 2026
e9fd962
Propose fix a couple of typos (#21581)
jeis4wpi Apr 8, 2026
4a05e0c
webui : send both backend_sampling == false/true (#18781)
ggerganov Apr 8, 2026
d9a12c8
vocab : remove </s> eog token if gemma4 (#21492)
aldehir Apr 8, 2026
6606000
server: respect the ignore eos flag (#21203)
ykhrustalev Apr 8, 2026
2dcb7f7
fix: free ctx_copy in ggml_opt_free to plug per-training-session leak…
RealOrko Apr 8, 2026
d12cc3d
CUDA: also store `node->src->data` ptrs for equality check (#21635)
am17an Apr 8, 2026
4293919
common : skip non-primary GGUF split files when selecting model (#21633)
angt Apr 9, 2026
8a132fa
vulkan: unify type macros to use Vx instead of _VECx (#21605)
0cc4m Apr 9, 2026
8a65a7a
ci: drop v5 `all:` composition from labeler.yml (#21627)
Marxist-Leninist Apr 9, 2026
b54cb2e
sycl : add flash-attn support for head size 512 (#21654)
qnixsynapse Apr 9, 2026
75511a8
webui: Add option to pre-encode conversation for faster next turns (#…
allozaur Apr 9, 2026
3ee9da0
server : fix grammar commandline args (#21543)
AUTOMATIC1111 Apr 9, 2026
9949ad0
fix: Model Selector choice sync (#21628)
allozaur Apr 9, 2026
5e9c635
metal : add missing mm-id specializations for q1_0 (#21662)
ggerganov Apr 9, 2026
243532e
jinja : support ensure_ascii=true, string repetition and int/float se…
kwajiehao Apr 9, 2026
0ec191e
vocab: add gemma4 tokenizer tests, fix edge case (#21534)
pwilkin Apr 9, 2026
501aeed
mtmd: support dots.ocr (#17575)
ngxson Apr 9, 2026
057dba3
model: fix multimodal padding token for gemma3n/gemma4 (#21625)
ngxson Apr 9, 2026
2622975
common : simplify autoparser tagged parser rules (#21216)
aldehir Apr 9, 2026
ddf03c6
common : fix ambiguous grammar rule in gemma4 (#21661)
aldehir Apr 9, 2026
4ef9301
webui: add "Send message on Enter" setting (#21577)
mourix Apr 9, 2026
c8ac02f
requirements : update transformers to 5.5.1 (#21617)
danbev Apr 9, 2026
009a113
ggml : check return value of CUB calls used in argsort and top-k (the…
fairydreaming Apr 9, 2026
d6f3030
ggml: backend-agnostic tensor parallelism (experimental) (#19378)
JohannesGaessler Apr 9, 2026
d132f22
HIP: add CDNA4 (gfx950) architecture support for MI350X/MI355X (#21570)
andyluo7 Apr 9, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 5 additions & 13 deletions .github/labeler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -75,21 +75,13 @@ android:
- examples/llama.android/**
server/webui:
- changed-files:
- all:
- any-glob-to-any-file:
- tools/server/webui/**
- tools/server/public/**
- all-globs-to-all-files:
- '!tools/server/webui/**'
- '!tools/server/public/**'
- any-glob-to-any-file:
- tools/server/webui/**
- tools/server/public/**
server:
- changed-files:
- all:
- any-glob-to-any-file:
- tools/server/**
- all-globs-to-all-files:
- '!tools/server/webui/**'
- '!tools/server/public/**'
- any-glob-to-any-file:
- tools/server/**



Expand Down
16 changes: 9 additions & 7 deletions common/arg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2348,19 +2348,21 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
}
).set_env("LLAMA_ARG_N_GPU_LAYERS"));
add_opt(common_arg(
{"-sm", "--split-mode"}, "{none,layer,row}",
{"-sm", "--split-mode"}, "{none,layer,row,tensor}",
"how to split the model across multiple GPUs, one of:\n"
"- none: use one GPU only\n"
"- layer (default): split layers and KV across GPUs\n"
"- row: split rows across GPUs",
"- layer (default): split layers and KV across GPUs (pipelined)\n"
"- row: split weight across GPUs by rows (parallelized)\n"
"- tensor: split weights and KV across GPUs (parallelized)",
[](common_params & params, const std::string & value) {
std::string arg_next = value;
if (arg_next == "none") {
if (value == "none") {
params.split_mode = LLAMA_SPLIT_MODE_NONE;
} else if (arg_next == "layer") {
} else if (value == "layer") {
params.split_mode = LLAMA_SPLIT_MODE_LAYER;
} else if (arg_next == "row") {
} else if (value == "row") {
params.split_mode = LLAMA_SPLIT_MODE_ROW;
} else if (value == "tensor") {
params.split_mode = LLAMA_SPLIT_MODE_TENSOR;
} else {
throw std::invalid_argument("invalid value");
}
Expand Down
50 changes: 14 additions & 36 deletions common/chat-auto-parser-generator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -332,58 +332,36 @@ common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_conte
const auto & inputs = ctx.inputs;
bool force_tools = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED;

auto until_suffix = p.rule("until-suffix", p.until(arguments.value_suffix));

common_peg_parser tool_choice = p.choice();

foreach_function(inputs.tools, [&](const json & tool) {
const auto & func = tool.at("function");
std::string name = func.at("name");
const auto & params = func.contains("parameters") ? func.at("parameters") : json::object();
auto params = func.contains("parameters") ? func.at("parameters") : json::object();
const auto & properties = params.contains("properties") ? params.at("properties") : json::object();

std::set<std::string> required;
if (params.contains("required")) {
params.at("required").get_to(required);
}

auto schema_info = common_schema_info();
schema_info.resolve_refs(params);

// Build parser for each argument, separating required and optional
std::vector<common_peg_parser> required_parsers;
std::vector<common_peg_parser> optional_parsers;
for (const auto & [param_name, param_schema] : properties.items()) {
bool is_required = required.find(param_name) != required.end();
std::string type = "object";
if (param_schema.contains("type")) {
const auto & type_obj = param_schema.at("type");
if (type_obj.is_string()) {
type_obj.get_to(type);
} else if (type_obj.is_array()) {
// Handle nullable types like ["string", "null"]
for (const auto & t : type_obj) {
if (t.is_string() && t.get<std::string>() != "null") {
type = t.get<std::string>();
break;
}
}
} else if (type_obj.is_object()) {
if (type_obj.contains("type") && type_obj.at("type").is_string()) {
type_obj.at("type").get_to(type);
}
}
}
// Infer string type from enum values when type is unspecified
if (type == "object" && param_schema.contains("enum")) {
const auto & enum_vals = param_schema.at("enum");
if (enum_vals.is_array()) {
for (const auto & v : enum_vals) {
if (v.is_string()) {
type = "string";
break;
}
}
}
}
bool is_required = required.find(param_name) != required.end();

auto arg =
p.tool_arg(p.tool_arg_open(arguments.name_prefix + p.tool_arg_name(p.literal(param_name)) +
arguments.name_suffix) +
arguments.value_prefix +
(type == "string" ?
p.tool_arg_string_value(p.schema(p.until(arguments.value_suffix),
(schema_info.resolves_to_string(param_schema) ?
p.tool_arg_string_value(p.schema(until_suffix,
"tool-" + name + "-arg-" + param_name + "-schema",
param_schema, true)) :
p.tool_arg_json_value(p.schema(
Expand Down Expand Up @@ -414,7 +392,7 @@ common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_conte
for (const auto & opt : optional_parsers) {
any_opt |= opt;
}
args_seq = args_seq + p.repeat(p.space() + any_opt, 0, (int) optional_parsers.size());
args_seq = args_seq + p.repeat(p.space() + any_opt, 0, -1);
}

if (!arguments.start.empty()) {
Expand Down
2 changes: 1 addition & 1 deletion common/chat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1124,7 +1124,7 @@ static common_chat_params common_chat_params_init_gemma4(const common_chat_templ
p.rule("gemma4-bool", p.json_bool());
p.rule("gemma4-null", p.json_null());
p.rule("gemma4-number", p.json_number());
p.rule("gemma4-dict-key", p.rule("gemma4-dict-key-name", p.until(":")) + p.literal(":"));
p.rule("gemma4-dict-key", p.rule("gemma4-dict-key-name", p.chars("[^:}]", 1, -1)) + p.literal(":"));
p.rule("gemma4-dict-kv", p.ref("gemma4-dict-key") + p.space() + p.ref("gemma4-value"));
p.rule("gemma4-dict", [&]() {
auto ws = p.space();
Expand Down
12 changes: 11 additions & 1 deletion common/download.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -591,6 +591,10 @@ static hf_cache::hf_file find_best_model(const hf_cache::hf_files & files,
for (const auto & f : files) {
if (gguf_filename_is_model(f.path) &&
std::regex_search(f.path, pattern)) {
auto split = get_gguf_split_info(f.path);
if (split.count > 1 && split.index != 1) {
continue;
}
return f;
}
}
Expand All @@ -600,6 +604,10 @@ static hf_cache::hf_file find_best_model(const hf_cache::hf_files & files,
if (tag.empty()) {
for (const auto & f : files) {
if (gguf_filename_is_model(f.path)) {
auto split = get_gguf_split_info(f.path);
if (split.count > 1 && split.index != 1) {
continue;
}
return f;
}
}
Expand All @@ -618,6 +626,7 @@ static void list_available_gguf_files(const hf_cache::hf_files & files) {
}

struct hf_plan {
hf_cache::hf_file primary;
hf_cache::hf_files model_files;
hf_cache::hf_file mmproj;
};
Expand Down Expand Up @@ -663,6 +672,7 @@ static hf_plan get_hf_plan(const common_params_model & model,
}
}

plan.primary = primary;
plan.model_files = get_split_files(all, primary);

if (opts.download_mmproj) {
Expand Down Expand Up @@ -749,7 +759,7 @@ common_download_model_result common_download_model(const common_params_model
for (const auto & f : hf.model_files) {
hf_cache::finalize_file(f);
}
result.model_path = hf.model_files[0].final_path;
result.model_path = hf.primary.final_path;

if (!hf.mmproj.path.empty()) {
result.mmproj_path = hf_cache::finalize_file(hf.mmproj);
Expand Down
17 changes: 17 additions & 0 deletions common/jinja/runtime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,23 @@ value binary_expression::execute_impl(context & ctx) {
return res;
}

// Python-style string repetition
// TODO: support array/tuple repetition (e.g., [1, 2] * 3 → [1, 2, 1, 2, 1, 2])
if (op.value == "*" &&
((is_val<value_string>(left_val) && is_val<value_int>(right_val)) ||
(is_val<value_int>(left_val) && is_val<value_string>(right_val)))) {
const auto & str = is_val<value_string>(left_val) ? left_val->as_string() : right_val->as_string();
const int64_t repeat = is_val<value_int>(right_val) ? right_val->as_int() : left_val->as_int();
auto res = mk_val<value_string>();
if (repeat <= 0) {
return res;
}
for (int64_t i = 0; i < repeat; ++i) {
res->val_str = res->val_str.append(str);
}
return res;
}

// String membership
if (is_val<value_string>(left_val) && is_val<value_string>(right_val)) {
// case: "a" in "abc"
Expand Down
93 changes: 90 additions & 3 deletions common/jinja/value.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include "runtime.h"
#include "unicode.h"
#include "value.h"

// for converting from JSON to jinja values
Expand Down Expand Up @@ -154,6 +155,83 @@ static value test_compare_fn(const func_args & args) {
return mk_val<value_bool>(value_compare(args.get_pos(0), args.get_pos(1), op));
}

static void append_codepoint_as_ascii_json_escape(std::string & out, uint32_t codepoint) {
auto append_u16 = [&out](uint32_t value) {
char buf[8];
snprintf(buf, sizeof(buf), "\\u%04x", static_cast<unsigned int>(value));
out += buf;
};

if (codepoint <= 0xFFFF) {
append_u16(codepoint);
return;
}

codepoint -= 0x10000;
append_u16(0xD800 + ((codepoint >> 10) & 0x3FF));
append_u16(0xDC00 + (codepoint & 0x3FF));
}

static std::string json_ensure_ascii_preserving_format(const std::string & json_str) {
std::string output;
output.reserve(json_str.size());

bool in_string = false;
bool escaped = false;

for (size_t pos = 0; pos < json_str.size();) {
const char ch = json_str[pos];
if (!in_string) {
output.push_back(ch);
if (ch == '"') {
in_string = true;
}
++pos;
continue;
}

if (escaped) {
output.push_back(ch);
escaped = false;
++pos;
continue;
}

if (ch == '\\') {
output.push_back(ch);
escaped = true;
++pos;
continue;
}

if (ch == '"') {
output.push_back(ch);
in_string = false;
++pos;
continue;
}

const unsigned char uch = static_cast<unsigned char>(ch);
if (uch < 0x80) {
output.push_back(ch);
++pos;
continue;
}

auto parsed = common_parse_utf8_codepoint(json_str, pos);
if (parsed.status != utf8_parse_result::SUCCESS) {
output += "\\ufffd";
++pos;
continue;
}

append_codepoint_as_ascii_json_escape(output, parsed.codepoint);
pos += parsed.bytes_consumed;
}

return output;
}

static value tojson(const func_args & args) {
args.ensure_count(1, 5);
value val_ascii = args.get_kwarg_or_pos("ensure_ascii", 1);
Expand All @@ -169,16 +247,17 @@ static value tojson(const func_args & args) {
if (is_val<value_int>(val_indent)) {
indent = static_cast<int>(val_indent->as_int());
}
if (val_ascii->as_bool()) { // undefined == false
throw not_implemented_exception("tojson ensure_ascii=true not implemented");
}
if (val_sort->as_bool()) { // undefined == false
throw not_implemented_exception("tojson sort_keys=true not implemented");
}
const bool ensure_ascii = val_ascii->as_bool(); // undefined == false
auto separators = (is_val<value_array>(val_separators) ? val_separators : mk_val<value_array>())->as_array();
std::string item_sep = separators.size() > 0 ? separators[0]->as_string().str() : (indent < 0 ? ", " : ",");
std::string key_sep = separators.size() > 1 ? separators[1]->as_string().str() : ": ";
std::string json_str = value_to_json(args.get_pos(0), indent, item_sep, key_sep);
if (ensure_ascii) {
json_str = json_ensure_ascii_preserving_format(json_str);
}
return mk_val<value_string>(json_str);
}

Expand Down Expand Up @@ -460,6 +539,10 @@ const func_builtins & value_int_t::get_builtins() const {
int64_t val = args.get_pos(0)->as_int();
return mk_val<value_int>(val < 0 ? -val : val);
}},
{"int", [](const func_args & args) -> value {
args.ensure_vals<value_int>();
return mk_val<value_int>(args.get_pos(0)->as_int());
}},
{"float", [](const func_args & args) -> value {
args.ensure_vals<value_int>();
double val = static_cast<double>(args.get_pos(0)->as_int());
Expand All @@ -486,6 +569,10 @@ const func_builtins & value_float_t::get_builtins() const {
int64_t val = static_cast<int64_t>(args.get_pos(0)->as_float());
return mk_val<value_int>(val);
}},
{"float", [](const func_args & args) -> value {
args.ensure_vals<value_float>();
return mk_val<value_float>(args.get_pos(0)->as_float());
}},
{"safe", tojson},
{"string", tojson},
{"tojson", tojson},
Expand Down
Loading