janhq · jan-service-account · Apr 10, 2026 · Apr 8, 2026 · Apr 8, 2026 · Apr 8, 2026
diff --git a/.github/labeler.yml b/.github/labeler.yml
@@ -75,21 +75,13 @@ android:
             - examples/llama.android/**
 server/webui:
     - changed-files:
-        - all:
-            - any-glob-to-any-file:
-                - tools/server/webui/**
-                - tools/server/public/**
-            - all-globs-to-all-files:
-                - '!tools/server/webui/**'
-                - '!tools/server/public/**'
+        - any-glob-to-any-file:
+            - tools/server/webui/**
+            - tools/server/public/**
 server:
     - changed-files:
-        - all:
-            - any-glob-to-any-file:
-                - tools/server/**
-            - all-globs-to-all-files:
-                - '!tools/server/webui/**'
-                - '!tools/server/public/**'
+        - any-glob-to-any-file:
+            - tools/server/**
 
 
 

diff --git a/common/arg.cpp b/common/arg.cpp
@@ -2348,19 +2348,21 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
         }
     ).set_env("LLAMA_ARG_N_GPU_LAYERS"));
     add_opt(common_arg(
-        {"-sm", "--split-mode"}, "{none,layer,row}",
+        {"-sm", "--split-mode"}, "{none,layer,row,tensor}",
         "how to split the model across multiple GPUs, one of:\n"
         "- none: use one GPU only\n"
-        "- layer (default): split layers and KV across GPUs\n"
-        "- row: split rows across GPUs",
+        "- layer (default): split layers and KV across GPUs (pipelined)\n"
+        "- row: split weight across GPUs by rows (parallelized)\n"
+        "- tensor: split weights and KV across GPUs (parallelized)",
         [](common_params & params, const std::string & value) {
-            std::string arg_next = value;
-            if (arg_next == "none") {
+            if (value == "none") {
                 params.split_mode = LLAMA_SPLIT_MODE_NONE;
-            } else if (arg_next == "layer") {
+            } else if (value == "layer") {
                 params.split_mode = LLAMA_SPLIT_MODE_LAYER;
-            } else if (arg_next == "row") {
+            } else if (value == "row") {
                 params.split_mode = LLAMA_SPLIT_MODE_ROW;
+            } else if (value == "tensor") {
+                params.split_mode = LLAMA_SPLIT_MODE_TENSOR;
             } else {
                 throw std::invalid_argument("invalid value");
             }

diff --git a/common/chat-auto-parser-generator.cpp b/common/chat-auto-parser-generator.cpp
@@ -332,58 +332,36 @@ common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_conte
     const auto & inputs      = ctx.inputs;
     bool         force_tools = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED;
 
+    auto until_suffix = p.rule("until-suffix", p.until(arguments.value_suffix));
+
     common_peg_parser tool_choice = p.choice();
 
     foreach_function(inputs.tools, [&](const json & tool) {
         const auto &          func       = tool.at("function");
         std::string           name       = func.at("name");
-        const auto &          params     = func.contains("parameters") ? func.at("parameters") : json::object();
+        auto                  params     = func.contains("parameters") ? func.at("parameters") : json::object();
         const auto &          properties = params.contains("properties") ? params.at("properties") : json::object();
+
         std::set<std::string> required;
+        if (params.contains("required")) {
+            params.at("required").get_to(required);
+        }
+
+        auto schema_info = common_schema_info();
+        schema_info.resolve_refs(params);
 
         // Build parser for each argument, separating required and optional
         std::vector<common_peg_parser> required_parsers;
         std::vector<common_peg_parser> optional_parsers;
         for (const auto & [param_name, param_schema] : properties.items()) {
-            bool        is_required = required.find(param_name) != required.end();
-            std::string type        = "object";
-            if (param_schema.contains("type")) {
-                const auto & type_obj = param_schema.at("type");
-                if (type_obj.is_string()) {
-                    type_obj.get_to(type);
-                } else if (type_obj.is_array()) {
-                    // Handle nullable types like ["string", "null"]
-                    for (const auto & t : type_obj) {
-                        if (t.is_string() && t.get<std::string>() != "null") {
-                            type = t.get<std::string>();
-                            break;
-                        }
-                    }
-                } else if (type_obj.is_object()) {
-                    if (type_obj.contains("type") && type_obj.at("type").is_string()) {
-                        type_obj.at("type").get_to(type);
-                    }
-                }
-            }
-            // Infer string type from enum values when type is unspecified
-            if (type == "object" && param_schema.contains("enum")) {
-                const auto & enum_vals = param_schema.at("enum");
-                if (enum_vals.is_array()) {
-                    for (const auto & v : enum_vals) {
-                        if (v.is_string()) {
-                            type = "string";
-                            break;
-                        }
-                    }
-                }
-            }
+            bool is_required = required.find(param_name) != required.end();
 
             auto arg =
                 p.tool_arg(p.tool_arg_open(arguments.name_prefix + p.tool_arg_name(p.literal(param_name)) +
                                            arguments.name_suffix) +
                            arguments.value_prefix +
-                           (type == "string" ?
-                                p.tool_arg_string_value(p.schema(p.until(arguments.value_suffix),
+                           (schema_info.resolves_to_string(param_schema) ?
+                                p.tool_arg_string_value(p.schema(until_suffix,
                                                                  "tool-" + name + "-arg-" + param_name + "-schema",
                                                                  param_schema, true)) :
                                 p.tool_arg_json_value(p.schema(
@@ -414,7 +392,7 @@ common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_conte
             for (const auto & opt : optional_parsers) {
                 any_opt |= opt;
             }
-            args_seq = args_seq + p.repeat(p.space() + any_opt, 0, (int) optional_parsers.size());
+            args_seq = args_seq + p.repeat(p.space() + any_opt, 0, -1);
         }
 
         if (!arguments.start.empty()) {

diff --git a/common/chat.cpp b/common/chat.cpp
@@ -1124,7 +1124,7 @@ static common_chat_params common_chat_params_init_gemma4(const common_chat_templ
             p.rule("gemma4-bool", p.json_bool());
             p.rule("gemma4-null", p.json_null());
             p.rule("gemma4-number", p.json_number());
-            p.rule("gemma4-dict-key", p.rule("gemma4-dict-key-name", p.until(":")) + p.literal(":"));
+            p.rule("gemma4-dict-key", p.rule("gemma4-dict-key-name", p.chars("[^:}]", 1, -1)) + p.literal(":"));
             p.rule("gemma4-dict-kv", p.ref("gemma4-dict-key") + p.space() + p.ref("gemma4-value"));
             p.rule("gemma4-dict", [&]() {
                 auto ws = p.space();

diff --git a/common/download.cpp b/common/download.cpp
@@ -591,6 +591,10 @@ static hf_cache::hf_file find_best_model(const hf_cache::hf_files & files,
         for (const auto & f : files) {
             if (gguf_filename_is_model(f.path) &&
                 std::regex_search(f.path, pattern)) {
+                auto split = get_gguf_split_info(f.path);
+                if (split.count > 1 && split.index != 1) {
+                    continue;
+                }
                 return f;
             }
         }
@@ -600,6 +604,10 @@ static hf_cache::hf_file find_best_model(const hf_cache::hf_files & files,
     if (tag.empty()) {
         for (const auto & f : files) {
             if (gguf_filename_is_model(f.path)) {
+                auto split = get_gguf_split_info(f.path);
+                if (split.count > 1 && split.index != 1) {
+                    continue;
+                }
                 return f;
             }
         }
@@ -618,6 +626,7 @@ static void list_available_gguf_files(const hf_cache::hf_files & files) {
 }
 
 struct hf_plan {
+    hf_cache::hf_file primary;
     hf_cache::hf_files model_files;
     hf_cache::hf_file mmproj;
 };
@@ -663,6 +672,7 @@ static hf_plan get_hf_plan(const common_params_model        & model,
         }
     }
 
+    plan.primary = primary;
     plan.model_files = get_split_files(all, primary);
 
     if (opts.download_mmproj) {
@@ -749,7 +759,7 @@ common_download_model_result common_download_model(const common_params_model
         for (const auto & f : hf.model_files) {
             hf_cache::finalize_file(f);
         }
-        result.model_path = hf.model_files[0].final_path;
+        result.model_path = hf.primary.final_path;
 
         if (!hf.mmproj.path.empty()) {
             result.mmproj_path = hf_cache::finalize_file(hf.mmproj);

diff --git a/common/jinja/runtime.cpp b/common/jinja/runtime.cpp
@@ -251,6 +251,23 @@ value binary_expression::execute_impl(context & ctx) {
         return res;
     }
 
+    // Python-style string repetition
+    // TODO: support array/tuple repetition (e.g., [1, 2] * 3 → [1, 2, 1, 2, 1, 2])
+    if (op.value == "*" &&
+            ((is_val<value_string>(left_val) && is_val<value_int>(right_val)) ||
+             (is_val<value_int>(left_val) && is_val<value_string>(right_val)))) {
+        const auto & str = is_val<value_string>(left_val) ? left_val->as_string() : right_val->as_string();
+        const int64_t repeat = is_val<value_int>(right_val) ? right_val->as_int() : left_val->as_int();
+        auto res = mk_val<value_string>();
+        if (repeat <= 0) {
+            return res;
+        }
+        for (int64_t i = 0; i < repeat; ++i) {
+            res->val_str = res->val_str.append(str);
+        }
+        return res;
+    }
+
     // String membership
     if (is_val<value_string>(left_val) && is_val<value_string>(right_val)) {
         // case: "a" in "abc"

diff --git a/common/jinja/value.cpp b/common/jinja/value.cpp
@@ -1,4 +1,5 @@
 #include "runtime.h"
+#include "unicode.h"
 #include "value.h"
 
 // for converting from JSON to jinja values
@@ -154,6 +155,83 @@ static value test_compare_fn(const func_args & args) {
     return mk_val<value_bool>(value_compare(args.get_pos(0), args.get_pos(1), op));
 }
 
+static void append_codepoint_as_ascii_json_escape(std::string & out, uint32_t codepoint) {
+    auto append_u16 = [&out](uint32_t value) {
+        char buf[8];
+        snprintf(buf, sizeof(buf), "\\u%04x", static_cast<unsigned int>(value));
+        out += buf;
+    };
+
+    if (codepoint <= 0xFFFF) {
+        append_u16(codepoint);
+        return;
+    }
+
+    codepoint -= 0x10000;
+    append_u16(0xD800 + ((codepoint >> 10) & 0x3FF));
+    append_u16(0xDC00 + (codepoint & 0x3FF));
+}
+
+static std::string json_ensure_ascii_preserving_format(const std::string & json_str) {
+    std::string output;
+    output.reserve(json_str.size());
+
+    bool in_string = false;
+    bool escaped = false;
+
+    for (size_t pos = 0; pos < json_str.size();) {
+        const char ch = json_str[pos];
+        if (!in_string) {
+            output.push_back(ch);
+            if (ch == '"') {
+                in_string = true;
+            }
+            ++pos;
+            continue;
+        }
+
+        if (escaped) {
+            output.push_back(ch);
+            escaped = false;
+            ++pos;
+            continue;
+        }
+
+        if (ch == '\\') {
+            output.push_back(ch);
+            escaped = true;
+            ++pos;
+            continue;
+        }
+
+        if (ch == '"') {
+            output.push_back(ch);
+            in_string = false;
+            ++pos;
+            continue;
+        }
+
+        const unsigned char uch = static_cast<unsigned char>(ch);
+        if (uch < 0x80) {
+            output.push_back(ch);
+            ++pos;
+            continue;
+        }
+
+        auto parsed = common_parse_utf8_codepoint(json_str, pos);
+        if (parsed.status != utf8_parse_result::SUCCESS) {
+            output += "\\ufffd";
+            ++pos;
+            continue;
+        }
+
+        append_codepoint_as_ascii_json_escape(output, parsed.codepoint);
+        pos += parsed.bytes_consumed;
+    }
+
+    return output;
+}
+
 static value tojson(const func_args & args) {
     args.ensure_count(1, 5);
     value val_ascii      = args.get_kwarg_or_pos("ensure_ascii", 1);
@@ -169,16 +247,17 @@ static value tojson(const func_args & args) {
     if (is_val<value_int>(val_indent)) {
         indent = static_cast<int>(val_indent->as_int());
     }
-    if (val_ascii->as_bool()) { // undefined == false
-        throw not_implemented_exception("tojson ensure_ascii=true not implemented");
-    }
     if (val_sort->as_bool()) { // undefined == false
         throw not_implemented_exception("tojson sort_keys=true not implemented");
     }
+    const bool ensure_ascii = val_ascii->as_bool(); // undefined == false
     auto separators = (is_val<value_array>(val_separators) ? val_separators : mk_val<value_array>())->as_array();
     std::string item_sep = separators.size() > 0 ? separators[0]->as_string().str() : (indent < 0 ? ", " : ",");
     std::string key_sep = separators.size() > 1 ? separators[1]->as_string().str() : ": ";
     std::string json_str = value_to_json(args.get_pos(0), indent, item_sep, key_sep);
+    if (ensure_ascii) {
+        json_str = json_ensure_ascii_preserving_format(json_str);
+    }
     return mk_val<value_string>(json_str);
 }
 
@@ -460,6 +539,10 @@ const func_builtins & value_int_t::get_builtins() const {
             int64_t val = args.get_pos(0)->as_int();
             return mk_val<value_int>(val < 0 ? -val : val);
         }},
+        {"int", [](const func_args & args) -> value {
+            args.ensure_vals<value_int>();
+            return mk_val<value_int>(args.get_pos(0)->as_int());
+        }},
         {"float", [](const func_args & args) -> value {
             args.ensure_vals<value_int>();
             double val = static_cast<double>(args.get_pos(0)->as_int());
@@ -486,6 +569,10 @@ const func_builtins & value_float_t::get_builtins() const {
             int64_t val = static_cast<int64_t>(args.get_pos(0)->as_float());
             return mk_val<value_int>(val);
         }},
+        {"float", [](const func_args & args) -> value {
+            args.ensure_vals<value_float>();
+            return mk_val<value_float>(args.get_pos(0)->as_float());
+        }},
         {"safe", tojson},
         {"string", tojson},
         {"tojson", tojson},