Skip to content

Commit 8168daf

Browse files
Merge pull request #519 from janhq/update-dev-from-master-2026-05-18-01-12
Sync master with upstream release b9204
2 parents c1531b9 + 726704a commit 8168daf

75 files changed

Lines changed: 1739 additions & 337 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

CMakeLists.txt

Lines changed: 2 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -108,20 +108,15 @@ option(LLAMA_BUILD_TESTS "llama: build tests"
108108
option(LLAMA_BUILD_TOOLS "llama: build tools" ${LLAMA_STANDALONE})
109109
option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE})
110110
option(LLAMA_BUILD_SERVER "llama: build server example" ${LLAMA_STANDALONE})
111-
# Deprecated: use LLAMA_BUILD_UI instead (kept for backward compat)
112-
option(LLAMA_BUILD_WEBUI "llama: build the embedded Web UI for server (deprecated: use LLAMA_BUILD_UI)" ON)
113-
option(LLAMA_USE_PREBUILT_WEBUI "llama: use prebuilt WebUI from HF Bucket when available (deprecated: use LLAMA_USE_PREBUILT_UI)" ON)
114-
115-
# New option names
116111
option(LLAMA_BUILD_UI "llama: build the embedded Web UI for server" ON)
117112
option(LLAMA_USE_PREBUILT_UI "llama: use prebuilt UI from HF Bucket when available (requires LLAMA_BUILD_UI=ON)" ON)
118113

119114
# Backward compat: when old var is set but new one isn't, forward the value
120-
if(DEFINED LLAMA_BUILD_WEBUI AND NOT DEFINED LLAMA_BUILD_UI)
115+
if(DEFINED LLAMA_BUILD_WEBUI)
121116
set(LLAMA_BUILD_UI ${LLAMA_BUILD_WEBUI})
122117
message(DEPRECATION "LLAMA_BUILD_WEBUI is deprecated, use LLAMA_BUILD_UI instead")
123118
endif()
124-
if(DEFINED LLAMA_USE_PREBUILT_WEBUI AND NOT DEFINED LLAMA_USE_PREBUILT_UI)
119+
if(DEFINED LLAMA_USE_PREBUILT_WEBUI)
125120
set(LLAMA_USE_PREBUILT_UI ${LLAMA_USE_PREBUILT_WEBUI})
126121
message(DEPRECATION "LLAMA_USE_PREBUILT_WEBUI is deprecated, use LLAMA_USE_PREBUILT_UI instead")
127122
endif()
@@ -286,18 +281,6 @@ install(FILES ${CMAKE_CURRENT_BINARY_DIR}/llama-config.cmake
286281
${CMAKE_CURRENT_BINARY_DIR}/llama-version.cmake
287282
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/llama)
288283

289-
install(
290-
FILES convert_hf_to_gguf.py
291-
PERMISSIONS
292-
OWNER_READ
293-
OWNER_WRITE
294-
OWNER_EXECUTE
295-
GROUP_READ
296-
GROUP_EXECUTE
297-
WORLD_READ
298-
WORLD_EXECUTE
299-
DESTINATION ${CMAKE_INSTALL_BINDIR})
300-
301284
configure_file(cmake/llama.pc.in
302285
"${CMAKE_CURRENT_BINARY_DIR}/llama.pc"
303286
@ONLY)

ci/run.sh

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,12 @@ if [ ! -z ${GG_BUILD_VULKAN} ]; then
117117
# if on Mac, disable METAL
118118
if [[ "$OSTYPE" == "darwin"* ]]; then
119119
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_METAL=OFF -DGGML_BLAS=OFF"
120+
121+
MACOS_RUNNER_CUSTOM_VULKAN_CMAKE_LOCATION="/usr/local/lib/cmake/vulkan"
122+
MACOS_RUNNER_CUSTOM_SPIRV_HEADERS_LOCATION="${MACOS_RUNNER_CUSTOM_VULKAN_CMAKE_LOCATION}/SPIRV-Headers/SPIRV-HeadersConfig.cmake"
123+
if [[ -f "${MACOS_RUNNER_CUSTOM_SPIRV_HEADERS_LOCATION}" || -h "${MACOS_RUNNER_CUSTOM_SPIRV_HEADERS_LOCATION}" ]]; then
124+
CMAKE_EXTRA="${CMAKE_EXTRA} -DSPIRV-Headers_DIR=${MACOS_RUNNER_CUSTOM_VULKAN_CMAKE_LOCATION}/SPIRV-Headers"
125+
fi
120126
fi
121127

122128
# Build shared libs on Windows

common/arg.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2808,7 +2808,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
28082808
[](common_params & params, int value) {
28092809
params.embd_normalize = value;
28102810
}
2811-
).set_examples({LLAMA_EXAMPLE_EMBEDDING, LLAMA_EXAMPLE_DEBUG}));
2811+
).set_examples({LLAMA_EXAMPLE_EMBEDDING, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_DEBUG}));
28122812
add_opt(common_arg(
28132813
{"--embd-output-format"}, "FORMAT",
28142814
"empty = default, \"array\" = [[],[]...], \"json\" = openai style, \"json+\" = same \"json\" + cosine similarity matrix, \"raw\" = plain whitespace-delimited output (one embedding per line)",

common/chat-auto-parser-generator.cpp

Lines changed: 28 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -43,11 +43,33 @@ common_chat_params peg_generator::generate_parser(const common_chat_template &
4343
const autoparser & autoparser) {
4444
// Create the result structure
4545
common_chat_params data;
46-
data.prompt = common_chat_template_direct_apply(tmpl, inputs);
47-
data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
48-
data.preserved_tokens = autoparser.preserved_tokens;
46+
data.prompt = common_chat_template_direct_apply(tmpl, inputs);
47+
data.generation_prompt = common_chat_template_generation_prompt(tmpl, inputs);
48+
data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
49+
data.preserved_tokens = autoparser.preserved_tokens;
50+
51+
std::string parser_generation_prompt = data.generation_prompt;
52+
53+
if (inputs.continue_final_message != COMMON_CHAT_CONTINUATION_NONE && !inputs.continue_msg.empty()) {
54+
// Build up generation prompt manually
55+
const auto & msg = inputs.continue_msg;
56+
57+
if (!autoparser.reasoning.start.empty()) {
58+
data.generation_prompt = data.generation_prompt.substr(0, data.generation_prompt.find(autoparser.reasoning.start));
59+
data.generation_prompt += autoparser.reasoning.start + msg.reasoning_content;
60+
if (inputs.continue_final_message == COMMON_CHAT_CONTINUATION_CONTENT) {
61+
data.generation_prompt += autoparser.reasoning.end;
62+
}
63+
}
64+
65+
if (inputs.continue_final_message == COMMON_CHAT_CONTINUATION_CONTENT) {
66+
data.generation_prompt += msg.render_content();
67+
}
68+
69+
data.prompt += data.generation_prompt;
70+
}
4971

50-
auto parser = autoparser.build_parser(inputs);
72+
auto parser = autoparser.build_parser(inputs, parser_generation_prompt);
5173
data.parser = parser.save();
5274

5375
// Build grammar if tools are present
@@ -87,7 +109,7 @@ common_chat_params peg_generator::generate_parser(const common_chat_template &
87109
return data;
88110
}
89111

90-
common_peg_arena autoparser::build_parser(const generation_params & inputs) const {
112+
common_peg_arena autoparser::build_parser(const generation_params & inputs, const std::string & generation_prompt) const {
91113
if (!analysis_complete) {
92114
throw std::invalid_argument("Cannot call build_parser on autoparser without performing analysis first, call analyze_template(...)");
93115
}
@@ -121,7 +143,7 @@ common_peg_arena autoparser::build_parser(const generation_params & inputs) cons
121143
} else {
122144
parser = content.build_parser(ctx);
123145
}
124-
return pure_content ? p.prefix(inputs.generation_prompt, reasoning.start) + parser : p.prefix(inputs.generation_prompt, reasoning.start) << parser;
146+
return pure_content ? p.prefix(generation_prompt, reasoning.start) + parser : p.prefix(generation_prompt, reasoning.start) << parser;
125147
});
126148
}
127149

common/chat-auto-parser.h

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -60,16 +60,21 @@ struct generation_params {
6060
common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_AUTO;
6161
bool stream = true;
6262
std::string grammar;
63-
bool add_generation_prompt = false;
64-
bool enable_thinking = true;
65-
std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
66-
std::string generation_prompt;
63+
bool add_generation_prompt = false;
64+
common_chat_continuation continue_final_message = COMMON_CHAT_CONTINUATION_NONE;
65+
common_chat_msg continue_msg;
66+
bool enable_thinking = true;
67+
std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
6768
json extra_context;
6869
bool add_bos = false;
6970
bool add_eos = false;
7071
bool is_inference = true;
7172
bool add_inference = false;
7273
bool mark_input = true; // whether to mark input strings in the jinja context
74+
75+
bool has_continuation() const {
76+
return continue_final_message != COMMON_CHAT_CONTINUATION_NONE && !continue_msg.empty();
77+
}
7378
};
7479

7580
// ============================================================================
@@ -386,7 +391,7 @@ struct autoparser {
386391
void analyze_template(const common_chat_template & tmpl);
387392

388393
// Build the PEG parser for this template
389-
common_peg_arena build_parser(const generation_params & inputs) const;
394+
common_peg_arena build_parser(const generation_params & inputs, const std::string & generation_prompt) const;
390395

391396
private:
392397
// Collect tokens from entire analysis to preserve

common/chat-peg-parser.cpp

Lines changed: 2 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -358,35 +358,7 @@ void common_chat_peg_mapper::map(const common_peg_ast_node & node) {
358358
if (is_potential_container) {
359359
value_content = normalize_container_value(value_content);
360360
}
361-
362-
// Try to parse as JSON value (number, bool, null, object, array)
363-
try {
364-
ordered_json parsed = ordered_json::parse(value_content);
365-
if (parsed.is_string()) {
366-
// Don't add closing quote yet (added by arg_close) for monotonic streaming
367-
std::string escaped = parsed.dump();
368-
if (!escaped.empty() && escaped.back() == '"') {
369-
escaped.pop_back();
370-
}
371-
value_to_add = escaped;
372-
closing_quote_pending = true;
373-
} else {
374-
// Non-string values: use raw content to preserve whitespace for monotonicity
375-
value_to_add = value_content;
376-
}
377-
} catch (...) {
378-
if (node.is_partial && is_potential_container) {
379-
// Partial container: pass through the already-normalized content
380-
value_to_add = value_content;
381-
} else {
382-
// Not valid JSON - treat as string value
383-
if (!closing_quote_pending) {
384-
value_to_add = "\"";
385-
closing_quote_pending = true;
386-
}
387-
value_to_add += escape_json_string_inner(value_content);
388-
}
389-
}
361+
value_to_add += value_content;
390362
}
391363

392364
args_target() += value_to_add;
@@ -813,7 +785,7 @@ common_peg_parser common_chat_peg_builder::prefix(const std::string & s, const s
813785
if (delimiter.empty()) {
814786
return literal(s);
815787
}
816-
return literal(s.substr(0, s.rfind(delimiter)));
788+
return literal(s.substr(0, s.find(delimiter)));
817789
}
818790

819791
common_peg_parser common_chat_peg_builder::optspace(const std::string & tag) {

common/chat-peg-parser.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ class common_chat_peg_builder : public common_peg_parser_builder {
9090

9191
// Use for schema-declared string types - won't be treated as potential JSON container
9292
common_peg_parser tool_arg_string_value(const common_peg_parser & p) { return tag(TOOL_ARG_STRING_VALUE, p); }
93-
common_peg_parser tool_arg_json_value(const common_peg_parser & p) { return atomic(tag(TOOL_ARG_VALUE, p)); }
93+
common_peg_parser tool_arg_json_value(const common_peg_parser & p) { return tag(TOOL_ARG_VALUE, p); }
9494

9595

9696
// Return a parser that parses the prefix of a string, up to a given delimiter.

0 commit comments

Comments
 (0)