Skip to content

Commit a90adc3

Browse files
Merge pull request #460 from janhq/update-dev-from-master-2026-03-21-00-44
Sync master with upstream release b8459
2 parents 0094172 + e6ec21e commit a90adc3

85 files changed

Lines changed: 20391 additions & 26408 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/workflows/ai-issues.yml

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
name: AI review (issues)
2+
3+
on:
4+
issues:
5+
types: [opened]
6+
7+
jobs:
8+
find-related:
9+
if: github.event.action == 'opened'
10+
runs-on: [self-hosted, opencode]
11+
12+
permissions:
13+
contents: read
14+
issues: write
15+
16+
steps:
17+
- name: Checkout repository
18+
uses: actions/checkout@v6
19+
with:
20+
fetch-depth: 1
21+
22+
- name: Find related
23+
env:
24+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
25+
OPENCODE_PERMISSION: |
26+
{
27+
"bash": {
28+
"*": "deny",
29+
"gh issue*": "allow",
30+
"gh search issues*": "allow"
31+
},
32+
"webfetch": "deny"
33+
}
34+
run: |
35+
rm AGENTS.md
36+
rm CLAUDE.md
37+
38+
timeout 5m opencode run -m llama.cpp-dgx/ai-review-issues-find-similar --thinking "A new issue has been created:
39+
40+
Issue number: ${{ github.event.issue.number }}
41+
42+
Lookup the contents of the issue using the following 'gh' command:
43+
44+
gh issue view ${{ github.event.issue.number }} --json title,body,url,number
45+
46+
Next, perform the following task and then post a SINGLE comment (if needed).
47+
48+
---
49+
50+
TASK : FIND RELATED ISSUES
51+
52+
Using the 'gh' CLI tool, search through existing issues on Github.
53+
Find related or similar issues to the newly created one and list them.
54+
Do not list the new issue itself (it is #${{ github.event.issue.number }}).
55+
56+
Consider:
57+
1. Similar titles or descriptions
58+
2. Same error messages or symptoms
59+
3. Related functionality or components
60+
4. Similar feature requests
61+
62+
---
63+
64+
POSTING YOUR COMMENT:
65+
66+
Based on your findings, post a SINGLE comment on issue #${{ github.event.issue.number }}. Build the comment as follows:
67+
68+
- If no related issues were found, do NOT comment at all.
69+
- If related issues were found, include a section listing them with links using the following format:
70+
71+
[comment]
72+
This issue might be similar or related to the following issue(s):
73+
74+
- #[related_issue_number]: [brief description of how they are related]
75+
- #[related_issue_number]: [brief description of how they are related]
76+
...
77+
78+
_This comment was auto-generated locally using **$GA_ENGINE** on **$GA_MACHINE**_
79+
[/comment]
80+
81+
Remember:
82+
- Do not include the comment tags in your actual comment.
83+
- Post at most ONE comment combining all findings.
84+
- If you didn't find issues that are related enough, post nothing.
85+
- You have access only to the 'gh' CLI tool - don't try to use other tools.
86+
- If the output from a tool call is too long, try to limit down the search.
87+
"
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
name: HIP quality check
2+
3+
on:
4+
workflow_dispatch: # allows manual triggering
5+
push:
6+
branches:
7+
- master
8+
paths: [
9+
'.github/workflows/hip-quality-check.yml',
10+
'**/*.cu',
11+
'**/*.cuh'
12+
]
13+
14+
pull_request:
15+
types: [opened, synchronize, reopened]
16+
paths: [
17+
'.github/workflows/hip-quality-check.yml',
18+
'**/*.cu',
19+
'**/*.cuh'
20+
]
21+
22+
concurrency:
23+
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
24+
cancel-in-progress: true
25+
26+
env:
27+
GGML_NLOOP: 3
28+
GGML_N_THREADS: 1
29+
LLAMA_LOG_COLORS: 1
30+
LLAMA_LOG_PREFIX: 1
31+
LLAMA_LOG_TIMESTAMPS: 1
32+
33+
jobs:
34+
ubuntu-22-hip-quality-check:
35+
runs-on: ubuntu-22.04
36+
container: rocm/dev-ubuntu-22.04:7.2
37+
steps:
38+
- name: Clone
39+
id: checkout
40+
uses: actions/checkout@v6
41+
42+
- name: Dependencies
43+
id: depends
44+
run: |
45+
sudo apt-get update
46+
sudo apt-get install -y build-essential git cmake rocblas-dev hipblas-dev libssl-dev python3
47+
48+
- name: ccache
49+
uses: ggml-org/ccache-action@v1.2.21
50+
with:
51+
key: ubuntu-22-hip-quality-check
52+
evict-old-files: 1d
53+
save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
54+
55+
- name: Build with Werror
56+
id: cmake_build
57+
run: |
58+
cmake -B build -S . \
59+
-DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" \
60+
-DGPU_TARGETS=gfx908 \
61+
-DGGML_HIP=ON \
62+
-DGGML_HIP_EXPORT_METRICS=Off \
63+
-DCMAKE_HIP_FLAGS="-Werror -Wno-tautological-compare" \
64+
-DCMAKE_BUILD_TYPE=Release
65+
cd build
66+
make -j $(nproc)
67+
68+
- name: Check for major VGPR spills
69+
id: vgpr_check
70+
run: |
71+
cmake -B build -S . \
72+
-DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" \
73+
-DGPU_TARGETS=gfx908 \
74+
-DGGML_HIP=ON \
75+
-DGGML_HIP_EXPORT_METRICS=On \
76+
-DCMAKE_HIP_FLAGS="" \
77+
-DCMAKE_BUILD_TYPE=Release
78+
cd build
79+
make -j $(nproc) 2>&1 | tee metrics.log | grep -v 'Rpass-analysis=kernel-resource-usage\|remark:\|^$'
80+
python3 ../scripts/hip/gcn-cdna-vgpr-check.py metrics.log

CONTRIBUTING.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,8 @@ Maintainers reserve the right to decline review or close pull requests for any r
178178
- New code should follow the guidelines (coding, naming, etc.) outlined in this document. Exceptions are allowed in isolated, backend-specific parts of the code that do not interface directly with the `ggml` interfaces.
179179
_(NOTE: for legacy reasons, existing code is not required to follow this guideline)_
180180
181+
- For changes in server, please make sure to refer to the [server development documentation](./tools/server/README-dev.md)
182+
181183
# Documentation
182184
183185
- Documentation is a community effort

common/arg.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1830,23 +1830,23 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
18301830
).set_sparam());
18311831
add_opt(common_arg(
18321832
{"--grammar"}, "GRAMMAR",
1833-
string_format("BNF-like grammar to constrain generations (see samples in grammars/ dir) (default: '%s')", params.sampling.grammar.c_str()),
1833+
"BNF-like grammar to constrain generations (see samples in grammars/ dir)",
18341834
[](common_params & params, const std::string & value) {
1835-
params.sampling.grammar = value;
1835+
params.sampling.grammar = {COMMON_GRAMMAR_TYPE_USER, value};
18361836
}
18371837
).set_sparam());
18381838
add_opt(common_arg(
18391839
{"--grammar-file"}, "FNAME",
18401840
"file to read grammar from",
18411841
[](common_params & params, const std::string & value) {
1842-
params.sampling.grammar = read_file(value);
1842+
params.sampling.grammar = {COMMON_GRAMMAR_TYPE_USER, read_file(value)};
18431843
}
18441844
).set_sparam());
18451845
add_opt(common_arg(
18461846
{"-j", "--json-schema"}, "SCHEMA",
18471847
"JSON schema to constrain generations (https://json-schema.org/), e.g. `{}` for any JSON object\nFor schemas w/ external $refs, use --grammar + example/json_schema_to_grammar.py instead",
18481848
[](common_params & params, const std::string & value) {
1849-
params.sampling.grammar = json_schema_to_grammar(json::parse(value));
1849+
params.sampling.grammar = {COMMON_GRAMMAR_TYPE_OUTPUT_FORMAT, json_schema_to_grammar(json::parse(value))};
18501850
}
18511851
).set_sparam());
18521852
add_opt(common_arg(
@@ -1863,7 +1863,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
18631863
std::istreambuf_iterator<char>(),
18641864
std::back_inserter(schema)
18651865
);
1866-
params.sampling.grammar = json_schema_to_grammar(json::parse(schema));
1866+
params.sampling.grammar = {COMMON_GRAMMAR_TYPE_OUTPUT_FORMAT, json_schema_to_grammar(json::parse(schema))};
18671867
}
18681868
).set_sparam());
18691869
add_opt(common_arg(
@@ -3494,7 +3494,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
34943494
throw std::invalid_argument("unknown speculative decoding type without draft model");
34953495
}
34963496
}
3497-
).set_examples({LLAMA_EXAMPLE_SERVER}));
3497+
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_SPEC_TYPE"));
34983498
add_opt(common_arg(
34993499
{"--spec-ngram-size-n"}, "N",
35003500
string_format("ngram size N for ngram-simple/ngram-map speculative decoding, length of lookup n-gram (default: %d)", params.speculative.ngram_size_n),

common/chat-auto-parser-generator.cpp

Lines changed: 29 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
#include "chat-auto-parser-helpers.h"
12
#include "chat-auto-parser.h"
23
#include "chat-peg-parser.h"
34
#include "chat.h"
@@ -23,31 +24,30 @@ static void foreach_function(const json & tools, const std::function<void(const
2324

2425
namespace autoparser {
2526

26-
parser_build_context::parser_build_context(common_chat_peg_builder & p, const templates_params & inputs) :
27+
parser_build_context::parser_build_context(common_chat_peg_builder & p, const generation_params & inputs) :
2728
p(p),
2829
inputs(inputs),
2930
reasoning_parser(p.eps()) {}
3031

3132
common_chat_params peg_generator::generate_parser(const common_chat_template & tmpl,
32-
const struct templates_params & inputs) {
33+
const struct generation_params & inputs) {
3334
// Run differential analysis to extract template structure
3435
struct autoparser autoparser;
3536
autoparser.analyze_template(tmpl);
3637
return generate_parser(tmpl, inputs, autoparser);
3738
}
3839

3940
common_chat_params peg_generator::generate_parser(const common_chat_template & tmpl,
40-
const struct templates_params & inputs,
41+
const struct generation_params & inputs,
4142
const autoparser & autoparser) {
42-
// Build the parser using the analysis results
43-
auto parser = autoparser.build_parser(inputs);
44-
4543
// Create the result structure
4644
common_chat_params data;
4745
data.prompt = common_chat_template_direct_apply(tmpl, inputs);
4846
data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
4947
data.preserved_tokens = autoparser.preserved_tokens;
50-
data.parser = parser.save();
48+
49+
auto parser = autoparser.build_parser(inputs);
50+
data.parser = parser.save();
5151

5252
// Build grammar if tools are present
5353
bool has_tools =
@@ -82,44 +82,38 @@ common_chat_params peg_generator::generate_parser(const common_chat_template &
8282
return data;
8383
}
8484

85-
common_peg_arena autoparser::build_parser(const templates_params & inputs) const {
85+
common_peg_arena autoparser::build_parser(const generation_params & inputs) const {
8686
if (!analysis_complete) {
8787
throw std::invalid_argument("Cannot call build_parser on autoparser without performing analysis first, call analyze_template(...)");
8888
}
8989
return build_chat_peg_parser([&](common_chat_peg_builder & p) {
90-
// If the template uses Python dict format (single-quoted strings in JSON structures),
91-
// pre-register a json-string rule that accepts both quote styles. This must happen
92-
// before any call to p.json() so that all JSON parsing inherits the flexible rule.
93-
if (tools.format.uses_python_dicts) {
94-
p.rule("json-string", p.quoted_string());
95-
}
96-
9790
parser_build_context ctx(p, inputs);
9891
bool extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
99-
bool enable_thinking = inputs.enable_thinking;
10092

101-
ctx.extracting_reasoning = extract_reasoning && enable_thinking && reasoning.mode != reasoning_mode::NONE;
93+
ctx.extracting_reasoning = extract_reasoning && reasoning.mode != reasoning_mode::NONE;
10294
ctx.content = &content;
10395

10496
// Build reasoning parser
10597
ctx.reasoning_parser = reasoning.build_parser(ctx);
10698

99+
auto parser = p.eps();
100+
107101
bool has_tools = inputs.tools.is_array() && !inputs.tools.empty();
108102
bool has_response_format = inputs.json_schema.is_object() && !inputs.json_schema.empty();
109103

110104
if (has_response_format) {
111105
auto response_format = p.rule("response-format", p.content(p.schema(p.json(), "response-format-schema", inputs.json_schema)));
112-
return ctx.reasoning_parser + p.space() + p.choice({
106+
parser = ctx.reasoning_parser + p.space() + p.choice({
113107
p.literal("```json") + p.space() + response_format + p.space() + p.literal("```"),
114108
response_format
115109
}) + p.end();
110+
} else if (has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE && jinja_caps.supports_tool_calls) {
111+
parser = tools.build_parser(ctx);
112+
} else {
113+
parser = content.build_parser(ctx);
116114
}
117-
118-
if (has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE && jinja_caps.supports_tool_calls) {
119-
return tools.build_parser(ctx);
120-
}
121-
122-
return content.build_parser(ctx);
115+
parser = wrap_for_generation_prompt(p, parser, inputs, reasoning.start);
116+
return parser;
123117
});
124118
}
125119

@@ -130,24 +124,15 @@ common_peg_parser analyze_reasoning::build_parser(parser_build_context & ctx) co
130124
return p.eps();
131125
}
132126

133-
bool thinking_forced_open = (mode == reasoning_mode::FORCED_OPEN);
134-
bool thinking_forced_closed = (mode == reasoning_mode::FORCED_CLOSED);
135-
136-
if (thinking_forced_open || thinking_forced_closed) {
137-
// Thinking is forced open OR forced closed with enable_thinking=true
138-
// In both cases, expect only the closing tag (opening was in template)
139-
// However, since we might have incorrectly detected the open/close pattern,
140-
// we admit an optional starting marker
141-
return p.optional(p.literal(start)) + p.reasoning(p.until(end)) + end;
142-
}
143127
if (mode == reasoning_mode::TAG_BASED || mode == reasoning_mode::TOOLS_ONLY) {
144-
// Standard tag-based reasoning OR tools-only mode (reasoning appears with tools)
145-
// Both use the same tag-based pattern if markers are available
146-
if (!start.empty() && !end.empty()) {
147-
return p.optional(start + p.reasoning(p.until(end)) + end);
128+
if (!end.empty()) {
129+
if (!start.empty()) {
130+
// Standard tag-based: optional(<think>reasoning</think>)
131+
return p.optional(start + p.reasoning(p.until(end)) + end + p.space());
132+
}
133+
// Delimiter-style (empty start)
134+
return p.optional(p.reasoning(p.until(end)) + end + p.space());
148135
}
149-
} else if (mode == reasoning_mode::DELIMITER) {
150-
return p.optional(p.reasoning(p.until(end)) + end);
151136
}
152137

153138
return p.eps();
@@ -335,7 +320,7 @@ common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_conte
335320
"tool-" + name + "-arg-" + param_name + "-schema",
336321
param_schema, true)) :
337322
p.tool_arg_json_value(p.schema(
338-
p.json(), "tool-" + name + "-arg-" + param_name + "-schema", param_schema, format.uses_python_dicts)) +
323+
p.json(), "tool-" + name + "-arg-" + param_name + "-schema", param_schema, false)) +
339324
p.space()) +
340325
p.tool_arg_close(p.literal(arguments.value_suffix)));
341326

@@ -384,7 +369,9 @@ common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_conte
384369
func_parser = p.atomic(p.tool_open(function.name_prefix + p.tool_name(p.literal(name)) + function.name_suffix) +
385370
call_id_section) + p.space() + args_seq;
386371
matched_atomic = true;
387-
} else if (!arguments.name_prefix.empty() && properties.size() > 0) {
372+
} else if (!arguments.name_prefix.empty() && !required_parsers.empty()) {
373+
// Only peek for an arg tag when there are required args that must follow.
374+
// When all args are optional, the model may emit no arg tags at all (#20650).
388375
func_parser = p.atomic(p.tool_open(function.name_prefix + p.tool_name(p.literal(name)) + function.name_suffix) +
389376
call_id_section + p.space() + p.peek(p.literal(arguments.name_prefix))) + args_seq;
390377
matched_atomic = true;

0 commit comments

Comments
 (0)