janhq
diff --git a/‎.github/workflows/ai-issues.yml‎
Lines changed: 87 additions & 0 deletions b/‎.github/workflows/ai-issues.yml‎
Lines changed: 87 additions & 0 deletions
diff --git a/‎.github/workflows/hip-quality-check.yml‎
Lines changed: 80 additions & 0 deletions b/‎.github/workflows/hip-quality-check.yml‎
Lines changed: 80 additions & 0 deletions
diff --git a/‎CONTRIBUTING.md‎
Lines changed: 2 additions & 0 deletions b/‎CONTRIBUTING.md‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎common/arg.cpp‎
Lines changed: 6 additions & 6 deletions b/‎common/arg.cpp‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎common/chat-auto-parser-generator.cpp‎
Lines changed: 29 additions & 42 deletions b/‎common/chat-auto-parser-generator.cpp‎
Lines changed: 29 additions & 42 deletions
@@ -0,0 +1,87 @@
+name: AI review (issues)
+
+on:
+  issues:
+    types: [opened]
+
+jobs:
+  find-related:
+    if: github.event.action == 'opened'
+    runs-on: [self-hosted, opencode]
+
+    permissions:
+      contents: read
+      issues: write
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v6
+        with:
+          fetch-depth: 1
+
+      - name: Find related
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          OPENCODE_PERMISSION: |
+            {
+              "bash": {
+                "*": "deny",
+                "gh issue*": "allow",
+                "gh search issues*": "allow"
+              },
+              "webfetch": "deny"
+            }
+        run: |
+          rm AGENTS.md
+          rm CLAUDE.md
+
+          timeout 5m opencode run -m llama.cpp-dgx/ai-review-issues-find-similar --thinking "A new issue has been created:
+
+          Issue number: ${{ github.event.issue.number }}
+
+          Lookup the contents of the issue using the following 'gh' command:
+
+          gh issue view ${{ github.event.issue.number }} --json title,body,url,number
+
+          Next, perform the following task and then post a SINGLE comment (if needed).
+
+          ---
+
+          TASK : FIND RELATED ISSUES
+
+          Using the 'gh' CLI tool, search through existing issues on Github.
+          Find related or similar issues to the newly created one and list them.
+          Do not list the new issue itself (it is #${{ github.event.issue.number }}).
+
+          Consider:
+          1. Similar titles or descriptions
+          2. Same error messages or symptoms
+          3. Related functionality or components
+          4. Similar feature requests
+
+          ---
+
+          POSTING YOUR COMMENT:
+
+          Based on your findings, post a SINGLE comment on issue #${{ github.event.issue.number }}. Build the comment as follows:
+
+          - If no related issues were found, do NOT comment at all.
+          - If related issues were found, include a section listing them with links using the following format:
+
+          [comment]
+          This issue might be similar or related to the following issue(s):
+
+            - #[related_issue_number]: [brief description of how they are related]
+            - #[related_issue_number]: [brief description of how they are related]
+            ...
+
+          _This comment was auto-generated locally using **$GA_ENGINE** on **$GA_MACHINE**_
+          [/comment]
+
+          Remember:
+            - Do not include the comment tags in your actual comment.
+            - Post at most ONE comment combining all findings.
+            - If you didn't find issues that are related enough, post nothing.
+            - You have access only to the 'gh' CLI tool - don't try to use other tools.
+            - If the output from a tool call is too long, try to limit down the search.
+          "
@@ -0,0 +1,80 @@
+name: HIP quality check
+
+on:
+  workflow_dispatch: # allows manual triggering
+  push:
+    branches:
+      - master
+    paths: [
+      '.github/workflows/hip-quality-check.yml',
+      '**/*.cu',
+      '**/*.cuh'
+    ]
+
+  pull_request:
+    types: [opened, synchronize, reopened]
+    paths: [
+      '.github/workflows/hip-quality-check.yml',
+      '**/*.cu',
+      '**/*.cuh'
+    ]
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
+  cancel-in-progress: true
+
+env:
+  GGML_NLOOP: 3
+  GGML_N_THREADS: 1
+  LLAMA_LOG_COLORS: 1
+  LLAMA_LOG_PREFIX: 1
+  LLAMA_LOG_TIMESTAMPS: 1
+
+jobs:
+  ubuntu-22-hip-quality-check:
+    runs-on: ubuntu-22.04
+    container: rocm/dev-ubuntu-22.04:7.2
+    steps:
+      - name: Clone
+        id: checkout
+        uses: actions/checkout@v6
+
+      - name: Dependencies
+        id: depends
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y build-essential git cmake rocblas-dev hipblas-dev libssl-dev python3
+
+      - name: ccache
+        uses: ggml-org/ccache-action@v1.2.21
+        with:
+          key: ubuntu-22-hip-quality-check
+          evict-old-files: 1d
+          save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
+
+      - name: Build with Werror
+        id: cmake_build
+        run: |
+          cmake -B build -S . \
+            -DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" \
+            -DGPU_TARGETS=gfx908 \
+            -DGGML_HIP=ON \
+            -DGGML_HIP_EXPORT_METRICS=Off \
+            -DCMAKE_HIP_FLAGS="-Werror -Wno-tautological-compare" \
+            -DCMAKE_BUILD_TYPE=Release
+          cd build
+          make -j $(nproc)
+
+      - name: Check for major VGPR spills
+        id: vgpr_check
+        run: |
+          cmake -B build -S . \
+            -DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" \
+            -DGPU_TARGETS=gfx908 \
+            -DGGML_HIP=ON \
+            -DGGML_HIP_EXPORT_METRICS=On \
+            -DCMAKE_HIP_FLAGS="" \
+            -DCMAKE_BUILD_TYPE=Release
+          cd build
+          make -j $(nproc) 2>&1 | tee metrics.log | grep -v 'Rpass-analysis=kernel-resource-usage\|remark:\|^$'
+          python3 ../scripts/hip/gcn-cdna-vgpr-check.py metrics.log
@@ -178,6 +178,8 @@ Maintainers reserve the right to decline review or close pull requests for any r
 - New code should follow the guidelines (coding, naming, etc.) outlined in this document. Exceptions are allowed in isolated, backend-specific parts of the code that do not interface directly with the `ggml` interfaces.
   _(NOTE: for legacy reasons, existing code is not required to follow this guideline)_
 
+- For changes in server, please make sure to refer to the [server development documentation](./tools/server/README-dev.md)
+
 # Documentation
 
 - Documentation is a community effort
 
@@ -1830,23 +1830,23 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
     ).set_sparam());
     add_opt(common_arg(
         {"--grammar"}, "GRAMMAR",
-        string_format("BNF-like grammar to constrain generations (see samples in grammars/ dir) (default: '%s')", params.sampling.grammar.c_str()),
+        "BNF-like grammar to constrain generations (see samples in grammars/ dir)",
         [](common_params & params, const std::string & value) {
-            params.sampling.grammar = value;
+            params.sampling.grammar = {COMMON_GRAMMAR_TYPE_USER, value};
         }
     ).set_sparam());
     add_opt(common_arg(
         {"--grammar-file"}, "FNAME",
         "file to read grammar from",
         [](common_params & params, const std::string & value) {
-            params.sampling.grammar = read_file(value);
+            params.sampling.grammar = {COMMON_GRAMMAR_TYPE_USER, read_file(value)};
         }
     ).set_sparam());
     add_opt(common_arg(
         {"-j", "--json-schema"}, "SCHEMA",
         "JSON schema to constrain generations (https://json-schema.org/), e.g. `{}` for any JSON object\nFor schemas w/ external $refs, use --grammar + example/json_schema_to_grammar.py instead",
         [](common_params & params, const std::string & value) {
-            params.sampling.grammar = json_schema_to_grammar(json::parse(value));
+            params.sampling.grammar = {COMMON_GRAMMAR_TYPE_OUTPUT_FORMAT, json_schema_to_grammar(json::parse(value))};
         }
     ).set_sparam());
     add_opt(common_arg(
@@ -1863,7 +1863,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
                 std::istreambuf_iterator<char>(),
                 std::back_inserter(schema)
             );
-            params.sampling.grammar = json_schema_to_grammar(json::parse(schema));
+            params.sampling.grammar = {COMMON_GRAMMAR_TYPE_OUTPUT_FORMAT, json_schema_to_grammar(json::parse(schema))};
         }
     ).set_sparam());
     add_opt(common_arg(
@@ -3494,7 +3494,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
                 throw std::invalid_argument("unknown speculative decoding type without draft model");
             }
         }
-    ).set_examples({LLAMA_EXAMPLE_SERVER}));
+    ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_SPEC_TYPE"));
     add_opt(common_arg(
         {"--spec-ngram-size-n"}, "N",
         string_format("ngram size N for ngram-simple/ngram-map speculative decoding, length of lookup n-gram (default: %d)", params.speculative.ngram_size_n),
 
@@ -1,3 +1,4 @@
+#include "chat-auto-parser-helpers.h"
 #include "chat-auto-parser.h"
 #include "chat-peg-parser.h"
 #include "chat.h"
@@ -23,31 +24,30 @@ static void foreach_function(const json & tools, const std::function<void(const
 
 namespace autoparser {
 
-parser_build_context::parser_build_context(common_chat_peg_builder & p, const templates_params & inputs) :
+parser_build_context::parser_build_context(common_chat_peg_builder & p, const generation_params & inputs) :
     p(p),
     inputs(inputs),
     reasoning_parser(p.eps()) {}
 
 common_chat_params peg_generator::generate_parser(const common_chat_template &    tmpl,
-                                                  const struct templates_params & inputs) {
+                                                  const struct generation_params & inputs) {
     // Run differential analysis to extract template structure
     struct autoparser autoparser;
     autoparser.analyze_template(tmpl);
     return generate_parser(tmpl, inputs, autoparser);
 }
 
 common_chat_params peg_generator::generate_parser(const common_chat_template &    tmpl,
-                                                  const struct templates_params & inputs,
+                                                  const struct generation_params & inputs,
                                                   const autoparser &              autoparser) {
-    // Build the parser using the analysis results
-    auto parser = autoparser.build_parser(inputs);
-
     // Create the result structure
     common_chat_params data;
     data.prompt           = common_chat_template_direct_apply(tmpl, inputs);
     data.format           = COMMON_CHAT_FORMAT_PEG_NATIVE;
     data.preserved_tokens = autoparser.preserved_tokens;
-    data.parser           = parser.save();
+
+    auto parser = autoparser.build_parser(inputs);
+    data.parser = parser.save();
 
     // Build grammar if tools are present
     bool has_tools =
@@ -82,44 +82,38 @@ common_chat_params peg_generator::generate_parser(const common_chat_template &
     return data;
 }
 
-common_peg_arena autoparser::build_parser(const templates_params & inputs) const {
+common_peg_arena autoparser::build_parser(const generation_params & inputs) const {
     if (!analysis_complete) {
         throw std::invalid_argument("Cannot call build_parser on autoparser without performing analysis first, call analyze_template(...)");
     }
     return build_chat_peg_parser([&](common_chat_peg_builder & p) {
-        // If the template uses Python dict format (single-quoted strings in JSON structures),
-        // pre-register a json-string rule that accepts both quote styles. This must happen
-        // before any call to p.json() so that all JSON parsing inherits the flexible rule.
-        if (tools.format.uses_python_dicts) {
-            p.rule("json-string", p.quoted_string());
-        }
-
         parser_build_context ctx(p, inputs);
         bool                 extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
-        bool                 enable_thinking   = inputs.enable_thinking;
 
-        ctx.extracting_reasoning = extract_reasoning && enable_thinking && reasoning.mode != reasoning_mode::NONE;
+        ctx.extracting_reasoning = extract_reasoning && reasoning.mode != reasoning_mode::NONE;
         ctx.content              = &content;
 
         // Build reasoning parser
         ctx.reasoning_parser = reasoning.build_parser(ctx);
 
+        auto parser = p.eps();
+
         bool has_tools           = inputs.tools.is_array() && !inputs.tools.empty();
         bool has_response_format = inputs.json_schema.is_object() && !inputs.json_schema.empty();
 
         if (has_response_format) {
             auto response_format = p.rule("response-format", p.content(p.schema(p.json(), "response-format-schema", inputs.json_schema)));
-            return ctx.reasoning_parser + p.space() + p.choice({
+            parser = ctx.reasoning_parser + p.space() + p.choice({
                 p.literal("```json") + p.space() + response_format + p.space() + p.literal("```"),
                 response_format
             }) + p.end();
+        } else if (has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE && jinja_caps.supports_tool_calls) {
+            parser = tools.build_parser(ctx);
+        } else {
+            parser = content.build_parser(ctx);
         }
-
-        if (has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE && jinja_caps.supports_tool_calls) {
-            return tools.build_parser(ctx);
-        }
-
-        return content.build_parser(ctx);
+        parser = wrap_for_generation_prompt(p, parser, inputs, reasoning.start);
+        return parser;
     });
 }
 
@@ -130,24 +124,15 @@ common_peg_parser analyze_reasoning::build_parser(parser_build_context & ctx) co
         return p.eps();
     }
 
-    bool thinking_forced_open   = (mode == reasoning_mode::FORCED_OPEN);
-    bool thinking_forced_closed = (mode == reasoning_mode::FORCED_CLOSED);
-
-    if (thinking_forced_open || thinking_forced_closed) {
-        // Thinking is forced open OR forced closed with enable_thinking=true
-        // In both cases, expect only the closing tag (opening was in template)
-        // However, since we might have incorrectly detected the open/close pattern,
-        // we admit an optional starting marker
-        return p.optional(p.literal(start)) + p.reasoning(p.until(end)) + end;
-    }
     if (mode == reasoning_mode::TAG_BASED || mode == reasoning_mode::TOOLS_ONLY) {
-        // Standard tag-based reasoning OR tools-only mode (reasoning appears with tools)
-        // Both use the same tag-based pattern if markers are available
-        if (!start.empty() && !end.empty()) {
-            return p.optional(start + p.reasoning(p.until(end)) + end);
+        if (!end.empty()) {
+            if (!start.empty()) {
+                // Standard tag-based: optional(<think>reasoning</think>)
+                return p.optional(start + p.reasoning(p.until(end)) + end + p.space());
+            }
+            // Delimiter-style (empty start)
+            return p.optional(p.reasoning(p.until(end)) + end + p.space());
         }
-    } else if (mode == reasoning_mode::DELIMITER) {
-        return p.optional(p.reasoning(p.until(end)) + end);
     }
 
     return p.eps();
@@ -335,7 +320,7 @@ common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_conte
                                                                      "tool-" + name + "-arg-" + param_name + "-schema",
                                                                      param_schema, true)) :
                                     p.tool_arg_json_value(p.schema(
-                                        p.json(), "tool-" + name + "-arg-" + param_name + "-schema", param_schema, format.uses_python_dicts)) +
+                                        p.json(), "tool-" + name + "-arg-" + param_name + "-schema", param_schema, false)) +
                                         p.space()) +
                 p.tool_arg_close(p.literal(arguments.value_suffix)));
 
@@ -384,7 +369,9 @@ common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_conte
             func_parser = p.atomic(p.tool_open(function.name_prefix + p.tool_name(p.literal(name)) + function.name_suffix) +
                 call_id_section) + p.space() + args_seq;
             matched_atomic = true;
-        } else if (!arguments.name_prefix.empty() && properties.size() > 0) {
+        } else if (!arguments.name_prefix.empty() && !required_parsers.empty()) {
+            // Only peek for an arg tag when there are required args that must follow.
+            // When all args are optional, the model may emit no arg tags at all (#20650).
             func_parser = p.atomic(p.tool_open(function.name_prefix + p.tool_name(p.literal(name)) + function.name_suffix) +
                 call_id_section + p.space() + p.peek(p.literal(arguments.name_prefix))) + args_seq;
             matched_atomic = true;