janhq
diff --git a/‎.devops/cann.Dockerfile‎
Lines changed: 1 addition & 1 deletion b/‎.devops/cann.Dockerfile‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.devops/llama-cli-cann.Dockerfile‎
Lines changed: 1 addition & 1 deletion b/‎.devops/llama-cli-cann.Dockerfile‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.devops/nix/package.nix‎
Lines changed: 2 additions & 0 deletions b/‎.devops/nix/package.nix‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎.github/workflows/build-cann.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/build-cann.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/release.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/release.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions b/‎CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎common/arg.cpp‎
Lines changed: 10 additions & 1 deletion b/‎common/arg.cpp‎
Lines changed: 10 additions & 1 deletion
diff --git a/‎common/common.h‎
Lines changed: 3 additions & 0 deletions b/‎common/common.h‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎common/reasoning-budget.cpp‎
Lines changed: 12 additions & 11 deletions b/‎common/reasoning-budget.cpp‎
Lines changed: 12 additions & 11 deletions
diff --git a/‎common/reasoning-budget.h‎
Lines changed: 2 additions & 0 deletions b/‎common/reasoning-budget.h‎
Lines changed: 2 additions & 0 deletions
@@ -4,7 +4,7 @@
 
 # Define the CANN base image for easier version updates later
 ARG CHIP_TYPE=910b
-ARG CANN_BASE_IMAGE=quay.io/ascend/cann:8.3.rc2-${CHIP_TYPE}-openeuler24.03-py3.11
+ARG CANN_BASE_IMAGE=quay.io/ascend/cann:8.5.0-${CHIP_TYPE}-openeuler24.03-py3.11
 
 # ==============================================================================
 # BUILD STAGE
 
@@ -1,4 +1,4 @@
-ARG ASCEND_VERSION=8.1.RC1.alpha001-910b-openeuler22.03-py3.10
+ARG ASCEND_VERSION=8.5.0-910b-openeuler22.03-py3.10
 
 FROM ascendai/cann:$ASCEND_VERSION AS build
 
 
@@ -41,6 +41,7 @@
   effectiveStdenv ? if useCuda then cudaPackages.backendStdenv else stdenv,
   enableStatic ? effectiveStdenv.hostPlatform.isStatic,
   precompileMetalShaders ? false,
+  useWebUi ? true,
 }:
 
 let
@@ -164,6 +165,7 @@ effectiveStdenv.mkDerivation (finalAttrs: {
   cmakeFlags =
     [
       (cmakeBool "LLAMA_BUILD_SERVER" true)
+      (cmakeBool "LLAMA_BUILD_WEBUI" useWebUi)
       (cmakeBool "BUILD_SHARED_LIBS" (!enableStatic))
       (cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
       (cmakeBool "GGML_NATIVE" false)
 
@@ -63,7 +63,7 @@ jobs:
       - name: Set container image
         id: cann-image
         run: |
-          image="ascendai/cann:${{ matrix.chip_type == '910b' &&  '8.3.rc2-910b-openeuler24.03-py3.11' || '8.3.rc2-310p-openeuler24.03-py3.11' }}"
+          image="ascendai/cann:${{ matrix.chip_type == '910b' &&  '8.5.0-910b-openeuler24.03-py3.11' || '8.5.0-310p-openeuler24.03-py3.11' }}"
           echo "image=${image}" >> "${GITHUB_OUTPUT}"
 
       - name: Pull container image
 
@@ -907,7 +907,7 @@ jobs:
       - name: Set container image
         id: cann-image
         run: |
-          image="ascendai/cann:${{ matrix.chip_type == '910b' &&  '8.3.rc2-910b-openeuler24.03-py3.11' || '8.3.rc2-310p-openeuler24.03-py3.11' }}"
+          image="ascendai/cann:${{ matrix.chip_type == '910b' &&  '8.5.0-910b-openeuler24.03-py3.11' || '8.5.0-310p-openeuler24.03-py3.11' }}"
           echo "image=${image}" >> "${GITHUB_OUTPUT}"
 
       - name: Pull container image
 
@@ -108,6 +108,7 @@ option(LLAMA_BUILD_TESTS    "llama: build tests"          ${LLAMA_STANDALONE})
 option(LLAMA_BUILD_TOOLS    "llama: build tools"          ${LLAMA_STANDALONE})
 option(LLAMA_BUILD_EXAMPLES "llama: build examples"       ${LLAMA_STANDALONE})
 option(LLAMA_BUILD_SERVER   "llama: build server example" ${LLAMA_STANDALONE})
+option(LLAMA_BUILD_WEBUI    "llama: build the embedded Web UI for server"  ON)
 option(LLAMA_TOOLS_INSTALL  "llama: install tools"        ${LLAMA_TOOLS_INSTALL_DEFAULT})
 option(LLAMA_TESTS_INSTALL  "llama: install tests"        ON)
 
 
@@ -1079,7 +1079,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
         [](common_params & params) {
             params.verbose_prompt = true;
         }
-    ));
+    ).set_examples({LLAMA_EXAMPLE_COMPLETION, LLAMA_EXAMPLE_CLI, LLAMA_EXAMPLE_EMBEDDING, LLAMA_EXAMPLE_RETRIEVAL}));
     add_opt(common_arg(
         {"--display-prompt"},
         {"--no-display-prompt"},
@@ -2843,6 +2843,15 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
             params.webui_mcp_proxy = value;
         }
     ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_WEBUI_MCP_PROXY"));
+    add_opt(common_arg(
+        {"--tools"}, "TOOL1,TOOL2,...",
+        "experimental: whether to enable built-in tools for AI agents - do not enable in untrusted environments (default: no tools)\n"
+        "specify \"all\" to enable all tools\n"
+        "available tools: read_file, file_glob_search, grep_search, exec_shell_command, write_file, edit_file, apply_diff",
+        [](common_params & params, const std::string & value) {
+            params.server_tools = parse_csv_row(value);
+        }
+    ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_TOOLS"));
     add_opt(common_arg(
         {"--webui"},
         {"--no-webui"},
 
@@ -613,6 +613,9 @@ struct common_params {
     bool endpoint_props   = false; // only control POST requests, not GET
     bool endpoint_metrics = false;
 
+    // enable built-in tools
+    std::vector<std::string> server_tools;
+
     // router server configs
     std::string models_dir    = ""; // directory containing models for the router server
     std::string models_preset = ""; // directory containing model presets for the router server
 
@@ -115,9 +115,11 @@ static void common_reasoning_budget_accept(struct llama_sampler * smpl, llama_to
             break;
         }
         case REASONING_BUDGET_FORCING:
-            // force_pos is advanced in apply(), not here.
-            // This ensures the first forced token isn't skipped when the sampler
-            // is initialized directly in FORCING state (e.g. COUNTING + budget=0)
+            ctx->force_pos++;
+            if (ctx->force_pos >= ctx->forced_tokens.size()) {
+                ctx->state = REASONING_BUDGET_DONE;
+                LOG_INF("reasoning-budget: forced sequence complete, done\n");
+            }
             break;
         case REASONING_BUDGET_DONE:
             break;
@@ -144,14 +146,6 @@ static void common_reasoning_budget_apply(struct llama_sampler * smpl, llama_tok
             cur_p->data[i].logit = -INFINITY;
         }
     }
-
-    // advance to next forced token (done here rather than in accept so that
-    // the first forced token isn't skipped when starting in FORCING state)
-    ctx->force_pos++;
-    if (ctx->force_pos >= ctx->forced_tokens.size()) {
-        ctx->state = REASONING_BUDGET_DONE;
-        LOG_INF("reasoning-budget: forced sequence complete, done\n");
-    }
 }
 
 static void common_reasoning_budget_reset(struct llama_sampler * smpl) {
@@ -261,3 +255,10 @@ struct llama_sampler * common_reasoning_budget_init(
         common_reasoning_budget_state    initial_state) {
     return common_reasoning_budget_init_state(vocab, start_tokens, end_tokens, forced_tokens, budget, initial_state);
 }
+
+common_reasoning_budget_state common_reasoning_budget_get_state(const struct llama_sampler * smpl) {
+    if (!smpl) {
+        return REASONING_BUDGET_IDLE;
+    }
+    return ((const common_reasoning_budget_ctx *)smpl->ctx)->state;
+}
@@ -51,3 +51,5 @@ struct llama_sampler * common_reasoning_budget_init(
         const std::vector<llama_token> & forced_tokens,
         int32_t                          budget,
         common_reasoning_budget_state    initial_state);
+
+common_reasoning_budget_state common_reasoning_budget_get_state(const struct llama_sampler * smpl);
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-ARG ASCEND_VERSION=8.1.RC1.alpha001-910b-openeuler22.03-py3.10`
	`1`	`+ARG ASCEND_VERSION=8.5.0-910b-openeuler22.03-py3.10`
`2`	`2`
`3`	`3`	`FROM ascendai/cann:$ASCEND_VERSION AS build`
`4`	`4`