Skip to content

Commit 0923e47

Browse files
Merge pull request #468 from janhq/update-dev-from-master-2026-03-28-00-48
Sync master with upstream release b8559
2 parents a487817 + 59d8402 commit 0923e47

58 files changed

Lines changed: 2516 additions & 313 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.devops/cann.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
# Define the CANN base image for easier version updates later
66
ARG CHIP_TYPE=910b
7-
ARG CANN_BASE_IMAGE=quay.io/ascend/cann:8.3.rc2-${CHIP_TYPE}-openeuler24.03-py3.11
7+
ARG CANN_BASE_IMAGE=quay.io/ascend/cann:8.5.0-${CHIP_TYPE}-openeuler24.03-py3.11
88

99
# ==============================================================================
1010
# BUILD STAGE

.devops/llama-cli-cann.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
ARG ASCEND_VERSION=8.1.RC1.alpha001-910b-openeuler22.03-py3.10
1+
ARG ASCEND_VERSION=8.5.0-910b-openeuler22.03-py3.10
22

33
FROM ascendai/cann:$ASCEND_VERSION AS build
44

.devops/nix/package.nix

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
effectiveStdenv ? if useCuda then cudaPackages.backendStdenv else stdenv,
4242
enableStatic ? effectiveStdenv.hostPlatform.isStatic,
4343
precompileMetalShaders ? false,
44+
useWebUi ? true,
4445
}:
4546

4647
let
@@ -164,6 +165,7 @@ effectiveStdenv.mkDerivation (finalAttrs: {
164165
cmakeFlags =
165166
[
166167
(cmakeBool "LLAMA_BUILD_SERVER" true)
168+
(cmakeBool "LLAMA_BUILD_WEBUI" useWebUi)
167169
(cmakeBool "BUILD_SHARED_LIBS" (!enableStatic))
168170
(cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
169171
(cmakeBool "GGML_NATIVE" false)

.github/workflows/build-cann.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ jobs:
6363
- name: Set container image
6464
id: cann-image
6565
run: |
66-
image="ascendai/cann:${{ matrix.chip_type == '910b' && '8.3.rc2-910b-openeuler24.03-py3.11' || '8.3.rc2-310p-openeuler24.03-py3.11' }}"
66+
image="ascendai/cann:${{ matrix.chip_type == '910b' && '8.5.0-910b-openeuler24.03-py3.11' || '8.5.0-310p-openeuler24.03-py3.11' }}"
6767
echo "image=${image}" >> "${GITHUB_OUTPUT}"
6868
6969
- name: Pull container image

.github/workflows/release.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -907,7 +907,7 @@ jobs:
907907
- name: Set container image
908908
id: cann-image
909909
run: |
910-
image="ascendai/cann:${{ matrix.chip_type == '910b' && '8.3.rc2-910b-openeuler24.03-py3.11' || '8.3.rc2-310p-openeuler24.03-py3.11' }}"
910+
image="ascendai/cann:${{ matrix.chip_type == '910b' && '8.5.0-910b-openeuler24.03-py3.11' || '8.5.0-310p-openeuler24.03-py3.11' }}"
911911
echo "image=${image}" >> "${GITHUB_OUTPUT}"
912912
913913
- name: Pull container image

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE})
108108
option(LLAMA_BUILD_TOOLS "llama: build tools" ${LLAMA_STANDALONE})
109109
option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE})
110110
option(LLAMA_BUILD_SERVER "llama: build server example" ${LLAMA_STANDALONE})
111+
option(LLAMA_BUILD_WEBUI "llama: build the embedded Web UI for server" ON)
111112
option(LLAMA_TOOLS_INSTALL "llama: install tools" ${LLAMA_TOOLS_INSTALL_DEFAULT})
112113
option(LLAMA_TESTS_INSTALL "llama: install tests" ON)
113114

common/arg.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1079,7 +1079,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
10791079
[](common_params & params) {
10801080
params.verbose_prompt = true;
10811081
}
1082-
));
1082+
).set_examples({LLAMA_EXAMPLE_COMPLETION, LLAMA_EXAMPLE_CLI, LLAMA_EXAMPLE_EMBEDDING, LLAMA_EXAMPLE_RETRIEVAL}));
10831083
add_opt(common_arg(
10841084
{"--display-prompt"},
10851085
{"--no-display-prompt"},
@@ -2843,6 +2843,15 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
28432843
params.webui_mcp_proxy = value;
28442844
}
28452845
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_WEBUI_MCP_PROXY"));
2846+
add_opt(common_arg(
2847+
{"--tools"}, "TOOL1,TOOL2,...",
2848+
"experimental: whether to enable built-in tools for AI agents - do not enable in untrusted environments (default: no tools)\n"
2849+
"specify \"all\" to enable all tools\n"
2850+
"available tools: read_file, file_glob_search, grep_search, exec_shell_command, write_file, edit_file, apply_diff",
2851+
[](common_params & params, const std::string & value) {
2852+
params.server_tools = parse_csv_row(value);
2853+
}
2854+
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_TOOLS"));
28462855
add_opt(common_arg(
28472856
{"--webui"},
28482857
{"--no-webui"},

common/common.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -613,6 +613,9 @@ struct common_params {
613613
bool endpoint_props = false; // only control POST requests, not GET
614614
bool endpoint_metrics = false;
615615

616+
// enable built-in tools
617+
std::vector<std::string> server_tools;
618+
616619
// router server configs
617620
std::string models_dir = ""; // directory containing models for the router server
618621
std::string models_preset = ""; // directory containing model presets for the router server

common/reasoning-budget.cpp

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -115,9 +115,11 @@ static void common_reasoning_budget_accept(struct llama_sampler * smpl, llama_to
115115
break;
116116
}
117117
case REASONING_BUDGET_FORCING:
118-
// force_pos is advanced in apply(), not here.
119-
// This ensures the first forced token isn't skipped when the sampler
120-
// is initialized directly in FORCING state (e.g. COUNTING + budget=0)
118+
ctx->force_pos++;
119+
if (ctx->force_pos >= ctx->forced_tokens.size()) {
120+
ctx->state = REASONING_BUDGET_DONE;
121+
LOG_INF("reasoning-budget: forced sequence complete, done\n");
122+
}
121123
break;
122124
case REASONING_BUDGET_DONE:
123125
break;
@@ -144,14 +146,6 @@ static void common_reasoning_budget_apply(struct llama_sampler * smpl, llama_tok
144146
cur_p->data[i].logit = -INFINITY;
145147
}
146148
}
147-
148-
// advance to next forced token (done here rather than in accept so that
149-
// the first forced token isn't skipped when starting in FORCING state)
150-
ctx->force_pos++;
151-
if (ctx->force_pos >= ctx->forced_tokens.size()) {
152-
ctx->state = REASONING_BUDGET_DONE;
153-
LOG_INF("reasoning-budget: forced sequence complete, done\n");
154-
}
155149
}
156150

157151
static void common_reasoning_budget_reset(struct llama_sampler * smpl) {
@@ -261,3 +255,10 @@ struct llama_sampler * common_reasoning_budget_init(
261255
common_reasoning_budget_state initial_state) {
262256
return common_reasoning_budget_init_state(vocab, start_tokens, end_tokens, forced_tokens, budget, initial_state);
263257
}
258+
259+
common_reasoning_budget_state common_reasoning_budget_get_state(const struct llama_sampler * smpl) {
260+
if (!smpl) {
261+
return REASONING_BUDGET_IDLE;
262+
}
263+
return ((const common_reasoning_budget_ctx *)smpl->ctx)->state;
264+
}

common/reasoning-budget.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,3 +51,5 @@ struct llama_sampler * common_reasoning_budget_init(
5151
const std::vector<llama_token> & forced_tokens,
5252
int32_t budget,
5353
common_reasoning_budget_state initial_state);
54+
55+
common_reasoning_budget_state common_reasoning_budget_get_state(const struct llama_sampler * smpl);

0 commit comments

Comments
 (0)