Skip to content

Commit 17771a6

Browse files
authored
feat(llamacpp): expand server flags + wire llamacpp-only sampling params (#8099)
Extension-level (settings.json + LlamacppConfig + args.rs): - Adds --cache-ram, --cache-reuse, --swa-full, --keep flags - Removes dead mirostat/grammar_file/json_schema_file entries that had no readers anywhere Per-model (predefined.ts modelSettings): - Adds reasoning dropdown (auto/on/off) → --reasoning, restart-on-change - ctx_len defaults to empty (auto-fit when --fit is on, fall back to 4096 in args.rs when fit is off to avoid OOM at n_ctx_train) - v11→v13 storage migrations backfill reasoning, strip the stale provider-level entry, and reset the prior 8192 ctx_len default llamacpp-only sampling (predefinedParams.ts assistant params): - Adds mirostat, mirostat_tau, mirostat_eta, grammar, json_schema - Gates LLAMACPP_ONLY_PARAM_KEYS in createCustomFetch / MLX / Foundation Models so they only reach the local llama-server Platform-aware --fit default: - Synchronous default flips fit on for non-macOS in onLoad - migrateFitPlatformDefault probes GPUs via getSystemInfo and only upgrades the v1 auto-default (false → true) when a discrete GPU is present (NVIDIA or Vulkan DiscreteGpu); explicit user values are preserved, probe failure retries on next launch Misc: - guest-js types/normalizer kept in sync with the Rust struct; asBool gains an optional default - ModelSetting renders dropdown rows full-width so titles aren't squeezed; reasoning is hoisted to the top of the panel - jan-cli's LlamacppConfig literal updated for the new fields - New args.rs unit tests covering each flag's default-omits and custom-value paths plus ctx_size fit interactions
1 parent 691af63 commit 17771a6

11 files changed

Lines changed: 451 additions & 57 deletions

File tree

extensions/llamacpp-extension/settings.json

Lines changed: 24 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -344,67 +344,49 @@
344344
}
345345
},
346346
{
347-
"key": "mirostat",
348-
"title": "Mirostat Mode",
349-
"description": "Use Mirostat sampling (0: disabled, 1: Mirostat V1, 2: Mirostat V2).",
350-
"controllerType": "dropdown",
351-
"controllerProps": {
352-
"value": 0,
353-
"options": [
354-
{ "value": 0, "name": "Disabled" },
355-
{ "value": 1, "name": "Mirostat V1" },
356-
{ "value": 2, "name": "Mirostat V2" }
357-
]
358-
}
359-
},
360-
{
361-
"key": "mirostat_lr",
362-
"title": "Mirostat Learning Rate",
363-
"description": "Mirostat learning rate (eta).",
347+
"key": "cache_ram",
348+
"title": "Prompt Cache RAM (MiB)",
349+
"description": "Maximum prompt cache size in MiB (--cache-ram). -1 = unlimited, 0 = disabled.",
364350
"controllerType": "input",
365351
"controllerProps": {
366-
"value": 0.1,
367-
"placeholder": "0.1",
352+
"value": -1,
353+
"placeholder": "-1",
368354
"type": "number",
369-
"textAlign": "right",
370-
"min": 0,
371-
"step": 0.01
355+
"textAlign": "right"
372356
}
373357
},
374358
{
375-
"key": "mirostat_ent",
376-
"title": "Mirostat Target Entropy",
377-
"description": "Mirostat target entropy (tau).",
359+
"key": "cache_reuse",
360+
"title": "Cache Reuse",
361+
"description": "Min chunk size of matching prefix tokens to reuse from cache (--cache-reuse). 0 = disabled.",
378362
"controllerType": "input",
379363
"controllerProps": {
380-
"value": 5.0,
381-
"placeholder": "5.0",
364+
"value": 0,
365+
"placeholder": "0",
382366
"type": "number",
383367
"textAlign": "right",
384-
"min": 0,
385-
"step": 0.01
368+
"min": 0
386369
}
387370
},
388371
{
389-
"key": "grammar_file",
390-
"title": "Grammar File",
391-
"description": "Path to a BNF-like grammar file to constrain generations.",
392-
"controllerType": "input",
372+
"key": "swa_full",
373+
"title": "Full SWA Cache",
374+
"description": "Use full-size SWA cache (--swa-full). May improve quality at the cost of memory.",
375+
"controllerType": "checkbox",
393376
"controllerProps": {
394-
"value": "",
395-
"placeholder": "path/to/grammar.gbnf",
396-
"type": "text"
377+
"value": false
397378
}
398379
},
399380
{
400-
"key": "json_schema_file",
401-
"title": "JSON Schema File",
402-
"description": "Path to a JSON schema file to constrain generations.",
381+
"key": "keep",
382+
"title": "Keep First N Tokens",
383+
"description": "Number of tokens from the initial prompt to keep when context is shifted (--keep). -1 = keep all.",
403384
"controllerType": "input",
404385
"controllerProps": {
405-
"value": "",
406-
"placeholder": "path/to/schema.json",
407-
"type": "text"
386+
"value": 0,
387+
"placeholder": "0",
388+
"type": "number",
389+
"textAlign": "right"
408390
}
409391
}
410392
]

extensions/llamacpp-extension/src/index.ts

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,11 @@ export default class llamacpp_extension extends AIEngine {
135135

136136
let settings = structuredClone(SETTINGS) // Clone to modify settings definition before registration
137137

138+
if (!IS_MAC) {
139+
const fitItem = settings.find((s) => s.key === 'fit')
140+
if (fitItem) fitItem.controllerProps.value = true
141+
}
142+
138143
// This makes the settings (including the backend options and initial value) available to the Jan UI.
139144
this.registerSettings(settings)
140145

@@ -155,6 +160,9 @@ export default class llamacpp_extension extends AIEngine {
155160
// Migration v2: disable fit by default
156161
await this.migrateFitDefault()
157162

163+
// Migration v3: enable fit on Windows/Linux with a discrete GPU
164+
await this.migrateFitPlatformDefault()
165+
158166
this.autoUnload = this.config.auto_unload
159167
this.timeout = this.config.timeout
160168
this.llamacpp_env = this.config.llamacpp_env
@@ -256,6 +264,47 @@ export default class llamacpp_extension extends AIEngine {
256264
localStorage.setItem(MIGRATION_KEY, '1')
257265
}
258266

267+
private async migrateFitPlatformDefault(): Promise<void> {
268+
const MIGRATION_KEY = 'llamacpp_fit_platform_v2'
269+
if (localStorage.getItem(MIGRATION_KEY)) return
270+
271+
if (IS_MAC) {
272+
localStorage.setItem(MIGRATION_KEY, '1')
273+
return
274+
}
275+
276+
let hasDiscreteGpu = false
277+
try {
278+
const sysInfo = await getSystemInfo()
279+
hasDiscreteGpu = (sysInfo?.gpus ?? []).some(
280+
(g: any) =>
281+
g?.nvidia_info != null ||
282+
g?.vulkan_info?.device_type === 'DiscreteGpu'
283+
)
284+
} catch (error) {
285+
// Skip writing the migration key so a transient probe failure retries.
286+
logger.warn('Failed to probe GPU info for fit migration:', error)
287+
return
288+
}
289+
290+
// Only upgrade the v1 auto-default; preserve any explicit user override.
291+
if (this.config.fit === false && hasDiscreteGpu) {
292+
const settings = await this.getSettings()
293+
await this.updateSettings(
294+
settings.map((item) => {
295+
if (item.key === 'fit') {
296+
item.controllerProps.value = true
297+
}
298+
return item
299+
})
300+
)
301+
this.config.fit = true
302+
logger.info('Migrated fit setting: enabled (discrete GPU detected)')
303+
}
304+
305+
localStorage.setItem(MIGRATION_KEY, '1')
306+
}
307+
259308
async configureBackends(): Promise<void> {
260309
if (this.isConfiguringBackends) {
261310
logger.info(

src-tauri/plugins/tauri-plugin-llamacpp/guest-js/index.ts

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,8 @@ function asI32(v: any, defaultValue = 0): number {
3131
return n
3232
}
3333

34-
function asBool(v: any): boolean {
35-
if (v === '' || v === null || v === undefined) return false
34+
function asBool(v: any, defaultValue = false): boolean {
35+
if (v === '' || v === null || v === undefined) return defaultValue
3636
return v === true || v === 'true' || v === 1 || v === '1'
3737
}
3838

@@ -92,6 +92,12 @@ export function normalizeLlamacppConfig(config: any): LlamacppConfig {
9292

9393
ctx_shift: asBool(config.ctx_shift),
9494
parallel: asI32(config.parallel, 1),
95+
96+
reasoning: asString(config.reasoning, 'auto'),
97+
cache_ram: asI32(config.cache_ram, -1),
98+
cache_reuse: asI32(config.cache_reuse, 0),
99+
swa_full: asBool(config.swa_full),
100+
keep: asI32(config.keep, 0),
95101
}
96102
}
97103

src-tauri/plugins/tauri-plugin-llamacpp/guest-js/types.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,11 @@ export type LlamacppConfig = {
6666
rope_freq_scale: number
6767
ctx_shift: boolean
6868
parallel: number
69+
reasoning: string
70+
cache_ram: number
71+
cache_reuse: number
72+
swa_full: boolean
73+
keep: number
6974
}
7075

7176
export type ModelPlan = {

0 commit comments

Comments
 (0)