janhq
diff --git a/‎extensions/llamacpp-extension/settings.json‎
Lines changed: 24 additions & 42 deletions b/‎extensions/llamacpp-extension/settings.json‎
Lines changed: 24 additions & 42 deletions
diff --git a/‎extensions/llamacpp-extension/src/index.ts‎
Lines changed: 49 additions & 0 deletions b/‎extensions/llamacpp-extension/src/index.ts‎
Lines changed: 49 additions & 0 deletions
diff --git a/‎src-tauri/plugins/tauri-plugin-llamacpp/guest-js/index.ts‎
Lines changed: 8 additions & 2 deletions b/‎src-tauri/plugins/tauri-plugin-llamacpp/guest-js/index.ts‎
Lines changed: 8 additions & 2 deletions
diff --git a/‎src-tauri/plugins/tauri-plugin-llamacpp/guest-js/types.ts‎
Lines changed: 5 additions & 0 deletions b/‎src-tauri/plugins/tauri-plugin-llamacpp/guest-js/types.ts‎
Lines changed: 5 additions & 0 deletions
@@ -344,67 +344,49 @@
     }
   },
   {
-    "key": "mirostat",
-    "title": "Mirostat Mode",
-    "description": "Use Mirostat sampling (0: disabled, 1: Mirostat V1, 2: Mirostat V2).",
-    "controllerType": "dropdown",
-    "controllerProps": {
-      "value": 0,
-      "options": [
-        { "value": 0, "name": "Disabled" },
-        { "value": 1, "name": "Mirostat V1" },
-        { "value": 2, "name": "Mirostat V2" }
-      ]
-    }
-  },
-  {
-    "key": "mirostat_lr",
-    "title": "Mirostat Learning Rate",
-    "description": "Mirostat learning rate (eta).",
+    "key": "cache_ram",
+    "title": "Prompt Cache RAM (MiB)",
+    "description": "Maximum prompt cache size in MiB (--cache-ram). -1 = unlimited, 0 = disabled.",
     "controllerType": "input",
     "controllerProps": {
-      "value": 0.1,
-      "placeholder": "0.1",
+      "value": -1,
+      "placeholder": "-1",
       "type": "number",
-      "textAlign": "right",
-      "min": 0,
-      "step": 0.01
+      "textAlign": "right"
     }
   },
   {
-    "key": "mirostat_ent",
-    "title": "Mirostat Target Entropy",
-    "description": "Mirostat target entropy (tau).",
+    "key": "cache_reuse",
+    "title": "Cache Reuse",
+    "description": "Min chunk size of matching prefix tokens to reuse from cache (--cache-reuse). 0 = disabled.",
     "controllerType": "input",
     "controllerProps": {
-      "value": 5.0,
-      "placeholder": "5.0",
+      "value": 0,
+      "placeholder": "0",
       "type": "number",
       "textAlign": "right",
-      "min": 0,
-      "step": 0.01
+      "min": 0
     }
   },
   {
-    "key": "grammar_file",
-    "title": "Grammar File",
-    "description": "Path to a BNF-like grammar file to constrain generations.",
-    "controllerType": "input",
+    "key": "swa_full",
+    "title": "Full SWA Cache",
+    "description": "Use full-size SWA cache (--swa-full). May improve quality at the cost of memory.",
+    "controllerType": "checkbox",
     "controllerProps": {
-      "value": "",
-      "placeholder": "path/to/grammar.gbnf",
-      "type": "text"
+      "value": false
     }
   },
   {
-    "key": "json_schema_file",
-    "title": "JSON Schema File",
-    "description": "Path to a JSON schema file to constrain generations.",
+    "key": "keep",
+    "title": "Keep First N Tokens",
+    "description": "Number of tokens from the initial prompt to keep when context is shifted (--keep). -1 = keep all.",
     "controllerType": "input",
     "controllerProps": {
-      "value": "",
-      "placeholder": "path/to/schema.json",
-      "type": "text"
+      "value": 0,
+      "placeholder": "0",
+      "type": "number",
+      "textAlign": "right"
     }
   }
 ]
@@ -135,6 +135,11 @@ export default class llamacpp_extension extends AIEngine {
 
     let settings = structuredClone(SETTINGS) // Clone to modify settings definition before registration
 
+    if (!IS_MAC) {
+      const fitItem = settings.find((s) => s.key === 'fit')
+      if (fitItem) fitItem.controllerProps.value = true
+    }
+
     // This makes the settings (including the backend options and initial value) available to the Jan UI.
     this.registerSettings(settings)
 
@@ -155,6 +160,9 @@ export default class llamacpp_extension extends AIEngine {
     // Migration v2: disable fit by default
     await this.migrateFitDefault()
 
+    // Migration v3: enable fit on Windows/Linux with a discrete GPU
+    await this.migrateFitPlatformDefault()
+
     this.autoUnload = this.config.auto_unload
     this.timeout = this.config.timeout
     this.llamacpp_env = this.config.llamacpp_env
@@ -256,6 +264,47 @@ export default class llamacpp_extension extends AIEngine {
     localStorage.setItem(MIGRATION_KEY, '1')
   }
 
+  private async migrateFitPlatformDefault(): Promise<void> {
+    const MIGRATION_KEY = 'llamacpp_fit_platform_v2'
+    if (localStorage.getItem(MIGRATION_KEY)) return
+
+    if (IS_MAC) {
+      localStorage.setItem(MIGRATION_KEY, '1')
+      return
+    }
+
+    let hasDiscreteGpu = false
+    try {
+      const sysInfo = await getSystemInfo()
+      hasDiscreteGpu = (sysInfo?.gpus ?? []).some(
+        (g: any) =>
+          g?.nvidia_info != null ||
+          g?.vulkan_info?.device_type === 'DiscreteGpu'
+      )
+    } catch (error) {
+      // Skip writing the migration key so a transient probe failure retries.
+      logger.warn('Failed to probe GPU info for fit migration:', error)
+      return
+    }
+
+    // Only upgrade the v1 auto-default; preserve any explicit user override.
+    if (this.config.fit === false && hasDiscreteGpu) {
+      const settings = await this.getSettings()
+      await this.updateSettings(
+        settings.map((item) => {
+          if (item.key === 'fit') {
+            item.controllerProps.value = true
+          }
+          return item
+        })
+      )
+      this.config.fit = true
+      logger.info('Migrated fit setting: enabled (discrete GPU detected)')
+    }
+
+    localStorage.setItem(MIGRATION_KEY, '1')
+  }
+
   async configureBackends(): Promise<void> {
     if (this.isConfiguringBackends) {
       logger.info(
 
@@ -31,8 +31,8 @@ function asI32(v: any, defaultValue = 0): number {
   return n
 }
 
-function asBool(v: any): boolean {
-  if (v === '' || v === null || v === undefined) return false
+function asBool(v: any, defaultValue = false): boolean {
+  if (v === '' || v === null || v === undefined) return defaultValue
   return v === true || v === 'true' || v === 1 || v === '1'
 }
 
@@ -92,6 +92,12 @@ export function normalizeLlamacppConfig(config: any): LlamacppConfig {
 
     ctx_shift: asBool(config.ctx_shift),
     parallel: asI32(config.parallel, 1),
+
+    reasoning: asString(config.reasoning, 'auto'),
+    cache_ram: asI32(config.cache_ram, -1),
+    cache_reuse: asI32(config.cache_reuse, 0),
+    swa_full: asBool(config.swa_full),
+    keep: asI32(config.keep, 0),
   }
 }
 
 
@@ -66,6 +66,11 @@ export type LlamacppConfig = {
   rope_freq_scale: number
   ctx_shift: boolean
   parallel: number
+  reasoning: string
+  cache_ram: number
+  cache_reuse: number
+  swa_full: boolean
+  keep: number
 }
 
 export type ModelPlan = {
Original file line number	Diff line number	Diff line change
`@@ -31,8 +31,8 @@ function asI32(v: any, defaultValue = 0): number {`
`31`	`31`	`return n`
`32`	`32`	`}`
`33`	`33`
`34`		`-function asBool(v: any): boolean {`
`35`		`- if (v === '' \|\| v === null \|\| v === undefined) return false`
	`34`	`+function asBool(v: any, defaultValue = false): boolean {`
	`35`	`+ if (v === '' \|\| v === null \|\| v === undefined) return defaultValue`
`36`	`36`	`return v === true \|\| v === 'true' \|\| v === 1 \|\| v === '1'`
`37`	`37`	`}`
`38`	`38`
`@@ -92,6 +92,12 @@ export function normalizeLlamacppConfig(config: any): LlamacppConfig {`
`92`	`92`
`93`	`93`	`ctx_shift: asBool(config.ctx_shift),`
`94`	`94`	`parallel: asI32(config.parallel, 1),`
	`95`	`+`
	`96`	`+ reasoning: asString(config.reasoning, 'auto'),`
	`97`	`+ cache_ram: asI32(config.cache_ram, -1),`
	`98`	`+ cache_reuse: asI32(config.cache_reuse, 0),`
	`99`	`+ swa_full: asBool(config.swa_full),`
	`100`	`+ keep: asI32(config.keep, 0),`
`95`	`101`	`}`
`96`	`102`	`}`
`97`	`103`