fix: model unload, GPU offloading, error logging (v2.2.1)

PurpleDoubleD · claude · PurpleDoubleD · commit ead5673b197d · 2026-04-04T17:36:04.000+02:00
- Fix broken model unloading (add missing `prompt` field to /generate call)
- Enable GPU offloading by default (num_gpu: 99 on all chat endpoints)
- Replace silent .catch(() =&gt; {}) with console.warn for debugging
- Bump version to 2.2.1 across all configs, docs, and landing page

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,13 @@
 
 All notable changes to Locally Uncensored are documented here.
 
+## [2.2.1] - 2026-04-04
+
+### Fixed
+- **Model unloading broken** — unload button and automatic unload on model switch silently failed (missing `prompt` field in Ollama `/generate` call), causing models to stay in RAM indefinitely
+- **No GPU offloading** — models ran entirely on CPU/RAM instead of GPU; added `num_gpu: 99` to all Ollama chat calls so layers are offloaded to GPU automatically (Ollama splits between GPU and CPU if VRAM is insufficient)
+- **Silent error swallowing** — unload errors were caught and discarded with `.catch(() => {})`; now logged to console for debugging
+
 ## [1.9.0] - 2026-04-03
 
 ### Added
diff --git a/README.md b/README.md
@@ -34,11 +34,11 @@ No cloud. No censorship. No data collection. Your AI, your rules.
 
 ---
 
-## v2.2 — Latest Release
+## v2.2.1 — Latest Release (Hotfix)
 
-**New:** Custom dark titlebar (no more Windows chrome), branded NSIS installer, Qwen3-Coder integration, Download Manager with multi-pull + pause/resume, auto-unload on model switch, redesigned Model Selector, Update Checker
+**Fixed:** Model unloading now works correctly, GPU offloading enabled by default (`num_gpu: 99`), silent error swallowing replaced with console logging. Resolves high CPU/RAM usage on systems with dedicated GPUs.
 
-See the full [Release Notes](https://github.com/PurpleDoubleD/locally-uncensored/releases/tag/v2.2.0).
+See the full [Release Notes](https://github.com/PurpleDoubleD/locally-uncensored/releases/tag/v2.2.1).
 
 ---
 
diff --git a/docs/index.html b/docs/index.html
@@ -27,7 +27,7 @@
 <!-- Setup: docker compose up -d on your server, then replace YOUR-DOMAIN below -->
 <script defer data-domain="YOUR-DOMAIN.com" src="https://plausible.YOUR-DOMAIN.com/js/script.js"></script>
 <script type="application/ld+json">
-{"@context":"https://schema.org","@type":"SoftwareApplication","name":"Locally Uncensored","applicationCategory":"DeveloperApplication","applicationSubCategory":"Artificial Intelligence","operatingSystem":"Windows, macOS, Linux","description":"All-in-one local AI desktop app for uncensored chat, image generation, and video creation. Run Llama, Mistral, Qwen, FLUX, Stable Diffusion, and Wan 2.1 models locally with zero cloud dependency.","url":"https://locallyuncensored.com/","downloadUrl":"https://github.com/PurpleDoubleD/locally-uncensored","softwareVersion":"2.2.0","license":"https://opensource.org/licenses/MIT","author":{"@type":"Person","name":"PurpleDoubleD","url":"https://github.com/PurpleDoubleD"},"offers":{"@type":"Offer","price":"0","priceCurrency":"USD"},"featureList":["Uncensored AI Chat via Ollama","Image Generation via ComfyUI and Stable Diffusion","Video Generation with Wan 2.1 and AnimateDiff","25+ Built-in AI Personas","One-Click Setup on Windows, macOS, Linux","Model Auto-Detection","100% Offline and Private","No Docker Required"],"screenshot":"https://raw.githubusercontent.com/PurpleDoubleD/locally-uncensored/master/docs/screenshots/marketing_03_chat_personas.png","softwareRequirements":"Node.js 18+, Ollama","memoryRequirements":"8 GB RAM minimum","storageRequirements":"6 GB for default model"}
+{"@context":"https://schema.org","@type":"SoftwareApplication","name":"Locally Uncensored","applicationCategory":"DeveloperApplication","applicationSubCategory":"Artificial Intelligence","operatingSystem":"Windows, macOS, Linux","description":"All-in-one local AI desktop app for uncensored chat, image generation, and video creation. Run Llama, Mistral, Qwen, FLUX, Stable Diffusion, and Wan 2.1 models locally with zero cloud dependency.","url":"https://locallyuncensored.com/","downloadUrl":"https://github.com/PurpleDoubleD/locally-uncensored","softwareVersion":"2.2.1","license":"https://opensource.org/licenses/MIT","author":{"@type":"Person","name":"PurpleDoubleD","url":"https://github.com/PurpleDoubleD"},"offers":{"@type":"Offer","price":"0","priceCurrency":"USD"},"featureList":["Uncensored AI Chat via Ollama","Image Generation via ComfyUI and Stable Diffusion","Video Generation with Wan 2.1 and AnimateDiff","25+ Built-in AI Personas","One-Click Setup on Windows, macOS, Linux","Model Auto-Detection","100% Offline and Private","No Docker Required"],"screenshot":"https://raw.githubusercontent.com/PurpleDoubleD/locally-uncensored/master/docs/screenshots/marketing_03_chat_personas.png","softwareRequirements":"Node.js 18+, Ollama","memoryRequirements":"8 GB RAM minimum","storageRequirements":"6 GB for default model"}
 </script>
 <script type="application/ld+json">
 {"@context":"https://schema.org","@type":"Organization","name":"PurpleDoubleD","url":"https://github.com/PurpleDoubleD","sameAs":["https://github.com/PurpleDoubleD","https://reddit.com/user/GroundbreakingMall54"]}
@@ -283,11 +283,11 @@ <h1>Run uncensored AI locally.<br>Chat. Images. Video.</h1>
 </section>
 
 <section class="install reveal" id="install">
-  <p style="margin-bottom:1.5rem;font-size:1.1rem;color:#ededed;font-weight:600">Download v2.2.0</p>
+  <p style="margin-bottom:1.5rem;font-size:1.1rem;color:#ededed;font-weight:600">Download v2.2.1</p>
   <div style="display:flex;gap:1rem;justify-content:center;margin-bottom:2rem;flex-wrap:wrap">
-    <a href="https://github.com/PurpleDoubleD/locally-uncensored/releases/tag/v2.2.0" class="btn btn-primary" style="font-size:.9rem;padding:.6rem 1.5rem">Windows (.exe)</a>
-    <a href="https://github.com/PurpleDoubleD/locally-uncensored/releases/tag/v2.2.0" class="btn btn-primary" style="font-size:.9rem;padding:.6rem 1.5rem">Linux (.AppImage)</a>
-    <a href="https://github.com/PurpleDoubleD/locally-uncensored/releases/tag/v2.2.0" class="btn" style="font-size:.9rem;padding:.6rem 1.5rem">All Downloads</a>
+    <a href="https://github.com/PurpleDoubleD/locally-uncensored/releases/tag/v2.2.1" class="btn btn-primary" style="font-size:.9rem;padding:.6rem 1.5rem">Windows (.exe)</a>
+    <a href="https://github.com/PurpleDoubleD/locally-uncensored/releases/tag/v2.2.1" class="btn btn-primary" style="font-size:.9rem;padding:.6rem 1.5rem">Linux (.AppImage)</a>
+    <a href="https://github.com/PurpleDoubleD/locally-uncensored/releases/tag/v2.2.1" class="btn" style="font-size:.9rem;padding:.6rem 1.5rem">All Downloads</a>
   </div>
   <p style="color:var(--text-tertiary);font-size:.8rem;margin-bottom:2rem">macOS: <a href="https://github.com/PurpleDoubleD/locally-uncensored#-build-from-source-all-platforms">Build from source</a></p>
   <div class="install-cmd" onclick="navigator.clipboard.writeText('git clone https://github.com/PurpleDoubleD/locally-uncensored.git && cd locally-uncensored && setup.bat')">
diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "locally-uncensored",
-  "version": "2.2.0",
+  "version": "2.2.1",
   "private": false,
   "description": "Private, local AI chat & image/video generation. No cloud, no censorship, no data collection. Powered by Ollama & ComfyUI.",
   "license": "MIT",
diff --git a/src-tauri/Cargo.toml b/src-tauri/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "locally-uncensored"
-version = "2.2.0"
+version = "2.2.1"
 description = "Private, local AI chat & image/video generation"
 authors = ["purpledoubled"]
 edition = "2021"
diff --git a/src-tauri/tauri.conf.json b/src-tauri/tauri.conf.json
@@ -1,7 +1,7 @@
 {
   "$schema": "https://raw.githubusercontent.com/tauri-apps/tauri/dev/crates/tauri-cli/schema.json",
   "productName": "Locally Uncensored",
-  "version": "2.2.0",
+  "version": "2.2.1",
   "identifier": "com.purpledoubled.locally-uncensored",
   "build": {
     "beforeBuildCommand": "npm run build",
diff --git a/src/api/ollama.ts b/src/api/ollama.ts
@@ -36,9 +36,10 @@ export async function chatStream(
   options: { temperature?: number; top_p?: number; top_k?: number; num_predict?: number } = {},
   signal?: AbortSignal
 ): Promise<Response> {
+  const opts = { num_gpu: 99, ...options }
   const res = await localFetchStream(ollamaUrl("/chat"), {
     method: "POST",
-    body: JSON.stringify({ model, messages, options, stream: true }),
+    body: JSON.stringify({ model, messages, options: opts, stream: true }),
   })
   if (!res.ok) throw new Error("Failed to start chat")
   return res
@@ -52,9 +53,10 @@ export async function chatStreamWithTools(
   options: { temperature?: number; top_p?: number; top_k?: number; num_predict?: number } = {},
   signal?: AbortSignal
 ): Promise<Response> {
+  const opts = { num_gpu: 99, ...options }
   const res = await localFetchStream(ollamaUrl("/chat"), {
     method: "POST",
-    body: JSON.stringify({ model, messages, tools, options, stream: true }),
+    body: JSON.stringify({ model, messages, tools, options: opts, stream: true }),
   })
   if (!res.ok) {
     // Try to extract Ollama's error message
@@ -79,7 +81,7 @@ export async function chatWithTools(
   const res = await localFetch(ollamaUrl("/chat"), {
     method: "POST",
     headers: { "Content-Type": "application/json" },
-    body: JSON.stringify({ model, messages, tools, options, stream: false }),
+    body: JSON.stringify({ model, messages, tools, options: { num_gpu: 99, ...options }, stream: false }),
   })
   if (!res.ok) {
     try {
@@ -170,16 +172,19 @@ export async function listRunningModels(): Promise<string[]> {
 }
 
 export async function unloadModel(name: string): Promise<void> {
-  await localFetch(ollamaUrl("/generate"), {
+  const res = await localFetch(ollamaUrl("/generate"), {
     method: "POST",
-    body: JSON.stringify({ model: name, keep_alive: 0 }),
+    body: JSON.stringify({ model: name, prompt: "", keep_alive: 0 }),
   })
+  if (!res.ok) {
+    console.warn(`[ollama] failed to unload model "${name}":`, res.status)
+  }
 }
 
 export async function unloadAllModels(): Promise<number> {
   const running = await listRunningModels()
   for (const name of running) {
-    try { await unloadModel(name) } catch { /* continue */ }
+    try { await unloadModel(name) } catch (e) { console.warn(`[ollama] unloadAll: failed for "${name}":`, e) }
   }
   return running.length
 }
diff --git a/src/api/providers/ollama-provider.ts b/src/api/providers/ollama-provider.ts
@@ -66,12 +66,12 @@ export class OllamaProvider implements ProviderClient {
       stream: true,
     }
 
-    const ollamaOptions: Record<string, any> = {}
+    const ollamaOptions: Record<string, any> = { num_gpu: 99 }
     if (options?.temperature !== undefined) ollamaOptions.temperature = options.temperature
     if (options?.topP !== undefined) ollamaOptions.top_p = options.topP
     if (options?.topK !== undefined) ollamaOptions.top_k = options.topK
     if (options?.maxTokens) ollamaOptions.num_predict = options.maxTokens
-    if (Object.keys(ollamaOptions).length > 0) body.options = ollamaOptions
+    body.options = ollamaOptions
 
     const res = await localFetchStream(this.apiUrl('/chat'), {
       method: 'POST',
@@ -119,12 +119,12 @@ export class OllamaProvider implements ProviderClient {
       stream: false,
     }
 
-    const ollamaOptions: Record<string, any> = {}
+    const ollamaOptions: Record<string, any> = { num_gpu: 99 }
     if (options?.temperature !== undefined) ollamaOptions.temperature = options.temperature
     if (options?.topP !== undefined) ollamaOptions.top_p = options.topP
     if (options?.topK !== undefined) ollamaOptions.top_k = options.topK
     if (options?.maxTokens) ollamaOptions.num_predict = options.maxTokens
-    if (Object.keys(ollamaOptions).length > 0) body.options = ollamaOptions
+    body.options = ollamaOptions
 
     const res = await localFetch(this.apiUrl('/chat'), {
       method: 'POST',
diff --git a/src/stores/modelStore.ts b/src/stores/modelStore.ts
@@ -46,7 +46,7 @@ export const useModelStore = create<ModelState>()(
         const prev = get().activeModel
         set({ activeModel: name })
         if (prev && prev !== name && !prev.includes('::')) {
-          unloadModel(prev).catch(() => {})
+          unloadModel(prev).catch((e) => console.warn('[modelStore] failed to unload previous model:', prev, e))
         }
       },
 

Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "locally-uncensored",`
`3`		`- "version": "2.2.0",`
	`3`	`+ "version": "2.2.1",`
`4`	`4`	`"private": false,`
`5`	`5`	`"description": "Private, local AI chat & image/video generation. No cloud, no censorship, no data collection. Powered by Ollama & ComfyUI.",`
`6`	`6`	`"license": "MIT",`
Original file line number	Diff line number	Diff line change
`@@ -1,7 +1,7 @@`
`1`	`1`	`{`
`2`	`2`	`"$schema": "https://raw.githubusercontent.com/tauri-apps/tauri/dev/crates/tauri-cli/schema.json",`
`3`	`3`	`"productName": "Locally Uncensored",`
`4`		`- "version": "2.2.0",`
	`4`	`+ "version": "2.2.1",`
`5`	`5`	`"identifier": "com.purpledoubled.locally-uncensored",`
`6`	`6`	`"build": {`
`7`	`7`	`"beforeBuildCommand": "npm run build",`
Original file line number	Diff line number	Diff line change
`@@ -46,7 +46,7 @@ export const useModelStore = create<ModelState>()(`
`46`	`46`	`const prev = get().activeModel`
`47`	`47`	`set({ activeModel: name })`
`48`	`48`	`if (prev && prev !== name && !prev.includes('::')) {`
`49`		`- unloadModel(prev).catch(() => {})`
	`49`	`+ unloadModel(prev).catch((e) => console.warn('[modelStore] failed to unload previous model:', prev, e))`
`50`	`50`	`}`
`51`	`51`	`},`
`52`	`52`