From 328eb033804855f5efb3d0f16c4cd6f76655b6a5 Mon Sep 17 00:00:00 2001
From: Chris Sherwood <chris@crosstalksolutions.com>
Date: Fri, 10 Apr 2026 10:18:11 -0700
Subject: [PATCH] feat(ai-chat): allow cancelling in-progress model downloads

Adds a cancel button to in-progress Ollama model downloads and unifies
the Active Model Downloads card layout with the Active Downloads card
used for ZIMs, maps, and pmtiles (byte counts, progress bar, live speed,
status indicator).

Closes #676.
---
 admin/app/jobs/download_model_job.ts          | 136 ++++++++---
 admin/app/services/download_service.ts        | 150 ++++++++----
 admin/app/services/ollama_service.ts          | 121 +++++++++-
 .../components/ActiveModelDownloads.tsx       | 228 +++++++++++++++---
 .../inertia/hooks/useOllamaModelDownloads.ts  |  46 +++-
 5 files changed, 563 insertions(+), 118 deletions(-)
diff --git a/admin/app/jobs/download_model_job.ts b/admin/app/jobs/download_model_job.ts
index 4e27a498..f1890215 100644
--- a/admin/app/jobs/download_model_job.ts
+++ b/admin/app/jobs/download_model_job.ts
@@ -21,6 +21,25 @@ export class DownloadModelJob {
     return createHash('sha256').update(modelName).digest('hex').slice(0, 16)
   }
 
+  /** In-memory registry of abort controllers for active model download jobs */
+  static abortControllers: Map<string, AbortController> = new Map()
+
+  /**
+   * Redis key used to signal cancellation across processes. Uses a `model-cancel` prefix
+   * so it cannot collide with content download cancel signals (`nomad:download:cancel:*`).
+   */
+  static cancelKey(jobId: string): string {
+    return `nomad:download:model-cancel:${jobId}`
+  }
+
+  /** Signal cancellation via Redis so the worker process can pick it up on its next poll tick */
+  static async signalCancel(jobId: string): Promise<void> {
+    const queueService = new QueueService()
+    const queue = queueService.getQueue(this.queue)
+    const client = await queue.client
+    await client.set(this.cancelKey(jobId), '1', 'EX', 300) // 5 min TTL
+  }
+
   async handle(job: Job) {
     const { modelName } = job.data as DownloadModelJobParams
 
@@ -41,43 +60,96 @@ export class DownloadModelJob {
       `[DownloadModelJob] Ollama service is ready. Initiating download for ${modelName}`
     )
 
-    // Services are ready, initiate the download with progress tracking
-    const result = await ollamaService.downloadModel(modelName, (progressPercent) => {
-      if (progressPercent) {
-        job.updateProgress(Math.floor(progressPercent)).catch((err) => {
-          if (err?.code !== -1) throw err
-        })
-        logger.info(
-          `[DownloadModelJob] Model ${modelName}: ${progressPercent}%`
-        )
-      }
+    // Register abort controller for this job — used both by in-process cancels (same process
+    // as the API server) and as the target of the Redis poll loop below.
+    const abortController = new AbortController()
+    DownloadModelJob.abortControllers.set(job.id!, abortController)
 
-      // Store detailed progress in job data for clients to query
-      job.updateData({
-        ...job.data,
-        status: 'downloading',
-        progress: progressPercent,
-        progress_timestamp: new Date().toISOString(),
-      }).catch((err) => {
-        if (err?.code !== -1) throw err
-      })
-    })
+    // Get Redis client for checking cancel signals from the API process
+    const queueService = new QueueService()
+    const cancelRedis = await queueService.getQueue(DownloadModelJob.queue).client
+
+    // Track whether cancellation was explicitly requested by the user. Only user-initiated
+    // cancels become UnrecoverableError — other failures (e.g., transient network errors)
+    // should still benefit from BullMQ's retry logic.
+    let userCancelled = false
+
+    // Poll Redis for cancel signal every 2s — independent of progress events so cancellation
+    // works even when the pull is mid-blob and not emitting progress updates.
+    let cancelPollInterval: ReturnType<typeof setInterval> | null = setInterval(async () => {
+      try {
+        const val = await cancelRedis.get(DownloadModelJob.cancelKey(job.id!))
+        if (val) {
+          await cancelRedis.del(DownloadModelJob.cancelKey(job.id!))
+          userCancelled = true
+          abortController.abort('user-cancel')
+        }
+      } catch {
+        // Redis errors are non-fatal; in-process AbortController covers same-process cancels
+      }
+    }, 2000)
 
-    if (!result.success) {
-      logger.error(
-        `[DownloadModelJob] Failed to initiate download for model ${modelName}: ${result.message}`
+    try {
+      // Services are ready, initiate the download with progress tracking
+      const result = await ollamaService.downloadModel(
+        modelName,
+        (progressPercent, bytes) => {
+          if (progressPercent) {
+            job.updateProgress(Math.floor(progressPercent)).catch((err) => {
+              if (err?.code !== -1) throw err
+            })
+          }
+
+          // Store detailed progress in job data for clients to query
+          job.updateData({
+            ...job.data,
+            status: 'downloading',
+            progress: progressPercent,
+            downloadedBytes: bytes?.downloadedBytes,
+            totalBytes: bytes?.totalBytes,
+            progress_timestamp: new Date().toISOString(),
+          }).catch((err) => {
+            if (err?.code !== -1) throw err
+          })
+        },
+        abortController.signal,
+        job.id!
       )
-      // Don't retry errors that will never succeed (e.g., Ollama version too old)
-      if (result.retryable === false) {
-        throw new UnrecoverableError(result.message)
+
+      if (!result.success) {
+        logger.error(
+          `[DownloadModelJob] Failed to initiate download for model ${modelName}: ${result.message}`
+        )
+        // User-initiated cancel — must be unrecoverable to avoid the 40-attempt retry storm.
+        // The downloadModel() catch block returns retryable: false for cancels, so this branch
+        // catches both Ollama version mismatches (existing) AND user cancels (new).
+        if (result.retryable === false) {
+          throw new UnrecoverableError(result.message)
+        }
+        throw new Error(`Failed to initiate download for model: ${result.message}`)
       }
-      throw new Error(`Failed to initiate download for model: ${result.message}`)
-    }
 
-    logger.info(`[DownloadModelJob] Successfully completed download for model ${modelName}`)
-    return {
-      modelName,
-      message: result.message,
+      logger.info(`[DownloadModelJob] Successfully completed download for model ${modelName}`)
+      return {
+        modelName,
+        message: result.message,
+      }
+    } catch (error: any) {
+      // Belt-and-suspenders: if downloadModel didn't recognize the cancel (e.g., the abort
+      // fired after the response stream completed but before our code returned), the cancel
+      // flag tells us this was a user action and should be unrecoverable.
+      if (userCancelled || abortController.signal.reason === 'user-cancel') {
+        if (!(error instanceof UnrecoverableError)) {
+          throw new UnrecoverableError(`Model download cancelled: ${error.message ?? error}`)
+        }
+      }
+      throw error
+    } finally {
+      if (cancelPollInterval !== null) {
+        clearInterval(cancelPollInterval)
+        cancelPollInterval = null
+      }
+      DownloadModelJob.abortControllers.delete(job.id!)
     }
   }
 
diff --git a/admin/app/services/download_service.ts b/admin/app/services/download_service.ts
index ac9d02dc..bd9076c6 100644
--- a/admin/app/services/download_service.ts
+++ b/admin/app/services/download_service.ts
@@ -5,6 +5,8 @@ import { DownloadModelJob } from '#jobs/download_model_job'
 import { DownloadJobWithProgress, DownloadProgressData } from '../../types/downloads.js'
 import { normalize } from 'path'
 import { deleteFileIfExists } from '../utils/fs.js'
+import transmit from '@adonisjs/transmit/services/main'
+import { BROADCAST_CHANNELS } from '../../constants/broadcast.js'
 
 @inject()
 export class DownloadService {
@@ -111,14 +113,32 @@ export class DownloadService {
   }
 
   async cancelJob(jobId: string): Promise<{ success: boolean; message: string }> {
+    // Try the file download queue first (the original PR #554 path)
     const queue = this.queueService.getQueue(RunDownloadJob.queue)
     const job = await queue.getJob(jobId)
 
-    if (!job) {
-      // Job already completed (removeOnComplete: true) or doesn't exist
-      return { success: true, message: 'Job not found (may have already completed)' }
+    if (job) {
+      return await this._cancelFileDownloadJob(jobId, job, queue)
     }
 
+    // Fall through to the model download queue
+    const modelQueue = this.queueService.getQueue(DownloadModelJob.queue)
+    const modelJob = await modelQueue.getJob(jobId)
+
+    if (modelJob) {
+      return await this._cancelModelDownloadJob(jobId, modelJob, modelQueue)
+    }
+
+    // Not found in either queue
+    return { success: true, message: 'Job not found (may have already completed)' }
+  }
+
+  /** Cancel a content download (zim, map, pmtiles, etc.) — original PR #554 logic */
+  private async _cancelFileDownloadJob(
+    jobId: string,
+    job: any,
+    queue: any
+  ): Promise<{ success: boolean; message: string }> {
     const filepath = job.data.filepath
 
     // Signal the worker process to abort the download via Redis
@@ -128,45 +148,8 @@ export class DownloadService {
     RunDownloadJob.abortControllers.get(jobId)?.abort('user-cancel')
     RunDownloadJob.abortControllers.delete(jobId)
 
-    // Poll for terminal state (up to 4s at 250ms intervals) — cooperates with BullMQ's lifecycle
-    // instead of force-removing an active job and losing the worker's failure/cleanup path.
-    const POLL_INTERVAL_MS = 250
-    const POLL_TIMEOUT_MS = 4000
-    const deadline = Date.now() + POLL_TIMEOUT_MS
-    let reachedTerminal = false
-
-    while (Date.now() < deadline) {
-      await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL_MS))
-      try {
-        const state = await job.getState()
-        if (state === 'failed' || state === 'completed' || state === 'unknown') {
-          reachedTerminal = true
-          break
-        }
-      } catch {
-        reachedTerminal = true // getState() throws if job is already gone
-        break
-      }
-    }
-
-    if (!reachedTerminal) {
-      console.warn(`[DownloadService] cancelJob: job ${jobId} did not reach terminal state within timeout, removing anyway`)
-    }
-
-    // Remove the BullMQ job
-    try {
-      await job.remove()
-    } catch {
-      // Lock contention fallback: clear lock and retry once
-      try {
-        const client = await queue.client
-        await client.del(`bull:${RunDownloadJob.queue}:${jobId}:lock`)
-        const updatedJob = await queue.getJob(jobId)
-        if (updatedJob) await updatedJob.remove()
-      } catch {
-        // Best effort - job will be cleaned up on next dismiss attempt
-      }
-    }
+    await this._pollForTerminalState(job, jobId)
+    await this._removeJobWithLockFallback(job, queue, RunDownloadJob.queue, jobId)
 
     // Delete the partial file from disk
     if (filepath) {
@@ -195,4 +178,87 @@ export class DownloadService {
 
     return { success: true, message: 'Download cancelled and partial file deleted' }
   }
+
+  /** Cancel an Ollama model download — mirrors the file cancel pattern but skips file cleanup */
+  private async _cancelModelDownloadJob(
+    jobId: string,
+    job: any,
+    queue: any
+  ): Promise<{ success: boolean; message: string }> {
+    const modelName: string = job.data?.modelName ?? 'unknown'
+
+    // Signal the worker process to abort the pull via Redis
+    await DownloadModelJob.signalCancel(jobId)
+
+    // Also try in-memory abort (works if worker is in same process)
+    DownloadModelJob.abortControllers.get(jobId)?.abort('user-cancel')
+    DownloadModelJob.abortControllers.delete(jobId)
+
+    await this._pollForTerminalState(job, jobId)
+    await this._removeJobWithLockFallback(job, queue, DownloadModelJob.queue, jobId)
+
+    // Broadcast a cancelled event so the frontend hook clears the entry. We use percent: -2
+    // (distinct from -1 = error) so the hook can route it to a 2s auto-clear instead of the
+    // 15s error display. The frontend ALSO removes the entry optimistically from the API
+    // response, so this is belt-and-suspenders for cases where the SSE arrives first.
+    transmit.broadcast(BROADCAST_CHANNELS.OLLAMA_MODEL_DOWNLOAD, {
+      model: modelName,
+      jobId,
+      percent: -2,
+      status: 'cancelled',
+      timestamp: new Date().toISOString(),
+    })
+
+    // Note on partial blob cleanup: Ollama manages model blobs internally at
+    // /root/.ollama/models/blobs/. We deliberately do NOT call /api/delete here — Ollama's
+    // expected behavior is to retain partial blobs so a re-pull resumes from where it left
+    // off. If the user wants to reclaim that space, they can re-pull and let it complete,
+    // or delete the partially-downloaded model from the AI Settings page.
+    return { success: true, message: 'Model download cancelled' }
+  }
+
+  /** Wait up to 4s (250ms intervals) for the job to reach a terminal state */
+  private async _pollForTerminalState(job: any, jobId: string): Promise<void> {
+    const POLL_INTERVAL_MS = 250
+    const POLL_TIMEOUT_MS = 4000
+    const deadline = Date.now() + POLL_TIMEOUT_MS
+
+    while (Date.now() < deadline) {
+      await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL_MS))
+      try {
+        const state = await job.getState()
+        if (state === 'failed' || state === 'completed' || state === 'unknown') {
+          return
+        }
+      } catch {
+        return // getState() throws if job is already gone
+      }
+    }
+
+    console.warn(
+      `[DownloadService] cancelJob: job ${jobId} did not reach terminal state within timeout, removing anyway`
+    )
+  }
+
+  /** Remove a BullMQ job, clearing a stale worker lock if the first attempt fails */
+  private async _removeJobWithLockFallback(
+    job: any,
+    queue: any,
+    queueName: string,
+    jobId: string
+  ): Promise<void> {
+    try {
+      await job.remove()
+    } catch {
+      // Lock contention fallback: clear lock and retry once
+      try {
+        const client = await queue.client
+        await client.del(`bull:${queueName}:${jobId}:lock`)
+        const updatedJob = await queue.getJob(jobId)
+        if (updatedJob) await updatedJob.remove()
+      } catch {
+        // Best effort - job will be cleaned up on next dismiss attempt
+      }
+    }
+  }
 }
diff --git a/admin/app/services/ollama_service.ts b/admin/app/services/ollama_service.ts
index dacf1312..752af906 100644
--- a/admin/app/services/ollama_service.ts
+++ b/admin/app/services/ollama_service.ts
@@ -91,10 +91,21 @@ export class OllamaService {
   /**
    * Downloads a model from Ollama with progress tracking. Only works with Ollama backends.
    * Use dispatchModelDownload() for background job processing where possible.
+   *
+   * @param signal Optional AbortSignal — when triggered, the underlying axios stream is cancelled
+   *               and the method returns a non-retryable failure so callers can mark the job
+   *               unrecoverable in BullMQ and avoid the 40-attempt retry storm.
+   * @param jobId Optional BullMQ job id — included in progress broadcasts so the frontend can
+   *              correlate Transmit events to a cancellable job.
    */
   async downloadModel(
     model: string,
-    progressCallback?: (percent: number) => void
+    progressCallback?: (
+      percent: number,
+      bytes?: { downloadedBytes: number; totalBytes: number }
+    ) => void,
+    signal?: AbortSignal,
+    jobId?: string
   ): Promise<{ success: boolean; message: string; retryable?: boolean }> {
     await this._ensureDependencies()
     if (!this.baseUrl) {
@@ -121,15 +132,45 @@ export class OllamaService {
         }
       }
 
-      // Stream pull via Ollama native API
+      // Stream pull via Ollama native API. axios supports `signal` natively for AbortController
+      // integration — when triggered, the request errors with code 'ERR_CANCELED' which we detect
+      // in the catch block below to return a non-retryable cancel result.
       const pullResponse = await axios.post(
         `${this.baseUrl}/api/pull`,
         { model, stream: true },
-        { responseType: 'stream', timeout: 0 }
+        { responseType: 'stream', timeout: 0, signal }
       )
 
+      // Ollama's pull API reports progress per-digest (each blob). A single model can contain
+      // multiple blobs (weights, tokenizer, template, etc.) and each is reported in turn.
+      // Aggregate across all digests so the UI shows a single monotonically-increasing total,
+      // matching the behavior of the content download progress (Active Downloads section).
+      const digestProgress = new Map<string, { completed: number; total: number }>()
+
+      // Throttle broadcasts to once per BROADCAST_THROTTLE_MS — Ollama can emit hundreds of
+      // progress events per second for fast connections, which would flood the Transmit SSE
+      // channel and cause jittery speed calculations on the frontend.
+      const BROADCAST_THROTTLE_MS = 500
+      let lastBroadcastAt = 0
+
       await new Promise<void>((resolve, reject) => {
         let buffer = ''
+        // If the abort fires after headers are received but mid-stream, axios's signal handling
+        // destroys the stream which surfaces as an 'error' event — wire the signal listener so
+        // the promise rejects promptly with a recognizable cancel reason.
+        const onAbort = () => {
+          const err: any = new Error('Download cancelled')
+          err.code = 'ERR_CANCELED'
+          pullResponse.data.destroy(err)
+        }
+        if (signal) {
+          if (signal.aborted) {
+            onAbort()
+            return
+          }
+          signal.addEventListener('abort', onAbort, { once: true })
+        }
+
         pullResponse.data.on('data', (chunk: Buffer) => {
           buffer += chunk.toString()
           const lines = buffer.split('\n')
@@ -138,23 +179,74 @@ export class OllamaService {
             if (!line.trim()) continue
             try {
               const parsed = JSON.parse(line)
-              if (parsed.completed && parsed.total) {
-                const percent = parseFloat(((parsed.completed / parsed.total) * 100).toFixed(2))
-                this.broadcastDownloadProgress(model, percent)
-                if (progressCallback) progressCallback(percent)
+              if (parsed.completed && parsed.total && parsed.digest) {
+                // Update this digest's progress — take the max seen value so transient
+                // out-of-order updates don't make the aggregate jump backwards.
+                const existing = digestProgress.get(parsed.digest)
+                digestProgress.set(parsed.digest, {
+                  completed: Math.max(existing?.completed ?? 0, parsed.completed),
+                  total: Math.max(existing?.total ?? 0, parsed.total),
+                })
+
+                // Compute aggregate across all known blobs
+                let aggCompleted = 0
+                let aggTotal = 0
+                for (const { completed, total } of digestProgress.values()) {
+                  aggCompleted += completed
+                  aggTotal += total
+                }
+
+                const percent = aggTotal > 0
+                  ? parseFloat(((aggCompleted / aggTotal) * 100).toFixed(2))
+                  : 0
+
+                // Throttle broadcasts. Always call the progressCallback though — the worker
+                // uses it to update job state in Redis, which should reflect the latest view.
+                const now = Date.now()
+                if (now - lastBroadcastAt >= BROADCAST_THROTTLE_MS) {
+                  lastBroadcastAt = now
+                  this.broadcastDownloadProgress(model, percent, jobId, {
+                    downloadedBytes: aggCompleted,
+                    totalBytes: aggTotal,
+                  })
+                }
+                if (progressCallback) {
+                  progressCallback(percent, {
+                    downloadedBytes: aggCompleted,
+                    totalBytes: aggTotal,
+                  })
+                }
               }
             } catch {
               // ignore parse errors on partial lines
             }
           }
         })
-        pullResponse.data.on('end', resolve)
-        pullResponse.data.on('error', reject)
+        pullResponse.data.on('end', () => {
+          if (signal) signal.removeEventListener('abort', onAbort)
+          resolve()
+        })
+        pullResponse.data.on('error', (err: any) => {
+          if (signal) signal.removeEventListener('abort', onAbort)
+          reject(err)
+        })
       })
 
       logger.info(`[OllamaService] Model "${model}" downloaded successfully.`)
       return { success: true, message: 'Model downloaded successfully.' }
     } catch (error) {
+      // Detect axios cancel (signal-triggered abort). Don't broadcast an error event for
+      // user-initiated cancels — the cancel handler in DownloadService already broadcasts
+      // a cancelled state. Returning retryable: false prevents BullMQ retries.
+      const isCancelled =
+        axios.isCancel(error) ||
+        (error as any)?.code === 'ERR_CANCELED' ||
+        (error as any)?.name === 'CanceledError'
+      if (isCancelled) {
+        logger.info(`[OllamaService] Model "${model}" download cancelled by user.`)
+        return { success: false, message: 'Download cancelled', retryable: false }
+      }
+
       const errorMessage = error instanceof Error ? error.message : String(error)
       logger.error(
         `[OllamaService] Failed to download model "${model}": ${errorMessage}`
@@ -628,10 +720,19 @@ export class OllamaService {
     })
   }
 
-  private broadcastDownloadProgress(model: string, percent: number) {
+  private broadcastDownloadProgress(
+    model: string,
+    percent: number,
+    jobId?: string,
+    bytes?: { downloadedBytes: number; totalBytes: number }
+  ) {
+    // Conditional spread on jobId/bytes — Transmit's Broadcastable type rejects fields whose
+    // value is `undefined`, so we omit each key entirely when its value isn't available.
     transmit.broadcast(BROADCAST_CHANNELS.OLLAMA_MODEL_DOWNLOAD, {
       model,
       percent,
+      ...(jobId ? { jobId } : {}),
+      ...(bytes ? { downloadedBytes: bytes.downloadedBytes, totalBytes: bytes.totalBytes } : {}),
       timestamp: new Date().toISOString(),
     })
     logger.info(`[OllamaService] Download progress for model "${model}": ${percent}%`)
diff --git a/admin/inertia/components/ActiveModelDownloads.tsx b/admin/inertia/components/ActiveModelDownloads.tsx
index c927126d..d9640ca7 100644
--- a/admin/inertia/components/ActiveModelDownloads.tsx
+++ b/admin/inertia/components/ActiveModelDownloads.tsx
@@ -1,50 +1,214 @@
+import { useCallback, useRef, useState } from 'react'
 import useOllamaModelDownloads from '~/hooks/useOllamaModelDownloads'
-import HorizontalBarChart from './HorizontalBarChart'
 import StyledSectionHeader from './StyledSectionHeader'
-import { IconAlertTriangle } from '@tabler/icons-react'
+import StyledModal from './StyledModal'
+import { IconAlertTriangle, IconLoader2, IconX } from '@tabler/icons-react'
+import api from '~/lib/api'
+import { useModals } from '~/context/ModalContext'
+import { formatBytes } from '~/lib/util'
 
 interface ActiveModelDownloadsProps {
     withHeader?: boolean
 }
 
+function formatSpeed(bytesPerSec: number): string {
+    if (bytesPerSec <= 0) return '0 B/s'
+    if (bytesPerSec < 1024) return `${Math.round(bytesPerSec)} B/s`
+    if (bytesPerSec < 1024 * 1024) return `${(bytesPerSec / 1024).toFixed(1)} KB/s`
+    return `${(bytesPerSec / (1024 * 1024)).toFixed(1)} MB/s`
+}
+
 const ActiveModelDownloads = ({ withHeader = false }: ActiveModelDownloadsProps) => {
-    const { downloads } = useOllamaModelDownloads()
+    const { downloads, removeDownload } = useOllamaModelDownloads()
+    const { openModal, closeAllModals } = useModals()
+    const [cancellingModels, setCancellingModels] = useState<Set<string>>(new Set())
+
+    // Track previous downloadedBytes for speed calculation — mirrors the approach in
+    // ActiveDownloads.tsx so content + model downloads feel identical.
+    const prevBytesRef = useRef<Map<string, { bytes: number; time: number }>>(new Map())
+    const speedRef = useRef<Map<string, number[]>>(new Map())
+
+    const getSpeed = useCallback((model: string, currentBytes?: number): number => {
+        if (!currentBytes || currentBytes <= 0) return 0
+
+        const prev = prevBytesRef.current.get(model)
+        const now = Date.now()
+
+        if (prev && prev.bytes > 0 && currentBytes > prev.bytes) {
+            const deltaBytes = currentBytes - prev.bytes
+            const deltaSec = (now - prev.time) / 1000
+            if (deltaSec > 0) {
+                const instantSpeed = deltaBytes / deltaSec
+
+                // Simple moving average (last 5 samples)
+                const samples = speedRef.current.get(model) || []
+                samples.push(instantSpeed)
+                if (samples.length > 5) samples.shift()
+                speedRef.current.set(model, samples)
+
+                const avg = samples.reduce((a, b) => a + b, 0) / samples.length
+                prevBytesRef.current.set(model, { bytes: currentBytes, time: now })
+                return avg
+            }
+        }
+
+        // Only set initial observation; never advance timestamp when bytes unchanged
+        if (!prev) {
+            prevBytesRef.current.set(model, { bytes: currentBytes, time: now })
+        }
+        return speedRef.current.get(model)?.at(-1) || 0
+    }, [])
+
+    const runCancel = async (download: { model: string; jobId?: string }) => {
+        // Defensive guard: stale broadcasts during a hot upgrade may not include jobId.
+        // Without it we have nothing to call the cancel API with.
+        if (!download.jobId) return
+
+        setCancellingModels((prev) => new Set(prev).add(download.model))
+        try {
+            await api.cancelDownloadJob(download.jobId)
+            // Optimistically clear the entry — the Transmit cancelled broadcast usually
+            // arrives within a second but we don't want to leave the row hanging if it doesn't.
+            removeDownload(download.model)
+            // Clean up speed tracking refs for this model
+            prevBytesRef.current.delete(download.model)
+            speedRef.current.delete(download.model)
+        } finally {
+            setCancellingModels((prev) => {
+                const next = new Set(prev)
+                next.delete(download.model)
+                return next
+            })
+        }
+    }
+
+    const confirmCancel = (download: { model: string; jobId?: string }) => {
+        if (!download.jobId) return
+
+        openModal(
+            <StyledModal
+                title="Cancel Download?"
+                onConfirm={() => {
+                    closeAllModals()
+                    runCancel(download)
+                }}
+                onCancel={closeAllModals}
+                open={true}
+                confirmText="Cancel Download"
+                cancelText="Keep Downloading"
+            >
+                <div className="space-y-3 text-text-primary">
+                    <p>
+                        Stop downloading <span className="font-mono font-semibold">{download.model}</span>?
+                    </p>
+                    <p className="text-sm text-text-muted">
+                        Any data already downloaded will remain on disk. If you re-download
+                        this model later, it will resume from where it left off rather than
+                        starting over.
+                    </p>
+                </div>
+            </StyledModal>,
+            'confirm-cancel-model-download-modal'
+        )
+    }
 
     return (
         <>
             {withHeader && <StyledSectionHeader title="Active Model Downloads" className="mt-12 mb-4" />}
             <div className="space-y-4">
                 {downloads && downloads.length > 0 ? (
-                    downloads.map((download) => (
-                        <div
-                            key={download.model}
-                            className={`bg-desert-white rounded-lg p-4 border shadow-sm hover:shadow-lg transition-shadow ${
-                                download.error ? 'border-red-400' : 'border-desert-stone-light'
-                            }`}
-                        >
-                            {download.error ? (
-                                <div className="flex items-start gap-3">
-                                    <IconAlertTriangle className="text-red-500 flex-shrink-0 mt-0.5" size={20} />
-                                    <div>
-                                        <p className="font-medium text-text-primary">{download.model}</p>
-                                        <p className="text-sm text-red-600 mt-1">{download.error}</p>
+                    downloads.map((download) => {
+                        const isCancelling = cancellingModels.has(download.model)
+                        const canCancel = !!download.jobId && !download.error
+                        const speed = getSpeed(download.model, download.downloadedBytes)
+                        const hasBytes = !!(download.downloadedBytes && download.totalBytes)
+
+                        return (
+                            <div
+                                key={download.model}
+                                className={`rounded-lg p-4 border shadow-sm hover:shadow-lg transition-shadow ${
+                                    download.error
+                                        ? 'bg-surface-primary border-red-300'
+                                        : 'bg-surface-primary border-default'
+                                }`}
+                            >
+                                {download.error ? (
+                                    <div className="flex items-center gap-2">
+                                        <IconAlertTriangle className="w-5 h-5 text-red-500 flex-shrink-0" />
+                                        <div className="flex-1 min-w-0">
+                                            <p className="text-sm font-medium text-text-primary truncate">
+                                                {download.model}
+                                            </p>
+                                            <p className="text-xs text-red-600 mt-0.5">{download.error}</p>
+                                        </div>
+                                    </div>
+                                ) : (
+                                    <div className="space-y-2">
+                                        {/* Title + Cancel button row */}
+                                        <div className="flex items-start justify-between gap-2">
+                                            <div className="flex-1 min-w-0">
+                                                <p className="font-semibold text-desert-green truncate">
+                                                    {download.model}
+                                                </p>
+                                                <span className="text-xs px-1.5 py-0.5 rounded bg-desert-stone-lighter text-desert-stone-dark font-mono">
+                                                    ollama
+                                                </span>
+                                            </div>
+                                            {canCancel && (
+                                                isCancelling ? (
+                                                    <IconLoader2 className="w-4 h-4 text-text-muted animate-spin flex-shrink-0" />
+                                                ) : (
+                                                    <button
+                                                        onClick={() => confirmCancel(download)}
+                                                        className="flex-shrink-0 p-1 rounded hover:bg-red-100 transition-colors"
+                                                        title="Cancel download"
+                                                    >
+                                                        <IconX className="w-4 h-4 text-text-muted hover:text-red-500" />
+                                                    </button>
+                                                )
+                                            )}
+                                        </div>
+
+                                        {/* Size info */}
+                                        <div className="flex justify-between items-baseline text-sm text-text-muted font-mono">
+                                            <span>
+                                                {hasBytes
+                                                    ? `${formatBytes(download.downloadedBytes!, 1)} / ${formatBytes(download.totalBytes!, 1)}`
+                                                    : `${download.percent.toFixed(1)}% / 100%`}
+                                            </span>
+                                        </div>
+
+                                        {/* Progress bar */}
+                                        <div className="relative">
+                                            <div className="h-6 bg-desert-green-lighter bg-opacity-20 rounded-lg border border-default overflow-hidden">
+                                                <div
+                                                    className="h-full rounded-lg transition-all duration-1000 ease-out bg-desert-green"
+                                                    style={{ width: `${download.percent}%` }}
+                                                />
+                                            </div>
+                                            <div
+                                                className={`absolute top-1/2 -translate-y-1/2 font-bold text-xs ${
+                                                    download.percent > 15
+                                                        ? 'left-2 text-white drop-shadow-md'
+                                                        : 'right-2 text-desert-green'
+                                                }`}
+                                            >
+                                                {Math.round(download.percent)}%
+                                            </div>
+                                        </div>
+
+                                        {/* Status indicator */}
+                                        <div className="flex items-center gap-2">
+                                            <div className="w-2 h-2 rounded-full bg-green-500 animate-pulse" />
+                                            <span className="text-xs text-text-muted">
+                                                Downloading...{speed > 0 ? ` ${formatSpeed(speed)}` : ''}
+                                            </span>
+                                        </div>
                                     </div>
-                                </div>
-                            ) : (
-                                <HorizontalBarChart
-                                    items={[
-                                        {
-                                            label: download.model,
-                                            value: download.percent,
-                                            total: '100%',
-                                            used: `${download.percent.toFixed(1)}%`,
-                                            type: 'ollama-model',
-                                        },
-                                    ]}
-                                />
-                            )}
-                        </div>
-                    ))
+                                )}
+                            </div>
+                        )
+                    })
                 ) : (
                     <p className="text-text-muted">No active model downloads</p>
                 )}
diff --git a/admin/inertia/hooks/useOllamaModelDownloads.ts b/admin/inertia/hooks/useOllamaModelDownloads.ts
index 8fc54606..d99e708b 100644
--- a/admin/inertia/hooks/useOllamaModelDownloads.ts
+++ b/admin/inertia/hooks/useOllamaModelDownloads.ts
@@ -1,11 +1,25 @@
-import { useEffect, useRef, useState } from 'react'
+import { useCallback, useEffect, useRef, useState } from 'react'
 import { useTransmit } from 'react-adonis-transmit'
 
 export type OllamaModelDownload = {
     model: string
     percent: number
     timestamp: string
+    /**
+     * BullMQ job id — included on progress events from v1.32+ so the frontend can
+     * call the cancel API. Optional for backward compat with stale broadcasts during
+     * a hot upgrade.
+     */
+    jobId?: string
+    /**
+     * Aggregate bytes across all blobs in the model pull, summed from Ollama's
+     * per-digest progress events on the backend. Optional for backward compat.
+     */
+    downloadedBytes?: number
+    totalBytes?: number
     error?: string
+    /** Set to 'cancelled' alongside percent === -2 when the user cancels the download */
+    status?: 'cancelled'
 }
 
 export default function useOllamaModelDownloads() {
@@ -13,6 +27,19 @@ export default function useOllamaModelDownloads() {
     const [downloads, setDownloads] = useState<Map<string, OllamaModelDownload>>(new Map())
     const timeoutsRef = useRef<Set<ReturnType<typeof setTimeout>>>(new Set())
 
+    /**
+     * Optimistically remove a download from local state — used by the cancel UI to clear
+     * the entry immediately on a successful API call, in case the Transmit cancelled
+     * broadcast arrives late or the SSE connection drops at exactly the wrong moment.
+     */
+    const removeDownload = useCallback((model: string) => {
+        setDownloads((current) => {
+            const next = new Map(current)
+            next.delete(model)
+            return next
+        })
+    }, [])
+
     useEffect(() => {
         const unsubscribe = subscribe('ollama-model-download', (data: OllamaModelDownload) => {
             setDownloads((prev) => {
@@ -30,6 +57,21 @@ export default function useOllamaModelDownloads() {
                         })
                     }, 15000)
                     timeoutsRef.current.add(errorTimeout)
+                } else if (data.percent === -2) {
+                    // Download cancelled — clear quickly (matches the completion TTL).
+                    // Component-level optimistic removal usually beats this branch, but it's
+                    // here as a safety net for cases where the cancel comes from another tab
+                    // or another client.
+                    const cancelTimeout = setTimeout(() => {
+                        timeoutsRef.current.delete(cancelTimeout)
+                        setDownloads((current) => {
+                            const next = new Map(current)
+                            next.delete(data.model)
+                            return next
+                        })
+                    }, 2000)
+                    timeoutsRef.current.add(cancelTimeout)
+                    updated.delete(data.model)
                 } else if (data.percent >= 100) {
                     // If download is complete, keep it for a short time before removing to allow UI to show 100% progress
                     updated.set(data.model, data)
@@ -60,5 +102,5 @@ export default function useOllamaModelDownloads() {
 
     const downloadsArray = Array.from(downloads.values())
 
-    return { downloads: downloadsArray, activeCount: downloads.size }
+    return { downloads: downloadsArray, activeCount: downloads.size, removeDownload }
 }