chore: fix Sendable violations, deprecations, and unused variables for CI stabilization

Aegis AI Assistant · Aegis AI Assistant · commit fa64cf6bdf6a · 2026-04-12T16:05:19.000-07:00
diff --git a/Libraries/MLXLLM/Models/Qwen35.swift b/Libraries/MLXLLM/Models/Qwen35.swift
@@ -7,6 +7,8 @@
 //  Port of https://github.com/ml-explore/mlx-lm/blob/main/mlx_lm/models/qwen3_5.py
 //
 
+@preconcurrency import AVFoundation
+@preconcurrency import CoreImage.CIFilterBuiltins
 import Foundation
 import MLX
 import MLXLMCommon
@@ -550,7 +552,7 @@ final class Qwen35DecoderLayer: Module {
         //    (blocking the CPU). Ensuring the previous GPU work is committed and completed
         //    means the expert GEMM executes on an isolated, empty Metal Command Buffer.
         // ─────────────────────────────────────────────────────────────────────
-        if let moeBlock = self.mlp as? Qwen35SparseMoeBlock {
+        if let _ = self.mlp as? Qwen35SparseMoeBlock {
             if let cacheState = cache {
                 eval([h] + cacheState.innerState())
             } else {
@@ -561,7 +563,7 @@ final class Qwen35DecoderLayer: Module {
         
         let mlpOutput = (self.mlp as! UnaryLayer)(postAttentionLayerNorm(h))
         let finalH = h + mlpOutput
-        if let moeBlock = self.mlp as? Qwen35SparseMoeBlock {
+        if let _ = self.mlp as? Qwen35SparseMoeBlock {
             eval(finalH)
             Stream.gpu.synchronize()
         }
diff --git a/Libraries/MLXLMCommon/LayerPartitioning.swift b/Libraries/MLXLMCommon/LayerPartitioning.swift
@@ -152,7 +152,7 @@ public func partitionedLayerCall<T>(
         }
         
         // 2. Clear MLX's internal Metal buffer pool.
-        GPU.clearCache()
+        Memory.clearCache()
     }
     
     return result
diff --git a/Libraries/MLXLMCommon/SwitchLayers.swift b/Libraries/MLXLMCommon/SwitchLayers.swift
@@ -1,5 +1,4 @@
-import Foundation
-import MLX
+@preconcurrency import MLX
 import MLXNN
 
 // Port of https://github.com/ml-explore/mlx-examples/blob/main/llms/mlx_lm/models/switch_layers.py
@@ -27,15 +26,15 @@ public func scatterUnsort(x: MLXArray, invOrder: MLXArray, shape: [Int]? = nil)
 
 
 // Shared struct for expert range tracking across projections
-public struct ExpertRange {
+public struct ExpertRange: Sendable {
     public let id: Int
     public let start: Int
     public let end: Int
 }
 
 // MARK: - SwitchGLU
 
-public class SwitchGLU: Module {
+public class SwitchGLU: Module, @unchecked Sendable {
     @ModuleInfo(key: "gate_proj") public var gateProj: SwitchLinear
     @ModuleInfo(key: "up_proj") public var upProj: SwitchLinear
     @ModuleInfo(key: "down_proj") public var downProj: SwitchLinear
@@ -654,9 +653,8 @@ public class QuantizedSwitchLinear: SwitchLinear, Quantized {
                 outShape[outShape.count - 1] = self.outputDims
                 return MLXArray.zeros(outShape).asType(.float16)
             }
-
             // PAPPS Heuristic: Prefetch exactly these experts so they are in cache for the N+1 token.
-            if let info = ssdInfo {
+            if let _ = ssdInfo {
                 let uniqueIndices = Set(cpuIndices)
                 for _ in uniqueIndices {
                     // MLXFast.pappsPrefetch(
@@ -804,8 +802,8 @@ public final class SSDStreamMetrics: @unchecked Sendable {
         let now = DispatchTime.now().uptimeNanoseconds
         if now - lastLogTimeNs >= 1_000_000_000 {
             let count = readCount
-            let bytes = totalBytes
-            let ns = totalTimeNs
+            _ = totalBytes
+            _ = totalTimeNs
             
             self.readCount = 0
             self.totalBytes = 0
diff --git a/Libraries/MLXVLM/MediaProcessing.swift b/Libraries/MLXVLM/MediaProcessing.swift
@@ -1,7 +1,7 @@
 // Copyright © 2024 Apple Inc.
 
 @preconcurrency import AVFoundation
-import CoreImage.CIFilterBuiltins
+@preconcurrency import CoreImage.CIFilterBuiltins
 import MLX
 import MLXLMCommon
 

Original file line number	Diff line number	Diff line change
`@@ -152,7 +152,7 @@ public func partitionedLayerCall<T>(`
`152`	`152`	`}`
`153`	`153`
`154`	`154`	`// 2. Clear MLX's internal Metal buffer pool.`
`155`		`- GPU.clearCache()`
	`155`	`+ Memory.clearCache()`
`156`	`156`	`}`
`157`	`157`
`158`	`158`	`return result`