Fix Swift compiler warnings and refine MTP output2D scatter logic

Aegis-AI · Aegis-AI · commit c66df128a08f · 2026-05-13T10:12:13.000-07:00
diff --git a/Libraries/MLXLLM/Models/Gemma4Text.swift b/Libraries/MLXLLM/Models/Gemma4Text.swift
@@ -1116,7 +1116,7 @@ public class Gemma4AssistantModel: Module, LLMModel, DualModelMTP, KVCacheDimens
         // Use mlx scatter via the __setitem__ approach:
         let scatterIdx2D = selectedCanonicalShaped.reshaped([B * S, totalCandidates]).asType(.int32)
         let selectedLogits2D = selectedLogits.reshaped([B * S, totalCandidates])
-        var output2D = output.reshaped([B * S, vocabSize])
+        let output2D = output.reshaped([B * S, vocabSize])
         let rowIndices = MLXArray.arange(B * S).asType(.int32).reshaped([B * S, 1])
         output2D[rowIndices, scatterIdx2D] = selectedLogits2D
         output = output2D.reshaped([B, S, vocabSize])
diff --git a/Libraries/MLXLMCommon/Load.swift b/Libraries/MLXLMCommon/Load.swift
@@ -126,12 +126,10 @@ public func loadWeights(
                     let allPrefixes = ["", "model.", "language_model.", "model.language_model."]
                     let candidates = [expert0Name, stripped0Name, strippedMtpName] + allPrefixes.map { $0 + stripped0Name } + allPrefixes.map { $0 + strippedMtpName }
                     var foundUnstacked = false
-                    var matchedCandidate = ""
                     
                     for candidate in candidates {
                         if ExpertStreamerManager.shared?.getFile(for: candidate) != nil {
                             foundUnstacked = true
-                            matchedCandidate = candidate
                             var map = [Int: (path: String, tensorName: String)]()
                             for i in 0 ..< sl.numExperts {
                                 let c = candidate.replacingOccurrences(of: ".experts.0.", with: ".experts.\(i).")
diff --git a/Libraries/MLXLMCommon/SwitchLayers.swift b/Libraries/MLXLMCommon/SwitchLayers.swift
@@ -316,10 +316,7 @@ public class SwitchGLU: Module, @unchecked Sendable {
             var outShape = x.shape
             outShape[outShape.count - 1] = downProj.outputDims
             let result = MLXArray.zeros(outShape).asType(.float16)
-            if doSort {
-                return MLX.squeezed(scatterUnsort(x: result, invOrder: inverseOrder, shape: indices.shape), axis: -2)
-            }
-            return MLX.squeezed(result, axis: -2)
+            return MLX.squeezed(scatterUnsort(x: result, invOrder: inverseOrder, shape: indices.shape), axis: -2)
         }
 
         // Parse routing — `idx.asArray()` is the actual sync point on GPU.
diff --git a/test_array_init.swift b/test_array_init.swift
@@ -0,0 +1,7 @@
+import Foundation
+import MLX
+MLX.GPU.set(cacheLimit: 10 * 1024 * 1024)
+
+let size: Int = 10
+let arr = MLXArray(0 ..< size).asType(.int32)
+print(arr)
diff --git a/test_scatter.swift b/test_scatter.swift
@@ -0,0 +1,13 @@
+import Foundation
+import MLX
+
+MLX.GPU.set(cacheLimit: 10 * 1024 * 1024)
+
+var out = MLXArray.zeros([4, 10])
+let rows = MLXArray(0 ..< Int32(4)).reshaped([4, 1])
+let cols = MLXArray([1, 2, 0, 4, 3, 5, 2, 9]).reshaped([4, 2])
+let vals = MLXArray([10, 20, 30, 40, 50, 60, 70, 80]).reshaped([4, 2])
+
+out[rows, cols] = vals
+MLX.eval(out)
+print(out)