Merge pull request #28 from SharpAI/fix/gemma4-pad-eos-token

solderzzc · web-flow · commit d1ff6d428576 · 2026-04-21T13:50:02.000-07:00
fix(Gemma4): add pad token (ID=0) to eosTokenIds to prevent infinite …
diff --git a/Libraries/MLXLLM/LLMModelFactory.swift b/Libraries/MLXLLM/LLMModelFactory.swift
@@ -208,13 +208,29 @@ public class LLMRegistry: AbstractModelRegistry, @unchecked Sendable {
     static public let gemma4_e4b_it_4bit = ModelConfiguration(
         id: "mlx-community/gemma-4-e4b-it-4bit",
         defaultPrompt: "What is the difference between a fruit and a vegetable?",
-        extraEOSTokens: ["<turn|>"]
+        extraEOSTokens: ["<turn|>", "<pad>"],
+        eosTokenIds: [0]
     )
 
     static public let gemma4_e2b_it_4bit = ModelConfiguration(
         id: "mlx-community/gemma-4-e2b-it-4bit",
         defaultPrompt: "What is the difference between a fruit and a vegetable?",
-        extraEOSTokens: ["<turn|>"]
+        extraEOSTokens: ["<turn|>", "<pad>"],
+        eosTokenIds: [0]
+    )
+
+    static public let gemma4_26BA4B_it_4bit = ModelConfiguration(
+        id: "mlx-community/gemma-4-26b-a4b-it-4bit",
+        defaultPrompt: "What is the difference between a fruit and a vegetable?",
+        extraEOSTokens: ["<turn|>", "<pad>"],
+        eosTokenIds: [0]
+    )
+
+    static public let gemma4_31B_it_4bit = ModelConfiguration(
+        id: "mlx-community/gemma-4-31b-it-4bit",
+        defaultPrompt: "What is the difference between a fruit and a vegetable?",
+        extraEOSTokens: ["<turn|>", "<pad>"],
+        eosTokenIds: [0]
     )
 
     static public let qwen205b4bit = ModelConfiguration(
@@ -400,6 +416,8 @@ public class LLMRegistry: AbstractModelRegistry, @unchecked Sendable {
             gemma3n_E2B_it_lm_4bit,
             gemma4_e4b_it_4bit,
             gemma4_e2b_it_4bit,
+            gemma4_26BA4B_it_4bit,
+            gemma4_31B_it_4bit,
             granite3_3_2b_4bit,
             granite_4_0_h_tiny_4bit_dwq,
             llama3_1_8B_4bit,
@@ -556,6 +574,7 @@ public final class LLMModelFactory: ModelFactory {
 
         // Build a ModelConfiguration with loaded EOS token IDs and tool call format
         var mutableConfiguration = configuration
+        eosTokenIds.formUnion(configuration.eosTokenIds)
         mutableConfiguration.eosTokenIds = eosTokenIds
         if mutableConfiguration.toolCallFormat == nil {
             mutableConfiguration.toolCallFormat = ToolCallFormat.infer(
diff --git a/Libraries/MLXLMCommon/ModelConfiguration.swift b/Libraries/MLXLMCommon/ModelConfiguration.swift
@@ -101,7 +101,12 @@ public struct ModelConfiguration: Sendable {
     /// Additional tokens to use for end of string (specified as strings, converted to IDs at runtime)
     public var extraEOSTokens: Set<String>
 
-    /// EOS token IDs loaded from config.json/generation_config.json
+    /// EOS token IDs used during generation.
+    ///
+    /// At load time this set is populated by merging:
+    /// - IDs from the model's `config.json` / `generation_config.json` (loaded at runtime)
+    /// - Any additional IDs provided by the registry / caller at registration time
+    ///   (e.g. `eosTokenIds: [0]` in ``LLMRegistry`` for Gemma-4 pad-token workaround)
     public var eosTokenIds: Set<Int> = []
 
     /// Tool call format for this model (nil = default JSON format)
@@ -115,6 +120,7 @@ public struct ModelConfiguration: Sendable {
         tokenizerSource: TokenizerSource? = nil,
         defaultPrompt: String = "",
         extraEOSTokens: Set<String> = [],
+        eosTokenIds: Set<Int> = [],
         toolCallFormat: ToolCallFormat? = nil,
         preparePrompt: (@Sendable (String) -> String)? = nil,
         lazyLoad: Bool = false
@@ -123,6 +129,7 @@ public struct ModelConfiguration: Sendable {
         self.tokenizerSource = tokenizerSource
         self.defaultPrompt = defaultPrompt
         self.extraEOSTokens = extraEOSTokens
+        self.eosTokenIds = eosTokenIds
         self.toolCallFormat = toolCallFormat
         self.lazyLoad = lazyLoad
     }
diff --git a/Libraries/MLXVLM/VLMModelFactory.swift b/Libraries/MLXVLM/VLMModelFactory.swift
@@ -212,25 +212,29 @@ public class VLMRegistry: AbstractModelRegistry, @unchecked Sendable {
     static public let gemma4_E2B_it_4bit = ModelConfiguration(
         id: "mlx-community/gemma-4-e2b-it-4bit",
         defaultPrompt: "Describe the image in English",
-        extraEOSTokens: ["<end_of_turn>"]
+        extraEOSTokens: ["<turn|>", "<pad>"],
+        eosTokenIds: [0]
     )
 
     static public let gemma4_E4B_it_4bit = ModelConfiguration(
         id: "mlx-community/gemma-4-e4b-it-4bit",
         defaultPrompt: "Describe the image in English",
-        extraEOSTokens: ["<end_of_turn>"]
+        extraEOSTokens: ["<turn|>", "<pad>"],
+        eosTokenIds: [0]
     )
 
     static public let gemma4_31B_it_4bit = ModelConfiguration(
         id: "mlx-community/gemma-4-31b-it-4bit",
         defaultPrompt: "Describe the image in English",
-        extraEOSTokens: ["<end_of_turn>"]
+        extraEOSTokens: ["<turn|>", "<pad>"],
+        eosTokenIds: [0]
     )
 
     static public let gemma4_26BA4B_it_4bit = ModelConfiguration(
         id: "mlx-community/gemma-4-26b-a4b-it-4bit",
         defaultPrompt: "Describe the image in English",
-        extraEOSTokens: ["<end_of_turn>"]
+        extraEOSTokens: ["<turn|>", "<pad>"],
+        eosTokenIds: [0]
     )
 
     static public let smolvlm = ModelConfiguration(
@@ -357,6 +361,7 @@ public final class VLMModelFactory: ModelFactory {
         }
 
         var mutableConfiguration = configuration
+        eosTokenIds.formUnion(configuration.eosTokenIds)
         mutableConfiguration.eosTokenIds = eosTokenIds
 
         // Auto-detect tool call format from model type if not explicitly set