Skip to content

Commit d1ff6d4

Browse files
authored
Merge pull request #28 from SharpAI/fix/gemma4-pad-eos-token
fix(Gemma4): add pad token (ID=0) to eosTokenIds to prevent infinite …
2 parents 50c3732 + 71a77e0 commit d1ff6d4

3 files changed

Lines changed: 38 additions & 7 deletions

File tree

Libraries/MLXLLM/LLMModelFactory.swift

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -208,13 +208,29 @@ public class LLMRegistry: AbstractModelRegistry, @unchecked Sendable {
208208
static public let gemma4_e4b_it_4bit = ModelConfiguration(
209209
id: "mlx-community/gemma-4-e4b-it-4bit",
210210
defaultPrompt: "What is the difference between a fruit and a vegetable?",
211-
extraEOSTokens: ["<turn|>"]
211+
extraEOSTokens: ["<turn|>", "<pad>"],
212+
eosTokenIds: [0]
212213
)
213214

214215
static public let gemma4_e2b_it_4bit = ModelConfiguration(
215216
id: "mlx-community/gemma-4-e2b-it-4bit",
216217
defaultPrompt: "What is the difference between a fruit and a vegetable?",
217-
extraEOSTokens: ["<turn|>"]
218+
extraEOSTokens: ["<turn|>", "<pad>"],
219+
eosTokenIds: [0]
220+
)
221+
222+
static public let gemma4_26BA4B_it_4bit = ModelConfiguration(
223+
id: "mlx-community/gemma-4-26b-a4b-it-4bit",
224+
defaultPrompt: "What is the difference between a fruit and a vegetable?",
225+
extraEOSTokens: ["<turn|>", "<pad>"],
226+
eosTokenIds: [0]
227+
)
228+
229+
static public let gemma4_31B_it_4bit = ModelConfiguration(
230+
id: "mlx-community/gemma-4-31b-it-4bit",
231+
defaultPrompt: "What is the difference between a fruit and a vegetable?",
232+
extraEOSTokens: ["<turn|>", "<pad>"],
233+
eosTokenIds: [0]
218234
)
219235

220236
static public let qwen205b4bit = ModelConfiguration(
@@ -400,6 +416,8 @@ public class LLMRegistry: AbstractModelRegistry, @unchecked Sendable {
400416
gemma3n_E2B_it_lm_4bit,
401417
gemma4_e4b_it_4bit,
402418
gemma4_e2b_it_4bit,
419+
gemma4_26BA4B_it_4bit,
420+
gemma4_31B_it_4bit,
403421
granite3_3_2b_4bit,
404422
granite_4_0_h_tiny_4bit_dwq,
405423
llama3_1_8B_4bit,
@@ -556,6 +574,7 @@ public final class LLMModelFactory: ModelFactory {
556574

557575
// Build a ModelConfiguration with loaded EOS token IDs and tool call format
558576
var mutableConfiguration = configuration
577+
eosTokenIds.formUnion(configuration.eosTokenIds)
559578
mutableConfiguration.eosTokenIds = eosTokenIds
560579
if mutableConfiguration.toolCallFormat == nil {
561580
mutableConfiguration.toolCallFormat = ToolCallFormat.infer(

Libraries/MLXLMCommon/ModelConfiguration.swift

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,12 @@ public struct ModelConfiguration: Sendable {
101101
/// Additional tokens to use for end of string (specified as strings, converted to IDs at runtime)
102102
public var extraEOSTokens: Set<String>
103103

104-
/// EOS token IDs loaded from config.json/generation_config.json
104+
/// EOS token IDs used during generation.
105+
///
106+
/// At load time this set is populated by merging:
107+
/// - IDs from the model's `config.json` / `generation_config.json` (loaded at runtime)
108+
/// - Any additional IDs provided by the registry / caller at registration time
109+
/// (e.g. `eosTokenIds: [0]` in ``LLMRegistry`` for Gemma-4 pad-token workaround)
105110
public var eosTokenIds: Set<Int> = []
106111

107112
/// Tool call format for this model (nil = default JSON format)
@@ -115,6 +120,7 @@ public struct ModelConfiguration: Sendable {
115120
tokenizerSource: TokenizerSource? = nil,
116121
defaultPrompt: String = "",
117122
extraEOSTokens: Set<String> = [],
123+
eosTokenIds: Set<Int> = [],
118124
toolCallFormat: ToolCallFormat? = nil,
119125
preparePrompt: (@Sendable (String) -> String)? = nil,
120126
lazyLoad: Bool = false
@@ -123,6 +129,7 @@ public struct ModelConfiguration: Sendable {
123129
self.tokenizerSource = tokenizerSource
124130
self.defaultPrompt = defaultPrompt
125131
self.extraEOSTokens = extraEOSTokens
132+
self.eosTokenIds = eosTokenIds
126133
self.toolCallFormat = toolCallFormat
127134
self.lazyLoad = lazyLoad
128135
}

Libraries/MLXVLM/VLMModelFactory.swift

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -212,25 +212,29 @@ public class VLMRegistry: AbstractModelRegistry, @unchecked Sendable {
212212
static public let gemma4_E2B_it_4bit = ModelConfiguration(
213213
id: "mlx-community/gemma-4-e2b-it-4bit",
214214
defaultPrompt: "Describe the image in English",
215-
extraEOSTokens: ["<end_of_turn>"]
215+
extraEOSTokens: ["<turn|>", "<pad>"],
216+
eosTokenIds: [0]
216217
)
217218

218219
static public let gemma4_E4B_it_4bit = ModelConfiguration(
219220
id: "mlx-community/gemma-4-e4b-it-4bit",
220221
defaultPrompt: "Describe the image in English",
221-
extraEOSTokens: ["<end_of_turn>"]
222+
extraEOSTokens: ["<turn|>", "<pad>"],
223+
eosTokenIds: [0]
222224
)
223225

224226
static public let gemma4_31B_it_4bit = ModelConfiguration(
225227
id: "mlx-community/gemma-4-31b-it-4bit",
226228
defaultPrompt: "Describe the image in English",
227-
extraEOSTokens: ["<end_of_turn>"]
229+
extraEOSTokens: ["<turn|>", "<pad>"],
230+
eosTokenIds: [0]
228231
)
229232

230233
static public let gemma4_26BA4B_it_4bit = ModelConfiguration(
231234
id: "mlx-community/gemma-4-26b-a4b-it-4bit",
232235
defaultPrompt: "Describe the image in English",
233-
extraEOSTokens: ["<end_of_turn>"]
236+
extraEOSTokens: ["<turn|>", "<pad>"],
237+
eosTokenIds: [0]
234238
)
235239

236240
static public let smolvlm = ModelConfiguration(
@@ -357,6 +361,7 @@ public final class VLMModelFactory: ModelFactory {
357361
}
358362

359363
var mutableConfiguration = configuration
364+
eosTokenIds.formUnion(configuration.eosTokenIds)
360365
mutableConfiguration.eosTokenIds = eosTokenIds
361366

362367
// Auto-detect tool call format from model type if not explicitly set

0 commit comments

Comments
 (0)