@@ -201,6 +201,8 @@ class ModelType(Enum):
201201
202202 PenguMoE = 0x2600
203203
204+ SmolLM3 = 0x2700
205+
204206 BCE_Embedding = 0x10000100
205207 BCE_ReRanker = 0x10000101
206208 BGE_M3 = 0x10000102
@@ -1880,6 +1882,31 @@ def get_weight_names(config):
18801882 r = Llama3Converter .get_weight_names (config )
18811883 return r [:- 1 ]
18821884
1885+ class SmolLM3Converter (BaseConverter ):
1886+ MODEL_TYPE = ModelType .SmolLM3
1887+ tie_word_embeddings = True
1888+
1889+ @staticmethod
1890+ def dump_config (f , config , ggml_type ):
1891+ assert config .rope_scaling is None
1892+ assert (config .layer_types .count ('full_attention' ) == config .num_hidden_layers ) or \
1893+ (config .use_sliding_window is None ) or (not config .use_sliding_window )
1894+ SmolLM3Converter .tie_word_embeddings = (config .tie_word_embeddings is None ) or (config .tie_word_embeddings )
1895+
1896+ dump_llama_like_config (f , config , ggml_type )
1897+ config_values = [
1898+ config .num_key_value_heads ,
1899+ config .no_rope_layer_interval ,
1900+ 1 if SmolLM3Converter .tie_word_embeddings else 0 ,
1901+ ]
1902+ f .write (struct .pack ("i" * len (config_values ), * config_values ))
1903+ f .write (struct .pack ("<f" , config .rope_theta ))
1904+
1905+ @staticmethod
1906+ def get_weight_names (config ):
1907+ r = Llama3Converter .get_weight_names (config )
1908+ return r [:- 1 ] if SmolLM3Converter .tie_word_embeddings else r
1909+
18831910class SmolVLMConverter (BaseConverter ):
18841911 MODEL_TYPE = ModelType .SmolVLM
18851912
@@ -7404,6 +7431,8 @@ def main():
74047431 Llama3Converter .convert (config , model_files , vocab , ggml_type , args .save_path )
74057432 elif arch == 'smollm' :
74067433 SmolLMConverter .convert (config , model_files , vocab , ggml_type , args .save_path )
7434+ elif arch == 'SmolLM3ForCausalLM' :
7435+ SmolLM3Converter .convert (config , model_files , vocab , ggml_type , args .save_path )
74077436 elif arch == 'SmolVLMForConditionalGeneration' :
74087437 SmolVLMConverter .convert (config , model_files , vocab , ggml_type , args .save_path )
74097438 elif arch == 'XverseForCausalLM' :
0 commit comments