@@ -203,6 +203,8 @@ class ModelType(Enum):
203203
204204 SmolLM3 = 0x2700
205205
206+ Exaone4 = 0x2800
207+
206208 BCE_Embedding = 0x10000100
207209 BCE_ReRanker = 0x10000101
208210 BGE_M3 = 0x10000102
@@ -2985,6 +2987,68 @@ def get_weight_names(config):
29852987
29862988 return weight_names
29872989
2990+ class Exaone4Converter (BaseConverter ):
2991+ MODEL_TYPE = ModelType .Exaone4
2992+
2993+ @staticmethod
2994+ def dump_config (f , config , ggml_type ):
2995+ MAX_LAYERS = 128
2996+ assert config .num_hidden_layers < MAX_LAYERS
2997+ assert config .rope_scaling ['rope_type' ] == 'llama3'
2998+ assert not config .attention_bias , "attention_bias must be False"
2999+ assert config .head_dim == config .hidden_size // config .num_attention_heads
3000+
3001+ dump_llama_like_config (f , config , ggml_type )
3002+
3003+ config_values = [
3004+ config .num_key_value_heads ,
3005+ config .sliding_window if config .sliding_window is not None else - 1 ,
3006+ 1 if config .tie_word_embeddings else 0 ,
3007+ ]
3008+ f .write (struct .pack ("<" + "i" * len (config_values ), * config_values ))
3009+
3010+ config_values = [
3011+ config .rope_theta ,
3012+ config .rope_scaling ['original_max_position_embeddings' ],
3013+ config .rope_scaling ['factor' ],
3014+ config .rope_scaling ['low_freq_factor' ],
3015+ config .rope_scaling ['high_freq_factor' ],
3016+ ]
3017+ f .write (struct .pack ("<fifff" , * config_values ))
3018+
3019+ def check_is_sliding (config , layer_idx ):
3020+ if config .sliding_window is None :
3021+ return False
3022+ if config .layer_types is not None :
3023+ return config .layer_types [layer_idx ] == "sliding_attention"
3024+ if isinstance (config .sliding_window_pattern , int ):
3025+ return ((layer_idx + 1 ) % config .sliding_window_pattern ) != 0
3026+ elif isinstance (config .sliding_window_pattern , str ):
3027+ assert isinstance (config .sliding_window , int ), (
3028+ f"Sliding window must be positive integer, but got { config .sliding_window } "
3029+ )
3030+ return (
3031+ layer_idx != config .num_hidden_layers - 1
3032+ and config .sliding_window_pattern [layer_idx % len (config .sliding_window_pattern )] == "L"
3033+ )
3034+ else :
3035+ pass
3036+ return False
3037+
3038+ config_values = [0 ] * MAX_LAYERS
3039+ for i in range (config .num_hidden_layers ):
3040+ if check_is_sliding (config , i ):
3041+ config_values [i ] = 1
3042+ f .write (struct .pack ("<" + "i" * len (config_values ), * config_values ))
3043+
3044+ @staticmethod
3045+ def get_weight_names (config ):
3046+ weight_names = OLMo2Converter .get_weight_names (config )
3047+ if config .tie_word_embeddings :
3048+ weight_names = weight_names [:- 1 ]
3049+
3050+ return weight_names
3051+
29883052class InstellaConverter (BaseConverter ):
29893053 MODEL_TYPE = ModelType .Instella
29903054
@@ -7663,6 +7727,8 @@ def main():
76637727 GraniteMoEConverter .convert (config , model_files , vocab , ggml_type , args .save_path )
76647728 elif arch == 'ExaoneForCausalLM' :
76657729 ExaoneConverter .convert (config , model_files , vocab , ggml_type , args .save_path )
7730+ elif arch == 'Exaone4ForCausalLM' :
7731+ Exaone4Converter .convert (config , model_files , vocab , ggml_type , args .save_path )
76667732 elif arch == 'TeleChat2ForCausalLM' :
76677733 TeleChat2Converter .convert (config , model_files , vocab , ggml_type , args .save_path )
76687734 elif arch == 'HunYuanForCausalLM' :
0 commit comments