@@ -74,6 +74,7 @@ def create_model_class(
7474 low_cpu_mem_usage = False ,
7575 model_type = convert_hf_to_gguf .ModelType .TEXT ,
7676 device = "cpu" ,
77+ quant_nontext_module : bool = False ,
7778):
7879 tmp_work_dir = model .name_or_path
7980 os .makedirs (output_dir , exist_ok = True )
@@ -118,7 +119,12 @@ def create_model_class(
118119 small_first_shard = False ,
119120 )
120121 model_instance = wrapper_model_instance (
121- model_instance , model = model , layer_config = layer_config , low_cpu_mem_usage = low_cpu_mem_usage , device = device
122+ model_instance ,
123+ model = model ,
124+ layer_config = layer_config ,
125+ low_cpu_mem_usage = low_cpu_mem_usage ,
126+ device = device ,
127+ quant_nontext_module = quant_nontext_module ,
122128 )
123129 model_instance = handle_special_model (model_instance , model_architecture )
124130 return model_instance
@@ -136,6 +142,7 @@ def pack_gguf_layer(
136142 image_processor = None ,
137143 model_type = convert_hf_to_gguf .ModelType .TEXT ,
138144 device = "cpu" ,
145+ quant_nontext_module = False ,
139146):
140147 """Export the model to gguf format."""
141148 global gguf_model_instance_global
@@ -153,6 +160,7 @@ def pack_gguf_layer(
153160 low_cpu_mem_usage = True ,
154161 model_type = convert_hf_to_gguf .ModelType .TEXT ,
155162 device = device ,
163+ quant_nontext_module = quant_nontext_module ,
156164 )
157165 ]
158166 if model_type == convert_hf_to_gguf .ModelType .MMPROJ :
@@ -165,6 +173,7 @@ def pack_gguf_layer(
165173 low_cpu_mem_usage = True ,
166174 model_type = convert_hf_to_gguf .ModelType .MMPROJ ,
167175 device = device ,
176+ quant_nontext_module = quant_nontext_module ,
168177 )
169178 )
170179
@@ -215,7 +224,14 @@ def pack_gguf_layer(
215224
216225@torch .inference_mode ()
217226def save_quantized_as_gguf (
218- output_dir , model = None , backend = "gguf:q4_0" , layer_config = None , mllm = False , device = "cpu" , ** kwargs
227+ output_dir ,
228+ model = None ,
229+ backend = "gguf:q4_0" ,
230+ layer_config = None ,
231+ mllm = False ,
232+ device = "cpu" ,
233+ quant_nontext_module = False ,
234+ ** kwargs ,
219235):
220236 """Export the model to gguf format."""
221237 st = time .time ()
@@ -224,7 +240,13 @@ def save_quantized_as_gguf(
224240 if "gguf_model_instance_global" not in globals ():
225241 gguf_model_instance_global = [
226242 create_model_class (
227- output_dir , model , layer_config , backend , model_type = convert_hf_to_gguf .ModelType .TEXT , device = device
243+ output_dir ,
244+ model ,
245+ layer_config ,
246+ backend ,
247+ model_type = convert_hf_to_gguf .ModelType .TEXT ,
248+ device = device ,
249+ quant_nontext_module = quant_nontext_module ,
228250 )
229251 ]
230252 if mllm :
@@ -236,6 +258,7 @@ def save_quantized_as_gguf(
236258 backend ,
237259 model_type = convert_hf_to_gguf .ModelType .MMPROJ ,
238260 device = device ,
261+ quant_nontext_module = quant_nontext_module ,
239262 )
240263 )
241264
0 commit comments