Skip to content

Commit cce09f0

Browse files
authored
convert : fix Pixtral 12B --mistral-format conversion (3 bugs) (ggml-org#22981)
1 parent dded58b commit cce09f0

1 file changed

Lines changed: 11 additions & 3 deletions

File tree

convert_hf_to_gguf.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2865,8 +2865,12 @@ def __init__(self, *args, **kwargs):
28652865
# fix for SmolVLM2, missing `num_attention_heads` in config.json
28662866
if self.hf_arch == "VLlama3ForCausalLM":
28672867
self.hparams["num_attention_heads"] = self.hparams.get("num_attention_heads", 32)
2868-
hparams = ModelBase.load_hparams(self.dir_model, is_mistral_format=False)
2869-
self.origin_hf_arch = hparams.get('architectures', [None])[0]
2868+
# Mistral consolidated format has no config.json; origin_hf_arch is HF-only.
2869+
if self.is_mistral_format:
2870+
self.origin_hf_arch = None
2871+
else:
2872+
hparams = ModelBase.load_hparams(self.dir_model, is_mistral_format=False)
2873+
self.origin_hf_arch = hparams.get('architectures', [None])[0]
28702874

28712875
def set_vocab(self):
28722876
if self.origin_hf_arch == "GlmasrModel":
@@ -13409,16 +13413,20 @@ def set_gguf_parameters(self):
1340913413
self.gguf_writer.add_vision_use_silu(True)
1341013414

1341113415
# spatial_merge_size
13412-
if self.find_vparam(["mm_projector_id"]) == "patch_merge":
13416+
if self.find_vparam(["mm_projector_id"], optional=True) == "patch_merge":
1341313417
self.gguf_writer.add_vision_spatial_merge_size(
1341413418
self.find_vparam(["spatial_merge_size"])
1341513419
)
1341613420

1341713421
def map_tensor_name(self, name: str, try_suffixes: Sequence[str] = (".weight", ".bias")) -> str:
1341813422
if name == "vision_language_adapter.w_in.weight":
1341913423
return "mm.1.weight"
13424+
elif name == "vision_language_adapter.w_in.bias":
13425+
return "mm.1.bias"
1342013426
elif name == "vision_language_adapter.w_out.weight":
1342113427
return "mm.2.weight"
13428+
elif name == "vision_language_adapter.w_out.bias":
13429+
return "mm.2.bias"
1342213430
return super().map_tensor_name(name, try_suffixes)
1342313431

1342413432

0 commit comments

Comments
 (0)