@@ -2865,8 +2865,12 @@ def __init__(self, *args, **kwargs):
28652865 # fix for SmolVLM2, missing `num_attention_heads` in config.json
28662866 if self.hf_arch == "VLlama3ForCausalLM":
28672867 self.hparams["num_attention_heads"] = self.hparams.get("num_attention_heads", 32)
2868- hparams = ModelBase.load_hparams(self.dir_model, is_mistral_format=False)
2869- self.origin_hf_arch = hparams.get('architectures', [None])[0]
2868+ # Mistral consolidated format has no config.json; origin_hf_arch is HF-only.
2869+ if self.is_mistral_format:
2870+ self.origin_hf_arch = None
2871+ else:
2872+ hparams = ModelBase.load_hparams(self.dir_model, is_mistral_format=False)
2873+ self.origin_hf_arch = hparams.get('architectures', [None])[0]
28702874
28712875 def set_vocab(self):
28722876 if self.origin_hf_arch == "GlmasrModel":
@@ -13409,16 +13413,20 @@ def set_gguf_parameters(self):
1340913413 self.gguf_writer.add_vision_use_silu(True)
1341013414
1341113415 # spatial_merge_size
13412- if self.find_vparam(["mm_projector_id"]) == "patch_merge":
13416+ if self.find_vparam(["mm_projector_id"], optional=True ) == "patch_merge":
1341313417 self.gguf_writer.add_vision_spatial_merge_size(
1341413418 self.find_vparam(["spatial_merge_size"])
1341513419 )
1341613420
1341713421 def map_tensor_name(self, name: str, try_suffixes: Sequence[str] = (".weight", ".bias")) -> str:
1341813422 if name == "vision_language_adapter.w_in.weight":
1341913423 return "mm.1.weight"
13424+ elif name == "vision_language_adapter.w_in.bias":
13425+ return "mm.1.bias"
1342013426 elif name == "vision_language_adapter.w_out.weight":
1342113427 return "mm.2.weight"
13428+ elif name == "vision_language_adapter.w_out.bias":
13429+ return "mm.2.bias"
1342213430 return super().map_tensor_name(name, try_suffixes)
1342313431
1342413432
0 commit comments