convert : fix conversion for Mistral-Medium-3.5-128B (#24268)

dfriehs · web-flow · commit 8a963fc10ee0 · 2026-06-07T21:41:39.000+02:00
Mistral explicitly sets `moe` and `llama_4_scaling` to `null` in
params.json, breaking `key in dict` checks during conversion. Replace
with `dict.get(key) is not None` where this matters.

Fixes `convert-hf-to-gguf.py --mistral-format Mistral-Medium-3.5-128B`
diff --git a/conversion/mistral.py b/conversion/mistral.py
@@ -105,8 +105,9 @@ def set_mistral_config(gguf_writer: gguf.GGUFWriter, hparams: dict):
             gguf_writer.add_rope_scaling_yarn_log_mul(mscale_all_dim)
             gguf_writer.add_rope_scaling_orig_ctx_len(yarn_params["original_max_position_embeddings"])
 
-        if "llama_4_scaling" in hparams:
-            gguf_writer.add_attn_temperature_scale(hparams["llama_4_scaling"]["beta"])
+        llama_4_scaling = hparams.get("llama_4_scaling")
+        if llama_4_scaling is not None:
+            gguf_writer.add_attn_temperature_scale(llama_4_scaling["beta"])
 
 
 class MistralMoeModel(DeepseekV2Model):
diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
@@ -238,7 +238,7 @@ def main() -> None:
             assert hparams.get("vision_encoder") is not None, "This model does not support multimodal"
             from conversion.pixtral import PixtralModel
             model_class = PixtralModel
-        elif "moe" in hparams:
+        elif hparams.get("moe") is not None:
             from conversion.mistral import MistralMoeModel
             model_class = MistralMoeModel
         else: