vllm-project · alex-jw-brooks · Jan 14, 2026 · gkumbhat · Jan 21, 2026
@@ -350,6 +350,9 @@ def __init__(
         elif self.config.model_type == "gpt_bigcode":
             self.kv_cache_specs["num_layers"] = self.config.n_layer
             self.kv_cache_specs["head_dim"] = self.config.n_embd // self.config.n_head
+        elif self.config.model_type == "pixtral":
+            self.kv_cache_specs["num_layers"] = self.config.text_config.num_hidden_layers
+            self.kv_cache_specs["head_dim"] = self.config.text_config.head_dim
         else:
             raise NotImplementedError(
                 f"[SpyreCausalLM] model type {self.config.model_type} "

@@ -354,6 +354,12 @@ def build_input_batch(self) -> SamplingInputBatch:
 
     @property
     def vocab_size(self) -> int:
+        cfg = self.model.model.model.config
+        # Mistral3 MM models, which currently only run text;
+        # TODO (Alex) move this to utils after granite vision
+        # is merged.
+        if hasattr(cfg, "text_config"):
+            return cfg.text_config.src_vocab_size
         return self.model.model.model.config.src_vocab_size
 
     def pad_input_ids(