style: black formatting

janhilgard · claude · janhilgard · commit d27c5ce9a0a1 · 2026-03-03T17:39:19.000+01:00
Co-Authored-By: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/vllm_mlx/engine_core.py b/vllm_mlx/engine_core.py
@@ -154,9 +154,7 @@ async def _engine_loop(self) -> None:
         # Emergency memory pressure threshold — dynamic based on gpu_memory_utilization
         _gpu_mem_util = self.config.gpu_memory_utilization
         try:
-            _device_mem = mx.device_info().get(
-                "memory_size", 200 * 1024 * 1024 * 1024
-            )
+            _device_mem = mx.device_info().get("memory_size", 200 * 1024 * 1024 * 1024)
             _memory_pressure_threshold = int(
                 _device_mem * min(_gpu_mem_util + 0.05, 0.99)
             )
@@ -249,6 +247,7 @@ async def _engine_loop(self) -> None:
                 break
             except Exception as e:
                 import traceback
+
                 logger.error(f"Engine loop error: {e}\n{traceback.format_exc()}")
                 await asyncio.sleep(0.1)
 
diff --git a/vllm_mlx/patches/qwen3_next_mtp.py b/vllm_mlx/patches/qwen3_next_mtp.py
@@ -52,9 +52,7 @@ def inject_mtp_support(model: Any, model_path, config: dict) -> bool:
     model_path = Path(model_path)
     mtp_file = model_path / "model-mtp.safetensors"
     if not mtp_file.exists():
-        logger.warning(
-            f"[MTP inject] model-mtp.safetensors not found in {model_path}"
-        )
+        logger.warning(f"[MTP inject] model-mtp.safetensors not found in {model_path}")
         return False
 
     args = model.args
@@ -76,14 +74,11 @@ def __init__(self, args, n_layers):
             self.pre_fc_norm_embedding = nn.RMSNorm(
                 args.hidden_size, eps=args.rms_norm_eps
             )
-            self.fc = nn.Linear(
-                args.hidden_size * 2, args.hidden_size, bias=False
-            )
+            self.fc = nn.Linear(args.hidden_size * 2, args.hidden_size, bias=False)
             # MTP decoder uses full attention (not linear/delta-net)
             fa_idx = args.full_attention_interval - 1
             self.layers = [
-                Qwen3NextDecoderLayer(args, layer_idx=fa_idx)
-                for _ in range(n_layers)
+                Qwen3NextDecoderLayer(args, layer_idx=fa_idx) for _ in range(n_layers)
             ]
             self.norm = nn.RMSNorm(args.hidden_size, eps=args.rms_norm_eps)