Skip to content

Commit d27c5ce

Browse files
janhilgardclaude
andcommitted
style: black formatting
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent cfcf32a commit d27c5ce

File tree

2 files changed

+5
-11
lines changed

2 files changed

+5
-11
lines changed

vllm_mlx/engine_core.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -154,9 +154,7 @@ async def _engine_loop(self) -> None:
154154
# Emergency memory pressure threshold — dynamic based on gpu_memory_utilization
155155
_gpu_mem_util = self.config.gpu_memory_utilization
156156
try:
157-
_device_mem = mx.device_info().get(
158-
"memory_size", 200 * 1024 * 1024 * 1024
159-
)
157+
_device_mem = mx.device_info().get("memory_size", 200 * 1024 * 1024 * 1024)
160158
_memory_pressure_threshold = int(
161159
_device_mem * min(_gpu_mem_util + 0.05, 0.99)
162160
)
@@ -249,6 +247,7 @@ async def _engine_loop(self) -> None:
249247
break
250248
except Exception as e:
251249
import traceback
250+
252251
logger.error(f"Engine loop error: {e}\n{traceback.format_exc()}")
253252
await asyncio.sleep(0.1)
254253

vllm_mlx/patches/qwen3_next_mtp.py

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,7 @@ def inject_mtp_support(model: Any, model_path, config: dict) -> bool:
5252
model_path = Path(model_path)
5353
mtp_file = model_path / "model-mtp.safetensors"
5454
if not mtp_file.exists():
55-
logger.warning(
56-
f"[MTP inject] model-mtp.safetensors not found in {model_path}"
57-
)
55+
logger.warning(f"[MTP inject] model-mtp.safetensors not found in {model_path}")
5856
return False
5957

6058
args = model.args
@@ -76,14 +74,11 @@ def __init__(self, args, n_layers):
7674
self.pre_fc_norm_embedding = nn.RMSNorm(
7775
args.hidden_size, eps=args.rms_norm_eps
7876
)
79-
self.fc = nn.Linear(
80-
args.hidden_size * 2, args.hidden_size, bias=False
81-
)
77+
self.fc = nn.Linear(args.hidden_size * 2, args.hidden_size, bias=False)
8278
# MTP decoder uses full attention (not linear/delta-net)
8379
fa_idx = args.full_attention_interval - 1
8480
self.layers = [
85-
Qwen3NextDecoderLayer(args, layer_idx=fa_idx)
86-
for _ in range(n_layers)
81+
Qwen3NextDecoderLayer(args, layer_idx=fa_idx) for _ in range(n_layers)
8782
]
8883
self.norm = nn.RMSNorm(args.hidden_size, eps=args.rms_norm_eps)
8984

0 commit comments

Comments
 (0)