We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent aca20c8 commit 2c5e0a2Copy full SHA for 2c5e0a2
2 files changed
src/parallax/server/executor/mlx_executor.py
@@ -199,7 +199,9 @@ def __init__(
199
if chunked_prefill_size == 0:
200
chunked_prefill_size = None
201
elif not enable_prefix_cache:
202
- raise ValueError("Chunked prefill is not supported when disable prefix cache, please enable prefix cache or set --chunked-prefill-size 0")
+ raise ValueError(
203
+ "Chunked prefill is not supported when disable prefix cache, please enable prefix cache or set --chunked-prefill-size 0"
204
+ )
205
206
self.cache_manager = CacheManager(
207
num_layers=self.num_shard_layers,
src/parallax/server/shard_loader.py
@@ -39,6 +39,7 @@
39
"GlmMoeDsaForCausalLM": "DeepseekV32ForCausalLM",
40
}
41
42
+
43
class MLXModelLoader:
44
"""
45
Handles downloading model assets from Hugging Face (if needed) and loading
0 commit comments