GradientHQ
diff --git a/‎pyproject.toml‎
Lines changed: 7 additions & 7 deletions b/‎pyproject.toml‎
Lines changed: 7 additions & 7 deletions
diff --git a/‎src/parallax/utils/utils.py‎
Lines changed: 2 additions & 5 deletions b/‎src/parallax/utils/utils.py‎
Lines changed: 2 additions & 5 deletions
diff --git a/‎src/parallax_extensions/kernels/paged_attention.cpp‎
Lines changed: 1 addition & 1 deletion b/‎src/parallax_extensions/kernels/paged_attention.cpp‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/parallax_extensions/kernels/reshape_and_cache.cpp‎
Lines changed: 3 additions & 3 deletions b/‎src/parallax_extensions/kernels/reshape_and_cache.cpp‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎src/parallax_extensions/kernels/reshape_and_cache.h‎
Lines changed: 2 additions & 2 deletions b/‎src/parallax_extensions/kernels/reshape_and_cache.h‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/parallax_extensions/lib/_ext.cpython-311-darwin.so‎
240 Bytes b/‎src/parallax_extensions/lib/_ext.cpython-311-darwin.so‎
240 Bytes
diff --git a/‎src/parallax_extensions/lib/_ext.cpython-312-darwin.so‎
224 Bytes b/‎src/parallax_extensions/lib/_ext.cpython-312-darwin.so‎
224 Bytes
diff --git a/‎src/parallax_extensions/lib/_ext.cpython-313-darwin.so‎
240 Bytes b/‎src/parallax_extensions/lib/_ext.cpython-313-darwin.so‎
240 Bytes
diff --git a/‎src/parallax_extensions/lib/libparallax_ext.dylib‎
80 Bytes b/‎src/parallax_extensions/lib/libparallax_ext.dylib‎
80 Bytes
diff --git a/‎src/parallax_extensions/lib/parallax_ext.metallib‎
8.58 KB b/‎src/parallax_extensions/lib/parallax_ext.metallib‎
8.58 KB
@@ -43,23 +43,23 @@ parallax = "parallax.cli:main"
 [project.optional-dependencies]
 
 mac = [
-  "nanobind==2.10.2",
+  "nanobind==2.12.0",
   "torch==2.8.0",
-  "mlx-lm==0.30.6",
-  "mlx==0.30.4",
+  "mlx-lm==0.31.3",
+  "mlx==0.31.2",
 ]
 
 gpu = [
   "sglang[all]==0.5.12",
   "accelerate",
-  "mlx-lm==0.28.4",
-  "mlx[cpu]==0.30.0",
+  "mlx-lm==0.31.3",
+  "mlx[cpu]==0.31.2",
 ]
 
 vllm = [
   "vllm==0.14.0",
-  "mlx-lm==0.28.4",
-  "mlx[cpu]==0.30.0",
+  "mlx-lm==0.31.3",
+  "mlx[cpu]==0.31.2",
 ]
 
 benchmark = [
 
@@ -27,10 +27,7 @@ def is_mps_available():
 def is_metal_available():
     """Check if MLX Metal backend is available"""
     try:
-        import mlx.core as mx
-
-        mx.metal.device_info()
-        return True
+        return mx.metal.is_available()
     except (RuntimeError, AttributeError, ImportError):
         return False
 
@@ -43,7 +40,7 @@ def get_current_device():
     device = "cpu"
     if is_cuda_available():
         device = "cuda"
-    if is_mps_available():
+    if is_metal_available():
         device = "mlx"
     return device
 
 
@@ -120,7 +120,7 @@ void PagedAttentionV1::eval_gpu(
     auto kernel = d.get_kernel(kname, lib, hash_name, func_consts);
 
     // Prepare to encode kernel
-    auto& compute_encoder = d.get_command_encoder(s.index);
+    auto& compute_encoder = mx::metal::get_command_encoder(s);
     compute_encoder.set_compute_pipeline_state(kernel);
 
     // Shared Memory
 
@@ -15,8 +15,8 @@ namespace parallax_ext {
 mx::array reshape_and_cache(
     const mx::array& key,          // [num_tokens, num_heads, head_size]
     const mx::array& value,        // [num_tokens, num_heads, head_size]
-    mx::array& key_cache,          // [num_blocks, num_heads, head_size/x, block_size, x]
-    mx::array& value_cache,        // [num_blocks, num_heads, head_size/x, block_size]
+    const mx::array& key_cache,    // [num_blocks, num_heads, head_size/x, block_size, x]
+    const mx::array& value_cache,  // [num_blocks, num_heads, head_size/x, block_size]
     const mx::array& slot_mapping, // [num_tokens]
     mx::StreamOrDevice s /* = {} */ // Stream on which to schedule the operation
 ) {
@@ -88,7 +88,7 @@ void ReshapeAndCache::eval_gpu(
     auto kernel = d.get_kernel(kname, lib, hash_name, func_consts);
 
     // Prepare to encode kernel
-    auto& compute_encoder = d.get_command_encoder(s.index);
+    auto& compute_encoder = mx::metal::get_command_encoder(s);
     compute_encoder.set_compute_pipeline_state(kernel);
 
     // Calculate parameters
 
@@ -8,8 +8,8 @@ namespace parallax_ext {
 mx::array reshape_and_cache(
     const mx::array& key,           // [num_tokens, num_heads, head_size]
     const mx::array& value,         // [num_tokens, num_heads, head_size]
-    mx::array& key_cache,           // [num_blocks, num_heads, head_size/x, block_size, x]
-    mx::array& value_cache,         // [num_blocks, num_heads, head_size/x, block_size]
+    const mx::array& key_cache,     // [num_blocks, num_heads, head_size/x, block_size, x]
+    const mx::array& value_cache,   // [num_blocks, num_heads, head_size/x, block_size]
     const mx::array& slot_mapping,  // [num_tokens]
     mx::StreamOrDevice s /* = {} */ // Stream on which to schedule the operation
 );