print unquantized

sfc-gh-mhidayetoglu · sfc-gh-mhidayetoglu · commit b56ba3f0b4c3 · 2025-04-24T23:58:29.000-07:00
diff --git a/vllm/model_executor/layers/linear.py b/vllm/model_executor/layers/linear.py
@@ -194,7 +194,8 @@ def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
             # row parallel linear
             assert layer.weight.shape[1] % sp_size == 0
             chunk_size = layer.weight.shape[1] // sp_size
-            weight = layer.weight.split(chunk_size, dim=1)[sp_rank]
+            weight = layer.weight.split(chunk_size,
+                                        dim=1)[sp_rank].contiguous()
         else:
             # column parallel linear
             assert layer.weight.shape[0] % sp_size == 0