fix qwen3.5 lora slicing

hallerite · hallerite · commit e38dbbdb76ba · 2026-03-11T22:03:30.000Z
Signed-off-by: hallerite &lt;git@hallerite.com&gt;
diff --git a/vllm/lora/layers/column_parallel_linear.py b/vllm/lora/layers/column_parallel_linear.py
@@ -285,7 +285,7 @@ def can_replace_layer(
     ) -> bool:
         return (
             type(source_layer) is MergedColumnParallelLinear
-            and len(packed_modules_list) == 2
+            and len(packed_modules_list) == len(source_layer.output_sizes)
         )
 
 
diff --git a/vllm/model_executor/models/qwen3_5.py b/vllm/model_executor/models/qwen3_5.py
@@ -528,8 +528,9 @@ class Qwen3_5ForCausalLMBase(
             "v_proj",
         ],
         "gate_up_proj": ["gate_proj", "up_proj"],
-        # GDN fused projections.
-        "in_proj_qkvz": ["in_proj_qkv", "in_proj_z"],
+        # GDN fused projections — 4 packed modules to match 4 output_sizes
+        # in create_qkvz_proj for correct per-slice TP sharding with LoRA.
+        "in_proj_qkvz": ["in_proj_q", "in_proj_k", "in_proj_v", "in_proj_z"],
         "in_proj_ba": ["in_proj_b", "in_proj_a"],
     }
 
@@ -632,7 +633,7 @@ class Qwen3_5ForConditionalGeneration(Qwen3VLForConditionalGeneration, IsHybrid)
     supports_multimodal_pruning = False
 
     packed_modules_mapping = Qwen3VLForConditionalGeneration.packed_modules_mapping | {
-        "in_proj_qkvz": ["in_proj_qkv", "in_proj_z"],
+        "in_proj_qkvz": ["in_proj_q", "in_proj_k", "in_proj_v", "in_proj_z"],
         "in_proj_ba": ["in_proj_b", "in_proj_a"],
     }
 

Original file line number	Diff line number	Diff line change
`@@ -285,7 +285,7 @@ def can_replace_layer(`
`285`	`285`	`) -> bool:`
`286`	`286`	`return (`
`287`	`287`	`type(source_layer) is MergedColumnParallelLinear`
`288`		`- and len(packed_modules_list) == 2`
	`288`	`+ and len(packed_modules_list) == len(source_layer.output_sizes)`
`289`	`289`	`)`
`290`	`290`
`291`	`291`