Skip to content

Commit e38dbbd

Browse files
committed
fix qwen3.5 lora slicing
Signed-off-by: hallerite <git@hallerite.com>
1 parent 483463f commit e38dbbd

File tree

2 files changed

+5
-4
lines changed

2 files changed

+5
-4
lines changed

vllm/lora/layers/column_parallel_linear.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -285,7 +285,7 @@ def can_replace_layer(
285285
) -> bool:
286286
return (
287287
type(source_layer) is MergedColumnParallelLinear
288-
and len(packed_modules_list) == 2
288+
and len(packed_modules_list) == len(source_layer.output_sizes)
289289
)
290290

291291

vllm/model_executor/models/qwen3_5.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -528,8 +528,9 @@ class Qwen3_5ForCausalLMBase(
528528
"v_proj",
529529
],
530530
"gate_up_proj": ["gate_proj", "up_proj"],
531-
# GDN fused projections.
532-
"in_proj_qkvz": ["in_proj_qkv", "in_proj_z"],
531+
# GDN fused projections — 4 packed modules to match 4 output_sizes
532+
# in create_qkvz_proj for correct per-slice TP sharding with LoRA.
533+
"in_proj_qkvz": ["in_proj_q", "in_proj_k", "in_proj_v", "in_proj_z"],
533534
"in_proj_ba": ["in_proj_b", "in_proj_a"],
534535
}
535536

@@ -632,7 +633,7 @@ class Qwen3_5ForConditionalGeneration(Qwen3VLForConditionalGeneration, IsHybrid)
632633
supports_multimodal_pruning = False
633634

634635
packed_modules_mapping = Qwen3VLForConditionalGeneration.packed_modules_mapping | {
635-
"in_proj_qkvz": ["in_proj_qkv", "in_proj_z"],
636+
"in_proj_qkvz": ["in_proj_q", "in_proj_k", "in_proj_v", "in_proj_z"],
636637
"in_proj_ba": ["in_proj_b", "in_proj_a"],
637638
}
638639

0 commit comments

Comments
 (0)