Skip to content

Commit ed90690

Browse files
authored
Merge pull request #50 from liwei109/quant
[refactor] remove redundant code in linear
2 parents 75d0bda + 383eb54 commit ed90690

8 files changed

Lines changed: 37 additions & 1570 deletions

File tree

vllm_kunlun/models/llama.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
from vllm.distributed import get_pp_group, get_tensor_model_parallel_world_size
3939
from vllm_kunlun.ops.activation import SiluAndMul
4040
from vllm.model_executor.layers.layernorm import RMSNorm
41-
from vllm_kunlun.ops.linear import (MergedColumnParallelLinear,
41+
from vllm.model_executor.layers.linear import (MergedColumnParallelLinear,
4242
QKVParallelLinear,
4343
RowParallelLinear)
4444
from vllm.model_executor.layers.logits_processor import LogitsProcessor

vllm_kunlun/models/qwen2.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
from vllm.distributed import get_pp_group, get_tensor_model_parallel_world_size
4141
from vllm_kunlun.ops.activation import SiluAndMul
4242
from vllm.model_executor.layers.layernorm import RMSNorm
43-
from vllm_kunlun.ops.linear import (MergedColumnParallelLinear,
43+
from vllm.model_executor.layers.linear import (MergedColumnParallelLinear,
4444
QKVParallelLinear,
4545
RowParallelLinear)
4646
from vllm.model_executor.layers.logits_processor import LogitsProcessor

vllm_kunlun/models/qwen3.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
from vllm.logger import init_logger
3939
from vllm.model_executor.layers.layernorm import RMSNorm
4040

41-
from vllm_kunlun.ops.linear import (QKVParallelLinear,
41+
from vllm.model_executor.layers.linear import (QKVParallelLinear,
4242
RowParallelLinear)
4343
from vllm.model_executor.layers.logits_processor import LogitsProcessor
4444
from vllm.model_executor.layers.quantization import QuantizationConfig

vllm_kunlun/models/qwen3_moe.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
from vllm_kunlun.ops.activation import SiluAndMul
4141
from vllm_kunlun.ops.fused_moe.layer import FusedMoE
4242
from vllm.model_executor.layers.layernorm import RMSNorm
43-
from vllm_kunlun.ops.linear import (MergedColumnParallelLinear,
43+
from vllm.model_executor.layers.linear import (MergedColumnParallelLinear,
4444
QKVParallelLinear,
4545
RowParallelLinear,
4646
ReplicatedLinear)

vllm_kunlun/models/qwen3_next.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,11 @@
3333
from vllm.model_executor.layers.layernorm import (
3434
GemmaRMSNorm as Qwen3NextRMSNorm)
3535
# yapf: enable
36-
from vllm_kunlun.ops.linear import (ColumnParallelLinear,
36+
from vllm.model_executor.layers.linear import (ColumnParallelLinear,
3737
MergedColumnParallelLinear,
3838
QKVParallelLinear,
3939
RowParallelLinear)
40-
from vllm_kunlun.ops.linear import ReplicatedLinear
40+
from vllm.model_executor.layers.linear import ReplicatedLinear
4141
from vllm.model_executor.layers.logits_processor import LogitsProcessor
4242
from vllm.model_executor.layers.mamba.abstract import MambaBase
4343
from vllm.model_executor.layers.mamba.mamba_mixer2 import (

vllm_kunlun/models/qwen3_omni_moe_thinker.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@
9292
from vllm.platforms.interface import _Backend
9393
from vllm.sequence import IntermediateTensors
9494

95-
from vllm_kunlun.ops.linear import (
95+
from vllm.model_executor.layers.linear import (
9696
ColumnParallelLinear,
9797
MergedColumnParallelLinear,
9898
QKVParallelLinear,

vllm_kunlun/models/qwen3_vl.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@
5050
from vllm.logger import init_logger
5151
from vllm.model_executor.layers.activation import _ACTIVATION_REGISTRY
5252

53-
from vllm_kunlun.ops.linear import (ColumnParallelLinear,
53+
from vllm.model_executor.layers.linear import (ColumnParallelLinear,
5454
RowParallelLinear)
5555
from vllm.model_executor.layers.logits_processor import LogitsProcessor
5656
from vllm.model_executor.layers.quantization import QuantizationConfig

0 commit comments

Comments
 (0)