Skip to content

Commit 656dcc1

Browse files
authored
Remove fp8 monkey patch (#2960)
1 parent 8af7048 commit 656dcc1

File tree

1 file changed

+0
-20
lines changed

1 file changed

+0
-20
lines changed

python/sglang/srt/layers/quantization/__init__.py

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -56,25 +56,6 @@ def get_quantization_config(quantization: str) -> Type[QuantizationConfig]:
5656
return QUANTIZATION_METHODS[quantization]
5757

5858

59-
def fp8_get_quant_method(self, layer, prefix):
60-
"""Enhanced get_quant_method for FP8 config."""
61-
from vllm.model_executor.layers.quantization.utils.quant_utils import (
62-
is_layer_skipped,
63-
)
64-
65-
from sglang.srt.layers.linear import LinearBase, UnquantizedLinearMethod
66-
from sglang.srt.layers.moe.fused_moe_triton.layer import FusedMoE
67-
from sglang.srt.layers.quantization.fp8 import Fp8LinearMethod, Fp8MoEMethod
68-
69-
if isinstance(layer, LinearBase):
70-
if is_layer_skipped(prefix, self.ignored_layers):
71-
return UnquantizedLinearMethod()
72-
return Fp8LinearMethod(self)
73-
elif isinstance(layer, FusedMoE):
74-
return Fp8MoEMethod(self)
75-
return None
76-
77-
7859
def gptq_get_quant_method(self, layer, prefix):
7960
from vllm.model_executor.layers.quantization.gptq_marlin import (
8061
GPTQMarlinLinearMethod,
@@ -126,7 +107,6 @@ def patched_isinstance(obj, classinfo):
126107

127108
def apply_monkey_patches():
128109
"""Apply all monkey patches in one place."""
129-
setattr(Fp8Config, "get_quant_method", fp8_get_quant_method)
130110
setattr(GPTQMarlinConfig, "get_quant_method", gptq_get_quant_method)
131111
setattr(AWQMarlinConfig, "get_quant_method", awq_get_quant_method)
132112

0 commit comments

Comments
 (0)