@@ -56,25 +56,6 @@ def get_quantization_config(quantization: str) -> Type[QuantizationConfig]:
56
56
return QUANTIZATION_METHODS [quantization ]
57
57
58
58
59
- def fp8_get_quant_method (self , layer , prefix ):
60
- """Enhanced get_quant_method for FP8 config."""
61
- from vllm .model_executor .layers .quantization .utils .quant_utils import (
62
- is_layer_skipped ,
63
- )
64
-
65
- from sglang .srt .layers .linear import LinearBase , UnquantizedLinearMethod
66
- from sglang .srt .layers .moe .fused_moe_triton .layer import FusedMoE
67
- from sglang .srt .layers .quantization .fp8 import Fp8LinearMethod , Fp8MoEMethod
68
-
69
- if isinstance (layer , LinearBase ):
70
- if is_layer_skipped (prefix , self .ignored_layers ):
71
- return UnquantizedLinearMethod ()
72
- return Fp8LinearMethod (self )
73
- elif isinstance (layer , FusedMoE ):
74
- return Fp8MoEMethod (self )
75
- return None
76
-
77
-
78
59
def gptq_get_quant_method (self , layer , prefix ):
79
60
from vllm .model_executor .layers .quantization .gptq_marlin import (
80
61
GPTQMarlinLinearMethod ,
@@ -126,7 +107,6 @@ def patched_isinstance(obj, classinfo):
126
107
127
108
def apply_monkey_patches ():
128
109
"""Apply all monkey patches in one place."""
129
- setattr (Fp8Config , "get_quant_method" , fp8_get_quant_method )
130
110
setattr (GPTQMarlinConfig , "get_quant_method" , gptq_get_quant_method )
131
111
setattr (AWQMarlinConfig , "get_quant_method" , awq_get_quant_method )
132
112
0 commit comments