Skip to content

Commit

Permalink
Remove fp8 monkey patch (#2960)
Browse files Browse the repository at this point in the history
  • Loading branch information
ispobock authored Jan 18, 2025
1 parent 8af7048 commit 656dcc1
Showing 1 changed file with 0 additions and 20 deletions.
20 changes: 0 additions & 20 deletions python/sglang/srt/layers/quantization/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,25 +56,6 @@ def get_quantization_config(quantization: str) -> Type[QuantizationConfig]:
return QUANTIZATION_METHODS[quantization]


def fp8_get_quant_method(self, layer, prefix):
"""Enhanced get_quant_method for FP8 config."""
from vllm.model_executor.layers.quantization.utils.quant_utils import (
is_layer_skipped,
)

from sglang.srt.layers.linear import LinearBase, UnquantizedLinearMethod
from sglang.srt.layers.moe.fused_moe_triton.layer import FusedMoE
from sglang.srt.layers.quantization.fp8 import Fp8LinearMethod, Fp8MoEMethod

if isinstance(layer, LinearBase):
if is_layer_skipped(prefix, self.ignored_layers):
return UnquantizedLinearMethod()
return Fp8LinearMethod(self)
elif isinstance(layer, FusedMoE):
return Fp8MoEMethod(self)
return None


def gptq_get_quant_method(self, layer, prefix):
from vllm.model_executor.layers.quantization.gptq_marlin import (
GPTQMarlinLinearMethod,
Expand Down Expand Up @@ -126,7 +107,6 @@ def patched_isinstance(obj, classinfo):

def apply_monkey_patches():
"""Apply all monkey patches in one place."""
setattr(Fp8Config, "get_quant_method", fp8_get_quant_method)
setattr(GPTQMarlinConfig, "get_quant_method", gptq_get_quant_method)
setattr(AWQMarlinConfig, "get_quant_method", awq_get_quant_method)

Expand Down

0 comments on commit 656dcc1

Please sign in to comment.