[quantization] Fix QuantGELU to preserve nn.GELU approximate mode

mhs4670go · mhs4670go · commit 04d1ffb2da8a · 2026-05-27T21:26:28.000+09:00
QuantGELU previously called _gelu(x) without forwarding the original
nn.GELU.approximate setting, so nn.GELU(approximate="tanh") was executed
as exact GELU. Store the original approximate mode and use it in forward.
Add NO_QUANT parity coverage for tanh GELU.

TICO-DCO-1.0-Signed-off-by: seongwoo &lt;mhs4670go@naver.com&gt;
diff --git a/test/quantization/wrapq/wrappers/test_quant_elementwise.py b/test/quantization/wrapq/wrappers/test_quant_elementwise.py
@@ -54,6 +54,11 @@
     (torch.nn.Tanh(), torch.tanh, QuantTanh),
     (torch.nn.ReLU(), torch.relu, QuantReLU),
     (torch.nn.GELU(), torch.nn.functional.gelu, QuantGELU),
+    (
+        torch.nn.GELU(approximate="tanh"),
+        partial(torch.nn.functional.gelu, approximate="tanh"),
+        QuantGELU,
+    ),
 ]
 
 try:
@@ -77,6 +82,22 @@ def _calibrate(self, qw, x):
         _ = qw(x)
         qw.freeze_qparams()
 
+    # ------------------------------------------------------------------
+    def test_gelu_approximate_tanh_no_quant_parity(self):
+        x = torch.linspace(-6.0, 6.0, steps=257).reshape(-1, 1)
+        fp32_mod = torch.nn.GELU(approximate="tanh")
+        qw = PTQWrapper(fp32_mod)
+
+        self.assertIs(qw._mode, Mode.NO_QUANT)
+
+        with torch.no_grad():
+            q_out = qw(x)
+            fp_out = fp32_mod(x)
+            wrong_out = torch.nn.functional.gelu(x, approximate="none")
+
+        torch.testing.assert_close(q_out, fp_out, rtol=0, atol=0)
+        self.assertGreater((wrong_out - fp_out).abs().max().item(), 1e-6)
+
     # ------------------------------------------------------------------
     def test_registry_and_factory(self):
         for fp32_mod, _, quant_cls in ACTIVATIONS:
diff --git a/tico/quantization/wrapq/wrappers/quant_elementwise.py b/tico/quantization/wrapq/wrappers/quant_elementwise.py
@@ -146,10 +146,26 @@ def FUNC(x: torch.Tensor) -> torch.Tensor:
 
 @register(nn.GELU)
 class QuantGELU(QuantElementwise):
+    def __init__(
+        self,
+        fp_module: nn.Module,
+        *,
+        qcfg: Optional[PTQConfig] = None,
+        fp_name: Optional[str] = None,
+    ):
+        super().__init__(fp_module, qcfg=qcfg, fp_name=fp_name)
+        self.approximate = getattr(fp_module, "approximate", "none")
+
     @staticmethod
     def FUNC(x: torch.Tensor) -> torch.Tensor:
         return _gelu(x)
 
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x_q = self._fq(x, self.act_in_obs)
+        y = _gelu(x_q, approximate=self.approximate)
+        y_q = self._fq(y, self.act_out_obs)
+        return y_q
+
 
 @try_register("transformers.activations.GELUTanh")
 class QuantGELUTanh(QuantElementwise):