Samsung
diff --git a/‎test/quantization/wrapq/wrappers/gemma4/test_quant_vision_pooler.py‎
Lines changed: 497 additions & 0 deletions b/‎test/quantization/wrapq/wrappers/gemma4/test_quant_vision_pooler.py‎
Lines changed: 497 additions & 0 deletions
diff --git a/‎test/quantization/wrapq/wrappers/gemma4/test_quantize_vision_pooler.py‎
Lines changed: 199 additions & 0 deletions b/‎test/quantization/wrapq/wrappers/gemma4/test_quantize_vision_pooler.py‎
Lines changed: 199 additions & 0 deletions
diff --git a/‎tico/quantization/recipes/debug/wrapper_smoke/cases/gemma4.py‎
Lines changed: 123 additions & 0 deletions b/‎tico/quantization/recipes/debug/wrapper_smoke/cases/gemma4.py‎
Lines changed: 123 additions & 0 deletions
diff --git a/‎tico/quantization/wrapq/examples/gemma4/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎tico/quantization/wrapq/examples/gemma4/__init__.py‎
Lines changed: 1 addition & 0 deletions
@@ -0,0 +1,199 @@
+# Copyright (c) 2026 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Smoke tests for Gemma4 vision pooler prepare-calibrate-convert flow."""
+
+import copy
+import os
+import unittest
+
+import torch
+
+from tico.quantization import convert, prepare
+from tico.quantization.config.ptq import PTQConfig
+from tico.quantization.wrapq.mode import Mode
+from tico.quantization.wrapq.wrappers.ptq_wrapper import PTQWrapper
+
+
+IS_INTERNAL_TEST = os.environ.get("RUN_INTERNAL_TESTS", "0") == "1"
+_SKIP_MSG = "required transformers Gemma4 modules are not installed"
+
+
+def _has_gemma4() -> bool:
+    """Return whether the installed transformers package provides Gemma4 vision."""
+    try:
+        from transformers.models.gemma4.configuration_gemma4 import (  # noqa: F401
+            Gemma4VisionConfig,
+        )
+        from transformers.models.gemma4.modeling_gemma4 import (  # noqa: F401
+            Gemma4VisionPooler,
+        )
+    except Exception:
+        return False
+    return True
+
+
+def _make_vision_config():
+    """Create a tiny Gemma4 vision config for synthetic smoke tests."""
+    from transformers.models.gemma4.configuration_gemma4 import Gemma4VisionConfig
+
+    cfg = Gemma4VisionConfig(
+        hidden_size=32,
+        intermediate_size=64,
+        num_hidden_layers=1,
+        num_attention_heads=4,
+        num_key_value_heads=2,
+        head_dim=8,
+        attention_dropout=0.0,
+        max_position_embeddings=128,
+        rms_norm_eps=1e-6,
+        use_clipped_linears=False,
+        rope_parameters={"rope_type": "default", "rope_theta": 100.0},
+    )
+    if not hasattr(cfg, "_attn_implementation"):
+        setattr(cfg, "_attn_implementation", "eager")
+    else:
+        cfg._attn_implementation = "eager"
+    return cfg
+
+
+def _pixel_position_ids(batch_size: int, seq_len: int) -> torch.Tensor:
+    """Create deterministic 2-D pixel position ids for a tiny patch sequence."""
+    side = int(seq_len**0.5)
+    coords = torch.arange(seq_len)
+    xy = torch.stack((coords % side, coords // side), dim=-1)
+    return xy.unsqueeze(0).expand(batch_size, -1, -1).long()
+
+
+def _padding_positions(batch_size: int, seq_len: int) -> torch.Tensor:
+    """Create an all-False padding mask (no padding)."""
+    return torch.zeros(batch_size, seq_len, dtype=torch.bool)
+
+
+@unittest.skipIf(
+    not IS_INTERNAL_TEST,
+    "Internal smoke test — set RUN_INTERNAL_TESTS=1 to enable it.",
+)
+@unittest.skipUnless(_has_gemma4(), _SKIP_MSG)
+class TestGemma4VisionPoolerSmoke(unittest.TestCase):
+    """Exercise Gemma4 vision pooler wrapper parity and PTQ flow."""
+
+    def setUp(self):
+        """Create deterministic tiny Gemma4 vision pooler modules."""
+        torch.manual_seed(2026)
+        from transformers.models.gemma4.modeling_gemma4 import Gemma4VisionPooler
+
+        self.cfg = _make_vision_config()
+        self.fp_pooler = Gemma4VisionPooler(self.cfg).eval()
+        self.fp_ref = copy.deepcopy(self.fp_pooler).eval()
+        # seq_len=16, output_length=4 so that k=2 (16 / 4 = 4, sqrt(4) = 2)
+        self.seq_len = 16
+        self.output_length = 4
+
+    def _sample(self):
+        """Create one synthetic Gemma4 vision pooler sample."""
+        batch_size = 1
+        return {
+            "hidden_states": torch.randn(
+                batch_size, self.seq_len, self.cfg.hidden_size
+            ),
+            "pixel_position_ids": _pixel_position_ids(batch_size, self.seq_len),
+            "padding_positions": _padding_positions(batch_size, self.seq_len),
+            "output_length": self.output_length,
+        }
+
+    def test_no_quant_vision_pooler_matches_reference(self):
+        """The wrapper should match the floating-point module before quantization."""
+        from tico.quantization.wrapq.wrappers.gemma4.quant_vision_pooler import (
+            QuantGemma4VisionPooler,
+        )
+
+        wrapped = QuantGemma4VisionPooler(self.fp_pooler, qcfg=PTQConfig()).eval()
+        sample = self._sample()
+
+        with torch.no_grad():
+            quant_out = wrapped(**sample)
+            fp_out = self.fp_ref(**sample)
+
+        # Both return (pooled_features, updated_padding)
+        self.assertIsInstance(quant_out, tuple)
+        self.assertIsInstance(fp_out, tuple)
+        self.assertEqual(quant_out[0].shape, fp_out[0].shape)
+        self.assertTrue(torch.allclose(quant_out[0], fp_out[0], atol=1e-5, rtol=1e-5))
+        self.assertTrue(torch.equal(quant_out[1], fp_out[1]))
+
+    def test_prepare_convert_vision_pooler_flow(self):
+        """Quantize Gemma4 vision pooler and validate a synthetic output."""
+        from tico.quantization.wrapq.wrappers.gemma4.quant_vision_pooler import (
+            QuantGemma4VisionPooler,
+        )
+
+        prepared = prepare(self.fp_pooler, PTQConfig())
+        self.assertIsInstance(prepared, PTQWrapper)
+        self.assertIsInstance(prepared.wrapped, QuantGemma4VisionPooler)
+
+        with torch.no_grad():
+            for _ in range(3):
+                prepared(**self._sample())
+
+        quantized = convert(prepared)
+        self.assertIs(quantized._mode, Mode.QUANT)
+
+        sample = self._sample()
+        with torch.no_grad():
+            quant_out = quantized(**sample)
+            fp_out = self.fp_ref(**sample)
+
+        self.assertIsInstance(quant_out, tuple)
+        self.assertEqual(quant_out[0].shape, fp_out[0].shape)
+        self.assertTrue(torch.isfinite(quant_out[0]).all())
+
+    def test_as_export_module_flow(self):
+        """Test the as_export_module flow for Circle export."""
+        from tico.quantization.wrapq.wrappers.gemma4.export_adapters import (
+            Gemma4VisionPoolerPrefillExportAdapter,
+        )
+
+        prepared = prepare(self.fp_pooler, PTQConfig())
+
+        with torch.no_grad():
+            for _ in range(3):
+                prepared(**self._sample())
+
+        quantized = convert(prepared)
+
+        pixel_pos_ids = _pixel_position_ids(1, self.seq_len)
+        adapter = quantized.wrapped.as_export_module(
+            output_length=self.output_length,
+            pixel_position_ids=pixel_pos_ids,
+        )
+
+        self.assertIsInstance(adapter, Gemma4VisionPoolerPrefillExportAdapter)
+
+        # Verify adapter forward works
+        sample = self._sample()
+        adapter_kwargs = {
+            "hidden_states": sample["hidden_states"],
+            "pixel_position_ids": sample["pixel_position_ids"],
+            "padding_positions": sample["padding_positions"],
+        }
+        with torch.no_grad():
+            out = adapter(**adapter_kwargs)
+
+        self.assertIsInstance(out, tuple)
+        self.assertEqual(len(out), 2)
+
+
+if __name__ == "__main__":
+    unittest.main()
@@ -583,6 +583,26 @@ def _vision_position_ids(batch_size: int, seq_len: int) -> torch.Tensor:
     return xy.unsqueeze(0).expand(batch_size, -1, -1).long()
 
 
+def _pixel_position_ids(batch_size: int, seq_len: int) -> torch.Tensor:
+    """Create deterministic 2-D pixel position ids for a tiny patch sequence.
+
+    The pooler requires ``pixel_position_ids`` with shape ``(B, S, 2)`` where
+    the last dimension encodes ``(x, y)`` patch coordinates.  We build a
+    simple square grid layout that is compatible with the ``output_length``
+    used in pooler tests: ``seq_len = output_length * k^2`` where ``k`` is
+    the pooling factor.
+    """
+    side = int(seq_len**0.5)
+    coords = torch.arange(seq_len)
+    xy = torch.stack((coords % side, coords // side), dim=-1)
+    return xy.unsqueeze(0).expand(batch_size, -1, -1).long()
+
+
+def _padding_positions(batch_size: int, seq_len: int) -> torch.Tensor:
+    """Create an all-False padding mask (no padding)."""
+    return torch.zeros(batch_size, seq_len, dtype=torch.bool)
+
+
 class Gemma4VisionAttentionCase(Gemma4BaseCase):
     """Smoke case for one tiny Gemma4 vision attention module."""
 
@@ -693,6 +713,108 @@ def eval_input(
         return self._sample()
 
 
+class Gemma4VisionPoolerCase(Gemma4BaseCase):
+    """Smoke case for one tiny Gemma4 vision pooler module."""
+
+    name = "gemma4_vision_pooler"
+    description = "Quantize one tiny Gemma4 vision pooler module."
+    tags = ("gemma4", "e2b", "vision", "pooler")
+    max_mean_abs_diff = 2.0
+    # seq_len=16 and output_length=4 so that k=2 (16 / 4 = 4, sqrt(4) = 2).
+    seq_len = 16
+    output_length = 4
+
+    def build(self, cfg: Mapping[str, Any]) -> tuple[torch.nn.Module, torch.nn.Module]:
+        """Build a tiny Gemma4 vision pooler module and reference copy."""
+        from transformers.models.gemma4.modeling_gemma4 import Gemma4VisionPooler
+
+        torch.manual_seed(123)
+        self.vision_cfg = _make_vision_config()
+        module = Gemma4VisionPooler(self.vision_cfg).eval()
+        return module, clone_module(module)
+
+    def _sample(self) -> ForwardInput:
+        """Create one synthetic Gemma4 vision pooler input."""
+        batch_size = 1
+        return ForwardInput(
+            (),
+            {
+                "hidden_states": torch.randn(
+                    batch_size, self.seq_len, self.vision_cfg.hidden_size
+                ),
+                "pixel_position_ids": _pixel_position_ids(batch_size, self.seq_len),
+                "padding_positions": _padding_positions(batch_size, self.seq_len),
+                "output_length": self.output_length,
+            },
+        )
+
+    def forward(self, module: torch.nn.Module, sample: ForwardInput) -> Any:
+        """Run a Gemma4 vision pooler without sharing mutable sample state."""
+        cloned = _clone_forward_input(sample)
+        output = module(*cloned.args, **dict(cloned.kwargs))
+        # Return only the pooled features for comparison.
+        return output[0] if isinstance(output, tuple) else output
+
+    def reference_forward(
+        self, reference: torch.nn.Module, sample: ForwardInput
+    ) -> Any:
+        """Run the original Gemma4 vision pooler without sharing mutable sample state."""
+        cloned = _clone_forward_input(sample)
+        output = reference(*cloned.args, **dict(cloned.kwargs))
+        return output[0] if isinstance(output, tuple) else output
+
+    def calibration_inputs(
+        self,
+        prepared: torch.nn.Module,
+        cfg: Mapping[str, Any],
+    ) -> list[ForwardInput]:
+        """Create Gemma4 vision pooler calibration samples."""
+        return [self._sample() for _ in range(3)]
+
+    def eval_input(
+        self,
+        prepared: torch.nn.Module,
+        cfg: Mapping[str, Any],
+    ) -> ForwardInput:
+        """Create the Gemma4 vision pooler evaluation sample."""
+        return self._sample()
+
+    def export_module(
+        self, quantized: torch.nn.Module, cfg: Mapping[str, Any]
+    ) -> torch.nn.Module:
+        """Export the wrapped pooler in prefill mode with fixed output_length.
+
+        Passes ``pixel_position_ids`` so the export adapter precomputes the
+        pooling weight matrix and output mask at construction time, replacing
+        the dynamic ``F.one_hot`` and ``torch.div`` operations with a static
+        ``matmul``.
+        """
+        wrapped = getattr(quantized, "wrapped", quantized)
+        if hasattr(wrapped, "as_export_module"):
+            pixel_pos_ids = _pixel_position_ids(1, self.seq_len)
+            return wrapped.as_export_module(
+                mode="prefill",
+                output_length=self.output_length,
+                pixel_position_ids=pixel_pos_ids,
+            ).eval()
+        return quantized
+
+    def export_input(
+        self, eval_sample: ForwardInput, cfg: Mapping[str, Any]
+    ) -> ForwardInput:
+        """Create static export inputs expected by the pooler adapter.
+
+        The export adapter bakes ``output_length`` as a construction-time
+        constant, so it is not included in the forward signature.
+        """
+        cloned = _clone_forward_input(eval_sample)
+        kwargs = dict(cloned.kwargs)
+        hidden = kwargs["hidden_states"]
+        pixel_position_ids = kwargs["pixel_position_ids"]
+        padding_positions = kwargs["padding_positions"]
+        return ForwardInput((hidden, pixel_position_ids, padding_positions), {})
+
+
 GEMMA4_CASES = (
     Gemma4TextMLPCase(),
     Gemma4TextAttentionCase(),
@@ -704,4 +826,5 @@ def eval_input(
     Gemma4TextDecoderLayerSharedKVCase(),
     Gemma4VisionAttentionCase(),
     Gemma4VisionEncoderLayerCase(),
+    Gemma4VisionPoolerCase(),
 )
@@ -0,0 +1 @@
+# DO NOT REMOVE THIS FILE