From 51d366a6363eb5a0166d01f3df48a51fd15362e8 Mon Sep 17 00:00:00 2001 From: Evgenii Maltsev Date: Fri, 22 May 2026 18:01:15 +0000 Subject: [PATCH] [quantization] Microscaling (MX) Quantization for LayerNorm in Qwen3-VL Vision Model Evaluation of microscaling (MX) Quantization for LayerNorm in Qwen3-VL Vision Model TICO-DCO-1.0-Signed-off-by: Evgenii Maltsev --- tico/quantization/config/builders.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tico/quantization/config/builders.py b/tico/quantization/config/builders.py index 26d37bb3..a6092943 100644 --- a/tico/quantization/config/builders.py +++ b/tico/quantization/config/builders.py @@ -25,6 +25,7 @@ from tico.quantization.wrapq.dtypes import DType from tico.quantization.wrapq.observers.base import ObserverBase from tico.quantization.wrapq.observers.minmax import MinMaxObserver +from tico.quantization.wrapq.observers.mx import MXObserver from tico.quantization.wrapq.qscheme import QScheme @@ -533,6 +534,22 @@ def _build_qwen3_vl_norm_override( ]: override[obs_name] = {"qscheme": norm_qscheme} + # LayerNorm observers that benefit from microscaling + mx_observers = [ + "inv_std", + "act_in", + "centered", + "square", + "act_out", + "norm", + ] + for obs_name in mx_observers: + override[obs_name] = { + "observer": MXObserver, + "elem_format": "int8", + "axis": 1, + } + if norm_weight_dtype is not None: weight_qscheme = auto_qscheme_for(norm_weight_dtype, "weight") override["weight"] = {