Skip to content

Commit 51d366a

Browse files
committed
[quantization] Microscaling (MX) Quantization for LayerNorm in Qwen3-VL Vision Model
Evaluation of microscaling (MX) Quantization for LayerNorm in Qwen3-VL Vision Model TICO-DCO-1.0-Signed-off-by: Evgenii Maltsev <e.maltsev@samsung.com>
1 parent 3df302e commit 51d366a

1 file changed

Lines changed: 17 additions & 0 deletions

File tree

tico/quantization/config/builders.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
from tico.quantization.wrapq.dtypes import DType
2626
from tico.quantization.wrapq.observers.base import ObserverBase
2727
from tico.quantization.wrapq.observers.minmax import MinMaxObserver
28+
from tico.quantization.wrapq.observers.mx import MXObserver
2829
from tico.quantization.wrapq.qscheme import QScheme
2930

3031

@@ -533,6 +534,22 @@ def _build_qwen3_vl_norm_override(
533534
]:
534535
override[obs_name] = {"qscheme": norm_qscheme}
535536

537+
# LayerNorm observers that benefit from microscaling
538+
mx_observers = [
539+
"inv_std",
540+
"act_in",
541+
"centered",
542+
"square",
543+
"act_out",
544+
"norm",
545+
]
546+
for obs_name in mx_observers:
547+
override[obs_name] = {
548+
"observer": MXObserver,
549+
"elem_format": "int8",
550+
"axis": 1,
551+
}
552+
536553
if norm_weight_dtype is not None:
537554
weight_qscheme = auto_qscheme_for(norm_weight_dtype, "weight")
538555
override["weight"] = {

0 commit comments

Comments
 (0)