From 9dd119c7871e59998f9f270375fa1ecda3435b45 Mon Sep 17 00:00:00 2001
From: anzr299 <aamir.nazir@intel.com>
Date: Thu, 8 Jan 2026 20:43:16 +0400
Subject: [PATCH 01/12] init

---
 tests/executorch/test_ptq.py | 479 +++++++++++++++++++++++++++++++++++
 1 file changed, 479 insertions(+)
 create mode 100644 tests/executorch/test_ptq.py

diff --git a/tests/executorch/test_ptq.py b/tests/executorch/test_ptq.py
new file mode 100644
index 00000000000..e699d185ae6
--- /dev/null
+++ b/tests/executorch/test_ptq.py
@@ -0,0 +1,479 @@
+# Copyright (c) 2026 Intel Corporation
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#      http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+from dataclasses import dataclass
+from functools import partial
+from typing import Any, Callable
+
+import pytest
+import torch
+import torch.fx
+import torch.nn.parallel
+import torch.optim
+import torch.utils.data
+import torch.utils.data.distributed
+import torchvision.models as models
+from executorch.backends.openvino.quantizer.quantizer import OpenVINOQuantizer
+from torch.ao.quantization.pt2e.utils import _fuse_conv_bn_
+from torch.ao.quantization.quantizer import xnnpack_quantizer
+from torch.ao.quantization.quantizer.quantizer import QuantizationAnnotation
+from torch.ao.quantization.quantizer.quantizer import QuantizationSpec as TorchAOQuantizationSpec
+from torch.ao.quantization.quantizer.quantizer import Quantizer
+from torch.ao.quantization.quantizer.quantizer import Quantizer as TorchAOQuantizer
+from torch.ao.quantization.quantizer.quantizer import SharedQuantizationSpec as TorchAOSharedQuantizationSpec
+from torch.ao.quantization.quantizer.x86_inductor_quantizer import X86InductorQuantizer
+from torch.ao.quantization.quantizer.x86_inductor_quantizer import get_default_x86_inductor_quantization_config
+from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e
+from torchao.quantization.pt2e.quantize_pt2e import prepare_pt2e
+
+import nncf
+from nncf.common.graph import NNCFGraph
+from nncf.common.utils.os import safe_open
+from nncf.experimental.torch.fx import quantize_pt2e
+from nncf.experimental.torch.fx.nncf_graph_builder import GraphConverter
+from nncf.experimental.torch.fx.node_utils import get_graph_node_by_name
+from nncf.experimental.torch.fx.quantization.quantizer.openvino_adapter import OpenVINOQuantizerAdapter
+from nncf.experimental.torch.fx.quantization.quantizer.torch_ao_adapter import TorchAOQuantizerAdapter
+from nncf.experimental.torch.fx.quantization.quantizer.torch_ao_adapter import _get_edge_or_node_to_qspec
+from nncf.tensor.definitions import TensorDataType
+from tests.cross_fw.shared.nx_graph import compare_nx_graph_with_reference
+from tests.cross_fw.shared.paths import TEST_ROOT
+from tests.torch import test_models
+from tests.torch.fx.helpers import get_torch_fx_model
+from tests.torch.test_models.synthetic import ConcatModelWithTwoOutputs
+from tests.torch.test_models.synthetic import LinearModel
+from tests.torch.test_models.synthetic import ShortTransformer
+from tests.torch.test_models.synthetic import SimpleConcatModel
+from tests.torch.test_models.synthetic import YOLO11N_SDPABlock
+
+FX_QUANTIZED_DIR_NAME = TEST_ROOT / "torch2" / "data" / "fx"
+
+
+@dataclass
+class ModelCase:
+    model_builder: Callable[[], torch.nn.Module]
+    model_id: str
+    input_shape: tuple[int]
+
+
+def torchvision_model_case(model_id: str, input_shape: tuple[int,]):
+    model = getattr(models, model_id)
+    return ModelCase(partial(model, weights=None), model_id, input_shape)
+
+
+def get_dot_filename(model_name: str) -> str:
+    return model_name + ".dot"
+
+
+def get_qconf_filename(model_name: str) -> str:
+    return model_name + "_ref_qconfig.json"
+
+
+def get_x86_quantizer(*args, **kwarsg) -> X86InductorQuantizer:
+    quantizer = X86InductorQuantizer()
+    quantizer.set_global(get_default_x86_inductor_quantization_config())
+    return quantizer
+
+
+def get_xnnpack_quantizer(*args, **kwargs) -> xnnpack_quantizer.XNNPACKQuantizer:
+    quantizer = xnnpack_quantizer.XNNPACKQuantizer()
+    quantizer.set_global(xnnpack_quantizer.get_symmetric_quantization_config())
+    return quantizer
+
+
+def get_openvino_quantizer(*args, **kwargs) -> OpenVINOQuantizer:
+    return OpenVINOQuantizer(*args, **kwargs)
+
+
+TEST_MODELS_QUANIZED = (
+    (ModelCase(test_models.UNet, "unet", [1, 3, 224, 224]), {}, {}),
+    (torchvision_model_case("resnet18", (1, 3, 224, 224)), {}, {}),
+    (torchvision_model_case("mobilenet_v3_small", (1, 3, 224, 224)), {}, {}),
+    (
+        torchvision_model_case("vit_b_16", (1, 3, 224, 224)),
+        {"model_type": nncf.ModelType.TRANSFORMER},
+        {"smooth_quant": True},
+    ),
+    (
+        torchvision_model_case("swin_v2_t", (1, 3, 224, 224)),
+        {"model_type": nncf.ModelType.TRANSFORMER},
+        {"smooth_quant": True},
+    ),
+    (
+        ModelCase(partial(ShortTransformer, 5, 10), "synthetic_transformer", [5]),
+        {"model_type": nncf.ModelType.TRANSFORMER},
+        {"smooth_quant": True},
+    ),
+    (
+        ModelCase(YOLO11N_SDPABlock, "yolo11n_sdpa_block", YOLO11N_SDPABlock.INPUT_SIZE),
+        {"model_type": nncf.ModelType.TRANSFORMER},
+        {"smooth_quant": True},
+    ),
+)
+
+
+def _build_torch_fx_model(model_case: ModelCase) -> tuple[torch.fx.GraphModule, torch.Tensor]:
+    model = model_case.model_builder()
+    dtype = torch.int32 if model_case.model_id == "synthetic_transformer" else torch.float32
+    example_input = torch.ones(model_case.input_shape, dtype=dtype)
+    fx_model = get_torch_fx_model(model, example_input)
+    return fx_model, example_input
+
+
+def _get_calibration_dataset(example_input: torch.Tensor) -> nncf.Dataset:
+    def transform_fn(data_item):
+        return data_item.to("cpu")
+
+    return nncf.Dataset([example_input], transform_fn)
+
+
+@pytest.mark.parametrize(
+    ("model_case", "quantizer_params", "pt2e_params"),
+    TEST_MODELS_QUANIZED,
+    ids=[m[0].model_id for m in TEST_MODELS_QUANIZED],
+)
+@pytest.mark.parametrize(
+    "quantizer_builder",
+    [
+        get_xnnpack_quantizer,
+        get_x86_quantizer,
+        get_openvino_quantizer,
+    ],
+    ids=["XNNPACKQuantizer", "X86InductorQuantizer", "OpenVINOQuantizer"],
+)
+def test_quantized_model(
+    quantizer_builder: Callable[[tuple[Any, ...]], Quantizer],
+    model_case: ModelCase,
+    quantizer_params,
+    pt2e_params,
+):
+    fx_model, example_input = _build_torch_fx_model(model_case)
+    calibration_dataset = _get_calibration_dataset(example_input)
+
+    quantizer = quantizer_builder(**quantizer_params)
+    quantized_model = quantize_pt2e(
+        fx_model,
+        quantizer,
+        calibration_dataset=calibration_dataset,
+        fast_bias_correction=None,  # BC is disabled
+        fold_quantize=True,
+        do_copy=True,
+        **pt2e_params,
+    )
+
+    # Uncomment to visualize torch fx graph
+    # from tests.torch2.fx.helpers import visualize_fx_model
+    # visualize_fx_model(quantized_model, f"{quantizer.__class__.__name__}_{model_case.model_id}_int8.svg")
+
+    nncf_graph = GraphConverter.create_nncf_graph(quantized_model)
+    path_to_dot = FX_QUANTIZED_DIR_NAME / str(quantizer.__class__.__name__) / get_dot_filename(model_case.model_id)
+    nncf_graph = _normalize_nncf_graph(nncf_graph, quantized_model.graph)
+    nx_graph = nncf_graph.get_graph_for_structure_analysis(extended=True)
+    compare_nx_graph_with_reference(nx_graph, path_to_dot.as_posix())
+
+    # Uncomment to visualize reference graphs
+    # from torch.ao.quantization.quantize_pt2e import convert_pt2e
+    # from torch.ao.quantization.quantize_pt2e import prepare_pt2e
+    # from tests.torch2.fx.helpers import visualize_fx_model
+    # prepared_model = prepare_pt2e(fx_model, quantizer)
+    # prepared_model(example_input)
+    # ao_quantized_model = convert_pt2e(prepared_model)
+    # visualize_fx_model(ao_quantized_model, f"{quantizer.__class__.__name__}_{model_case.model_id}_ao_int8.svg")
+    # ao_nncf_graph = GraphConverter.create_nncf_graph(ao_quantized_model)
+    # ao_nncf_graph.visualize_graph(f"ao_{quantizer.__class__.__name__}_{get_dot_filename(model_case.model_id)}")
+
+
+@pytest.mark.parametrize(
+    ("model_case", "quantizer_params"),
+    [case[:2] for case in TEST_MODELS_QUANIZED],
+    ids=[m[0].model_id for m in TEST_MODELS_QUANIZED],
+)
+@pytest.mark.parametrize(
+    "quantizer_builder",
+    [
+        get_xnnpack_quantizer,
+        get_x86_quantizer,
+        get_openvino_quantizer,
+    ],
+    ids=["XNNPACKQuantizer", "X86InductorQuantizer", "OpenVINOQuantizer"],
+)
+def test_quantizer_setup(
+    quantizer_builder: Callable[[tuple[Any, ...]], Quantizer],
+    model_case: ModelCase,
+    quantizer_params,
+    regen_ref_data,
+):
+    fx_model, _ = _build_torch_fx_model(model_case)
+    quantizer = quantizer_builder(**quantizer_params)
+    ref_qconfig_filename = (
+        FX_QUANTIZED_DIR_NAME / quantizer.__class__.__name__ / get_qconf_filename(model_case.model_id)
+    )
+
+    _fuse_conv_bn_(fx_model)
+    if isinstance(quantizer, OpenVINOQuantizer) or hasattr(quantizer, "get_nncf_quantization_setup"):
+        quantizer = OpenVINOQuantizerAdapter(quantizer)
+    else:
+        quantizer = TorchAOQuantizerAdapter(quantizer)
+
+    # Call transform_prior_quantization before the NNCFGraph creation
+    fx_model = quantizer.transform_prior_quantization(fx_model)
+    nncf_graph = GraphConverter.create_nncf_graph(fx_model)
+    quantizer_setup = quantizer.get_quantization_setup(fx_model, nncf_graph)
+    qsetup_config = quantizer_setup.get_state()
+    _normalize_qsetup_state(qsetup_config)
+    if regen_ref_data:
+        with safe_open(ref_qconfig_filename, "w") as file:
+            json.dump(qsetup_config, file, indent=4)
+
+    with safe_open(ref_qconfig_filename, "r") as file:
+        ref_qsetup_config = json.load(file)
+    # helper to find diff in qconfigs
+    # pip install dictdiffer
+    # from dictdiffer import diff
+    # diff_res = list(diff(ref_qsetup_config, qsetup_config))
+    assert qsetup_config == ref_qsetup_config
+
+
+def _normalize_qsetup_state(setup: dict[str, Any]) -> None:
+    """
+    Normalizes the quantization setup state dictionary in-place to ensure consistent ordering
+    of elements for deterministic behavior.
+
+    :param setup: Quantization setup state to normalize.
+    """
+    for key in ["unified_scale_groups", "shared_input_operation_set_groups"]:
+        sorted_usg = {}
+        for k, v in setup[key].items():
+            sorted_usg[str(k)] = sorted(v)
+        setup[key] = sorted_usg
+    dq_key = "directly_quantized_operator_node_names"
+    sorted_qps = {}
+    for qp in setup["quantization_points"].values():
+        sorted_dq = sorted(qp[dq_key])
+        qconfig = qp["qconfig"].copy()
+        if "dest_dtype" in qconfig:
+            qconfig["dest_dtype"] = "INT8" if qconfig["dest_dtype"] is TensorDataType.int8 else "UINT8"
+        sorted_qps[f"{tuple(sorted_dq)}_{qp['qip_class']}"] = qconfig
+    setup["quantization_points"] = sorted_qps
+
+
+def _normalize_nncf_graph(nncf_graph: NNCFGraph, fx_graph: torch.fx.Graph):
+    """
+    Normalizes the given NNCFGraph by renaming quantize/dequantize nodes to ensure consistent naming across runs.
+    XNNPACKQuantizer and X86InductorQuantizer quantizers insert quantize and dequantize nodes
+    with inconsistent names across runs. This function assigns standardized names to such nodes
+    to maintain consistency.
+
+    :param nncf_graph: The given NNCFGraph instance.
+    :return: The normalized version of the given NNCFGraph.
+    """
+    idx = 0
+    dtypes_map = {}
+
+    q_dq_types = ["quantize_per_tensor", "dequantize_per_tensor", "quantize_per_channel", "dequantize_per_channel"]
+    norm_nncf_graph = NNCFGraph()
+    node_names_map = {}
+    for node in nncf_graph.topological_sort():
+        attrs = node._attributes.copy()
+        if node.node_type in q_dq_types:
+            new_node_name = f"{node.node_type}_{idx}"
+            node_names_map[node.node_name] = new_node_name
+            attrs[node.NODE_NAME_ATTR] = new_node_name
+            idx += 1
+            if node.node_type in ["dequantize_per_tensor", "dequantize_per_channel"]:
+                source_node = get_graph_node_by_name(fx_graph, node.node_name)
+                dtypes_map[new_node_name] = (
+                    TensorDataType.int8 if source_node.args[-1] == torch.int8 else TensorDataType.uint8
+                )
+        norm_nncf_graph.add_nncf_node(
+            node_name=attrs[node.NODE_NAME_ATTR],
+            node_type=attrs[node.NODE_TYPE_ATTR],
+            node_metatype=attrs[node.METATYPE_ATTR],
+            layer_attributes=node.layer_attributes,
+        )
+
+    for edge in nncf_graph.get_all_edges():
+        from_node_name = node_names_map.get(edge.from_node.node_name, edge.from_node.node_name)
+        to_node_name = node_names_map.get(edge.to_node.node_name, edge.to_node.node_name)
+        from_node, to_node = [norm_nncf_graph.get_node_by_name(name) for name in (from_node_name, to_node_name)]
+        dtype = dtypes_map.get(to_node.node_name, edge.dtype)
+        norm_nncf_graph.add_edge_between_nncf_nodes(
+            from_node.node_id,
+            to_node.node_id,
+            tensor_shape=edge.tensor_shape,
+            input_port_id=edge.input_port_id,
+            output_port_id=edge.output_port_id,
+            dtype=dtype,
+            parallel_input_port_ids=edge.parallel_input_port_ids,
+        )
+    return norm_nncf_graph
+
+
+@pytest.mark.parametrize(
+    "model_case,quantizer_params",
+    [(m[0], m[1]) for m in TEST_MODELS_QUANIZED],
+    ids=[m[0].model_id for m in TEST_MODELS_QUANIZED],
+)
+def test_openvino_quantizer_with_torch_ao_convert_pt2e(model_case: ModelCase, quantizer_params):
+    quantizer = get_openvino_quantizer(**quantizer_params)
+    fx_model, example_input = _build_torch_fx_model(model_case)
+    prepared_model = prepare_pt2e(fx_model, quantizer)
+    prepared_model(example_input)
+    ao_quantized_model = convert_pt2e(prepared_model)
+    nncf_graph = GraphConverter.create_nncf_graph(ao_quantized_model)
+
+    path_to_dot = (
+        FX_QUANTIZED_DIR_NAME / "ao_export_quantization_OpenVINOQuantizer" / get_dot_filename(model_case.model_id)
+    )
+    nx_graph = nncf_graph.get_graph_for_structure_analysis(extended=True)
+    compare_nx_graph_with_reference(nx_graph, path_to_dot.as_posix())
+
+
+TorchAOSharedQuantizationSpecTestCases = (
+    (
+        ModelCase(SimpleConcatModel, "unified_scales_test_model", SimpleConcatModel.INPUT_SHAPE),
+        ("conv2d", "conv2d_1"),
+        (0.01176275312, 127, 0, 255, torch.uint8),
+    ),
+)
+
+
+@pytest.mark.parametrize(
+    "model_case,unified_scale_node_names,ref_fq_params",
+    TorchAOSharedQuantizationSpecTestCases,
+    ids=[m[0].model_id for m in TorchAOSharedQuantizationSpecTestCases],
+)
+def test_OVQuantizer_TorchAOSharedQuantizationSpec_handling(
+    model_case: ModelCase,
+    unified_scale_node_names: tuple[str, str],
+    ref_fq_params: tuple[float, int, int, int, torch.dtype],
+):
+    model_case.model_builder()(torch.ones(model_case.input_shape))
+    fx_model, example_input = _build_torch_fx_model(model_case)
+
+    quantizer = OpenVINOQuantizer()
+    prepared_model = prepare_pt2e(fx_model, quantizer)
+
+    actual_annotation = _get_edge_or_node_to_qspec(fx_model)
+    for edge_or_node, annotation in actual_annotation.items():
+        if isinstance(edge_or_node, torch.fx.Node) and edge_or_node.name == unified_scale_node_names[1]:
+            assert isinstance(annotation, TorchAOSharedQuantizationSpec)
+            assert annotation.edge_or_node.name == unified_scale_node_names[0]
+            assert isinstance(actual_annotation[annotation.edge_or_node], TorchAOQuantizationSpec)
+            break
+    else:
+        msg = f"Node {unified_scale_node_names[1]} should be annotated as quantizable"
+        raise RuntimeError(msg)
+
+    prepared_model(example_input)
+    ao_quantized_model = convert_pt2e(prepared_model)
+
+    nodes_visited = 0
+    for node in ao_quantized_model.graph.nodes:
+        if node.name in unified_scale_node_names:
+            dequantize_args = list(node.users)[0].args
+            assert abs(dequantize_args[1] - ref_fq_params[0]) < torch.finfo(torch.float32).eps
+            assert dequantize_args[2:] == ref_fq_params[1:]
+            nodes_visited += 1
+            if nodes_visited == 2:
+                break
+    else:
+        msg = f"Quantizers was not found for the unified scales pair {unified_scale_node_names}"
+        raise RuntimeError(msg)
+
+
+class OneNodeAnnotationQuantizer(TorchAOQuantizer):
+    def __init__(self, node_name: str, annotation: TorchAOQuantizationSpec):
+        self._node_name = node_name
+        self._annotation = annotation
+
+    def annotate(self, model: torch.fx.GraphModule):
+        target_node = get_graph_node_by_name(model.graph, self._node_name)
+        target_node.meta["quantization_annotation"] = self._annotation
+
+        return model
+
+    def validate(self, model):
+        return
+
+
+REF_NONE_Q_MIN_Q_MAX_SETUP = {
+    "quantization_points": {
+        0: {
+            "qip": {"target_node_name": "linear", "input_port_id": None},
+            "qip_class": "ActivationQuantizationInsertionPoint",
+            "qconfig": {
+                "num_bits": 8,
+                "mode": "symmetric",
+                "signedness_to_force": False,
+                "per_channel": False,
+                "narrow_range": False,
+                "dest_dtype": "int8",
+            },
+            "directly_quantized_operator_node_names": ["output"],
+        }
+    },
+    "unified_scale_groups": {},
+    "shared_input_operation_set_groups": {},
+}
+
+
+@pytest.mark.parametrize("dtype", [torch.int8, torch.uint8])
+def test_none_q_min_q_max_quantizer(dtype):
+    qspec = TorchAOQuantizationSpec(dtype=dtype, observer_or_fake_quant_ctr=None, qscheme=torch.per_tensor_symmetric)
+    annotation = QuantizationAnnotation(output_qspec=qspec)
+    quantizer = OneNodeAnnotationQuantizer("linear", annotation)
+
+    adapted_quantizer = TorchAOQuantizerAdapter(quantizer)
+
+    model = get_torch_fx_model(LinearModel(torch.ones(3, 3)), torch.ones(1, 3, 3, 3))
+    setup = adapted_quantizer.get_quantization_setup(model, GraphConverter.create_nncf_graph(model))
+
+    ref = REF_NONE_Q_MIN_Q_MAX_SETUP.copy()
+    ref["quantization_points"][0]["qconfig"]["dest_dtype"] = "int8" if dtype == torch.int8 else "uint8"
+    assert setup.get_state() == ref
+
+
+REF_INP_CONCAT_SETUP = {
+    "quantization_points": {
+        0: {
+            "qip": {"target_node_name": "cat", "input_port_id": 0},
+            "qip_class": "ActivationQuantizationInsertionPoint",
+            "qconfig": {
+                "num_bits": 8,
+                "mode": "symmetric",
+                "signedness_to_force": False,
+                "per_channel": False,
+                "narrow_range": False,
+                "dest_dtype": "int8",
+            },
+            "directly_quantized_operator_node_names": ["cat"],
+        }
+    },
+    "unified_scale_groups": {},
+    "shared_input_operation_set_groups": {},
+}
+
+
+def test_adapter_inp_concat_idx():
+    model = get_torch_fx_model(ConcatModelWithTwoOutputs(), torch.ones(ConcatModelWithTwoOutputs.INPUT_SHAPE))
+    conv2d = get_graph_node_by_name(model.graph, "conv2d")
+
+    qspec = TorchAOQuantizationSpec(
+        dtype=torch.int8, observer_or_fake_quant_ctr=None, qscheme=torch.per_tensor_symmetric
+    )
+    annotation = QuantizationAnnotation(input_qspec_map={conv2d: qspec})
+    quantizer = OneNodeAnnotationQuantizer("cat", annotation)
+
+    adapted_quantizer = TorchAOQuantizerAdapter(quantizer)
+    setup = adapted_quantizer.get_quantization_setup(model, GraphConverter.create_nncf_graph(model))
+    assert setup.get_state() == REF_INP_CONCAT_SETUP

From b62071b20b9950efb6f4e12042d0d99c079dcd24 Mon Sep 17 00:00:00 2001
From: anzr299 <aamir.nazir@intel.com>
Date: Thu, 8 Jan 2026 20:47:02 +0400
Subject: [PATCH 02/12] remove extra imports

---
 tests/executorch/test_ptq.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/tests/executorch/test_ptq.py b/tests/executorch/test_ptq.py
index e699d185ae6..35b69efc739 100644
--- a/tests/executorch/test_ptq.py
+++ b/tests/executorch/test_ptq.py
@@ -17,10 +17,6 @@
 import pytest
 import torch
 import torch.fx
-import torch.nn.parallel
-import torch.optim
-import torch.utils.data
-import torch.utils.data.distributed
 import torchvision.models as models
 from executorch.backends.openvino.quantizer.quantizer import OpenVINOQuantizer
 from torch.ao.quantization.pt2e.utils import _fuse_conv_bn_

From 0467fda56ca2933f3ab980e20cf7d20056e16763 Mon Sep 17 00:00:00 2001
From: anzr299 <aamir.nazir@intel.com>
Date: Fri, 9 Jan 2026 13:27:43 +0400
Subject: [PATCH 03/12] update workflow file; change location of data file for
 test

---
 .github/workflows/executorch.yml | 54 ++++++++++++++++++++++++++++++++
 tests/executorch/test_ptq.py     |  2 +-
 2 files changed, 55 insertions(+), 1 deletion(-)
 create mode 100644 .github/workflows/executorch.yml

diff --git a/.github/workflows/executorch.yml b/.github/workflows/executorch.yml
new file mode 100644
index 00000000000..3abf00e94fc
--- /dev/null
+++ b/.github/workflows/executorch.yml
@@ -0,0 +1,54 @@
+name: ExecuTorch
+permissions: read-all
+ 
+on:
+  workflow_dispatch:
+  schedule:
+    - cron: '0 0 * * *'
+  pull_request:
+    paths: 
+       - 'src/nncf/experimental/quantization/algorithms/range_estimator/*'
+       - 'src/nncf/experimental/quantization/algorithms/post_training/*'
+       - 'src/nncf/experimental/quantization/algorithms/weight_compression/*'
+       - 'tests/torch2/fx/*'
+       - 'src/nncf/experimental/torch/fx/*'
+       - 'src/nncf/quantization/algorithms/algorithm.py'
+
+jobs:
+  executorch:
+    timeout-minutes: 40
+    runs-on: ubuntu-latest-8-cores
+    defaults:
+      run:
+        shell: bash
+    env:
+      DEBIAN_FRONTEND: noninteractive
+    steps:
+      - name: Install dependencies
+        run : |
+          sudo apt-get update
+          sudo apt-get --assume-yes install gcc g++ build-essential ninja-build libgl1-mesa-dev libglib2.0-0
+      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+        with:
+          lfs: true
+      - uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
+        with:
+          python-version: "3.10.14"
+      - name: Runner info
+        continue-on-error: true
+        run: |
+          cat /etc/*release
+          cat /proc/cpuinfo
+      - name: Install NNCF and test requirements
+        run: |
+          # Torchao installation requires pytorch to be installed first.
+          pip install . -r tests/executorch/requirements.txt
+          pip install --pre torch torchvision torchao --index-url https://download.pytorch.org/whl/nightly/cpu
+          # Executorch
+          # Editable install due to https://github.com/pytorch/executorch/issues/6475
+          pip install --no-build-isolation -e git+https://github.com/anzr299/executorch.git@an/openvino/nncf_compress_pt2e#egg=executorch
+      - name: Print installed modules
+        run: pip list
+      - name: Run PyTorch precommit test scope
+        run: |
+          pytest -ra tests/executorch
\ No newline at end of file
diff --git a/tests/executorch/test_ptq.py b/tests/executorch/test_ptq.py
index 35b69efc739..70748bad2df 100644
--- a/tests/executorch/test_ptq.py
+++ b/tests/executorch/test_ptq.py
@@ -51,7 +51,7 @@
 from tests.torch.test_models.synthetic import SimpleConcatModel
 from tests.torch.test_models.synthetic import YOLO11N_SDPABlock
 
-FX_QUANTIZED_DIR_NAME = TEST_ROOT / "torch2" / "data" / "fx"
+FX_QUANTIZED_DIR_NAME = TEST_ROOT / "torch" / "data" / "fx"
 
 
 @dataclass

From 716d1bebfa9267118aae1c8c4f5f3fe1c3bbd2c5 Mon Sep 17 00:00:00 2001
From: anzr299 <aamir.nazir@intel.com>
Date: Thu, 15 Jan 2026 20:29:24 +0400
Subject: [PATCH 04/12] remove old quantizer test

---
 tests/torch/fx/test_quantizer.py | 479 -------------------------------
 1 file changed, 479 deletions(-)
 delete mode 100644 tests/torch/fx/test_quantizer.py

diff --git a/tests/torch/fx/test_quantizer.py b/tests/torch/fx/test_quantizer.py
deleted file mode 100644
index 90734f60b0f..00000000000
--- a/tests/torch/fx/test_quantizer.py
+++ /dev/null
@@ -1,479 +0,0 @@
-# Copyright (c) 2026 Intel Corporation
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#      http://www.apache.org/licenses/LICENSE-2.0
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import json
-from dataclasses import dataclass
-from functools import partial
-from typing import Any, Callable
-
-import pytest
-import torch
-import torch.fx
-import torch.nn.parallel
-import torch.optim
-import torch.utils.data
-import torch.utils.data.distributed
-import torchvision.models as models
-from torch.ao.quantization.pt2e.utils import _fuse_conv_bn_
-from torch.ao.quantization.quantize_pt2e import convert_pt2e
-from torch.ao.quantization.quantize_pt2e import prepare_pt2e
-from torch.ao.quantization.quantizer import xnnpack_quantizer
-from torch.ao.quantization.quantizer.quantizer import QuantizationAnnotation
-from torch.ao.quantization.quantizer.quantizer import QuantizationSpec as TorchAOQuantizationSpec
-from torch.ao.quantization.quantizer.quantizer import Quantizer
-from torch.ao.quantization.quantizer.quantizer import Quantizer as TorchAOQuantizer
-from torch.ao.quantization.quantizer.quantizer import SharedQuantizationSpec as TorchAOSharedQuantizationSpec
-from torch.ao.quantization.quantizer.x86_inductor_quantizer import X86InductorQuantizer
-from torch.ao.quantization.quantizer.x86_inductor_quantizer import get_default_x86_inductor_quantization_config
-
-import nncf
-from nncf.common.graph import NNCFGraph
-from nncf.common.utils.os import safe_open
-from nncf.experimental.torch.fx import quantize_pt2e
-from nncf.experimental.torch.fx.nncf_graph_builder import GraphConverter
-from nncf.experimental.torch.fx.node_utils import get_graph_node_by_name
-from nncf.experimental.torch.fx.quantization.quantizer.openvino_adapter import OpenVINOQuantizerAdapter
-from nncf.experimental.torch.fx.quantization.quantizer.openvino_quantizer import OpenVINOQuantizer
-from nncf.experimental.torch.fx.quantization.quantizer.torch_ao_adapter import TorchAOQuantizerAdapter
-from nncf.experimental.torch.fx.quantization.quantizer.torch_ao_adapter import _get_edge_or_node_to_qspec
-from nncf.tensor.definitions import TensorDataType
-from tests.cross_fw.shared.nx_graph import compare_nx_graph_with_reference
-from tests.cross_fw.shared.paths import TEST_ROOT
-from tests.torch import test_models
-from tests.torch.fx.helpers import get_torch_fx_model
-from tests.torch.test_models.synthetic import ConcatModelWithTwoOutputs
-from tests.torch.test_models.synthetic import LinearModel
-from tests.torch.test_models.synthetic import ShortTransformer
-from tests.torch.test_models.synthetic import SimpleConcatModel
-from tests.torch.test_models.synthetic import YOLO11N_SDPABlock
-
-FX_QUANTIZED_DIR_NAME = TEST_ROOT / "torch" / "data" / "fx"
-
-
-@dataclass
-class ModelCase:
-    model_builder: Callable[[], torch.nn.Module]
-    model_id: str
-    input_shape: tuple[int]
-
-
-def torchvision_model_case(model_id: str, input_shape: tuple[int,]):
-    model = getattr(models, model_id)
-    return ModelCase(partial(model, weights=None), model_id, input_shape)
-
-
-def get_dot_filename(model_name: str) -> str:
-    return model_name + ".dot"
-
-
-def get_qconf_filename(model_name: str) -> str:
-    return model_name + "_ref_qconfig.json"
-
-
-def get_x86_quantizer(*args, **kwarsg) -> X86InductorQuantizer:
-    quantizer = X86InductorQuantizer()
-    quantizer.set_global(get_default_x86_inductor_quantization_config())
-    return quantizer
-
-
-def get_xnnpack_quantizer(*args, **kwargs) -> xnnpack_quantizer.XNNPACKQuantizer:
-    quantizer = xnnpack_quantizer.XNNPACKQuantizer()
-    quantizer.set_global(xnnpack_quantizer.get_symmetric_quantization_config())
-    return quantizer
-
-
-def get_openvino_quantizer(*args, **kwargs) -> OpenVINOQuantizer:
-    return OpenVINOQuantizer(*args, **kwargs)
-
-
-TEST_MODELS_QUANIZED = (
-    (ModelCase(test_models.UNet, "unet", [1, 3, 224, 224]), {}, {}),
-    (torchvision_model_case("resnet18", (1, 3, 224, 224)), {}, {}),
-    (torchvision_model_case("mobilenet_v3_small", (1, 3, 224, 224)), {}, {}),
-    (
-        torchvision_model_case("vit_b_16", (1, 3, 224, 224)),
-        {"model_type": nncf.ModelType.TRANSFORMER},
-        {"smooth_quant": True},
-    ),
-    (
-        torchvision_model_case("swin_v2_t", (1, 3, 224, 224)),
-        {"model_type": nncf.ModelType.TRANSFORMER},
-        {"smooth_quant": True},
-    ),
-    (
-        ModelCase(partial(ShortTransformer, 5, 10), "synthetic_transformer", [5]),
-        {"model_type": nncf.ModelType.TRANSFORMER},
-        {"smooth_quant": True},
-    ),
-    (
-        ModelCase(YOLO11N_SDPABlock, "yolo11n_sdpa_block", YOLO11N_SDPABlock.INPUT_SIZE),
-        {"model_type": nncf.ModelType.TRANSFORMER},
-        {"smooth_quant": True},
-    ),
-)
-
-
-def _build_torch_fx_model(model_case: ModelCase) -> tuple[torch.fx.GraphModule, torch.Tensor]:
-    model = model_case.model_builder()
-    dtype = torch.int32 if model_case.model_id == "synthetic_transformer" else torch.float32
-    example_input = torch.ones(model_case.input_shape, dtype=dtype)
-    fx_model = get_torch_fx_model(model, example_input)
-    return fx_model, example_input
-
-
-def _get_calibration_dataset(example_input: torch.Tensor) -> nncf.Dataset:
-    def transform_fn(data_item):
-        return data_item.to("cpu")
-
-    return nncf.Dataset([example_input], transform_fn)
-
-
-@pytest.mark.parametrize(
-    ("model_case", "quantizer_params", "pt2e_params"),
-    TEST_MODELS_QUANIZED,
-    ids=[m[0].model_id for m in TEST_MODELS_QUANIZED],
-)
-@pytest.mark.parametrize(
-    "quantizer_builder",
-    [
-        get_xnnpack_quantizer,
-        get_x86_quantizer,
-        get_openvino_quantizer,
-    ],
-    ids=["XNNPACKQuantizer", "X86InductorQuantizer", "OpenVINOQuantizer"],
-)
-def test_quantized_model(
-    quantizer_builder: Callable[[tuple[Any, ...]], Quantizer],
-    model_case: ModelCase,
-    quantizer_params,
-    pt2e_params,
-):
-    fx_model, example_input = _build_torch_fx_model(model_case)
-    calibration_dataset = _get_calibration_dataset(example_input)
-
-    quantizer = quantizer_builder(**quantizer_params)
-    quantized_model = quantize_pt2e(
-        fx_model,
-        quantizer,
-        calibration_dataset=calibration_dataset,
-        fast_bias_correction=None,  # BC is disabled
-        fold_quantize=True,
-        do_copy=True,
-        **pt2e_params,
-    )
-
-    # Uncomment to visualize torch fx graph
-    # from tests.torch.fx.helpers import visualize_fx_model
-    # visualize_fx_model(quantized_model, f"{quantizer.__class__.__name__}_{model_case.model_id}_int8.svg")
-
-    nncf_graph = GraphConverter.create_nncf_graph(quantized_model)
-    path_to_dot = FX_QUANTIZED_DIR_NAME / str(quantizer.__class__.__name__) / get_dot_filename(model_case.model_id)
-    nncf_graph = _normalize_nncf_graph(nncf_graph, quantized_model.graph)
-    nx_graph = nncf_graph.get_graph_for_structure_analysis(extended=True)
-    compare_nx_graph_with_reference(nx_graph, path_to_dot.as_posix())
-
-    # Uncomment to visualize reference graphs
-    # from torch.ao.quantization.quantize_pt2e import convert_pt2e
-    # from torch.ao.quantization.quantize_pt2e import prepare_pt2e
-    # from tests.torch.fx.helpers import visualize_fx_model
-    # prepared_model = prepare_pt2e(fx_model, quantizer)
-    # prepared_model(example_input)
-    # ao_quantized_model = convert_pt2e(prepared_model)
-    # visualize_fx_model(ao_quantized_model, f"{quantizer.__class__.__name__}_{model_case.model_id}_ao_int8.svg")
-    # ao_nncf_graph = GraphConverter.create_nncf_graph(ao_quantized_model)
-    # ao_nncf_graph.visualize_graph(f"ao_{quantizer.__class__.__name__}_{get_dot_filename(model_case.model_id)}")
-
-
-@pytest.mark.parametrize(
-    ("model_case", "quantizer_params"),
-    [case[:2] for case in TEST_MODELS_QUANIZED],
-    ids=[m[0].model_id for m in TEST_MODELS_QUANIZED],
-)
-@pytest.mark.parametrize(
-    "quantizer_builder",
-    [
-        get_xnnpack_quantizer,
-        get_x86_quantizer,
-        get_openvino_quantizer,
-    ],
-    ids=["XNNPACKQuantizer", "X86InductorQuantizer", "OpenVINOQuantizer"],
-)
-def test_quantizer_setup(
-    quantizer_builder: Callable[[tuple[Any, ...]], Quantizer],
-    model_case: ModelCase,
-    quantizer_params,
-    regen_ref_data,
-):
-    fx_model, _ = _build_torch_fx_model(model_case)
-    quantizer = quantizer_builder(**quantizer_params)
-    ref_qconfig_filename = (
-        FX_QUANTIZED_DIR_NAME / quantizer.__class__.__name__ / get_qconf_filename(model_case.model_id)
-    )
-
-    _fuse_conv_bn_(fx_model)
-    if isinstance(quantizer, OpenVINOQuantizer) or hasattr(quantizer, "get_nncf_quantization_setup"):
-        quantizer = OpenVINOQuantizerAdapter(quantizer)
-    else:
-        quantizer = TorchAOQuantizerAdapter(quantizer)
-
-    # Call transform_prior_quantization before the NNCFGraph creation
-    fx_model = quantizer.transform_prior_quantization(fx_model)
-    nncf_graph = GraphConverter.create_nncf_graph(fx_model)
-    quantizer_setup = quantizer.get_quantization_setup(fx_model, nncf_graph)
-    qsetup_config = quantizer_setup.get_state()
-    _normalize_qsetup_state(qsetup_config)
-    if regen_ref_data:
-        with safe_open(ref_qconfig_filename, "w") as file:
-            json.dump(qsetup_config, file, indent=4)
-
-    with safe_open(ref_qconfig_filename, "r") as file:
-        ref_qsetup_config = json.load(file)
-    # helper to find diff in qconfigs
-    # pip install dictdiffer
-    # from dictdiffer import diff
-    # diff_res = list(diff(ref_qsetup_config, qsetup_config))
-    assert qsetup_config == ref_qsetup_config
-
-
-def _normalize_qsetup_state(setup: dict[str, Any]) -> None:
-    """
-    Normalizes the quantization setup state dictionary in-place to ensure consistent ordering
-    of elements for deterministic behavior.
-
-    :param setup: Quantization setup state to normalize.
-    """
-    for key in ["unified_scale_groups", "shared_input_operation_set_groups"]:
-        sorted_usg = {}
-        for k, v in setup[key].items():
-            sorted_usg[str(k)] = sorted(v)
-        setup[key] = sorted_usg
-    dq_key = "directly_quantized_operator_node_names"
-    sorted_qps = {}
-    for qp in setup["quantization_points"].values():
-        sorted_dq = sorted(qp[dq_key])
-        qconfig = qp["qconfig"].copy()
-        if "dest_dtype" in qconfig:
-            qconfig["dest_dtype"] = "INT8" if qconfig["dest_dtype"] is TensorDataType.int8 else "UINT8"
-        sorted_qps[f"{tuple(sorted_dq)}_{qp['qip_class']}"] = qconfig
-    setup["quantization_points"] = sorted_qps
-
-
-def _normalize_nncf_graph(nncf_graph: NNCFGraph, fx_graph: torch.fx.Graph):
-    """
-    Normalizes the given NNCFGraph by renaming quantize/dequantize nodes to ensure consistent naming across runs.
-    XNNPACKQuantizer and X86InductorQuantizer quantizers insert quantize and dequantize nodes
-    with inconsistent names across runs. This function assigns standardized names to such nodes
-    to maintain consistency.
-
-    :param nncf_graph: The given NNCFGraph instance.
-    :return: The normalized version of the given NNCFGraph.
-    """
-    idx = 0
-    dtypes_map = {}
-
-    q_dq_types = ["quantize_per_tensor", "dequantize_per_tensor", "quantize_per_channel", "dequantize_per_channel"]
-    norm_nncf_graph = NNCFGraph()
-    node_names_map = {}
-    for node in nncf_graph.topological_sort():
-        attrs = node._attributes.copy()
-        if node.node_type in q_dq_types:
-            new_node_name = f"{node.node_type}_{idx}"
-            node_names_map[node.node_name] = new_node_name
-            attrs[node.NODE_NAME_ATTR] = new_node_name
-            idx += 1
-            if node.node_type in ["dequantize_per_tensor", "dequantize_per_channel"]:
-                source_node = get_graph_node_by_name(fx_graph, node.node_name)
-                dtypes_map[new_node_name] = (
-                    TensorDataType.int8 if source_node.args[-1] == torch.int8 else TensorDataType.uint8
-                )
-        norm_nncf_graph.add_nncf_node(
-            node_name=attrs[node.NODE_NAME_ATTR],
-            node_type=attrs[node.NODE_TYPE_ATTR],
-            node_metatype=attrs[node.METATYPE_ATTR],
-            layer_attributes=node.layer_attributes,
-        )
-
-    for edge in nncf_graph.get_all_edges():
-        from_node_name = node_names_map.get(edge.from_node.node_name, edge.from_node.node_name)
-        to_node_name = node_names_map.get(edge.to_node.node_name, edge.to_node.node_name)
-        from_node, to_node = [norm_nncf_graph.get_node_by_name(name) for name in (from_node_name, to_node_name)]
-        dtype = dtypes_map.get(to_node.node_name, edge.dtype)
-        norm_nncf_graph.add_edge_between_nncf_nodes(
-            from_node.node_id,
-            to_node.node_id,
-            tensor_shape=edge.tensor_shape,
-            input_port_id=edge.input_port_id,
-            output_port_id=edge.output_port_id,
-            dtype=dtype,
-            parallel_input_port_ids=edge.parallel_input_port_ids,
-        )
-    return norm_nncf_graph
-
-
-@pytest.mark.parametrize(
-    "model_case,quantizer_params",
-    [(m[0], m[1]) for m in TEST_MODELS_QUANIZED],
-    ids=[m[0].model_id for m in TEST_MODELS_QUANIZED],
-)
-def test_openvino_quantizer_with_torch_ao_convert_pt2e(model_case: ModelCase, quantizer_params):
-    quantizer = get_openvino_quantizer(**quantizer_params)
-    fx_model, example_input = _build_torch_fx_model(model_case)
-    prepared_model = prepare_pt2e(fx_model, quantizer)
-    prepared_model(example_input)
-    ao_quantized_model = convert_pt2e(prepared_model)
-    nncf_graph = GraphConverter.create_nncf_graph(ao_quantized_model)
-
-    path_to_dot = (
-        FX_QUANTIZED_DIR_NAME / "ao_export_quantization_OpenVINOQuantizer" / get_dot_filename(model_case.model_id)
-    )
-    nx_graph = nncf_graph.get_graph_for_structure_analysis(extended=True)
-    compare_nx_graph_with_reference(nx_graph, path_to_dot.as_posix())
-
-
-TorchAOSharedQuantizationSpecTestCases = (
-    (
-        ModelCase(SimpleConcatModel, "unified_scales_test_model", SimpleConcatModel.INPUT_SHAPE),
-        ("conv2d", "conv2d_1"),
-        (0.01176275312, 127, 0, 255, torch.uint8),
-    ),
-)
-
-
-@pytest.mark.parametrize(
-    "model_case,unified_scale_node_names,ref_fq_params",
-    TorchAOSharedQuantizationSpecTestCases,
-    ids=[m[0].model_id for m in TorchAOSharedQuantizationSpecTestCases],
-)
-def test_OVQuantizer_TorchAOSharedQuantizationSpec_handling(
-    model_case: ModelCase,
-    unified_scale_node_names: tuple[str, str],
-    ref_fq_params: tuple[float, int, int, int, torch.dtype],
-):
-    model_case.model_builder()(torch.ones(model_case.input_shape))
-    fx_model, example_input = _build_torch_fx_model(model_case)
-
-    quantizer = OpenVINOQuantizer()
-    prepared_model = prepare_pt2e(fx_model, quantizer)
-
-    actual_annotation = _get_edge_or_node_to_qspec(fx_model)
-    for edge_or_node, annotation in actual_annotation.items():
-        if isinstance(edge_or_node, torch.fx.Node) and edge_or_node.name == unified_scale_node_names[1]:
-            assert isinstance(annotation, TorchAOSharedQuantizationSpec)
-            assert annotation.edge_or_node.name == unified_scale_node_names[0]
-            assert isinstance(actual_annotation[annotation.edge_or_node], TorchAOQuantizationSpec)
-            break
-    else:
-        msg = f"Node {unified_scale_node_names[1]} should be annotated as quantizable"
-        raise RuntimeError(msg)
-
-    prepared_model(example_input)
-    ao_quantized_model = convert_pt2e(prepared_model)
-
-    nodes_visited = 0
-    for node in ao_quantized_model.graph.nodes:
-        if node.name in unified_scale_node_names:
-            dequantize_args = list(node.users)[0].args
-            assert abs(dequantize_args[1] - ref_fq_params[0]) < torch.finfo(torch.float32).eps
-            assert dequantize_args[2:] == ref_fq_params[1:]
-            nodes_visited += 1
-            if nodes_visited == 2:
-                break
-    else:
-        msg = f"Quantizers was not found for the unified scales pair {unified_scale_node_names}"
-        raise RuntimeError(msg)
-
-
-class OneNodeAnnotationQuantizer(TorchAOQuantizer):
-    def __init__(self, node_name: str, annotation: TorchAOQuantizationSpec):
-        self._node_name = node_name
-        self._annotation = annotation
-
-    def annotate(self, model: torch.fx.GraphModule):
-        target_node = get_graph_node_by_name(model.graph, self._node_name)
-        target_node.meta["quantization_annotation"] = self._annotation
-
-        return model
-
-    def validate(self, model):
-        return
-
-
-REF_NONE_Q_MIN_Q_MAX_SETUP = {
-    "quantization_points": {
-        0: {
-            "qip": {"target_node_name": "linear", "input_port_id": None},
-            "qip_class": "ActivationQuantizationInsertionPoint",
-            "qconfig": {
-                "num_bits": 8,
-                "mode": "symmetric",
-                "signedness_to_force": False,
-                "per_channel": False,
-                "narrow_range": False,
-                "dest_dtype": "int8",
-            },
-            "directly_quantized_operator_node_names": ["output"],
-        }
-    },
-    "unified_scale_groups": {},
-    "shared_input_operation_set_groups": {},
-}
-
-
-@pytest.mark.parametrize("dtype", [torch.int8, torch.uint8])
-def test_none_q_min_q_max_quantizer(dtype):
-    qspec = TorchAOQuantizationSpec(dtype=dtype, observer_or_fake_quant_ctr=None, qscheme=torch.per_tensor_symmetric)
-    annotation = QuantizationAnnotation(output_qspec=qspec)
-    quantizer = OneNodeAnnotationQuantizer("linear", annotation)
-
-    adapted_quantizer = TorchAOQuantizerAdapter(quantizer)
-
-    model = get_torch_fx_model(LinearModel(torch.ones(3, 3)), torch.ones(1, 3, 3, 3))
-    setup = adapted_quantizer.get_quantization_setup(model, GraphConverter.create_nncf_graph(model))
-
-    ref = REF_NONE_Q_MIN_Q_MAX_SETUP.copy()
-    ref["quantization_points"][0]["qconfig"]["dest_dtype"] = "int8" if dtype == torch.int8 else "uint8"
-    assert setup.get_state() == ref
-
-
-REF_INP_CONCAT_SETUP = {
-    "quantization_points": {
-        0: {
-            "qip": {"target_node_name": "cat", "input_port_id": 0},
-            "qip_class": "ActivationQuantizationInsertionPoint",
-            "qconfig": {
-                "num_bits": 8,
-                "mode": "symmetric",
-                "signedness_to_force": False,
-                "per_channel": False,
-                "narrow_range": False,
-                "dest_dtype": "int8",
-            },
-            "directly_quantized_operator_node_names": ["cat"],
-        }
-    },
-    "unified_scale_groups": {},
-    "shared_input_operation_set_groups": {},
-}
-
-
-def test_adapter_inp_concat_idx():
-    model = get_torch_fx_model(ConcatModelWithTwoOutputs(), torch.ones(ConcatModelWithTwoOutputs.INPUT_SHAPE))
-    conv2d = get_graph_node_by_name(model.graph, "conv2d")
-
-    qspec = TorchAOQuantizationSpec(
-        dtype=torch.int8, observer_or_fake_quant_ctr=None, qscheme=torch.per_tensor_symmetric
-    )
-    annotation = QuantizationAnnotation(input_qspec_map={conv2d: qspec})
-    quantizer = OneNodeAnnotationQuantizer("cat", annotation)
-
-    adapted_quantizer = TorchAOQuantizerAdapter(quantizer)
-    setup = adapted_quantizer.get_quantization_setup(model, GraphConverter.create_nncf_graph(model))
-    assert setup.get_state() == REF_INP_CONCAT_SETUP

From 602f773dd20ff5371ce174c1d36a1bebb632bff4 Mon Sep 17 00:00:00 2001
From: anzr299 <aamir.nazir@intel.com>
Date: Thu, 15 Jan 2026 20:29:46 +0400
Subject: [PATCH 05/12] fix imports

---
 tests/executorch/test_ptq.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/tests/executorch/test_ptq.py b/tests/executorch/test_ptq.py
index 70748bad2df..a3da8cf8506 100644
--- a/tests/executorch/test_ptq.py
+++ b/tests/executorch/test_ptq.py
@@ -19,17 +19,17 @@
 import torch.fx
 import torchvision.models as models
 from executorch.backends.openvino.quantizer.quantizer import OpenVINOQuantizer
-from torch.ao.quantization.pt2e.utils import _fuse_conv_bn_
-from torch.ao.quantization.quantizer import xnnpack_quantizer
-from torch.ao.quantization.quantizer.quantizer import QuantizationAnnotation
-from torch.ao.quantization.quantizer.quantizer import QuantizationSpec as TorchAOQuantizationSpec
-from torch.ao.quantization.quantizer.quantizer import Quantizer
-from torch.ao.quantization.quantizer.quantizer import Quantizer as TorchAOQuantizer
-from torch.ao.quantization.quantizer.quantizer import SharedQuantizationSpec as TorchAOSharedQuantizationSpec
-from torch.ao.quantization.quantizer.x86_inductor_quantizer import X86InductorQuantizer
-from torch.ao.quantization.quantizer.x86_inductor_quantizer import get_default_x86_inductor_quantization_config
+from executorch.backends.xnnpack.quantizer import xnnpack_quantizer
 from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e
 from torchao.quantization.pt2e.quantize_pt2e import prepare_pt2e
+from torchao.quantization.pt2e.quantizer import QuantizationAnnotation
+from torchao.quantization.pt2e.quantizer import QuantizationSpec as TorchAOQuantizationSpec
+from torchao.quantization.pt2e.quantizer import Quantizer
+from torchao.quantization.pt2e.quantizer import Quantizer as TorchAOQuantizer
+from torchao.quantization.pt2e.quantizer import SharedQuantizationSpec as TorchAOSharedQuantizationSpec
+from torchao.quantization.pt2e.quantizer.x86_inductor_quantizer import X86InductorQuantizer
+from torchao.quantization.pt2e.quantizer.x86_inductor_quantizer import get_default_x86_inductor_quantization_config
+from torchao.quantization.pt2e.utils import _fuse_conv_bn_
 
 import nncf
 from nncf.common.graph import NNCFGraph

From 096347b58a82ab395aba70d49fdb8c40e040da46 Mon Sep 17 00:00:00 2001
From: anzr299 <aamir.nazir@intel.com>
Date: Thu, 15 Jan 2026 20:37:20 +0400
Subject: [PATCH 06/12] modify data folder

---
 tests/executorch/test_ptq.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/executorch/test_ptq.py b/tests/executorch/test_ptq.py
index a3da8cf8506..0d9895250e6 100644
--- a/tests/executorch/test_ptq.py
+++ b/tests/executorch/test_ptq.py
@@ -51,7 +51,7 @@
 from tests.torch.test_models.synthetic import SimpleConcatModel
 from tests.torch.test_models.synthetic import YOLO11N_SDPABlock
 
-FX_QUANTIZED_DIR_NAME = TEST_ROOT / "torch" / "data" / "fx"
+FX_QUANTIZED_DIR_NAME = TEST_ROOT / "executorch" / "data" / "fx"
 
 
 @dataclass

From 9be750232ac905ad7ca4bbec607df64004ed08f5 Mon Sep 17 00:00:00 2001
From: anzr299 <aamir.nazir@intel.com>
Date: Thu, 15 Jan 2026 22:03:00 +0400
Subject: [PATCH 07/12] call executorch precommit on PR

---
 .github/workflows/executorch.yml | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/.github/workflows/executorch.yml b/.github/workflows/executorch.yml
index 3abf00e94fc..67b4b7bb67c 100644
--- a/.github/workflows/executorch.yml
+++ b/.github/workflows/executorch.yml
@@ -2,9 +2,6 @@ name: ExecuTorch
 permissions: read-all
  
 on:
-  workflow_dispatch:
-  schedule:
-    - cron: '0 0 * * *'
   pull_request:
     paths: 
        - 'src/nncf/experimental/quantization/algorithms/range_estimator/*'

From 414b74b9d2a2460825d14bb0d7d0f7f6e91208b3 Mon Sep 17 00:00:00 2001
From: anzr299 <aamir.nazir@intel.com>
Date: Thu, 15 Jan 2026 22:04:11 +0400
Subject: [PATCH 08/12] fix file location

---
 .github/workflows/executorch.yml | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/executorch.yml b/.github/workflows/executorch.yml
index 67b4b7bb67c..8149334d6c6 100644
--- a/.github/workflows/executorch.yml
+++ b/.github/workflows/executorch.yml
@@ -2,12 +2,15 @@ name: ExecuTorch
 permissions: read-all
  
 on:
+  workflow_dispatch:
+  schedule:
+    - cron: '0 0 * * *'
   pull_request:
     paths: 
        - 'src/nncf/experimental/quantization/algorithms/range_estimator/*'
        - 'src/nncf/experimental/quantization/algorithms/post_training/*'
        - 'src/nncf/experimental/quantization/algorithms/weight_compression/*'
-       - 'tests/torch2/fx/*'
+       - 'tests/executorch*'
        - 'src/nncf/experimental/torch/fx/*'
        - 'src/nncf/quantization/algorithms/algorithm.py'
 

From e55c59907308dc7bde735750aabcf9603b84584a Mon Sep 17 00:00:00 2001
From: anzr299 <aamir.nazir@intel.com>
Date: Thu, 15 Jan 2026 22:07:25 +0400
Subject: [PATCH 09/12] remove OVQuantizer

---
 .../quantizer/openvino_quantizer.py           | 363 ------------------
 1 file changed, 363 deletions(-)
 delete mode 100644 src/nncf/experimental/torch/fx/quantization/quantizer/openvino_quantizer.py

diff --git a/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_quantizer.py b/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_quantizer.py
deleted file mode 100644
index 170abca504b..00000000000
--- a/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_quantizer.py
+++ /dev/null
@@ -1,363 +0,0 @@
-# Copyright (c) 2026 Intel Corporation
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#      http://www.apache.org/licenses/LICENSE-2.0
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from collections import defaultdict
-from typing import Optional, Union
-
-import torch.fx
-from torch.ao.quantization.observer import HistogramObserver
-from torch.ao.quantization.observer import PerChannelMinMaxObserver
-from torch.ao.quantization.quantizer.quantizer import EdgeOrNode
-from torch.ao.quantization.quantizer.quantizer import QuantizationAnnotation as TorchAOQuantizationAnnotation
-from torch.ao.quantization.quantizer.quantizer import QuantizationSpec as TorchAOQuantizationSpec
-from torch.ao.quantization.quantizer.quantizer import QuantizationSpecBase as TorchAOQuantizationSpecBase
-from torch.ao.quantization.quantizer.quantizer import Quantizer as TorchAOQuantizer
-from torch.ao.quantization.quantizer.quantizer import SharedQuantizationSpec as TorchAOSharedQuantizationSpec
-
-import nncf
-from nncf import IgnoredScope
-from nncf import ModelType
-from nncf import OverflowFix
-from nncf import QuantizationMode
-from nncf import QuantizationPreset
-from nncf import TargetDevice
-from nncf.common.graph.graph import NNCFGraph
-from nncf.common.logging import nncf_logger
-from nncf.common.quantization.quantizer_propagation.structs import QuantizerPropagationRule
-from nncf.common.quantization.quantizer_setup import QuantizationPointBase
-from nncf.common.quantization.quantizer_setup import SingleConfigQuantizerSetup
-from nncf.common.quantization.structs import QuantizationScheme
-from nncf.common.utils.api_marker import api
-from nncf.experimental.torch.fx.nncf_graph_builder import GraphConverter
-from nncf.experimental.torch.fx.node_utils import get_graph_node_by_name
-from nncf.quantization.advanced_parameters import FP8QuantizationParameters
-from nncf.quantization.advanced_parameters import QuantizationParameters
-from nncf.quantization.algorithms.min_max.algorithm import MinMaxQuantization
-from nncf.torch.model_graph_manager import get_weight_tensor_port_ids
-
-QUANT_ANNOTATION_KEY = "quantization_annotation"
-
-
-@api(canonical_alias="nncf.experimental.torch.fx.OpenVINOQuantizer")
-class OpenVINOQuantizer(TorchAOQuantizer):
-    """
-    Implementation of the Torch AO quantizer which annotates models with quantization annotations
-    optimally for the inference via OpenVINO.
-
-    :param mode: Defines optimization mode for the algorithm. None by default.
-    :param preset: A preset controls the quantization mode (symmetric and asymmetric).
-        It can take the following values:
-        - `performance`: Symmetric quantization of weights and activations.
-        - `mixed`: Symmetric quantization of weights and asymmetric quantization of activations.
-        Default value is None. In this case, `mixed` preset is used for `transformer`
-        model type otherwise `performance`.
-    :param target_device: A target device the specificity of which will be taken
-        into account while compressing in order to obtain the best performance
-        for this type of device, defaults to TargetDevice.ANY.
-    :param model_type: Model type is needed to specify additional patterns
-        in the model. Supported only `transformer` now.
-    :param ignored_scope: An ignored scope that defined the list of model control
-        flow graph nodes to be ignored during quantization.
-    :param overflow_fix: This option controls whether to apply the overflow issue
-        fix for the 8-bit quantization.
-    :param quantize_outputs: Whether to insert additional quantizers right before
-        each of the model outputs.
-    :param activations_quantization_params: Quantization parameters for model
-        activations.
-    :param weights_quantization_params: Quantization parameters for model weights.
-    :param quantizer_propagation_rule: The strategy to be used while propagating and merging quantizers.
-        MERGE_ALL_IN_ONE by default.
-    """
-
-    def __init__(
-        self,
-        *,
-        mode: Optional[QuantizationMode] = None,
-        preset: Optional[QuantizationPreset] = None,
-        target_device: TargetDevice = TargetDevice.ANY,
-        model_type: Optional[ModelType] = None,
-        ignored_scope: Optional[IgnoredScope] = None,
-        overflow_fix: Optional[OverflowFix] = None,
-        quantize_outputs: bool = False,
-        activations_quantization_params: Optional[Union[QuantizationParameters, FP8QuantizationParameters]] = None,
-        weights_quantization_params: Optional[Union[QuantizationParameters, FP8QuantizationParameters]] = None,
-        quantizer_propagation_rule: QuantizerPropagationRule = QuantizerPropagationRule.MERGE_ALL_IN_ONE,
-    ):
-        self._min_max_algo = MinMaxQuantization(
-            mode=mode,
-            preset=preset,
-            target_device=target_device,
-            model_type=model_type,
-            ignored_scope=ignored_scope,
-            overflow_fix=overflow_fix,
-            quantize_outputs=quantize_outputs,
-            activations_quantization_params=activations_quantization_params,
-            weights_quantization_params=weights_quantization_params,
-            quantizer_propagation_rule=quantizer_propagation_rule,
-        )
-
-    def set_ignored_scope(
-        self,
-        names: Optional[list[str]] = None,
-        patterns: Optional[list[str]] = None,
-        types: Optional[list[str]] = None,
-        subgraphs: Optional[list[tuple[list[str], list[str]]]] = None,
-        validate: bool = True,
-    ) -> None:
-        """
-        Provides an option to specify portions of model to be excluded from compression.
-        The ignored scope defines model sub-graphs that should be excluded from the quantization process.
-
-        :param names: List of ignored node names.
-        :param patterns: List of regular expressions that define patterns for names of ignored nodes.
-        :param types: List of ignored operation types.
-        :param subgraphs: List of ignored subgraphs.
-        :param validate: If set to True, then a RuntimeError will be raised if any ignored scope does not match
-            in the model graph.
-        """
-        self._min_max_algo.set_ignored_scope(
-            nncf.IgnoredScope(
-                names=names or [],
-                patterns=patterns or [],
-                types=types or [],
-                subgraphs=subgraphs or [],
-                validate=validate,
-            )
-        )
-
-    def get_nncf_quantization_setup(
-        self, model: torch.fx.GraphModule, nncf_graph: NNCFGraph
-    ) -> SingleConfigQuantizerSetup:
-        self._min_max_algo._set_backend_entity(model)
-        return self._min_max_algo.find_quantization_setup(model, nncf_graph)
-
-    def annotate(self, model: torch.fx.GraphModule) -> torch.fx.GraphModule:
-        """
-        Adds quantization annotations to the nodes in the model graph in-place.
-
-        :param model: A torch.fx.GraphModule to annotate.
-        :return: The torch.fx.GraphModule with updated annotations.
-        """
-        nncf_graph = GraphConverter.create_nncf_graph(model)
-        quantization_setup = self.get_nncf_quantization_setup(model, nncf_graph)
-
-        graph = model.graph
-        node_vs_torch_annotation = defaultdict(TorchAOQuantizationAnnotation)
-
-        for qp in quantization_setup.quantization_points.values():
-            edge_or_node, annotation = self._get_edge_or_node_and_annotation(
-                graph, nncf_graph, qp, node_vs_torch_annotation
-            )
-            qspec = self._get_torch_ao_qspec_from_qp(qp)
-            self._fill_torch_ao_annotation(edge_or_node, qspec, annotation)
-
-        for quantizer_ids in quantization_setup.unified_scale_groups.values():
-            root_quantizer_id = self._get_unified_scales_root_quantizer_id(
-                nncf_graph, quantizer_ids, quantization_setup
-            )
-            root_qp = quantization_setup.quantization_points[root_quantizer_id]
-
-            if any(root_qp.qconfig != quantization_setup.quantization_points[q_id].qconfig for q_id in quantizer_ids):
-                qps = [quantization_setup.quantization_points[q_id] for q_id in quantizer_ids]
-                msg = (
-                    "Different quantization configs are set to one unified scale group:"
-                    f"{[(qp.insertion_point.__dict__, str(qp.qconfig)) for qp in qps]}"
-                )
-                raise nncf.InternalError(msg)
-
-            root_target_node = get_graph_node_by_name(graph, root_qp.insertion_point.target_node_name)
-            root_edge_or_node = self._get_edge_or_node(root_target_node, root_qp, nncf_graph)
-
-            for quantizer_id in quantizer_ids:
-                if quantizer_id == root_quantizer_id:
-                    continue
-
-                qspec = TorchAOSharedQuantizationSpec(root_edge_or_node)
-                qp = quantization_setup.quantization_points[quantizer_id]
-                edge_or_node, annotation = self._get_edge_or_node_and_annotation(
-                    graph, nncf_graph, qp, node_vs_torch_annotation
-                )
-                self._fill_torch_ao_annotation(edge_or_node, qspec, annotation)
-
-        for node, annotation in node_vs_torch_annotation.items():
-            assert QUANT_ANNOTATION_KEY not in node.meta
-            node.meta[QUANT_ANNOTATION_KEY] = annotation
-        return model
-
-    @staticmethod
-    def _get_unified_scales_root_quantizer_id(
-        nncf_graph: NNCFGraph, quantizer_ids: list[int], quantizer_setup: SingleConfigQuantizerSetup
-    ) -> int:
-        """
-        Identifies the earliest quantizer node ID based on the corresponding `nncf_node.node_id`
-        in the given NNCFGraph. This is required by the `_get_obs_or_fq_map` function.
-        Refer to: https://github.com/pytorch/pytorch/blob/main/torch/ao/quantization/pt2e/prepare.py#L291
-
-        :param nncf_graph: The NNCFGraph instance.
-        :param quantizer_ids: The list of quantizer IDs to evaluate.
-        :param quantizer_setup: The instance of SingleConfigQuantizerSetup.
-        :return: The ID of the earliest quantizer node in terms of `nncf_node.node_id`.
-        """
-        nncf_node_quantizer_id = None
-        root_quantizer_id = None
-        for quantizer_id in quantizer_ids:
-            target_node_name = quantizer_setup.quantization_points[quantizer_id].insertion_point.target_node_name
-            nncf_node = nncf_graph.get_node_by_name(target_node_name)
-            if nncf_node_quantizer_id is None or nncf_node.node_id < nncf_node_quantizer_id:
-                root_quantizer_id = quantizer_id
-                nncf_node_quantizer_id = nncf_node.node_id
-        return root_quantizer_id
-
-    @staticmethod
-    def _get_edge_or_node_and_annotation(
-        graph: torch.fx.Graph,
-        nncf_graph: NNCFGraph,
-        qp: QuantizationPointBase,
-        node_vs_torch_annotation: dict[torch.fx.Node, TorchAOQuantizationAnnotation],
-    ) -> tuple[EdgeOrNode, TorchAOQuantizationAnnotation]:
-        """
-        Retrieves the edge or node and its corresponding TorchAOQuantizationAnnotation based on the given graph,
-        quantization point, and node-to-annotation mapping.
-
-        :param graph: torch.fx.Graph instance.
-        :param nncf_graph: NNCFGraph instance.
-        :param qp: QuantizationPointBase instance.
-        :param node_vs_torch_annotation: A dictionary mapping torch.fx.GraphNode objects to their respective
-            TorchAOQuantizationAnnotations.
-        :return: A tuple containing the EdgeOrNode and its associated TorchAOQuantizationAnnotation.
-        """
-        target_node = get_graph_node_by_name(graph, qp.insertion_point.target_node_name)
-        annotation = node_vs_torch_annotation[target_node]
-        edge_or_node = OpenVINOQuantizer._get_edge_or_node(target_node, qp, nncf_graph)
-        return edge_or_node, annotation
-
-    @staticmethod
-    def _get_edge_or_node(target_node: torch.fx.Node, qp: QuantizationPointBase, nncf_graph: NNCFGraph) -> EdgeOrNode:
-        """
-        Returns the edge or node based on the given target node and quantization point.
-
-        :param target_node: Target node instance.
-        :param qp: QuantizationPointBase instance.
-        :param graph: NNCFGraph instance.
-        :return: The corresponding EdgeOrNode derived from the target node and quantization point.
-        """
-        ip = qp.insertion_point
-        if qp.is_weight_quantization_point():
-            nncf_node = nncf_graph.get_node_by_name(target_node.name)
-            weights_ports_ids = get_weight_tensor_port_ids(nncf_node, nncf_graph)
-            if len(weights_ports_ids) > 1:
-                # TODO(dlyakhov): support quantization for nodes with several weights
-                nncf_logger.warning(
-                    f"Quantization of the weighted node {target_node.name}"
-                    " is not yet supported by the OpenVINOQuantizer."
-                    f" Only the weight on port ID {weights_ports_ids[0]} will be quantized."
-                    f" Quantizable weights are located on ports: {weights_ports_ids}."
-                )
-            weight_node = target_node.all_input_nodes[weights_ports_ids[0]]
-            return (weight_node, target_node)
-
-        if ip.input_port_id is None:
-            return target_node
-
-        node = target_node.all_input_nodes[ip.input_port_id]
-        return (node, target_node)
-
-    @staticmethod
-    def _fill_torch_ao_annotation(
-        edge_or_node: EdgeOrNode,
-        qspec: TorchAOQuantizationSpecBase,
-        annotation_to_update: TorchAOQuantizationAnnotation,
-    ) -> None:
-        """
-        Helper method to update the annotation_to_update based on the specified edge_or_node and qspec.
-
-        :param edge_or_node: The target EdgeOrNode to be used for the update.
-        :param qspec: An instance of TorchAOQuantizationSpecBase representing the quantization specification to apply.
-        :param annotation_to_update: The annotation to update based on the edge_or_node and qspec.
-        """
-        if isinstance(edge_or_node, torch.fx.Node):
-            annotation_to_update.output_qspec = qspec
-        else:
-            annotation_to_update.input_qspec_map[edge_or_node[0]] = qspec
-
-    @staticmethod
-    def _get_torch_ao_qspec_from_qp(qp: QuantizationPointBase) -> TorchAOQuantizationSpec:
-        """
-        Retrieves the quantization configuration from the given quantization point and
-        converts it into a TorchAOQuantizationSpec.
-
-        :param qp: An instance of QuantizationPointBase.
-        :return: A TorchAOQuantizationSpec retrieved and converted from the quantization point.
-        """
-        # Eps value is copied from nncf/torch/quantization/layers.py
-        extra_args = {"eps": 1e-16}
-        qconfig = qp.qconfig
-        is_weight = qp.is_weight_quantization_point()
-
-        if qconfig.per_channel:
-            torch_qscheme = (
-                torch.per_channel_symmetric
-                if qconfig.mode is QuantizationScheme.SYMMETRIC
-                else torch.per_channel_affine
-            )
-        else:
-            torch_qscheme = (
-                torch.per_tensor_symmetric if qconfig.mode is QuantizationScheme.SYMMETRIC else torch.per_tensor_affine
-            )
-        if is_weight:
-            observer = PerChannelMinMaxObserver
-            quant_min = -128
-            quant_max = 127
-            dtype = torch.int8
-            channel_axis = 0
-        else:
-            observer = (
-                HistogramObserver
-                if torch_qscheme in [torch.per_tensor_symmetric, torch.per_tensor_affine]
-                else PerChannelMinMaxObserver
-            )
-            quant_min = 0
-            quant_max = 255
-            dtype = torch.int8 if qconfig.signedness_to_force else torch.uint8
-            channel_axis = 1  # channel dim for activations
-        return TorchAOQuantizationSpec(
-            dtype=dtype,
-            observer_or_fake_quant_ctr=observer.with_args(**extra_args),
-            quant_min=quant_min,
-            quant_max=quant_max,
-            qscheme=torch_qscheme,
-            ch_axis=channel_axis,
-            is_dynamic=False,
-        )
-
-    def validate(self, model: torch.fx.GraphModule) -> None:
-        """
-        Validates the annotated model before the insertion of FakeQuantizers / observers.
-
-        :param model: Annotated torch.fx.GraphModule to validate after the  annotation.
-        """
-        pass
-
-    def transform_for_annotation(self, model: torch.fx.GraphModule) -> torch.fx.GraphModule:
-        """
-        Allows for user defined transforms to run before annotating the graph.
-        This allows quantizer to allow quantizing part of the model that are otherwise not quantizable.
-        For example quantizer can
-        a) decompose a compound operator like scaled dot product attention,
-        into bmm and softmax if quantizer knows how to quantize bmm/softmax but not sdpa
-        or b) transform scalars to tensor to allow quantizing scalars.
-
-        Note: this is an optional method
-
-        :param model: Given torch.fx.GraphModule to transform before the annotation.
-        :return: The transformed torch.fx.GraphModule ready for the annotation.
-        """
-        return model

From 2f54bd53cb218b14604150d5078e8a347dbd2d82 Mon Sep 17 00:00:00 2001
From: anzr299 <aamir.nazir@intel.com>
Date: Thu, 15 Jan 2026 22:21:05 +0400
Subject: [PATCH 10/12] conditional import of openvino quantizer

---
 .../torch/fx/quantization/quantizer/openvino_adapter.py   | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_adapter.py b/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_adapter.py
index 63c4c4c6ff1..8d4abad1d43 100644
--- a/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_adapter.py
+++ b/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_adapter.py
@@ -13,10 +13,16 @@
 
 import torch.fx
 
+import nncf
 from nncf.common.graph.graph import NNCFGraph
 from nncf.common.quantization.quantizer_setup import SingleConfigQuantizerSetup
 from nncf.experimental.quantization.quantizer import Quantizer
-from nncf.experimental.torch.fx.quantization.quantizer.openvino_quantizer import OpenVINOQuantizer
+
+try:
+    from executorch.backends.openvino.quantizer.quantizer import OpenVINOQuantizer
+except ModuleNotFoundError as err:
+    msg = "OpenVINO Quantizer could not be imported from Executorch. Please Install Executorch"
+    raise nncf.ModuleNotFoundError(msg) from err
 from nncf.quantization.algorithms.weight_compression.config import WeightCompressionParameters
 
 

From 4c4cd3128180596427ebc9963fbd67564be00037 Mon Sep 17 00:00:00 2001
From: anzr299 <aamir.nazir@intel.com>
Date: Thu, 15 Jan 2026 23:27:10 +0400
Subject: [PATCH 11/12] replace all torch.ao instances with torchao

---
 .../common/tensor_statistics/collectors.py    |  2 +-
 src/nncf/experimental/torch/fx/__init__.py    |  1 -
 .../torch/fx/quantization/quantize_model.py   |  6 ++--
 .../torch/fx/quantization/quantize_pt2e.py    | 33 ++++++++++++-------
 .../quantizer/openvino_adapter.py             | 12 +++----
 .../quantizer/torch_ao_adapter.py             | 19 +++++------
 .../experimental/torch/fx/transformations.py  |  6 ++--
 .../algorithms/min_max/torch_fx_backend.py    |  5 +--
 src/nncf/torch/quantization/strip.py          |  5 +--
 tests/executorch/test_ptq.py                  |  4 +--
 .../pipelines/image_classification_base.py    |  6 ++--
 tests/torch/fx/test_model_transformer.py      |  7 ++--
 12 files changed, 57 insertions(+), 49 deletions(-)

diff --git a/src/nncf/common/tensor_statistics/collectors.py b/src/nncf/common/tensor_statistics/collectors.py
index 4a44d3bf592..df65825a112 100644
--- a/src/nncf/common/tensor_statistics/collectors.py
+++ b/src/nncf/common/tensor_statistics/collectors.py
@@ -938,7 +938,7 @@ def _aggregate_impl(self) -> Tensor:
 
 class HistogramAggregator(AggregatorBase):
     """
-    NNCF implementation of the torch.ao.quantization.observer.HistogramObserver.
+    NNCF implementation of the torchao.quantization.pt2e.observer.HistogramObserver.
     Intended to be combined with a single RawReducer.
     The aggregator records the running histogram of the input tensor values along with
     min/max values. Only the reduction_axis==None is supported.
diff --git a/src/nncf/experimental/torch/fx/__init__.py b/src/nncf/experimental/torch/fx/__init__.py
index 79350c12855..0c6cfb97597 100644
--- a/src/nncf/experimental/torch/fx/__init__.py
+++ b/src/nncf/experimental/torch/fx/__init__.py
@@ -11,4 +11,3 @@
 
 from nncf.experimental.torch.fx.quantization.quantize_pt2e import compress_pt2e as compress_pt2e
 from nncf.experimental.torch.fx.quantization.quantize_pt2e import quantize_pt2e as quantize_pt2e
-from nncf.experimental.torch.fx.quantization.quantizer.openvino_quantizer import OpenVINOQuantizer as OpenVINOQuantizer
diff --git a/src/nncf/experimental/torch/fx/quantization/quantize_model.py b/src/nncf/experimental/torch/fx/quantization/quantize_model.py
index 17f895f54ff..ca543c9ede6 100644
--- a/src/nncf/experimental/torch/fx/quantization/quantize_model.py
+++ b/src/nncf/experimental/torch/fx/quantization/quantize_model.py
@@ -14,11 +14,11 @@
 
 import torch
 import torch.fx
-from torch.ao.quantization.pt2e.port_metadata_pass import PortNodeMetaForQDQ
-from torch.ao.quantization.pt2e.qat_utils import _fold_conv_bn_qat
-from torch.ao.quantization.pt2e.utils import _disallow_eval_train
 from torch.fx import GraphModule
 from torch.fx.passes.infra.pass_manager import PassManager
+from torchao.quantization.pt2e.qat_utils import _fold_conv_bn_qat
+from torchao.quantization.pt2e.quantizer import PortNodeMetaForQDQ
+from torchao.quantization.pt2e.utils import _disallow_eval_train
 
 import nncf
 from nncf.common.factory import build_graph
diff --git a/src/nncf/experimental/torch/fx/quantization/quantize_pt2e.py b/src/nncf/experimental/torch/fx/quantization/quantize_pt2e.py
index 396dd2e87cb..5e1850fd5e0 100644
--- a/src/nncf/experimental/torch/fx/quantization/quantize_pt2e.py
+++ b/src/nncf/experimental/torch/fx/quantization/quantize_pt2e.py
@@ -8,18 +8,17 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 from copy import deepcopy
 from typing import Optional
 
 import torch
 import torch.fx
-from torch.ao.quantization.pt2e.port_metadata_pass import PortNodeMetaForQDQ
-from torch.ao.quantization.pt2e.utils import _disallow_eval_train
-from torch.ao.quantization.pt2e.utils import _fuse_conv_bn_
-from torch.ao.quantization.quantizer import Quantizer
 from torch.fx import GraphModule
 from torch.fx.passes.infra.pass_manager import PassManager
+from torchao.quantization.pt2e.quantizer import PortNodeMetaForQDQ
+from torchao.quantization.pt2e.quantizer.quantizer import Quantizer
+from torchao.quantization.pt2e.utils import _disallow_eval_train
+from torchao.quantization.pt2e.utils import _fuse_conv_bn_
 
 import nncf
 from nncf import AdvancedCompressionParameters
@@ -32,7 +31,6 @@
 from nncf.experimental.quantization.algorithms.weight_compression.algorithm import WeightsCompression
 from nncf.experimental.torch.fx.constant_folding import constant_fold
 from nncf.experimental.torch.fx.quantization.quantizer.openvino_adapter import OpenVINOQuantizerAdapter
-from nncf.experimental.torch.fx.quantization.quantizer.openvino_quantizer import OpenVINOQuantizer
 from nncf.experimental.torch.fx.quantization.quantizer.torch_ao_adapter import TorchAOQuantizerAdapter
 from nncf.experimental.torch.fx.transformations import QUANTIZE_NODE_TARGETS
 from nncf.experimental.torch.fx.transformations import DuplicateDQPassNoAnnotations
@@ -42,6 +40,19 @@
 from nncf.quantization.range_estimator import RangeEstimatorParameters
 
 
+def _is_openvino_quantizer_instance(obj) -> bool:
+    """
+    Safely check if an object is instance of OpenVINOQuantizer.
+    This is to avoid a circular import
+    """
+    try:
+        from executorch.backends.openvino.quantizer.quantizer import OpenVINOQuantizer
+    except ModuleNotFoundError as err:
+        msg = "OpenVINO Quantizer could not be imported from Executorch. Please install Executorch."
+        raise nncf.ModuleNotFoundError(msg) from err
+    return isinstance(obj, OpenVINOQuantizer)
+
+
 @api(canonical_alias="nncf.experimental.torch.fx.quantize_pt2e")
 def quantize_pt2e(
     model: torch.fx.GraphModule,
@@ -60,7 +71,7 @@ def quantize_pt2e(
 ) -> torch.fx.GraphModule:
     """
     Applies post-training quantization to the torch.fx.GraphModule provided model
-    using provided torch.ao quantizer.
+    using provided torchao quantizer.
 
     :param model: A torch.fx.GraphModule instance to be quantized.
     :param quantizer: Torch ao quantizer to annotate nodes in the graph with quantization setups
@@ -103,7 +114,7 @@ def quantize_pt2e(
         model = deepcopy(model)
 
     _fuse_conv_bn_(model)
-    if isinstance(quantizer, OpenVINOQuantizer) or hasattr(quantizer, "get_nncf_quantization_setup"):
+    if _is_openvino_quantizer_instance(quantizer) or hasattr(quantizer, "get_nncf_quantization_setup"):
         quantizer = OpenVINOQuantizerAdapter(quantizer)
     else:
         quantizer = TorchAOQuantizerAdapter(quantizer)
@@ -130,7 +141,7 @@ def quantize_pt2e(
     quantized_model = GraphModule(quantized_model, quantized_model.graph)
 
     if fold_quantize:
-        if isinstance(quantizer, OpenVINOQuantizerAdapter):
+        if _is_openvino_quantizer_instance(quantizer):
             compress_post_quantize_transformation(quantized_model)
         else:
             constant_fold(quantized_model, _quant_node_constraint)
@@ -178,7 +189,7 @@ def compress_pt2e(
     advanced_parameters: Optional[AdvancedCompressionParameters] = None,
 ) -> torch.fx.GraphModule:
     """
-    Applies Weight Compression to the torch.fx.GraphModule model using provided torch.ao quantizer.
+    Applies Weight Compression to the torch.fx.GraphModule model using provided torchao quantizer.
 
     :param model: A torch.fx.GraphModule instance to be quantized.
     :param quantizer: Torch ao quantizer to annotate nodes in the graph with quantization setups
@@ -196,7 +207,7 @@ def compress_pt2e(
         preserve the accuracy of the model, the more sensitive layers receive a higher precision.
     :param advanced_parameters: Advanced parameters for algorithms in the compression pipeline.
     """
-    if isinstance(quantizer, OpenVINOQuantizer) or hasattr(quantizer, "get_nncf_weight_compression_parameters"):
+    if _is_openvino_quantizer_instance(quantizer) or hasattr(quantizer, "get_nncf_weight_compression_parameters"):
         quantizer = OpenVINOQuantizerAdapter(quantizer)
         compression_format = nncf.CompressionFormat.DQ
     else:
diff --git a/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_adapter.py b/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_adapter.py
index 8d4abad1d43..90f3ff47e93 100644
--- a/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_adapter.py
+++ b/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_adapter.py
@@ -9,21 +9,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Any
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any
 
 import torch.fx
 
-import nncf
 from nncf.common.graph.graph import NNCFGraph
 from nncf.common.quantization.quantizer_setup import SingleConfigQuantizerSetup
 from nncf.experimental.quantization.quantizer import Quantizer
+from nncf.quantization.algorithms.weight_compression.config import WeightCompressionParameters
 
-try:
+if TYPE_CHECKING:
     from executorch.backends.openvino.quantizer.quantizer import OpenVINOQuantizer
-except ModuleNotFoundError as err:
-    msg = "OpenVINO Quantizer could not be imported from Executorch. Please Install Executorch"
-    raise nncf.ModuleNotFoundError(msg) from err
-from nncf.quantization.algorithms.weight_compression.config import WeightCompressionParameters
 
 
 class OpenVINOQuantizerAdapter(Quantizer):
diff --git a/src/nncf/experimental/torch/fx/quantization/quantizer/torch_ao_adapter.py b/src/nncf/experimental/torch/fx/quantization/quantizer/torch_ao_adapter.py
index cf46603257c..96b8ad50902 100644
--- a/src/nncf/experimental/torch/fx/quantization/quantizer/torch_ao_adapter.py
+++ b/src/nncf/experimental/torch/fx/quantization/quantizer/torch_ao_adapter.py
@@ -15,11 +15,11 @@
 
 import torch
 import torch.fx
-from torch.ao.quantization.pt2e.prepare import _get_edge_or_node_to_group_id
-from torch.ao.quantization.pt2e.prepare import _get_edge_or_node_to_qspec
-from torch.ao.quantization.quantizer import Quantizer as TorchAOQuantizer
-from torch.ao.quantization.quantizer.quantizer import QuantizationSpec
-from torch.ao.quantization.quantizer.quantizer import SharedQuantizationSpec
+from torchao.quantization.pt2e.prepare import _get_edge_or_node_to_group_id
+from torchao.quantization.pt2e.prepare import _get_edge_or_node_to_qspec
+from torchao.quantization.pt2e.quantizer import Quantizer as TorchAOQuantizer
+from torchao.quantization.pt2e.quantizer.quantizer import QuantizationSpec
+from torchao.quantization.pt2e.quantizer.quantizer import SharedQuantizationSpec
 
 import nncf
 from nncf.common.graph.graph import NNCFGraph
@@ -41,7 +41,7 @@
 
 class TorchAOQuantizerAdapter(Quantizer):
     """
-    Implementation of the NNCF Quantizer interface for any given torch.ao quantizer.
+    Implementation of the NNCF Quantizer interface for any given torchao quantizer.
     """
 
     def __init__(self, quantizer: TorchAOQuantizer):
@@ -110,7 +110,7 @@ def _get_quantization_points(
     def get_quantizer_config_from_annotated_model(annotated: torch.fx.GraphModule) -> SingleConfigQuantizerSetup:
         """
         Process a torch.fx.GraphModule annotated with quantization specifications
-        (e.g., via torch.ao observers) and generates a corresponding NNCF quantization setup object,
+        (e.g., via torchao observers) and generates a corresponding NNCF quantization setup object,
         which maps quantization configurations to graph edges.
 
         :param annotated: A torch.fx.GraphModule that has been annotated with Torch quantization observers.
@@ -139,7 +139,7 @@ def get_quantizer_config_from_annotated_model(annotated: torch.fx.GraphModule) -
             if qspec is None:
                 continue
             if not isinstance(qspec, QuantizationSpec):
-                msg = f"Unknown torch.ao quantization spec: {qspec}"
+                msg = f"Unknown torchao quantization spec: {qspec}"
                 raise nncf.InternalError(msg)
 
             if qspec.qscheme in [torch.per_channel_affine, torch.per_channel_symmetric]:
@@ -156,9 +156,8 @@ def get_quantizer_config_from_annotated_model(annotated: torch.fx.GraphModule) -
                 if qspec.qscheme in [torch.per_channel_symmetric, torch.per_tensor_symmetric]
                 else QuantizationMode.ASYMMETRIC
             )
-
             # QuantizationSpec may have quant_min and quant_max attributes set to None.
-            # torch.ao.prepare_pt2e treats such occurrences as a signal
+            # torchao.prepare_pt2e treats such occurrences as a signal
             # that the full range of values should be used for quant_min and quant_max.
             # Therefore, the narrow_range parameter is set to False in this case.
             if qspec.quant_min is None or qspec.quant_max is None:
diff --git a/src/nncf/experimental/torch/fx/transformations.py b/src/nncf/experimental/torch/fx/transformations.py
index 49579afb906..3f50c3c69ad 100644
--- a/src/nncf/experimental/torch/fx/transformations.py
+++ b/src/nncf/experimental/torch/fx/transformations.py
@@ -15,12 +15,12 @@
 
 import torch
 import torch.fx
-from torch.ao.quantization.fx.utils import create_getattr_from_value
-from torch.ao.quantization.pt2e.utils import _fuse_conv_bn_
 from torch.fx.node import map_arg
 from torch.fx.passes.infra.pass_base import PassBase
 from torch.fx.passes.infra.pass_base import PassResult
 from torch.quantization.fake_quantize import FakeQuantize
+from torchao.quantization.pt2e.utils import _fuse_conv_bn_
+from torchao.quantization.pt2e.utils import create_getattr_from_value
 
 import nncf
 import nncf.torch
@@ -382,7 +382,7 @@ def insert_one_qdq(model: torch.fx.GraphModule, target_point: PTTargetPoint, qua
         target node.
     :param quantizer: Quantizer module to inherit quantization parameters from.
     """
-    # Copied from torch.ao.quantization.quantize_pt2e.convert_pt2e
+    # Copied from torchao.quantization.pt2e.quantize_pt2e.convert_pt2e
     # 1. extract information for inserting q/dq node from activation_post_process
     node_type = "call_function"
     quantize_op: Optional[Callable] = None
diff --git a/src/nncf/quantization/algorithms/min_max/torch_fx_backend.py b/src/nncf/quantization/algorithms/min_max/torch_fx_backend.py
index ad411e0e4fa..dd9ce2ef32b 100644
--- a/src/nncf/quantization/algorithms/min_max/torch_fx_backend.py
+++ b/src/nncf/quantization/algorithms/min_max/torch_fx_backend.py
@@ -12,6 +12,7 @@
 from typing import Optional
 
 import torch
+import torchao
 from torch.quantization.fake_quantize import FakeQuantize
 
 import nncf
@@ -203,9 +204,9 @@ def _create_quantizer(
             )
 
         if per_channel:
-            observer = torch.ao.quantization.observer.PerChannelMinMaxObserver
+            observer = torchao.quantization.pt2e.observer.PerChannelMinMaxObserver
         else:
-            observer = torch.ao.quantization.observer.MinMaxObserver
+            observer = torchao.quantization.observer.MinMaxObserver
 
         if dtype is TensorDataType.int8:
             level_high = 127
diff --git a/src/nncf/torch/quantization/strip.py b/src/nncf/torch/quantization/strip.py
index 1e071ad2729..9dde31caa43 100644
--- a/src/nncf/torch/quantization/strip.py
+++ b/src/nncf/torch/quantization/strip.py
@@ -49,11 +49,12 @@ def convert_to_torch_fakequantizer(nncf_quantizer: BaseQuantizer) -> FakeQuantiz
     scale_shape = nncf_quantizer.scale_shape
     ch_axis = int(np.argmax(scale_shape))
     dtype = torch.qint8 if nncf_quantizer.level_low < 0 else torch.quint8
+    import torchao
 
     if per_channel:
-        observer = torch.ao.quantization.observer.PerChannelMinMaxObserver
+        observer = torchao.quantization.pt2e.observer.PerChannelMinMaxObserver
     else:
-        observer = torch.ao.quantization.observer.MinMaxObserver
+        observer = torchao.quantization.pt2e.observer.MinMaxObserver
 
     if isinstance(nncf_quantizer, SymmetricQuantizer):
         qscheme = torch.per_channel_symmetric if per_channel else torch.per_tensor_symmetric
diff --git a/tests/executorch/test_ptq.py b/tests/executorch/test_ptq.py
index 0d9895250e6..0a50f3081c4 100644
--- a/tests/executorch/test_ptq.py
+++ b/tests/executorch/test_ptq.py
@@ -177,8 +177,8 @@ def test_quantized_model(
     compare_nx_graph_with_reference(nx_graph, path_to_dot.as_posix())
 
     # Uncomment to visualize reference graphs
-    # from torch.ao.quantization.quantize_pt2e import convert_pt2e
-    # from torch.ao.quantization.quantize_pt2e import prepare_pt2e
+    # from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e
+    # from torchao.quantization.pt2e.quantize_pt2e import prepare_pt2e
     # from tests.torch2.fx.helpers import visualize_fx_model
     # prepared_model = prepare_pt2e(fx_model, quantizer)
     # prepared_model(example_input)
diff --git a/tests/post_training/pipelines/image_classification_base.py b/tests/post_training/pipelines/image_classification_base.py
index 129cb875f71..1171088e7e6 100644
--- a/tests/post_training/pipelines/image_classification_base.py
+++ b/tests/post_training/pipelines/image_classification_base.py
@@ -24,9 +24,9 @@
 import openvino as ov
 import torch
 from sklearn.metrics import accuracy_score
-from torch.ao.quantization.quantize_pt2e import convert_pt2e
-from torch.ao.quantization.quantize_pt2e import prepare_pt2e
-from torch.ao.quantization.quantizer.quantizer import Quantizer as TorchAOQuantizer
+from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e
+from torchao.quantization.pt2e.quantize_pt2e import prepare_pt2e
+from torchao.quantization.pt2e.quantizer import Quantizer as TorchAOQuantizer
 from torchvision import datasets
 
 import nncf
diff --git a/tests/torch/fx/test_model_transformer.py b/tests/torch/fx/test_model_transformer.py
index 05e195e4299..046c0ff3fff 100644
--- a/tests/torch/fx/test_model_transformer.py
+++ b/tests/torch/fx/test_model_transformer.py
@@ -15,12 +15,11 @@
 
 import pytest
 import torch
-import torch.ao.quantization
 import torch.fx
-from torch.ao.quantization.fx.utils import create_getattr_from_value
-from torch.ao.quantization.observer import MinMaxObserver
-from torch.ao.quantization.observer import PerChannelMinMaxObserver
 from torch.quantization.fake_quantize import FakeQuantize
+from torchao.quantization.pt2e.observer import MinMaxObserver
+from torchao.quantization.pt2e.observer import PerChannelMinMaxObserver
+from torchao.quantization.pt2e.utils import create_getattr_from_value
 
 import nncf
 import nncf.common

From 77090db7e41d389ab4c0e54642f82eb871bc8097 Mon Sep 17 00:00:00 2001
From: anzr299 <aamir.nazir@intel.com>
Date: Thu, 15 Jan 2026 23:28:19 +0400
Subject: [PATCH 12/12] micro import fix

---
 src/nncf/quantization/algorithms/min_max/torch_fx_backend.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/nncf/quantization/algorithms/min_max/torch_fx_backend.py b/src/nncf/quantization/algorithms/min_max/torch_fx_backend.py
index dd9ce2ef32b..a0ba0dc19e2 100644
--- a/src/nncf/quantization/algorithms/min_max/torch_fx_backend.py
+++ b/src/nncf/quantization/algorithms/min_max/torch_fx_backend.py
@@ -206,7 +206,7 @@ def _create_quantizer(
         if per_channel:
             observer = torchao.quantization.pt2e.observer.PerChannelMinMaxObserver
         else:
-            observer = torchao.quantization.observer.MinMaxObserver
+            observer = torchao.quantization.pt2e.observer.MinMaxObserver
 
         if dtype is TensorDataType.int8:
             level_high = 127