openvinotoolkit
diff --git a/‎nncf/common/quantization/quantizer_setup.py
Lines changed: 2 additions & 1 deletion b/‎nncf/common/quantization/quantizer_setup.py
Lines changed: 2 additions & 1 deletion
diff --git a/‎nncf/experimental/torch/fx/quantization/quantizer/torch_ao_adapter.py
Lines changed: 93 additions & 60 deletions b/‎nncf/experimental/torch/fx/quantization/quantizer/torch_ao_adapter.py
Lines changed: 93 additions & 60 deletions
diff --git a/‎tests/post_training/pipelines/image_classification_base.py
Lines changed: 46 additions & 8 deletions b/‎tests/post_training/pipelines/image_classification_base.py
Lines changed: 46 additions & 8 deletions
@@ -247,12 +247,13 @@ def __init__(self) -> None:
         self._next_unified_scale_gid = 0
         self._next_shared_inputs_gid = 0
 
-    def add_independent_quantization_point(self, qp: QuantizationPointBase) -> None:
+    def add_independent_quantization_point(self, qp: QuantizationPointBase) -> int:
         if self.quantization_points.keys():
             new_id = max(self.quantization_points.keys()) + 1
         else:
             new_id = 0
         self.quantization_points[new_id] = qp
+        return new_id
 
     def register_unified_scale_group(self, qp_group: List[QuantizationPointId]) -> int:
         for qp_id in qp_group:
 
@@ -11,18 +11,18 @@
 
 
 from collections import defaultdict
-from typing import Dict, List, Tuple, Union
+from typing import Any, Dict, List, Tuple, Union
 
 import torch
 import torch.fx
+from torch.ao.quantization.pt2e.prepare import _get_edge_or_node_to_group_id
+from torch.ao.quantization.pt2e.prepare import _get_edge_or_node_to_qspec
 from torch.ao.quantization.quantizer import Quantizer as TorchAOQuantizer
 from torch.ao.quantization.quantizer.quantizer import QuantizationSpec
-from torch.ao.quantization.quantizer.quantizer import QuantizationSpecBase
 from torch.ao.quantization.quantizer.quantizer import SharedQuantizationSpec
 
 import nncf
 from nncf.common.graph.graph import NNCFGraph
-from nncf.common.logging import nncf_logger
 from nncf.common.quantization.quantizer_setup import ActivationQuantizationInsertionPoint
 from nncf.common.quantization.quantizer_setup import QuantizationPointBase
 from nncf.common.quantization.quantizer_setup import SingleConfigQuantizationPoint
@@ -73,6 +73,15 @@ def _get_quantization_points(
         annotated_model: torch.fx.GraphModule,
         qconfig: QuantizerConfig,
     ) -> List[QuantizationPointBase]:
+        """
+        Creates quantization points based on the nodes and edges.
+
+        :param from_node: The originating node in the computation graph.
+        :param to_nodes: The list of destination nodes of the from_node.
+        :param annotated_model: The torch.fx.GraphModule instance.
+        :param qconfig: The torch.ao quantization configuration.
+        :return: A list of NNCF quantization points.
+        """
         to_n = to_nodes[0]
         if from_node.op == "get_attr":
             _, metatype = GraphConverter.get_node_type_and_metatype(to_n, annotated_model)
@@ -95,78 +104,102 @@ def _get_quantization_points(
         return qps
 
     @staticmethod
-    def _get_node_args(node: torch.fx.Node):
+    def _get_node_args(node: torch.fx.Node) -> Tuple[Any, ...]:
+        """
+        Correctly retrieves arguments of the given node.
+
+        :param node: The given node.
+        :return: The arguments of the given node.
+        """
         if node.target == torch.ops.aten.cat.default:
             return node.args[0]
         return node.args
 
     @staticmethod
-    def get_quantizer_config_from_annotated_model(annotated_model: torch.fx.GraphModule) -> SingleConfigQuantizerSetup:
-        edge_or_node_to_qspec = _get_edge_or_node_to_qspec(annotated_model)
-
-        q_map = defaultdict(list)
-        for edge, qspec in edge_or_node_to_qspec.items():
-            if not isinstance(edge, tuple):
-                continue
-            from_n, to_n = edge
-            q_map[from_n].append(to_n)
+    def get_quantizer_config_from_annotated_model(annotated: torch.fx.GraphModule) -> SingleConfigQuantizerSetup:
+        edge_or_node_to_qspec = _get_edge_or_node_to_qspec(annotated)
+        # Node means all output edges should be quantized.
+        # Edge means only one edge should be quantized.
+        edge_or_node_to_group_id = _get_edge_or_node_to_group_id(edge_or_node_to_qspec)
+
+        group_id_vs_edges = defaultdict(set)
+        group_id_vs_qspec = {}
+        for edge_or_node, group_id in edge_or_node_to_group_id.items():
+            target_edges = [edge_or_node]
+            if isinstance(edge_or_node, torch.fx.Node):
+                target_edges = []
+                for user in edge_or_node.users:
+                    target_edges.append((edge_or_node, user))
+            group_id_vs_edges[group_id].update(target_edges)
+            # All qspecs should be aligned after the _get_edge_or_node_to_group_id call
+            group_id_vs_qspec[group_id] = _unwrap_shared_qspec_safe(
+                edge_or_node_to_qspec[edge_or_node], edge_or_node_to_qspec
+            )
 
         q_setup = SingleConfigQuantizerSetup()
-        for from_n, to_nodes in q_map.items():
-            to_n = to_nodes[0]
-            qspec = edge_or_node_to_qspec[(from_n, to_n)]
+        for group_id, edges in group_id_vs_edges.items():
+            qspec = group_id_vs_qspec[group_id]
             if qspec is None:
                 continue
-            if isinstance(qspec, QuantizationSpec):
-                if qspec.qscheme in [torch.per_channel_affine, torch.per_channel_symmetric]:
-                    per_channel = True
-                elif qspec.qscheme in [torch.per_tensor_affine, torch.per_tensor_symmetric]:
-                    per_channel = False
-                else:
-                    msg = f"Unknown qscheme: {qspec.qscheme}"
-                    raise nncf.InternalError(msg)
-                signed = qspec.dtype is torch.int8
-                mode = (
-                    QuantizationMode.SYMMETRIC
-                    if qspec.qscheme in [torch.per_channel_symmetric, torch.per_tensor_symmetric]
-                    else QuantizationMode.ASYMMETRIC
-                )
-                qconfig = QuantizerConfig(mode=mode, signedness_to_force=signed, per_channel=per_channel)
-
-                qps = TorchAOQuantizerAdapter._get_quantization_points(from_n, to_nodes, annotated_model, qconfig)
-                for qp in qps:
-                    q_setup.add_independent_quantization_point(qp)
-
-            elif isinstance(qspec, SharedQuantizationSpec):
-                # TODO(dlyakhov): Support SharedQuantizationSpec
-                nncf_logger.warning(
-                    f"SharedQuantizationSpec is not supported yet; edges {from_n} -> {to_nodes} won't be quantized."
-                )
-            else:
+            if not isinstance(qspec, QuantizationSpec):
                 msg = f"Unknown torch.ao quantization spec: {qspec}"
                 raise nncf.InternalError(msg)
 
+            if qspec.qscheme in [torch.per_channel_affine, torch.per_channel_symmetric]:
+                per_channel = True
+            elif qspec.qscheme in [torch.per_tensor_affine, torch.per_tensor_symmetric]:
+                per_channel = False
+            else:
+                msg = f"Unknown qscheme: {qspec.qscheme}"
+                raise nncf.InternalError(msg)
+
+            signed = qspec.dtype is torch.int8
+            mode = (
+                QuantizationMode.SYMMETRIC
+                if qspec.qscheme in [torch.per_channel_symmetric, torch.per_tensor_symmetric]
+                else QuantizationMode.ASYMMETRIC
+            )
+            narrow_range = qspec.quant_min % 2 != 0
+            qconfig = QuantizerConfig(
+                mode=mode, signedness_to_force=signed, per_channel=per_channel, narrow_range=narrow_range
+            )
+
+            joined_edges = defaultdict(list)
+            for edge in edges:
+                joined_edges[edge[0]].append(edge[1])
+
+            qps = []
+            for from_node, to_nodes in joined_edges.items():
+                qps.extend(TorchAOQuantizerAdapter._get_quantization_points(from_node, to_nodes, annotated, qconfig))
+            qp_ids = []
+            for qp in qps:
+                qp_ids.append(q_setup.add_independent_quantization_point(qp))
+            if len(qp_ids) > 1:
+                q_setup.register_unified_scale_group(qp_ids)
+
         return q_setup
 
 
-def _get_edge_or_node_to_qspec(
-    model: torch.fx.GraphModule,
-) -> Dict[EdgeOrNode, QuantizationSpecBase]:
+def _unwrap_shared_qspec_safe(qspec: QuantizationSpec, edge_or_node_to_qspec: Dict[EdgeOrNode, QuantizationSpec]):
     """
-    Get a map from EdgeOrNode to quantization spec based on annotations on the nodes.
+    Iteratively unwraps a given SharedQuantizationSpec to retrieve its actual QuantizationSpec.
+    It detects cyclic dependencies and enforces a maximum depth limit to prevent infinite recursion.
 
-    :param model: torch.fx.GraphModule instance.
-    :return: A map from EdgeOrNode to quantization spec based on annotations on the nodes.
+    :param qspec: The quantization specification to unwrap.
+    :param edge_or_node_to_qspec: A dictionary mapping EdgeOrNode instances to their respective QuantizationSpec.
+    :return: The resolved QuantizationSpec.
     """
-    edge_or_node_to_qspec: Dict[EdgeOrNode, QuantizationSpecBase] = {}
-    for n in model.graph.nodes:
-        if hasattr(n, "meta") and "quantization_annotation" in n.meta:
-            qa = n.meta["quantization_annotation"]
-            for input_to_n, qspec in qa.input_qspec_map.items():
-                input_edge = (input_to_n, n)
-                edge_or_node_to_qspec[input_edge] = qspec
-            if qa.output_qspec is not None:
-                output_node = n
-                qspec = qa.output_qspec
-                edge_or_node_to_qspec[output_node] = qspec
-    return edge_or_node_to_qspec
+    MAX_DEPTH = 1000
+    i = 0
+    visited = []
+    while i < MAX_DEPTH and isinstance(qspec, SharedQuantizationSpec):
+        if qspec.edge_or_node in visited:
+            msg = f"A cycled dependency of the quantization spec is detected {visited + [qspec.edge_or_node]}"
+            raise RuntimeError(msg)
+        visited.append(qspec.edge_or_node)
+        qspec = edge_or_node_to_qspec[qspec.edge_or_node]
+        i += 1
+    if i == MAX_DEPTH:
+        msg = f"Shared qspecs referenced to each other more than the limit: {MAX_DEPTH}"
+        raise RuntimeError(msg)
+    return qspec
@@ -12,16 +12,37 @@
 import copy
 import os
 
+from torch.ao.quantization.observer import MovingAverageMinMaxObserver
+
+os.environ["TORCHINDUCTOR_FREEZING"] = "1"
+
+from itertools import islice
+from typing import Optional
+
 import numpy as np
 import openvino as ov
 import torch
+
+# from executorch.backends.arm.quantizer import arm_quantizer
+from executorch.backends.qualcomm.quantizer import quantizer as qualcom_q
 from sklearn.metrics import accuracy_score
+from torch.ao.quantization.quantize_pt2e import convert_pt2e
+from torch.ao.quantization.quantize_pt2e import prepare_pt2e
+from torch.ao.quantization.quantizer import xnnpack_quantizer
+from torch.ao.quantization.quantizer.x86_inductor_quantizer import X86InductorQuantizer
+from torch.ao.quantization.quantizer.x86_inductor_quantizer import get_default_x86_inductor_quantization_config
 from torchvision import datasets
 
 import nncf
+from nncf import AdvancedQuantizationParameters
 from nncf.common.logging.track_progress import track
+from nncf.experimental.torch.fx import OpenVINOQuantizer
+from nncf.experimental.torch.fx import quantize_pt2e
+from nncf.torch import disable_patching
 from tests.post_training.pipelines.base import DEFAULT_VAL_THREADS
 from tests.post_training.pipelines.base import FX_BACKENDS
+from tests.post_training.pipelines.base import FX_EAGER_BACKENDS
+from tests.post_training.pipelines.base import BackendType
 from tests.post_training.pipelines.base import PTQTestPipeline
 
 
@@ -75,14 +96,31 @@ def process_result(request, userdata):
     def _validate_torch_compile(
         self, val_loader: torch.utils.data.DataLoader, predictions: np.ndarray, references: np.ndarray
     ):
-        compiled_model = torch.compile(self.compressed_model.cpu(), backend="openvino")
-        for i, (images, target) in enumerate(val_loader):
-            # W/A for memory leaks when using torch DataLoader and OpenVINO
-            pred = compiled_model(images)
-            pred = torch.argmax(pred, dim=1)
-            predictions[i] = pred.numpy()
-            references[i] = target.numpy()
-        return predictions, references
+        # compiled_model = torch.compile(self.compressed_model, backend="openvino")
+        q_num = 0
+        for node in self.compressed_model.graph.nodes:
+            if ".quantize_per" in str(node.target):
+                q_num += 1
+
+        print(f"Qunatize ops num: {q_num}")
+
+        with disable_patching():
+            with torch.no_grad():
+                if self.backend in FX_EAGER_BACKENDS:
+                    # Run such models in eager model
+                    compiled_model = self.compressed_model
+                if self.backend in [BackendType.X86_QUANTIZER_AO, BackendType.X86_QUANTIZER_NNCF]:
+                    compiled_model = torch.compile(self.compressed_model)
+                else:
+                    compiled_model = torch.compile(self.compressed_model, backend="openvino")
+
+                for i, (images, target) in enumerate(val_loader):
+                    # W/A for memory leaks when using torch DataLoader and OpenVINO
+                    pred = compiled_model(images)
+                    pred = torch.argmax(pred, dim=1)
+                    predictions[i] = pred.numpy()
+                    references[i] = target.numpy()
+                return predictions, references
 
     def _validate(self) -> None:
         val_dataset = datasets.ImageFolder(root=self.data_dir / "imagenet" / "val", transform=self.transform)