openvinotoolkit · anzr299 · Jan 8, 2026 · Jan 8, 2026 · Jan 9, 2026 · Jan 15, 2026
@@ -4,6 +4,7 @@ openvino==2025.4.1
 # Pytorch
 torch==2.9.0
 torchvision==0.24.0
+torchao==0.14.0
 
 # ONNX
 onnx==1.17.0; python_version < '3.13'

@@ -1,5 +1,6 @@
 tensorboard==2.13.0
 torch==2.9.0
+torchao==0.14.0
 numpy>=1.23.5,<2
 openvino==2025.4.1
 optimum-intel==1.27.0

@@ -1,5 +1,6 @@
 tensorboard==2.13.0
 torch==2.9.0
+torchao==0.14.0
 numpy>=1.23.5,<2
 openvino==2025.4.1
 optimum-intel==1.27.0

@@ -3,4 +3,5 @@ datasets==4.4.1
 openvino==2025.4.1
 optimum==2.1.0
 torch==2.9.0
+torchao==0.14.0
 torchvision==0.24.0
@@ -4,3 +4,4 @@ fastcore==1.11.5
 openvino==2025.4.1
 torch==2.9.0
 torchvision==0.24.0
+torchao==0.14.0
diff --git a/src/nncf/common/tensor_statistics/collectors.py b/src/nncf/common/tensor_statistics/collectors.py
@@ -938,7 +938,7 @@ def _aggregate_impl(self) -> Tensor:
 
 class HistogramAggregator(AggregatorBase):
     """
-    NNCF implementation of the torch.ao.quantization.observer.HistogramObserver.
+    NNCF implementation of the torchao.quantization.pt2e.observer.HistogramObserver.
     Intended to be combined with a single RawReducer.
     The aggregator records the running histogram of the input tensor values along with
     min/max values. Only the reduction_axis==None is supported.

@@ -11,4 +11,3 @@
 
 from nncf.experimental.torch.fx.quantization.quantize_pt2e import compress_pt2e as compress_pt2e
 from nncf.experimental.torch.fx.quantization.quantize_pt2e import quantize_pt2e as quantize_pt2e
-from nncf.experimental.torch.fx.quantization.quantizer.openvino_quantizer import OpenVINOQuantizer as OpenVINOQuantizer
diff --git a/src/nncf/experimental/torch/fx/quantization/quantize_model.py b/src/nncf/experimental/torch/fx/quantization/quantize_model.py
@@ -14,11 +14,11 @@
 
 import torch
 import torch.fx
-from torch.ao.quantization.pt2e.port_metadata_pass import PortNodeMetaForQDQ
-from torch.ao.quantization.pt2e.qat_utils import _fold_conv_bn_qat
-from torch.ao.quantization.pt2e.utils import _disallow_eval_train
 from torch.fx import GraphModule
 from torch.fx.passes.infra.pass_manager import PassManager
+from torchao.quantization.pt2e.qat_utils import _fold_conv_bn_qat
+from torchao.quantization.pt2e.quantizer import PortNodeMetaForQDQ
+from torchao.quantization.pt2e.utils import _disallow_eval_train
 
 import nncf
 from nncf.common.factory import build_graph

@@ -8,18 +8,17 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 from copy import deepcopy
 from typing import Optional
 
 import torch
 import torch.fx
-from torch.ao.quantization.pt2e.port_metadata_pass import PortNodeMetaForQDQ
-from torch.ao.quantization.pt2e.utils import _disallow_eval_train
-from torch.ao.quantization.pt2e.utils import _fuse_conv_bn_
-from torch.ao.quantization.quantizer import Quantizer
 from torch.fx import GraphModule
 from torch.fx.passes.infra.pass_manager import PassManager
+from torchao.quantization.pt2e.quantizer import PortNodeMetaForQDQ
+from torchao.quantization.pt2e.quantizer.quantizer import Quantizer
+from torchao.quantization.pt2e.utils import _disallow_eval_train
+from torchao.quantization.pt2e.utils import _fuse_conv_bn_
 
 import nncf
 from nncf import AdvancedCompressionParameters
@@ -32,7 +31,6 @@
 from nncf.experimental.quantization.algorithms.weight_compression.algorithm import WeightsCompression
 from nncf.experimental.torch.fx.constant_folding import constant_fold
 from nncf.experimental.torch.fx.quantization.quantizer.openvino_adapter import OpenVINOQuantizerAdapter
-from nncf.experimental.torch.fx.quantization.quantizer.openvino_quantizer import OpenVINOQuantizer
 from nncf.experimental.torch.fx.quantization.quantizer.torch_ao_adapter import TorchAOQuantizerAdapter
 from nncf.experimental.torch.fx.transformations import QUANTIZE_NODE_TARGETS
 from nncf.experimental.torch.fx.transformations import DuplicateDQPassNoAnnotations
@@ -42,6 +40,19 @@
 from nncf.quantization.range_estimator import RangeEstimatorParameters
 
 
+def _is_openvino_quantizer_instance(obj) -> bool:
+    """
+    Safely check if an object is instance of OpenVINOQuantizer.
+    This is to avoid a circular import
+    """
+    try:
+        from executorch.backends.openvino.quantizer.quantizer import OpenVINOQuantizer
+
+        return isinstance(obj, OpenVINOQuantizer)
+    except ImportError:
+        return False
+
+
 @api(canonical_alias="nncf.experimental.torch.fx.quantize_pt2e")
 def quantize_pt2e(
     model: torch.fx.GraphModule,
@@ -60,7 +71,7 @@ def quantize_pt2e(
 ) -> torch.fx.GraphModule:
     """
     Applies post-training quantization to the torch.fx.GraphModule provided model
-    using provided torch.ao quantizer.
+    using provided torchao quantizer.
 
     :param model: A torch.fx.GraphModule instance to be quantized.
     :param quantizer: Torch ao quantizer to annotate nodes in the graph with quantization setups
@@ -103,7 +114,7 @@ def quantize_pt2e(
         model = deepcopy(model)
 
     _fuse_conv_bn_(model)
-    if isinstance(quantizer, OpenVINOQuantizer) or hasattr(quantizer, "get_nncf_quantization_setup"):
+    if _is_openvino_quantizer_instance(quantizer) or hasattr(quantizer, "get_nncf_quantization_setup"):
         quantizer = OpenVINOQuantizerAdapter(quantizer)
     else:
         quantizer = TorchAOQuantizerAdapter(quantizer)
@@ -178,7 +189,7 @@ def compress_pt2e(
     advanced_parameters: Optional[AdvancedCompressionParameters] = None,
 ) -> torch.fx.GraphModule:
     """
-    Applies Weight Compression to the torch.fx.GraphModule model using provided torch.ao quantizer.
+    Applies Weight Compression to the torch.fx.GraphModule model using provided torchao quantizer.
 
     :param model: A torch.fx.GraphModule instance to be quantized.
     :param quantizer: Torch ao quantizer to annotate nodes in the graph with quantization setups
@@ -196,7 +207,7 @@ def compress_pt2e(
         preserve the accuracy of the model, the more sensitive layers receive a higher precision.
     :param advanced_parameters: Advanced parameters for algorithms in the compression pipeline.
     """
-    if isinstance(quantizer, OpenVINOQuantizer) or hasattr(quantizer, "get_nncf_weight_compression_parameters"):
+    if _is_openvino_quantizer_instance(quantizer) or hasattr(quantizer, "get_nncf_weight_compression_parameters"):
         quantizer = OpenVINOQuantizerAdapter(quantizer)
         compression_format = nncf.CompressionFormat.DQ
     else:

@@ -9,16 +9,20 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Any
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any
 
 import torch.fx
 
 from nncf.common.graph.graph import NNCFGraph
 from nncf.common.quantization.quantizer_setup import SingleConfigQuantizerSetup
 from nncf.experimental.quantization.quantizer import Quantizer
-from nncf.experimental.torch.fx.quantization.quantizer.openvino_quantizer import OpenVINOQuantizer
 from nncf.quantization.algorithms.weight_compression.config import WeightCompressionParameters
 
+if TYPE_CHECKING:
+    from executorch.backends.openvino.quantizer.quantizer import OpenVINOQuantizer
+
 
 class OpenVINOQuantizerAdapter(Quantizer):
     """
Original file line number	Diff line number	Diff line change
Expand Up		@@ -11,4 +11,3 @@

		from nncf.experimental.torch.fx.quantization.quantize_pt2e import compress_pt2e as compress_pt2e
		from nncf.experimental.torch.fx.quantization.quantize_pt2e import quantize_pt2e as quantize_pt2e
		from nncf.experimental.torch.fx.quantization.quantizer.openvino_quantizer import OpenVINOQuantizer as OpenVINOQuantizer