openvinotoolkit · anzr299 · Jan 8, 2026 · Jan 22, 2026 · Jan 22, 2026 · Jan 22, 2026
@@ -149,7 +149,7 @@ jobs:
       - name: Print installed modules
         run: pip list
       - name: Run torch precommit test scope
-        run: pytest -ra -n2 --durations=30 tests/torch -m "not cuda"
+        run: pytest -ra --durations=30 tests/torch/ -m "not cuda"
 
   pytorch-cuda:
     timeout-minutes: 40

@@ -2,8 +2,9 @@
 openvino==2025.4.1
 
 # Pytorch
-torch==2.9.0
-torchvision==0.24.0
+torch==2.10.0
+torchvision==0.25.0
+torchao==0.15.0
 
 # ONNX
 onnx==1.17.0; python_version < '3.13'

@@ -1,5 +1,6 @@
 tensorboard==2.13.0
 torch==2.9.0
+torchao==0.14.0
 numpy>=1.23.5,<2
 openvino==2025.4.1
 optimum-intel==1.27.0

@@ -8,3 +8,4 @@ optimum==2.1.0
 transformers==4.53.0
 lm_eval==0.4.8
 pillow==12.0.0
+torchao==0.14.0
@@ -4,3 +4,4 @@ openvino==2025.4.1
 optimum==2.1.0
 torch==2.9.0
 torchvision==0.24.0
+torchao==0.14.0
@@ -4,3 +4,4 @@ fastcore==1.11.5
 openvino==2025.4.1
 torch==2.9.0
 torchvision==0.24.0
+torchao==0.14.0
@@ -938,7 +938,7 @@ def _aggregate_impl(self) -> Tensor:
 
 class HistogramAggregator(AggregatorBase):
     """
-    NNCF implementation of the torch.ao.quantization.observer.HistogramObserver.
+    NNCF implementation of the torchao.quantization.pt2e.observer.HistogramObserver.
     Intended to be combined with a single RawReducer.
     The aggregator records the running histogram of the input tensor values along with
     min/max values. Only the reduction_axis==None is supported.

@@ -14,11 +14,11 @@
 
 import torch
 import torch.fx
-from torch.ao.quantization.pt2e.port_metadata_pass import PortNodeMetaForQDQ
-from torch.ao.quantization.pt2e.qat_utils import _fold_conv_bn_qat
-from torch.ao.quantization.pt2e.utils import _disallow_eval_train
 from torch.fx import GraphModule
 from torch.fx.passes.infra.pass_manager import PassManager
+from torchao.quantization.pt2e.qat_utils import _fold_conv_bn_qat
+from torchao.quantization.pt2e.quantizer import PortNodeMetaForQDQ
+from torchao.quantization.pt2e.utils import _disallow_eval_train
 
 import nncf
 from nncf.common.factory import build_graph

@@ -8,18 +8,17 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 from copy import deepcopy
 from typing import Optional
 
 import torch
 import torch.fx
-from torch.ao.quantization.pt2e.port_metadata_pass import PortNodeMetaForQDQ
-from torch.ao.quantization.pt2e.utils import _disallow_eval_train
-from torch.ao.quantization.pt2e.utils import _fuse_conv_bn_
-from torch.ao.quantization.quantizer import Quantizer
 from torch.fx import GraphModule
 from torch.fx.passes.infra.pass_manager import PassManager
+from torchao.quantization.pt2e.quantizer import PortNodeMetaForQDQ
+from torchao.quantization.pt2e.quantizer.quantizer import Quantizer
+from torchao.quantization.pt2e.utils import _disallow_eval_train
+from torchao.quantization.pt2e.utils import _fuse_conv_bn_
 
 import nncf
 from nncf import AdvancedCompressionParameters
@@ -60,7 +59,7 @@ def quantize_pt2e(
 ) -> torch.fx.GraphModule:
     """
     Applies post-training quantization to the torch.fx.GraphModule provided model
-    using provided torch.ao quantizer.
+    using provided torchao quantizer.
 
     :param model: A torch.fx.GraphModule instance to be quantized.
     :param quantizer: Torch ao quantizer to annotate nodes in the graph with quantization setups
@@ -178,7 +177,7 @@ def compress_pt2e(
     advanced_parameters: Optional[AdvancedCompressionParameters] = None,
 ) -> torch.fx.GraphModule:
     """
-    Applies Weight Compression to the torch.fx.GraphModule model using provided torch.ao quantizer.
+    Applies Weight Compression to the torch.fx.GraphModule model using provided torchao quantizer.
 
     :param model: A torch.fx.GraphModule instance to be quantized.
     :param quantizer: Torch ao quantizer to annotate nodes in the graph with quantization setups

@@ -13,14 +13,14 @@
 from typing import Optional, Union
 
 import torch.fx
-from torch.ao.quantization.observer import HistogramObserver
-from torch.ao.quantization.observer import PerChannelMinMaxObserver
-from torch.ao.quantization.quantizer.quantizer import EdgeOrNode
-from torch.ao.quantization.quantizer.quantizer import QuantizationAnnotation as TorchAOQuantizationAnnotation
-from torch.ao.quantization.quantizer.quantizer import QuantizationSpec as TorchAOQuantizationSpec
-from torch.ao.quantization.quantizer.quantizer import QuantizationSpecBase as TorchAOQuantizationSpecBase
-from torch.ao.quantization.quantizer.quantizer import Quantizer as TorchAOQuantizer
-from torch.ao.quantization.quantizer.quantizer import SharedQuantizationSpec as TorchAOSharedQuantizationSpec
+from torchao.quantization.pt2e.observer import HistogramObserver
+from torchao.quantization.pt2e.observer import PerChannelMinMaxObserver
+from torchao.quantization.pt2e.quantizer import EdgeOrNode
+from torchao.quantization.pt2e.quantizer import QuantizationAnnotation as TorchAOQuantizationAnnotation
+from torchao.quantization.pt2e.quantizer import QuantizationSpec as TorchAOQuantizationSpec
+from torchao.quantization.pt2e.quantizer import QuantizationSpecBase as TorchAOQuantizationSpecBase
+from torchao.quantization.pt2e.quantizer import Quantizer as TorchAOQuantizer
+from torchao.quantization.pt2e.quantizer import SharedQuantizationSpec as TorchAOSharedQuantizationSpec
 
 import nncf
 from nncf import IgnoredScope

@@ -15,11 +15,11 @@
 
 import torch
 import torch.fx
-from torch.ao.quantization.pt2e.prepare import _get_edge_or_node_to_group_id
-from torch.ao.quantization.pt2e.prepare import _get_edge_or_node_to_qspec
-from torch.ao.quantization.quantizer import Quantizer as TorchAOQuantizer
-from torch.ao.quantization.quantizer.quantizer import QuantizationSpec
-from torch.ao.quantization.quantizer.quantizer import SharedQuantizationSpec
+from torchao.quantization.pt2e.prepare import _get_edge_or_node_to_group_id
+from torchao.quantization.pt2e.prepare import _get_edge_or_node_to_qspec
+from torchao.quantization.pt2e.quantizer import Quantizer as TorchAOQuantizer
+from torchao.quantization.pt2e.quantizer.quantizer import QuantizationSpec
+from torchao.quantization.pt2e.quantizer.quantizer import SharedQuantizationSpec
 
 import nncf
 from nncf.common.graph.graph import NNCFGraph
@@ -41,7 +41,7 @@
 
 class TorchAOQuantizerAdapter(Quantizer):
     """
-    Implementation of the NNCF Quantizer interface for any given torch.ao quantizer.
+    Implementation of the NNCF Quantizer interface for any given torchao quantizer.
     """
 
     def __init__(self, quantizer: TorchAOQuantizer):
@@ -110,7 +110,7 @@ def _get_quantization_points(
     def get_quantizer_config_from_annotated_model(annotated: torch.fx.GraphModule) -> SingleConfigQuantizerSetup:
         """
         Process a torch.fx.GraphModule annotated with quantization specifications
-        (e.g., via torch.ao observers) and generates a corresponding NNCF quantization setup object,
+        (e.g., via torchao observers) and generates a corresponding NNCF quantization setup object,
         which maps quantization configurations to graph edges.
 
         :param annotated: A torch.fx.GraphModule that has been annotated with Torch quantization observers.
@@ -139,7 +139,7 @@ def get_quantizer_config_from_annotated_model(annotated: torch.fx.GraphModule) -
             if qspec is None:
                 continue
             if not isinstance(qspec, QuantizationSpec):
-                msg = f"Unknown torch.ao quantization spec: {qspec}"
+                msg = f"Unknown torchao quantization spec: {qspec}"
                 raise nncf.InternalError(msg)
 
             if qspec.qscheme in [torch.per_channel_affine, torch.per_channel_symmetric]:
@@ -156,9 +156,8 @@ def get_quantizer_config_from_annotated_model(annotated: torch.fx.GraphModule) -
                 if qspec.qscheme in [torch.per_channel_symmetric, torch.per_tensor_symmetric]
                 else QuantizationMode.ASYMMETRIC
             )
-
             # QuantizationSpec may have quant_min and quant_max attributes set to None.
-            # torch.ao.prepare_pt2e treats such occurrences as a signal
+            # torchao.prepare_pt2e treats such occurrences as a signal
             # that the full range of values should be used for quant_min and quant_max.
             # Therefore, the narrow_range parameter is set to False in this case.
             if qspec.quant_min is None or qspec.quant_max is None:

@@ -15,12 +15,12 @@
 
 import torch
 import torch.fx
-from torch.ao.quantization.fx.utils import create_getattr_from_value
-from torch.ao.quantization.pt2e.utils import _fuse_conv_bn_
 from torch.fx.node import map_arg
 from torch.fx.passes.infra.pass_base import PassBase
 from torch.fx.passes.infra.pass_base import PassResult
-from torch.quantization.fake_quantize import FakeQuantize
+from torchao.quantization.pt2e.fake_quantize import FakeQuantize
+from torchao.quantization.pt2e.utils import _fuse_conv_bn_
+from torchao.quantization.pt2e.utils import create_getattr_from_value
 
 import nncf
 import nncf.torch
@@ -382,7 +382,7 @@ def insert_one_qdq(model: torch.fx.GraphModule, target_point: PTTargetPoint, qua
         target node.
     :param quantizer: Quantizer module to inherit quantization parameters from.
     """
-    # Copied from torch.ao.quantization.quantize_pt2e.convert_pt2e
+    # Copied from torchao.quantization.pt2e.quantize_pt2e.convert_pt2e
     # 1. extract information for inserting q/dq node from activation_post_process
     node_type = "call_function"
     quantize_op: Optional[Callable] = None

@@ -12,7 +12,9 @@
 from typing import Optional
 
 import torch
-from torch.quantization.fake_quantize import FakeQuantize
+from torchao.quantization.pt2e.fake_quantize import FakeQuantize
+from torchao.quantization.pt2e.observer import MinMaxObserver
+from torchao.quantization.pt2e.observer import PerChannelMinMaxObserver
 
 import nncf
 import nncf.torch.graph.operator_metatypes as om
@@ -203,9 +205,9 @@ def _create_quantizer(
             )
 
         if per_channel:
-            observer = torch.ao.quantization.observer.PerChannelMinMaxObserver
+            observer = PerChannelMinMaxObserver
         else:
-            observer = torch.ao.quantization.observer.MinMaxObserver
+            observer = MinMaxObserver
 
         if dtype is TensorDataType.int8:
             level_high = 127

@@ -14,7 +14,9 @@
 
 import numpy as np
 import torch
-from torch.quantization.fake_quantize import FakeQuantize
+from torchao.quantization.pt2e.fake_quantize import FakeQuantize
+from torchao.quantization.pt2e.observer import MinMaxObserver
+from torchao.quantization.pt2e.observer import PerChannelMinMaxObserver
 
 import nncf
 from nncf.torch.quantization.layers import AsymmetricQuantizer
@@ -51,9 +53,9 @@ def convert_to_torch_fakequantizer(nncf_quantizer: BaseQuantizer) -> FakeQuantiz
     dtype = torch.qint8 if nncf_quantizer.level_low < 0 else torch.quint8
 
     if per_channel:
-        observer = torch.ao.quantization.observer.PerChannelMinMaxObserver
+        observer = PerChannelMinMaxObserver
     else:
-        observer = torch.ao.quantization.observer.MinMaxObserver
+        observer = MinMaxObserver
 
     if isinstance(nncf_quantizer, SymmetricQuantizer):
         qscheme = torch.per_channel_symmetric if per_channel else torch.per_tensor_symmetric

@@ -36,8 +36,8 @@ def get_cli_dict_args(args):
 
 
 MAP_BACKEND_PACKAGES = {
-    "torch": ["torch", "torchvision"],
-    "torchfx": ["torch", "torchvision"],
+    "torch": ["torch", "torchvision", "torchao"],
+    "torchfx": ["torch", "torchvision", "torchao"],
     "openvino": ["openvino"],
     "onnx": ["onnx", "onnxruntime"],
 }

@@ -21,8 +21,9 @@ efficientnet_pytorch==0.7.1
 transformers==4.53.0
 
 sentence-transformers==4.1.0
-optimum-intel==1.24.0
-optimum==1.26.0
+optimum-intel==1.26.0
+optimum-onnx==0.0.3
+optimum==2.0.0
 accelerate==1.9.0
 fastdownload==0.0.7
 
@@ -36,3 +37,4 @@ pytest-ordering==0.6
 pytest-xdist==3.5.0
 pytest-forked==1.6.0
 pytest-split==0.9.0
+fastprogress==1.0.5
@@ -24,9 +24,9 @@
 import openvino as ov
 import torch
 from sklearn.metrics import accuracy_score
-from torch.ao.quantization.quantize_pt2e import convert_pt2e
-from torch.ao.quantization.quantize_pt2e import prepare_pt2e
-from torch.ao.quantization.quantizer.quantizer import Quantizer as TorchAOQuantizer
+from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e
+from torchao.quantization.pt2e.quantize_pt2e import prepare_pt2e
+from torchao.quantization.pt2e.quantizer import Quantizer as TorchAOQuantizer
 from torchvision import datasets
 
 import nncf
@@ -209,6 +209,7 @@ def _build_quantizer(self) -> TorchAOQuantizer:
         ):
             if key in self.compression_params:
                 quantizer_kwargs[key] = self.compression_params[key]
+
         advanced_parameters: AdvancedQuantizationParameters = self.compression_params.get(
             "advanced_parameters", AdvancedQuantizationParameters()
         )

@@ -4,6 +4,7 @@ datasets
 matplotlib # required for the memory monitor tool
 
 torch
+torchao
 torchvision
 onnx
 onnxruntime
-Original file line number
+Diff line change
@@ Expand Up / @@ -4,6 +4,7 @@ datasets @@
     matplotlib # required for the memory monitor tool
     torch
+    torchao
     torchvision
     onnx
     onnxruntime
@@ Expand Down @@