Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/call_precommit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ jobs:
- name: Print installed modules
run: pip list
- name: Run torch precommit test scope
run: pytest -ra -n2 --durations=30 tests/torch -m "not cuda"
run: pytest -ra --durations=30 tests/torch/ -m "not cuda"

pytorch-cuda:
timeout-minutes: 40
Expand Down
5 changes: 3 additions & 2 deletions constraints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@
openvino==2025.4.1

# Pytorch
torch==2.9.0
torchvision==0.24.0
torch==2.10.0
torchvision==0.25.0
torchao==0.15.0

# ONNX
onnx==1.17.0; python_version < '3.13'
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
tensorboard==2.13.0
torch==2.9.0
torchao==0.14.0
numpy>=1.23.5,<2
openvino==2025.4.1
optimum-intel==1.27.0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ optimum==2.1.0
transformers==4.53.0
lm_eval==0.4.8
pillow==12.0.0
torchao==0.14.0
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ openvino==2025.4.1
optimum==2.1.0
torch==2.9.0
torchvision==0.24.0
torchao==0.14.0
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ fastcore==1.11.5
openvino==2025.4.1
torch==2.9.0
torchvision==0.24.0
torchao==0.14.0
2 changes: 1 addition & 1 deletion src/nncf/common/tensor_statistics/collectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -938,7 +938,7 @@ def _aggregate_impl(self) -> Tensor:

class HistogramAggregator(AggregatorBase):
"""
NNCF implementation of the torch.ao.quantization.observer.HistogramObserver.
NNCF implementation of the torchao.quantization.pt2e.observer.HistogramObserver.
Intended to be combined with a single RawReducer.
The aggregator records the running histogram of the input tensor values along with
min/max values. Only the reduction_axis==None is supported.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@

import torch
import torch.fx
from torch.ao.quantization.pt2e.port_metadata_pass import PortNodeMetaForQDQ
from torch.ao.quantization.pt2e.qat_utils import _fold_conv_bn_qat
from torch.ao.quantization.pt2e.utils import _disallow_eval_train
from torch.fx import GraphModule
from torch.fx.passes.infra.pass_manager import PassManager
from torchao.quantization.pt2e.qat_utils import _fold_conv_bn_qat
from torchao.quantization.pt2e.quantizer import PortNodeMetaForQDQ
from torchao.quantization.pt2e.utils import _disallow_eval_train

import nncf
from nncf.common.factory import build_graph
Expand Down
13 changes: 6 additions & 7 deletions src/nncf/experimental/torch/fx/quantization/quantize_pt2e.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,17 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from copy import deepcopy
from typing import Optional

import torch
import torch.fx
from torch.ao.quantization.pt2e.port_metadata_pass import PortNodeMetaForQDQ
from torch.ao.quantization.pt2e.utils import _disallow_eval_train
from torch.ao.quantization.pt2e.utils import _fuse_conv_bn_
from torch.ao.quantization.quantizer import Quantizer
from torch.fx import GraphModule
from torch.fx.passes.infra.pass_manager import PassManager
from torchao.quantization.pt2e.quantizer import PortNodeMetaForQDQ
from torchao.quantization.pt2e.quantizer.quantizer import Quantizer
from torchao.quantization.pt2e.utils import _disallow_eval_train
from torchao.quantization.pt2e.utils import _fuse_conv_bn_

import nncf
from nncf import AdvancedCompressionParameters
Expand Down Expand Up @@ -60,7 +59,7 @@ def quantize_pt2e(
) -> torch.fx.GraphModule:
"""
Applies post-training quantization to the torch.fx.GraphModule provided model
using provided torch.ao quantizer.
using provided torchao quantizer.

:param model: A torch.fx.GraphModule instance to be quantized.
:param quantizer: Torch ao quantizer to annotate nodes in the graph with quantization setups
Expand Down Expand Up @@ -178,7 +177,7 @@ def compress_pt2e(
advanced_parameters: Optional[AdvancedCompressionParameters] = None,
) -> torch.fx.GraphModule:
"""
Applies Weight Compression to the torch.fx.GraphModule model using provided torch.ao quantizer.
Applies Weight Compression to the torch.fx.GraphModule model using provided torchao quantizer.

:param model: A torch.fx.GraphModule instance to be quantized.
:param quantizer: Torch ao quantizer to annotate nodes in the graph with quantization setups
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,14 @@
from typing import Optional, Union

import torch.fx
from torch.ao.quantization.observer import HistogramObserver
from torch.ao.quantization.observer import PerChannelMinMaxObserver
from torch.ao.quantization.quantizer.quantizer import EdgeOrNode
from torch.ao.quantization.quantizer.quantizer import QuantizationAnnotation as TorchAOQuantizationAnnotation
from torch.ao.quantization.quantizer.quantizer import QuantizationSpec as TorchAOQuantizationSpec
from torch.ao.quantization.quantizer.quantizer import QuantizationSpecBase as TorchAOQuantizationSpecBase
from torch.ao.quantization.quantizer.quantizer import Quantizer as TorchAOQuantizer
from torch.ao.quantization.quantizer.quantizer import SharedQuantizationSpec as TorchAOSharedQuantizationSpec
from torchao.quantization.pt2e.observer import HistogramObserver
from torchao.quantization.pt2e.observer import PerChannelMinMaxObserver
from torchao.quantization.pt2e.quantizer import EdgeOrNode
from torchao.quantization.pt2e.quantizer import QuantizationAnnotation as TorchAOQuantizationAnnotation
from torchao.quantization.pt2e.quantizer import QuantizationSpec as TorchAOQuantizationSpec
from torchao.quantization.pt2e.quantizer import QuantizationSpecBase as TorchAOQuantizationSpecBase
from torchao.quantization.pt2e.quantizer import Quantizer as TorchAOQuantizer
from torchao.quantization.pt2e.quantizer import SharedQuantizationSpec as TorchAOSharedQuantizationSpec

import nncf
from nncf import IgnoredScope
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@

import torch
import torch.fx
from torch.ao.quantization.pt2e.prepare import _get_edge_or_node_to_group_id
from torch.ao.quantization.pt2e.prepare import _get_edge_or_node_to_qspec
from torch.ao.quantization.quantizer import Quantizer as TorchAOQuantizer
from torch.ao.quantization.quantizer.quantizer import QuantizationSpec
from torch.ao.quantization.quantizer.quantizer import SharedQuantizationSpec
from torchao.quantization.pt2e.prepare import _get_edge_or_node_to_group_id
from torchao.quantization.pt2e.prepare import _get_edge_or_node_to_qspec
from torchao.quantization.pt2e.quantizer import Quantizer as TorchAOQuantizer
from torchao.quantization.pt2e.quantizer.quantizer import QuantizationSpec
from torchao.quantization.pt2e.quantizer.quantizer import SharedQuantizationSpec

import nncf
from nncf.common.graph.graph import NNCFGraph
Expand All @@ -41,7 +41,7 @@

class TorchAOQuantizerAdapter(Quantizer):
"""
Implementation of the NNCF Quantizer interface for any given torch.ao quantizer.
Implementation of the NNCF Quantizer interface for any given torchao quantizer.
"""

def __init__(self, quantizer: TorchAOQuantizer):
Expand Down Expand Up @@ -110,7 +110,7 @@ def _get_quantization_points(
def get_quantizer_config_from_annotated_model(annotated: torch.fx.GraphModule) -> SingleConfigQuantizerSetup:
"""
Process a torch.fx.GraphModule annotated with quantization specifications
(e.g., via torch.ao observers) and generates a corresponding NNCF quantization setup object,
(e.g., via torchao observers) and generates a corresponding NNCF quantization setup object,
which maps quantization configurations to graph edges.

:param annotated: A torch.fx.GraphModule that has been annotated with Torch quantization observers.
Expand Down Expand Up @@ -139,7 +139,7 @@ def get_quantizer_config_from_annotated_model(annotated: torch.fx.GraphModule) -
if qspec is None:
continue
if not isinstance(qspec, QuantizationSpec):
msg = f"Unknown torch.ao quantization spec: {qspec}"
msg = f"Unknown torchao quantization spec: {qspec}"
raise nncf.InternalError(msg)

if qspec.qscheme in [torch.per_channel_affine, torch.per_channel_symmetric]:
Expand All @@ -156,9 +156,8 @@ def get_quantizer_config_from_annotated_model(annotated: torch.fx.GraphModule) -
if qspec.qscheme in [torch.per_channel_symmetric, torch.per_tensor_symmetric]
else QuantizationMode.ASYMMETRIC
)

# QuantizationSpec may have quant_min and quant_max attributes set to None.
# torch.ao.prepare_pt2e treats such occurrences as a signal
# torchao.prepare_pt2e treats such occurrences as a signal
# that the full range of values should be used for quant_min and quant_max.
# Therefore, the narrow_range parameter is set to False in this case.
if qspec.quant_min is None or qspec.quant_max is None:
Expand Down
8 changes: 4 additions & 4 deletions src/nncf/experimental/torch/fx/transformations.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,12 @@

import torch
import torch.fx
from torch.ao.quantization.fx.utils import create_getattr_from_value
from torch.ao.quantization.pt2e.utils import _fuse_conv_bn_
from torch.fx.node import map_arg
from torch.fx.passes.infra.pass_base import PassBase
from torch.fx.passes.infra.pass_base import PassResult
from torch.quantization.fake_quantize import FakeQuantize
from torchao.quantization.pt2e.fake_quantize import FakeQuantize
from torchao.quantization.pt2e.utils import _fuse_conv_bn_
from torchao.quantization.pt2e.utils import create_getattr_from_value

import nncf
import nncf.torch
Expand Down Expand Up @@ -382,7 +382,7 @@ def insert_one_qdq(model: torch.fx.GraphModule, target_point: PTTargetPoint, qua
target node.
:param quantizer: Quantizer module to inherit quantization parameters from.
"""
# Copied from torch.ao.quantization.quantize_pt2e.convert_pt2e
# Copied from torchao.quantization.pt2e.quantize_pt2e.convert_pt2e
# 1. extract information for inserting q/dq node from activation_post_process
node_type = "call_function"
quantize_op: Optional[Callable] = None
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@
from typing import Optional

import torch
from torch.quantization.fake_quantize import FakeQuantize
from torchao.quantization.pt2e.fake_quantize import FakeQuantize
from torchao.quantization.pt2e.observer import MinMaxObserver
from torchao.quantization.pt2e.observer import PerChannelMinMaxObserver

import nncf
import nncf.torch.graph.operator_metatypes as om
Expand Down Expand Up @@ -203,9 +205,9 @@ def _create_quantizer(
)

if per_channel:
observer = torch.ao.quantization.observer.PerChannelMinMaxObserver
observer = PerChannelMinMaxObserver
else:
observer = torch.ao.quantization.observer.MinMaxObserver
observer = MinMaxObserver

if dtype is TensorDataType.int8:
level_high = 127
Expand Down
8 changes: 5 additions & 3 deletions src/nncf/torch/quantization/strip.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@

import numpy as np
import torch
from torch.quantization.fake_quantize import FakeQuantize
from torchao.quantization.pt2e.fake_quantize import FakeQuantize
from torchao.quantization.pt2e.observer import MinMaxObserver
from torchao.quantization.pt2e.observer import PerChannelMinMaxObserver

import nncf
from nncf.torch.quantization.layers import AsymmetricQuantizer
Expand Down Expand Up @@ -51,9 +53,9 @@ def convert_to_torch_fakequantizer(nncf_quantizer: BaseQuantizer) -> FakeQuantiz
dtype = torch.qint8 if nncf_quantizer.level_low < 0 else torch.quint8

if per_channel:
observer = torch.ao.quantization.observer.PerChannelMinMaxObserver
observer = PerChannelMinMaxObserver
else:
observer = torch.ao.quantization.observer.MinMaxObserver
observer = MinMaxObserver

if isinstance(nncf_quantizer, SymmetricQuantizer):
qscheme = torch.per_channel_symmetric if per_channel else torch.per_tensor_symmetric
Expand Down
4 changes: 2 additions & 2 deletions tests/cross_fw/shared/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ def get_cli_dict_args(args):


MAP_BACKEND_PACKAGES = {
"torch": ["torch", "torchvision"],
"torchfx": ["torch", "torchvision"],
"torch": ["torch", "torchvision", "torchao"],
"torchfx": ["torch", "torchvision", "torchao"],
"openvino": ["openvino"],
"onnx": ["onnx", "onnxruntime"],
}
Expand Down
6 changes: 4 additions & 2 deletions tests/executorch/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,9 @@ efficientnet_pytorch==0.7.1
transformers==4.53.0

sentence-transformers==4.1.0
optimum-intel==1.24.0
optimum==1.26.0
optimum-intel==1.26.0
optimum-onnx==0.0.3
optimum==2.0.0
accelerate==1.9.0
fastdownload==0.0.7

Expand All @@ -36,3 +37,4 @@ pytest-ordering==0.6
pytest-xdist==3.5.0
pytest-forked==1.6.0
pytest-split==0.9.0
fastprogress==1.0.5
7 changes: 4 additions & 3 deletions tests/post_training/pipelines/image_classification_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@
import openvino as ov
import torch
from sklearn.metrics import accuracy_score
from torch.ao.quantization.quantize_pt2e import convert_pt2e
from torch.ao.quantization.quantize_pt2e import prepare_pt2e
from torch.ao.quantization.quantizer.quantizer import Quantizer as TorchAOQuantizer
from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e
from torchao.quantization.pt2e.quantize_pt2e import prepare_pt2e
from torchao.quantization.pt2e.quantizer import Quantizer as TorchAOQuantizer
from torchvision import datasets

import nncf
Expand Down Expand Up @@ -209,6 +209,7 @@ def _build_quantizer(self) -> TorchAOQuantizer:
):
if key in self.compression_params:
quantizer_kwargs[key] = self.compression_params[key]

advanced_parameters: AdvancedQuantizationParameters = self.compression_params.get(
"advanced_parameters", AdvancedQuantizationParameters()
)
Expand Down
1 change: 1 addition & 0 deletions tests/post_training/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ datasets
matplotlib # required for the memory monitor tool

torch
torchao
torchvision
onnx
onnxruntime
Expand Down
Loading
Loading