Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 54 additions & 0 deletions .github/workflows/executorch.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
name: ExecuTorch
permissions: read-all

on:
workflow_dispatch:
schedule:
- cron: '0 0 * * *'
pull_request:
paths:
- 'src/nncf/experimental/quantization/algorithms/range_estimator/*'
- 'src/nncf/experimental/quantization/algorithms/post_training/*'
- 'src/nncf/experimental/quantization/algorithms/weight_compression/*'
- 'tests/executorch*'
- 'src/nncf/experimental/torch/fx/*'
- 'src/nncf/quantization/algorithms/algorithm.py'

jobs:
executorch:
timeout-minutes: 40
runs-on: ubuntu-latest-8-cores
defaults:
run:
shell: bash
env:
DEBIAN_FRONTEND: noninteractive
steps:
- name: Install dependencies
run : |
sudo apt-get update
sudo apt-get --assume-yes install gcc g++ build-essential ninja-build libgl1-mesa-dev libglib2.0-0
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
with:
lfs: true
- uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
with:
python-version: "3.10.14"
- name: Runner info
continue-on-error: true
run: |
cat /etc/*release
cat /proc/cpuinfo
- name: Install NNCF and test requirements
run: |
# Torchao installation requires pytorch to be installed first.
pip install . -r tests/executorch/requirements.txt
pip install --pre torch torchvision torchao --index-url https://download.pytorch.org/whl/nightly/cpu
# Executorch
# Editable install due to https://github.com/pytorch/executorch/issues/6475
pip install --no-build-isolation -e git+https://github.com/anzr299/executorch.git@an/openvino/nncf_compress_pt2e#egg=executorch
- name: Print installed modules
run: pip list
- name: Run PyTorch precommit test scope
run: |
pytest -ra tests/executorch
2 changes: 1 addition & 1 deletion src/nncf/common/tensor_statistics/collectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -938,7 +938,7 @@ def _aggregate_impl(self) -> Tensor:

class HistogramAggregator(AggregatorBase):
"""
NNCF implementation of the torch.ao.quantization.observer.HistogramObserver.
NNCF implementation of the torchao.quantization.pt2e.observer.HistogramObserver.
Intended to be combined with a single RawReducer.
The aggregator records the running histogram of the input tensor values along with
min/max values. Only the reduction_axis==None is supported.
Expand Down
1 change: 0 additions & 1 deletion src/nncf/experimental/torch/fx/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,3 @@

from nncf.experimental.torch.fx.quantization.quantize_pt2e import compress_pt2e as compress_pt2e
from nncf.experimental.torch.fx.quantization.quantize_pt2e import quantize_pt2e as quantize_pt2e
from nncf.experimental.torch.fx.quantization.quantizer.openvino_quantizer import OpenVINOQuantizer as OpenVINOQuantizer
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@

import torch
import torch.fx
from torch.ao.quantization.pt2e.port_metadata_pass import PortNodeMetaForQDQ
from torch.ao.quantization.pt2e.qat_utils import _fold_conv_bn_qat
from torch.ao.quantization.pt2e.utils import _disallow_eval_train
from torch.fx import GraphModule
from torch.fx.passes.infra.pass_manager import PassManager
from torchao.quantization.pt2e.qat_utils import _fold_conv_bn_qat
from torchao.quantization.pt2e.quantizer import PortNodeMetaForQDQ
from torchao.quantization.pt2e.utils import _disallow_eval_train

import nncf
from nncf.common.factory import build_graph
Expand Down
33 changes: 22 additions & 11 deletions src/nncf/experimental/torch/fx/quantization/quantize_pt2e.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,17 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from copy import deepcopy
from typing import Optional

import torch
import torch.fx
from torch.ao.quantization.pt2e.port_metadata_pass import PortNodeMetaForQDQ
from torch.ao.quantization.pt2e.utils import _disallow_eval_train
from torch.ao.quantization.pt2e.utils import _fuse_conv_bn_
from torch.ao.quantization.quantizer import Quantizer
from torch.fx import GraphModule
from torch.fx.passes.infra.pass_manager import PassManager
from torchao.quantization.pt2e.quantizer import PortNodeMetaForQDQ
from torchao.quantization.pt2e.quantizer.quantizer import Quantizer
from torchao.quantization.pt2e.utils import _disallow_eval_train
from torchao.quantization.pt2e.utils import _fuse_conv_bn_

import nncf
from nncf import AdvancedCompressionParameters
Expand All @@ -32,7 +31,6 @@
from nncf.experimental.quantization.algorithms.weight_compression.algorithm import WeightsCompression
from nncf.experimental.torch.fx.constant_folding import constant_fold
from nncf.experimental.torch.fx.quantization.quantizer.openvino_adapter import OpenVINOQuantizerAdapter
from nncf.experimental.torch.fx.quantization.quantizer.openvino_quantizer import OpenVINOQuantizer
from nncf.experimental.torch.fx.quantization.quantizer.torch_ao_adapter import TorchAOQuantizerAdapter
from nncf.experimental.torch.fx.transformations import QUANTIZE_NODE_TARGETS
from nncf.experimental.torch.fx.transformations import DuplicateDQPassNoAnnotations
Expand All @@ -42,6 +40,19 @@
from nncf.quantization.range_estimator import RangeEstimatorParameters


def _is_openvino_quantizer_instance(obj) -> bool:
"""
Safely check if an object is instance of OpenVINOQuantizer.
This is to avoid a circular import
"""
try:
from executorch.backends.openvino.quantizer.quantizer import OpenVINOQuantizer
except ModuleNotFoundError as err:
msg = "OpenVINO Quantizer could not be imported from Executorch. Please install Executorch."
raise nncf.ModuleNotFoundError(msg) from err
return isinstance(obj, OpenVINOQuantizer)


@api(canonical_alias="nncf.experimental.torch.fx.quantize_pt2e")
def quantize_pt2e(
model: torch.fx.GraphModule,
Expand All @@ -60,7 +71,7 @@ def quantize_pt2e(
) -> torch.fx.GraphModule:
"""
Applies post-training quantization to the torch.fx.GraphModule provided model
using provided torch.ao quantizer.
using provided torchao quantizer.

:param model: A torch.fx.GraphModule instance to be quantized.
:param quantizer: Torch ao quantizer to annotate nodes in the graph with quantization setups
Expand Down Expand Up @@ -103,7 +114,7 @@ def quantize_pt2e(
model = deepcopy(model)

_fuse_conv_bn_(model)
if isinstance(quantizer, OpenVINOQuantizer) or hasattr(quantizer, "get_nncf_quantization_setup"):
if _is_openvino_quantizer_instance(quantizer) or hasattr(quantizer, "get_nncf_quantization_setup"):
quantizer = OpenVINOQuantizerAdapter(quantizer)
else:
quantizer = TorchAOQuantizerAdapter(quantizer)
Expand All @@ -130,7 +141,7 @@ def quantize_pt2e(
quantized_model = GraphModule(quantized_model, quantized_model.graph)

if fold_quantize:
if isinstance(quantizer, OpenVINOQuantizerAdapter):
if _is_openvino_quantizer_instance(quantizer):
compress_post_quantize_transformation(quantized_model)
else:
constant_fold(quantized_model, _quant_node_constraint)
Expand Down Expand Up @@ -178,7 +189,7 @@ def compress_pt2e(
advanced_parameters: Optional[AdvancedCompressionParameters] = None,
) -> torch.fx.GraphModule:
"""
Applies Weight Compression to the torch.fx.GraphModule model using provided torch.ao quantizer.
Applies Weight Compression to the torch.fx.GraphModule model using provided torchao quantizer.

:param model: A torch.fx.GraphModule instance to be quantized.
:param quantizer: Torch ao quantizer to annotate nodes in the graph with quantization setups
Expand All @@ -196,7 +207,7 @@ def compress_pt2e(
preserve the accuracy of the model, the more sensitive layers receive a higher precision.
:param advanced_parameters: Advanced parameters for algorithms in the compression pipeline.
"""
if isinstance(quantizer, OpenVINOQuantizer) or hasattr(quantizer, "get_nncf_weight_compression_parameters"):
if _is_openvino_quantizer_instance(quantizer) or hasattr(quantizer, "get_nncf_weight_compression_parameters"):
quantizer = OpenVINOQuantizerAdapter(quantizer)
compression_format = nncf.CompressionFormat.DQ
else:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,20 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Any
from __future__ import annotations

from typing import TYPE_CHECKING, Any

import torch.fx

from nncf.common.graph.graph import NNCFGraph
from nncf.common.quantization.quantizer_setup import SingleConfigQuantizerSetup
from nncf.experimental.quantization.quantizer import Quantizer
from nncf.experimental.torch.fx.quantization.quantizer.openvino_quantizer import OpenVINOQuantizer
from nncf.quantization.algorithms.weight_compression.config import WeightCompressionParameters

if TYPE_CHECKING:
from executorch.backends.openvino.quantizer.quantizer import OpenVINOQuantizer


class OpenVINOQuantizerAdapter(Quantizer):
"""
Expand Down
Loading
Loading