|
12 | 12 | import copy
|
13 | 13 | import os
|
14 | 14 |
|
| 15 | +os.environ["TORCHINDUCTOR_FREEZING"] = "1" |
| 16 | + |
| 17 | +from itertools import islice |
| 18 | + |
15 | 19 | import numpy as np
|
16 | 20 | import openvino as ov
|
17 | 21 | import torch
|
18 | 22 | from sklearn.metrics import accuracy_score
|
| 23 | +from torch.ao.quantization.quantize_pt2e import convert_pt2e |
| 24 | +from torch.ao.quantization.quantize_pt2e import prepare_pt2e |
| 25 | +from torch.ao.quantization.quantizer.quantizer import Quantizer as TorchAOQuantizer |
| 26 | +from torch.ao.quantization.quantizer.x86_inductor_quantizer import X86InductorQuantizer |
| 27 | +from torch.ao.quantization.quantizer.x86_inductor_quantizer import get_default_x86_inductor_quantization_config |
19 | 28 | from torchvision import datasets
|
20 | 29 |
|
21 | 30 | import nncf
|
| 31 | +from nncf import AdvancedQuantizationParameters |
22 | 32 | from nncf.common.logging.track_progress import track
|
| 33 | +from nncf.experimental.torch.fx import OpenVINOQuantizer |
| 34 | +from nncf.experimental.torch.fx import quantize_pt2e |
| 35 | +from nncf.torch import disable_patching |
23 | 36 | from tests.post_training.pipelines.base import DEFAULT_VAL_THREADS
|
24 | 37 | from tests.post_training.pipelines.base import FX_BACKENDS
|
| 38 | +from tests.post_training.pipelines.base import BackendType |
25 | 39 | from tests.post_training.pipelines.base import PTQTestPipeline
|
26 | 40 |
|
27 | 41 |
|
@@ -75,7 +89,17 @@ def process_result(request, userdata):
|
75 | 89 | def _validate_torch_compile(
|
76 | 90 | self, val_loader: torch.utils.data.DataLoader, predictions: np.ndarray, references: np.ndarray
|
77 | 91 | ):
|
78 |
| - compiled_model = torch.compile(self.compressed_model.cpu(), backend="openvino", options={"aot_autograd": True}) |
| 92 | + if self.backend in [ |
| 93 | + BackendType.FX_TORCH, |
| 94 | + BackendType.CUDA_FX_TORCH, |
| 95 | + BackendType.OV_QUANTIZER_AO, |
| 96 | + BackendType.OV_QUANTIZER_NNCF, |
| 97 | + ]: |
| 98 | + compiled_model = torch.compile( |
| 99 | + self.compressed_model.cpu(), backend="openvino", options={"aot_autograd": True} |
| 100 | + ) |
| 101 | + else: |
| 102 | + compiled_model = torch.compile(self.compressed_model) |
79 | 103 | for i, (images, target) in enumerate(val_loader):
|
80 | 104 | # W/A for memory leaks when using torch DataLoader and OpenVINO
|
81 | 105 | pred = compiled_model(images)
|
@@ -103,3 +127,98 @@ def _validate(self) -> None:
|
103 | 127 |
|
104 | 128 | self.run_info.metric_name = "Acc@1"
|
105 | 129 | self.run_info.metric_value = acc_top1
|
| 130 | + return [] |
| 131 | + |
| 132 | + def _compress_torch_ao(self, quantizer): |
| 133 | + with torch.no_grad(), disable_patching(): |
| 134 | + prepared_model = prepare_pt2e(self.model, quantizer) |
| 135 | + subset_size = self.compression_params.get("subset_size", 300) |
| 136 | + for data in islice(self.calibration_dataset.get_inference_data(), subset_size): |
| 137 | + prepared_model(data) |
| 138 | + self.compressed_model = convert_pt2e(prepared_model) |
| 139 | + |
| 140 | + def _compress_nncf_pt2e(self, quantizer): |
| 141 | + pt2e_kwargs = {} |
| 142 | + for key in ( |
| 143 | + "subset_size", |
| 144 | + "fast_bias_correction", |
| 145 | + ): |
| 146 | + if key in self.compression_params: |
| 147 | + pt2e_kwargs[key] = self.compression_params[key] |
| 148 | + |
| 149 | + advanced_parameters: AdvancedQuantizationParameters = self.compression_params.get( |
| 150 | + "advanced_parameters", AdvancedQuantizationParameters() |
| 151 | + ) |
| 152 | + |
| 153 | + sq_params = advanced_parameters.smooth_quant_alphas |
| 154 | + sq_alpha = advanced_parameters.smooth_quant_alpha |
| 155 | + if sq_alpha is not None: |
| 156 | + if sq_alpha < 0: |
| 157 | + sq_params.convolution = -1 |
| 158 | + sq_params.matmul = -1 |
| 159 | + else: |
| 160 | + sq_params.matmul = sq_alpha |
| 161 | + pt2e_kwargs["smooth_quant_params"] = sq_params |
| 162 | + pt2e_kwargs["bias_correction_params"] = advanced_parameters.bias_correction_params |
| 163 | + pt2e_kwargs["activations_range_estimator_params"] = advanced_parameters.activations_range_estimator_params |
| 164 | + pt2e_kwargs["weights_range_estimator_params"] = advanced_parameters.weights_range_estimator_params |
| 165 | + |
| 166 | + smooth_quant = False |
| 167 | + if self.compression_params.get("model_type", False): |
| 168 | + smooth_quant = self.compression_params["model_type"] == nncf.ModelType.TRANSFORMER |
| 169 | + |
| 170 | + with disable_patching(), torch.no_grad(): |
| 171 | + self.compressed_model = quantize_pt2e( |
| 172 | + self.model, |
| 173 | + quantizer, |
| 174 | + self.calibration_dataset, |
| 175 | + smooth_quant=smooth_quant, |
| 176 | + fold_quantize=False, |
| 177 | + **pt2e_kwargs, |
| 178 | + ) |
| 179 | + |
| 180 | + def _compress(self): |
| 181 | + """ |
| 182 | + Quantize self.model |
| 183 | + """ |
| 184 | + if self.backend not in FX_BACKENDS: |
| 185 | + super()._compress() |
| 186 | + |
| 187 | + return |
| 188 | + if self.backend in [BackendType.FX_TORCH, BackendType.CUDA_FX_TORCH]: |
| 189 | + with disable_patching(), torch.no_grad(): |
| 190 | + super()._compress() |
| 191 | + return |
| 192 | + |
| 193 | + quantizer = self._build_quantizer() |
| 194 | + |
| 195 | + if self.backend in [BackendType.OV_QUANTIZER_NNCF, BackendType.X86_QUANTIZER_NNCF]: |
| 196 | + self._compress_nncf_pt2e(quantizer) |
| 197 | + else: |
| 198 | + self._compress_torch_ao(quantizer) |
| 199 | + |
| 200 | + def _build_quantizer(self) -> TorchAOQuantizer: |
| 201 | + if self.backend in [BackendType.X86_QUANTIZER_AO, BackendType.X86_QUANTIZER_NNCF]: |
| 202 | + quantizer = X86InductorQuantizer() |
| 203 | + quantizer.set_global(get_default_x86_inductor_quantization_config()) |
| 204 | + return quantizer |
| 205 | + quantizer_kwargs = {} |
| 206 | + for key in ( |
| 207 | + "mode", |
| 208 | + "preset", |
| 209 | + "target_device", |
| 210 | + "model_type", |
| 211 | + "ignored_scope", |
| 212 | + ): |
| 213 | + if key in self.compression_params: |
| 214 | + quantizer_kwargs[key] = self.compression_params[key] |
| 215 | + advanced_parameters: AdvancedQuantizationParameters = self.compression_params.get( |
| 216 | + "advanced_parameters", AdvancedQuantizationParameters() |
| 217 | + ) |
| 218 | + quantizer_kwargs["overflow_fix"] = advanced_parameters.overflow_fix |
| 219 | + quantizer_kwargs["quantize_outputs"] = advanced_parameters.quantize_outputs |
| 220 | + quantizer_kwargs["activations_quantization_params"] = advanced_parameters.activations_quantization_params |
| 221 | + quantizer_kwargs["weights_quantization_params"] = advanced_parameters.weights_quantization_params |
| 222 | + quantizer_kwargs["quantizer_propagation_rule"] = advanced_parameters.quantizer_propagation_rule |
| 223 | + |
| 224 | + return OpenVINOQuantizer(**quantizer_kwargs) |
0 commit comments