Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 3 additions & 8 deletions optimum/intel/openvino/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -715,7 +715,7 @@ def __init__(
)
self.bits = bits
self.sym = sym
self.group_size = group_size or (-1 if bits == 8 else 128)
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Delegate group size value selection to NNCF. This is backward compatible in terms of the default value since NNCF also selects -1 for 8-bit types and 128 for 4-bit non-mxfp4 types by default.

self.group_size = group_size
self.ratio = ratio
self.all_layers = all_layers
self.sensitivity_metric = sensitivity_metric
Expand Down Expand Up @@ -794,7 +794,7 @@ def post_init(self):
raise ValueError(
f"For 8-bit quantization, `ratio` is expected to be set to 1.0, but was set to {self.ratio}"
)
if self.group_size != -1:
if self.group_size is not None and self.group_size != -1:
raise ValueError(
f"For 8-bit quantization, `group_size` is expected to be set to -1, but was set to {self.group_size}"
)
Expand Down Expand Up @@ -843,11 +843,6 @@ def post_init(self):
f"['int4', 'int8', 'mxfp4', 'nf4', 'cb4'], but found: {self.dtype}."
)
if self.dtype in ["mxfp4", "nf4", "cb4"]:
if self.dtype == "cb4" and is_nncf_version("<=", "2.17"):
raise ImportError(
"Codebook quantization is currently supported only with NNCF develop. "
"Please run `pip install git+https://github.com/openvinotoolkit/nncf.git`."
)
if self.bits != 4:
raise ValueError(
f"When applying weight compression with '{self.dtype}' data type, the `bits` parameter must be set to 4, but found {self.bits}"
Expand All @@ -874,7 +869,7 @@ def to_nncf_dict(self) -> Dict[str, Any]:
if mode in signed_bitness.values():
mode += "_sym" if self.sym else "_asym"
if mode == "mxfp4":
mode = "e2m1"
mode = "e2m1" if is_nncf_version("<=", "2.18") else "mxfp4"
if mode == "cb4":
mode = "cb4_f8e4m3"
mode = nncf.CompressWeightsMode(mode)
Expand Down
8 changes: 1 addition & 7 deletions tests/openvino/test_exporters_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
from typing import Dict
from unittest.mock import Mock

import pytest
from parameterized import parameterized
from transformers import AutoModelForCausalLM, AutoModelForZeroShotImageClassification, AutoProcessor, AutoTokenizer
from utils_tests import (
Expand Down Expand Up @@ -63,7 +62,6 @@
from optimum.intel.openvino.utils import _HEAD_TO_AUTOMODELS, TemporaryDirectory
from optimum.intel.utils.import_utils import (
compare_versions,
is_nncf_version,
is_openvino_tokenizers_available,
is_openvino_version,
is_tokenizers_version,
Expand Down Expand Up @@ -443,7 +441,7 @@ class OVCLIExportTestCase(unittest.TestCase):
if is_transformers_version("<=", "4.45")
else {
"encoder": 30,
"decoder": 62 if is_nncf_version("<=", "2.17") and is_openvino_version("<", "2025.3") else 52,
"decoder": 52,
},
(
{"encoder": {"int8": 32}, "decoder": {"int8": 52}, "decoder_with_past": {"int8": 42}}
Expand Down Expand Up @@ -1026,8 +1024,6 @@ def test_exporters_cli_hybrid_quantization(
def test_exporters_cli_4bit(
self, task: str, model_type: str, option: str, expected_num_weight_nodes_per_model: Dict[str, Dict[str, int]]
):
if option.startswith("cb4") and is_nncf_version("<=", "2.17"):
pytest.skip("Codebook quantization is supported starting from NNCF 2.18")
with TemporaryDirectory() as tmpdir:
result = subprocess.run(
f"optimum-cli export openvino --model {MODEL_NAMES[model_type]} --task {task} --weight-format {option} {tmpdir}",
Expand Down Expand Up @@ -1084,8 +1080,6 @@ def test_exporters_cli_full_quantization(
expected_fake_nodes_per_model: Dict[str, int],
expected_num_weight_nodes_per_model: Dict[str, Dict[str, int]],
):
if quant_mode == "cb4_f8e4m3" and is_nncf_version("<=", "2.17"):
pytest.skip("Codebook quantization is supported starting from NNCF 2.18")
with TemporaryDirectory() as tmpdir:
subprocess.run(
f"optimum-cli export openvino --task {task} --model {MODEL_NAMES[model_type]} "
Expand Down
21 changes: 5 additions & 16 deletions tests/openvino/test_quantization.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@
from copy import deepcopy

from optimum.intel.openvino.quantization import InferRequestWrapper, OVCalibrationDatasetBuilder
from optimum.intel.utils.import_utils import is_openvino_version, is_transformers_version, is_nncf_version
from optimum.intel.utils.import_utils import is_openvino_version, is_transformers_version
from utils_tests import (
MODEL_NAMES,
get_num_quantized_nodes,
Expand Down Expand Up @@ -394,7 +394,7 @@ class OVQuantizerTest(unittest.TestCase):
if is_transformers_version("<=", "4.45")
else {
"encoder": 30,
"decoder": 62 if is_nncf_version("<=", "2.17") and is_openvino_version("<", "2025.3") else 52,
"decoder": 52,
},
(
{"encoder": {"int8": 32}, "decoder": {"int8": 52}, "decoder_with_past": {"int8": 42}}
Expand Down Expand Up @@ -568,12 +568,6 @@ def test_ov_model_static_quantization_with_auto_dataset(
expected_fake_nodes_per_model,
expected_num_weight_nodes_per_model,
):
if (
isinstance(quantization_config, dict)
and quantization_config.get("weight_quantization_config", {}).get("dtype") == "cb4"
and is_nncf_version("<=", "2.17")
):
pytest.skip("Codebook quantization is supported starting from NNCF 2.18")
model_id = MODEL_NAMES[model_name]

with TemporaryDirectory() as tmp_dir:
Expand Down Expand Up @@ -1303,13 +1297,6 @@ def test_ovmodel_4bit_auto_compression(self, model_cls, model_type, expected_ov_
def test_ovmodel_4bit_auto_compression_with_config(
self, model_cls, model_name, trust_remote_code, quantization_config, expected_num_weight_nodes_per_model
):
if (
isinstance(quantization_config, dict)
and quantization_config.get("dtype") == "cb4"
and is_nncf_version("<=", "2.17")
):
pytest.skip("Codebook quantization is supported starting from NNCF 2.18")

model_id = MODEL_NAMES[model_name]
with TemporaryDirectory() as tmp_dir:
quantization_config = OVWeightQuantizationConfig.from_dict(quantization_config)
Expand Down Expand Up @@ -1390,7 +1377,7 @@ def main_export_in_stacktrace(*args, **kwargs):
compression_params = {
"mode": nncf.CompressWeightsMode.INT8_ASYM,
"ratio": 1.0,
"group_size": -1,
"group_size": None,
"all_layers": None,
"sensitivity_metric": None,
"dataset": None,
Expand Down Expand Up @@ -1797,6 +1784,8 @@ def eval_expression_if_possible(expression):
config_value = (
"max_activation_variance" if sub_config.bits == 4 else "weight_quantization_error"
)
if param_name == "group_size" and config_value is None:
config_value = -1 if sub_config.bits == 8 else 128

if config_value is None and rt_info_value is False:
continue
Expand Down
Loading