From e09d3274eade3b7d529ba1b7b4b6f2a2a895314e Mon Sep 17 00:00:00 2001 From: Alexander Dokuchaev Date: Mon, 5 Jan 2026 14:25:13 +0200 Subject: [PATCH 1/2] rename --- docs/Algorithms.md | 2 +- .../weights_compression/Usage.md | 4 ++-- .../openvino/smollm2_360m_codebook/main.py | 2 +- src/nncf/parameters.py | 4 ++-- .../algorithms/weight_compression/algorithm.py | 10 +++++----- .../algorithms/weight_compression/config.py | 4 ++-- src/nncf/quantization/quantize_model.py | 6 +++--- .../native/quantization/test_weights_compression.py | 4 ++-- 8 files changed, 18 insertions(+), 18 deletions(-) diff --git a/docs/Algorithms.md b/docs/Algorithms.md index 34d20532620..7f42c937eef 100644 --- a/docs/Algorithms.md +++ b/docs/Algorithms.md @@ -11,7 +11,7 @@ - Symmetric 8 bit compression mode - Symmetric and asymmetric 4 bit compression mode - NF4 compression mode - - Arbitrary look-up table (CODEBOOK) or predefined lookup table based on NF4 (CB4_F8E4M3) + - Arbitrary look-up table (CODEBOOK) or predefined lookup table based on NF4 (CB4) - MX-compliant types - MXFP4 and MXFP8_E4M3 - FP types - FP8_E4M3 and FP4 - Mixed precision weights compression diff --git a/docs/usage/post_training_compression/weights_compression/Usage.md b/docs/usage/post_training_compression/weights_compression/Usage.md index 1400cbeff10..fb13967f5f7 100644 --- a/docs/usage/post_training_compression/weights_compression/Usage.md +++ b/docs/usage/post_training_compression/weights_compression/Usage.md @@ -44,11 +44,11 @@ NNCF can automatically distribute precision assignments based on quantization se | INT4_ASYM | INT4 | FP16 | Per-channel / Group-wise | [Asymmetric quantization](/docs/usage/training_time_compression/Quantization.md#asymmetric-quantization) | | NF4 | FP32 | FP16 | Per-channel / Group-wise | [NormalFloat-4](https://arxiv.org/pdf/2305.14314v1.pdf) lookup table with 16 FP32 values | | CODEBOOK | Any | FP16 | Per-channel / Group-wise | Arbitrary lookup table (codebook) | -| CB4_F8E4M3 | E4M3 | FP16 | Per-channel / Group-wise | A fixed lookup table with 16 E4M3 values based on NF4 values | +| CB4 | E4M3 | FP16 | Per-channel / Group-wise | A fixed lookup table with 16 E4M3 values based on NF4 values | | MXFP4 | E2M1 | E8M0 | Group-wise (32) | [MX-compliant FP4](https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf) | | MXFP8_E4M3 | E4M3 | E8M0 | Group-wise (32) | [MX-compliant FP8](https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf) | | FP8_E4M3 | E4M3 | FP16 | Per-channel / Group-wise | [FP8](https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf) | -| FP4 | E2M1 | FP16 | Per-channel / Group-wise | [FP4](https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf) | +| FP4 | E2M1 | FP16 | Per-channel / Group-wise | [FP4](https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf) | **Note**: Granularity refers to the scope of elements sharing quantization parameters. "Per-channel" applies different parameters for each output channel, while "Group-wise" divides weights into groups (e.g., group_size=128) that share the same parameters. diff --git a/examples/llm_compression/openvino/smollm2_360m_codebook/main.py b/examples/llm_compression/openvino/smollm2_360m_codebook/main.py index 32a5cd6de43..9711304e9dc 100644 --- a/examples/llm_compression/openvino/smollm2_360m_codebook/main.py +++ b/examples/llm_compression/openvino/smollm2_360m_codebook/main.py @@ -110,7 +110,7 @@ def default_codebook_example(model_id: str, compressed_model_id: str) -> list[st answers_by_questions = generate_answers(QUESTIONS, model, tokenizer) print_answers("Non-optimized model outputs:\n", answers_by_questions) - model.model = nncf.compress_weights(model.model, mode=nncf.CompressWeightsMode.CB4_F8E4M3, ratio=1.0, group_size=64) + model.model = nncf.compress_weights(model.model, mode=nncf.CompressWeightsMode.CB4, ratio=1.0, group_size=64) model.save_pretrained(compressed_model_id) tokenizer.save_pretrained(compressed_model_id) diff --git a/src/nncf/parameters.py b/src/nncf/parameters.py index 21c20743490..69740a14dd8 100644 --- a/src/nncf/parameters.py +++ b/src/nncf/parameters.py @@ -95,7 +95,7 @@ class CompressWeightsMode(StrEnum): :param FP8_E4M3: A FP8 format with E4M3 values sharing group-level fp16 scale. :param FP4: A FP4 format with E2M1 values sharing group-level fp16 scale. :param CODEBOOK: Codebook (LUT) quantization format. - :param CB4_F8E4M3: Codebook (LUT) format with 16 fixed fp8 values in E4M3 format. + :param CB4: Codebook (LUT) format with 16 fixed fp8 values in E4M3 format. """ INT8_SYM = "int8_sym" @@ -103,7 +103,7 @@ class CompressWeightsMode(StrEnum): INT4_SYM = "int4_sym" INT4_ASYM = "int4_asym" NF4 = "nf4" - CB4_F8E4M3 = "cb4_f8e4m3" + CB4 = "cb4" INT8 = "int8" # Deprecated mode MXFP4 = "mxfp4" MXFP8_E4M3 = "mxfp8_e4m3" diff --git a/src/nncf/quantization/algorithms/weight_compression/algorithm.py b/src/nncf/quantization/algorithms/weight_compression/algorithm.py index 7a58055a4d8..7341a595c73 100644 --- a/src/nncf/quantization/algorithms/weight_compression/algorithm.py +++ b/src/nncf/quantization/algorithms/weight_compression/algorithm.py @@ -93,7 +93,7 @@ def get_weight_compression_configuration( elif group_size is None and mode in NON_INT8_MODES: if mode in [CompressWeightsMode.MXFP4, CompressWeightsMode.MXFP8_E4M3]: group_size = 32 - elif mode in [CompressWeightsMode.CODEBOOK, CompressWeightsMode.CB4_F8E4M3]: + elif mode in [CompressWeightsMode.CODEBOOK, CompressWeightsMode.CB4]: group_size = -1 else: group_size = 128 @@ -523,7 +523,7 @@ def _get_backup_config(self, weight_dtype: TensorDataType) -> Optional[WeightCom def _get_primary_config(self, group_size: int) -> WeightCompressionConfig: codebook_values = None - if self._mode == CompressWeightsMode.CB4_F8E4M3: + if self._mode == CompressWeightsMode.CB4: codebook_values = Tensor(CB4_QUANTILES) elif self._mode == CompressWeightsMode.CODEBOOK: codebook_values = Tensor(self._advanced_parameters.codebook) @@ -959,9 +959,9 @@ def get_weight_compression_parameters( # MoE operations are usually matmuls, so the check for matmul metatype is done # This is to avoid raising the error for non-MoE cases with 3D weights. parsed_ov_version = f"{ov_version[0]}.{ov_version[1]}.{ov_version[2]}-{ov_version[3]}" - msg = f"""NNCF compression algorithms do not support 3D weights with current version of - OpenVINO {parsed_ov_version} due to a known issue in statistics collection - Ticket - 176465. Please update to the latest OpenVINO nightly version. + msg = f"""NNCF compression algorithms do not support 3D weights with current version of + OpenVINO {parsed_ov_version} due to a known issue in statistics collection + Ticket - 176465. Please update to the latest OpenVINO nightly version. Node with weight: {node.node_name}.""" raise nncf.UnsupportedModelError(msg) diff --git a/src/nncf/quantization/algorithms/weight_compression/config.py b/src/nncf/quantization/algorithms/weight_compression/config.py index 25d475212ef..9f682803675 100644 --- a/src/nncf/quantization/algorithms/weight_compression/config.py +++ b/src/nncf/quantization/algorithms/weight_compression/config.py @@ -71,7 +71,7 @@ def is_integer(self): CompressWeightsMode.FP8_E4M3, CompressWeightsMode.FP4, CompressWeightsMode.CODEBOOK, - CompressWeightsMode.CB4_F8E4M3, + CompressWeightsMode.CB4, ] @property @@ -79,7 +79,7 @@ def is_codebook(self): """ :return: True if compression type is codebook, else False. """ - return self.mode in [CompressWeightsMode.CODEBOOK, CompressWeightsMode.CB4_F8E4M3] + return self.mode in [CompressWeightsMode.CODEBOOK, CompressWeightsMode.CB4] @property def compression_dtype(self) -> TensorDataType: diff --git a/src/nncf/quantization/quantize_model.py b/src/nncf/quantization/quantize_model.py index 2986ab91e4d..69c7750a6b6 100644 --- a/src/nncf/quantization/quantize_model.py +++ b/src/nncf/quantization/quantize_model.py @@ -511,7 +511,7 @@ def compress_weights( CompressWeightsMode.FP8_E4M3, CompressWeightsMode.FP4, CompressWeightsMode.CODEBOOK, - CompressWeightsMode.CB4_F8E4M3, + CompressWeightsMode.CB4, ] if mode in not_supported_modes: msg = ( @@ -559,7 +559,7 @@ def compress_weights( CompressWeightsMode.FP8_E4M3, CompressWeightsMode.FP4, CompressWeightsMode.CODEBOOK, - CompressWeightsMode.CB4_F8E4M3, + CompressWeightsMode.CB4, ] if mode in not_supported_modes: msg = ( @@ -634,7 +634,7 @@ def compress_weights( CompressWeightsMode.FP8_E4M3, CompressWeightsMode.FP4, CompressWeightsMode.CODEBOOK, - CompressWeightsMode.CB4_F8E4M3, + CompressWeightsMode.CB4, ] if mode in not_supported_modes: msg = ( diff --git a/tests/openvino/native/quantization/test_weights_compression.py b/tests/openvino/native/quantization/test_weights_compression.py index a19638cf517..cd27c7b2cc4 100644 --- a/tests/openvino/native/quantization/test_weights_compression.py +++ b/tests/openvino/native/quantization/test_weights_compression.py @@ -371,7 +371,7 @@ def get_mixed_mapping(primary_fn: Callable, list_layers: list[str]): (CompressWeightsMode.INT4_SYM, 3, get_mixed_mapping(check_int4_sym_grouped, TEST_MODELS[IntegerModel])), (CompressWeightsMode.INT4_ASYM, 3, get_mixed_mapping(check_int4_asym_grouped, TEST_MODELS[IntegerModel])), (CompressWeightsMode.NF4, 3, get_mixed_mapping(check_nf4_grouped, TEST_MODELS[IntegerModel])), - (CompressWeightsMode.CB4_F8E4M3, 3, get_mixed_mapping(check_codebook_grouped, TEST_MODELS[IntegerModel])), + (CompressWeightsMode.CB4, 3, get_mixed_mapping(check_codebook_grouped, TEST_MODELS[IntegerModel])), (CompressWeightsMode.MXFP4, 32, get_mixed_mapping(check_mxfp4, TEST_MODELS[IntegerModel])), (CompressWeightsMode.MXFP8_E4M3, 32, get_mixed_mapping(check_mxfp8, TEST_MODELS[IntegerModel])), (CompressWeightsMode.FP8_E4M3, 3, get_mixed_mapping(check_fp8, TEST_MODELS[IntegerModel])), @@ -1330,7 +1330,7 @@ def test_mixed_precision_codebook(mode, all_layers, ratio, ref_ids): model = SequentialMatmulModel().ov_model compressed_model = compress_weights( model, - mode=CompressWeightsMode.CB4_F8E4M3, + mode=CompressWeightsMode.CB4, ratio=ratio, group_size=1, all_layers=all_layers, From ab0eb6fb0d2441fd963bf7d9009c88e0b1ec5398 Mon Sep 17 00:00:00 2001 From: Alexander Dokuchaev Date: Mon, 5 Jan 2026 14:38:54 +0200 Subject: [PATCH 2/2] ref --- ...s_cb4_f8e4m3.json => IntegerModel_compressed_weights_cb4.json} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/openvino/native/data/2025.2/reference_scales/{IntegerModel_compressed_weights_cb4_f8e4m3.json => IntegerModel_compressed_weights_cb4.json} (100%) diff --git a/tests/openvino/native/data/2025.2/reference_scales/IntegerModel_compressed_weights_cb4_f8e4m3.json b/tests/openvino/native/data/2025.2/reference_scales/IntegerModel_compressed_weights_cb4.json similarity index 100% rename from tests/openvino/native/data/2025.2/reference_scales/IntegerModel_compressed_weights_cb4_f8e4m3.json rename to tests/openvino/native/data/2025.2/reference_scales/IntegerModel_compressed_weights_cb4.json