openvinotoolkit · AlexanderDokuchaev · Jan 5, 2026 · Jan 5, 2026
@@ -11,7 +11,7 @@
   - Symmetric 8 bit compression mode
   - Symmetric and asymmetric 4 bit compression mode
   - NF4 compression mode
-  - Arbitrary look-up table (CODEBOOK) or predefined lookup table based on NF4 (CB4_F8E4M3)
+  - Arbitrary look-up table (CODEBOOK) or predefined lookup table based on NF4 (CB4)
   - MX-compliant types - MXFP4 and MXFP8_E4M3
   - FP types - FP8_E4M3 and FP4
   - Mixed precision weights compression

@@ -44,11 +44,11 @@ NNCF can automatically distribute precision assignments based on quantization se
 | INT4_ASYM        | INT4         | FP16       | Per-channel / Group-wise | [Asymmetric quantization](/docs/usage/training_time_compression/Quantization.md#asymmetric-quantization) |
 | NF4              | FP32         | FP16       | Per-channel / Group-wise | [NormalFloat-4](https://arxiv.org/pdf/2305.14314v1.pdf) lookup table with 16 FP32 values |
 | CODEBOOK         | Any          | FP16       | Per-channel / Group-wise | Arbitrary lookup table (codebook) |
-| CB4_F8E4M3       | E4M3         | FP16       | Per-channel / Group-wise | A fixed lookup table with 16 E4M3 values based on NF4 values |
+| CB4              | E4M3         | FP16       | Per-channel / Group-wise | A fixed lookup table with 16 E4M3 values based on NF4 values |
 | MXFP4            | E2M1         | E8M0       | Group-wise (32)          | [MX-compliant FP4](https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf) |
 | MXFP8_E4M3       | E4M3         | E8M0       | Group-wise (32)          | [MX-compliant FP8](https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf) |
 |   FP8_E4M3       | E4M3         | FP16       | Per-channel / Group-wise | [FP8](https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf) |
-|   FP4       | E2M1         | FP16       | Per-channel / Group-wise | [FP4](https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf) |
+|   FP4            | E2M1         | FP16       | Per-channel / Group-wise | [FP4](https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf) |
 
 **Note**: Granularity refers to the scope of elements sharing quantization parameters. "Per-channel" applies different parameters for each output channel, while "Group-wise" divides weights into groups (e.g., group_size=128) that share the same parameters.
 

@@ -110,7 +110,7 @@ def default_codebook_example(model_id: str, compressed_model_id: str) -> list[st
     answers_by_questions = generate_answers(QUESTIONS, model, tokenizer)
     print_answers("Non-optimized model outputs:\n", answers_by_questions)
 
-    model.model = nncf.compress_weights(model.model, mode=nncf.CompressWeightsMode.CB4_F8E4M3, ratio=1.0, group_size=64)
+    model.model = nncf.compress_weights(model.model, mode=nncf.CompressWeightsMode.CB4, ratio=1.0, group_size=64)
     model.save_pretrained(compressed_model_id)
     tokenizer.save_pretrained(compressed_model_id)
 

@@ -95,15 +95,15 @@ class CompressWeightsMode(StrEnum):
     :param FP8_E4M3: A FP8 format with E4M3 values sharing group-level fp16 scale.
     :param FP4: A FP4 format with E2M1 values sharing group-level fp16 scale.
     :param CODEBOOK: Codebook (LUT) quantization format.
-    :param CB4_F8E4M3: Codebook (LUT) format with 16 fixed fp8 values in E4M3 format.
+    :param CB4: Codebook (LUT) format with 16 fixed fp8 values in E4M3 format.
     """
 
     INT8_SYM = "int8_sym"
     INT8_ASYM = "int8_asym"
     INT4_SYM = "int4_sym"
     INT4_ASYM = "int4_asym"
     NF4 = "nf4"
-    CB4_F8E4M3 = "cb4_f8e4m3"
+    CB4 = "cb4"
     INT8 = "int8"  # Deprecated mode
     MXFP4 = "mxfp4"
     MXFP8_E4M3 = "mxfp8_e4m3"

@@ -93,7 +93,7 @@ def get_weight_compression_configuration(
     elif group_size is None and mode in NON_INT8_MODES:
         if mode in [CompressWeightsMode.MXFP4, CompressWeightsMode.MXFP8_E4M3]:
             group_size = 32
-        elif mode in [CompressWeightsMode.CODEBOOK, CompressWeightsMode.CB4_F8E4M3]:
+        elif mode in [CompressWeightsMode.CODEBOOK, CompressWeightsMode.CB4]:
             group_size = -1
         else:
             group_size = 128
@@ -523,7 +523,7 @@ def _get_backup_config(self, weight_dtype: TensorDataType) -> Optional[WeightCom
     def _get_primary_config(self, group_size: int) -> WeightCompressionConfig:
         codebook_values = None
 
-        if self._mode == CompressWeightsMode.CB4_F8E4M3:
+        if self._mode == CompressWeightsMode.CB4:
             codebook_values = Tensor(CB4_QUANTILES)
         elif self._mode == CompressWeightsMode.CODEBOOK:
             codebook_values = Tensor(self._advanced_parameters.codebook)
@@ -959,9 +959,9 @@ def get_weight_compression_parameters(
                         # MoE operations are usually matmuls, so the check for matmul metatype is done
                         # This is to avoid raising the error for non-MoE cases with 3D weights.
                         parsed_ov_version = f"{ov_version[0]}.{ov_version[1]}.{ov_version[2]}-{ov_version[3]}"
-                        msg = f"""NNCF compression algorithms do not support 3D weights with current version of 
-                                OpenVINO {parsed_ov_version} due to a known issue in statistics collection 
-                                Ticket - 176465. Please update to the latest OpenVINO nightly version. 
+                        msg = f"""NNCF compression algorithms do not support 3D weights with current version of
+                                OpenVINO {parsed_ov_version} due to a known issue in statistics collection
+                                Ticket - 176465. Please update to the latest OpenVINO nightly version.
                                 Node with weight: {node.node_name}."""
                         raise nncf.UnsupportedModelError(msg)
 

@@ -71,15 +71,15 @@ def is_integer(self):
             CompressWeightsMode.FP8_E4M3,
             CompressWeightsMode.FP4,
             CompressWeightsMode.CODEBOOK,
-            CompressWeightsMode.CB4_F8E4M3,
+            CompressWeightsMode.CB4,
         ]
 
     @property
     def is_codebook(self):
         """
         :return: True if compression type is codebook, else False.
         """
-        return self.mode in [CompressWeightsMode.CODEBOOK, CompressWeightsMode.CB4_F8E4M3]
+        return self.mode in [CompressWeightsMode.CODEBOOK, CompressWeightsMode.CB4]
 
     @property
     def compression_dtype(self) -> TensorDataType:

@@ -511,7 +511,7 @@ def compress_weights(
             CompressWeightsMode.FP8_E4M3,
             CompressWeightsMode.FP4,
             CompressWeightsMode.CODEBOOK,
-            CompressWeightsMode.CB4_F8E4M3,
+            CompressWeightsMode.CB4,
         ]
         if mode in not_supported_modes:
             msg = (
@@ -559,7 +559,7 @@ def compress_weights(
             CompressWeightsMode.FP8_E4M3,
             CompressWeightsMode.FP4,
             CompressWeightsMode.CODEBOOK,
-            CompressWeightsMode.CB4_F8E4M3,
+            CompressWeightsMode.CB4,
         ]
         if mode in not_supported_modes:
             msg = (
@@ -634,7 +634,7 @@ def compress_weights(
             CompressWeightsMode.FP8_E4M3,
             CompressWeightsMode.FP4,
             CompressWeightsMode.CODEBOOK,
-            CompressWeightsMode.CB4_F8E4M3,
+            CompressWeightsMode.CB4,
         ]
         if mode in not_supported_modes:
             msg = (

@@ -371,7 +371,7 @@ def get_mixed_mapping(primary_fn: Callable, list_layers: list[str]):
         (CompressWeightsMode.INT4_SYM, 3, get_mixed_mapping(check_int4_sym_grouped, TEST_MODELS[IntegerModel])),
         (CompressWeightsMode.INT4_ASYM, 3, get_mixed_mapping(check_int4_asym_grouped, TEST_MODELS[IntegerModel])),
         (CompressWeightsMode.NF4, 3, get_mixed_mapping(check_nf4_grouped, TEST_MODELS[IntegerModel])),
-        (CompressWeightsMode.CB4_F8E4M3, 3, get_mixed_mapping(check_codebook_grouped, TEST_MODELS[IntegerModel])),
+        (CompressWeightsMode.CB4, 3, get_mixed_mapping(check_codebook_grouped, TEST_MODELS[IntegerModel])),
         (CompressWeightsMode.MXFP4, 32, get_mixed_mapping(check_mxfp4, TEST_MODELS[IntegerModel])),
         (CompressWeightsMode.MXFP8_E4M3, 32, get_mixed_mapping(check_mxfp8, TEST_MODELS[IntegerModel])),
         (CompressWeightsMode.FP8_E4M3, 3, get_mixed_mapping(check_fp8, TEST_MODELS[IntegerModel])),
@@ -1330,7 +1330,7 @@ def test_mixed_precision_codebook(mode, all_layers, ratio, ref_ids):
     model = SequentialMatmulModel().ov_model
     compressed_model = compress_weights(
         model,
-        mode=CompressWeightsMode.CB4_F8E4M3,
+        mode=CompressWeightsMode.CB4,
         ratio=ratio,
         group_size=1,
         all_layers=all_layers,