Skip to content

Commit d6a2aa9

Browse files
[NNCF] FP8/FP4 support
1 parent 906008d commit d6a2aa9

File tree

4 files changed

+18
-6
lines changed

4 files changed

+18
-6
lines changed

docs/source/openvino/export.mdx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ Check out the help for more options:
3131

3232
```text
3333
usage: optimum-cli export openvino [-h] -m MODEL [--task TASK] [--framework {pt}] [--trust-remote-code]
34-
[--weight-format {fp32,fp16,int8,int4,mxfp4,nf4,cb4}]
34+
[--weight-format {fp32,fp16,int8,int4,mxfp4,nf4,fp4,fp8_e4m3,cb4}]
3535
[--quant-mode {int8,f8e4m3,f8e5m2,cb4_f8e4m3,int4_f8e4m3,int4_f8e5m2}]
3636
[--library {transformers,diffusers,timm,sentence_transformers,open_clip}]
3737
[--cache_dir CACHE_DIR] [--pad-token-id PAD_TOKEN_ID] [--ratio RATIO] [--sym]
@@ -66,7 +66,7 @@ Optional arguments:
6666
--trust-remote-code Allows to use custom code for the modeling hosted in the model repository. This option should
6767
only be set for repositories you trust and in which you have read the code, as it will execute
6868
on your local machine arbitrary code present in the model repository.
69-
--weight-format {fp32,fp16,int8,int4,mxfp4,nf4,cb4}
69+
--weight-format {fp32,fp16,int8,int4,mxfp4,fp4,fp8_e4m3,nf4,cb4}
7070
The weight format of the exported model. Option 'cb4' represents a codebook with 16
7171
fixed fp8 values in E4M3 format.
7272
--quant-mode {int8,f8e4m3,f8e5m2,cb4_f8e4m3,int4_f8e4m3,int4_f8e5m2}

optimum/commands/export/openvino.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ def parse_args_openvino(parser: "ArgumentParser"):
6767
optional_group.add_argument(
6868
"--weight-format",
6969
type=str,
70-
choices=["fp32", "fp16", "int8", "int4", "mxfp4", "nf4", "cb4"],
70+
choices=["fp32", "fp16", "int8", "int4", "mxfp4", "fp4", "fp8_e4m3", "nf4", "cb4"],
7171
default=None,
7272
help=(
7373
"The weight format of the exported model. Option 'cb4' represents a codebook with 16 fixed fp8 values in E4M3 format."

optimum/intel/openvino/configuration.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -686,7 +686,7 @@ class OVWeightQuantizationConfig(OVQuantizationConfigBase):
686686
Indicates whether to apply a scale estimation algorithm that minimizes the L2 error between the original and
687687
compressed layers. Providing a dataset is required to run scale estimation.
688688
dtype (`str`, *optional*):
689-
Data type weights are compressed to. Possible values: ['int4', 'int8', 'mxfp4', 'nf4', 'cb4'].
689+
Data type weights are compressed to. Possible values: ['int4', 'int8', 'mxfp4', 'nf4', 'cb4', 'fp4', 'fp8_e4m3'].
690690
Option 'cb4' represents a codebook with 16 fixed fp8 values in E4M3 format.
691691
qptq (`bool`, *optional*):
692692
Whether to apply GPTQ algorithm. GPTQ optimizes compressed weights in a layer-wise fashion to minimize the
@@ -879,10 +879,10 @@ def post_init(self):
879879

880880
if self.dtype is None:
881881
self.dtype = "int4" if self.bits == 4 else "int8"
882-
if self.dtype not in ["int4", "int8", "mxfp4", "nf4", "cb4"]:
882+
if self.dtype not in ["int4", "int8", "mxfp4", "nf4", "cb4", "fp4", "fp8_e4m3"]:
883883
raise ValueError(
884884
"Weights quantization data type must be one of the following: "
885-
f"['int4', 'int8', 'mxfp4', 'nf4', 'cb4'], but found: {self.dtype}."
885+
f"['int4', 'int8', 'mxfp4', 'nf4', 'cb4', 'fp4', 'fp8_e4m3'], but found: {self.dtype}."
886886
)
887887
if self.dtype in ["mxfp4", "nf4", "cb4"]:
888888
if self.bits != 4:

tests/openvino/test_exporters_cli.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -494,6 +494,18 @@ class OVCLIExportTestCase(unittest.TestCase):
494494
"mxfp4",
495495
{"model": {"int8": 4, "f4e2m1": 72, "f8e8m0": 72}},
496496
),
497+
(
498+
"text-generation-with-past",
499+
"opt125m",
500+
"fp4",
501+
{"model": {"int8": 4, "f4e2m1": 72}},
502+
),
503+
(
504+
"text-generation-with-past",
505+
"opt125m",
506+
"fp8_e4m3",
507+
{"model": {"int8": 4, "f8e4m3": 72}},
508+
),
497509
(
498510
"text-generation-with-past",
499511
"opt125m",

0 commit comments

Comments
 (0)