Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docs/source/openvino/export.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ Check out the help for more options:

```text
usage: optimum-cli export openvino [-h] -m MODEL [--task TASK] [--framework {pt}] [--trust-remote-code]
[--weight-format {fp32,fp16,int8,int4,mxfp4,nf4,cb4}]
[--weight-format {fp32,fp16,int8,int4,mxfp4,nf4,fp4,fp8_e4m3,cb4}]
[--quant-mode {int8,f8e4m3,f8e5m2,cb4_f8e4m3,int4_f8e4m3,int4_f8e5m2}]
[--library {transformers,diffusers,timm,sentence_transformers,open_clip}]
[--cache_dir CACHE_DIR] [--pad-token-id PAD_TOKEN_ID] [--ratio RATIO] [--sym]
Expand Down Expand Up @@ -66,7 +66,7 @@ Optional arguments:
--trust-remote-code Allows to use custom code for the modeling hosted in the model repository. This option should
only be set for repositories you trust and in which you have read the code, as it will execute
on your local machine arbitrary code present in the model repository.
--weight-format {fp32,fp16,int8,int4,mxfp4,nf4,cb4}
--weight-format {fp32,fp16,int8,int4,mxfp4,fp4,fp8_e4m3,nf4,cb4}
The weight format of the exported model. Option 'cb4' represents a codebook with 16
fixed fp8 values in E4M3 format.
--quant-mode {int8,f8e4m3,f8e5m2,cb4_f8e4m3,int4_f8e4m3,int4_f8e5m2}
Expand Down
2 changes: 1 addition & 1 deletion optimum/commands/export/openvino.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def parse_args_openvino(parser: "ArgumentParser"):
optional_group.add_argument(
"--weight-format",
type=str,
choices=["fp32", "fp16", "int8", "int4", "mxfp4", "nf4", "cb4"],
choices=["fp32", "fp16", "int8", "int4", "mxfp4", "fp4", "fp8_e4m3", "nf4", "cb4"],
default=None,
help=(
"The weight format of the exported model. Option 'cb4' represents a codebook with 16 fixed fp8 values in E4M3 format."
Expand Down
6 changes: 3 additions & 3 deletions optimum/intel/openvino/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -834,7 +834,7 @@ class OVWeightQuantizationConfig(OVQuantizationConfigBase):
Indicates whether to apply a scale estimation algorithm that minimizes the L2 error between the original and
compressed layers. Providing a dataset is required to run scale estimation.
dtype (`str`, *optional*):
Data type weights are compressed to. Possible values: ['int4', 'int8', 'mxfp4', 'nf4', 'cb4'].
Data type weights are compressed to. Possible values: ['int4', 'int8', 'mxfp4', 'nf4', 'cb4', 'fp4', 'fp8_e4m3'].
Option 'cb4' represents a codebook with 16 fixed fp8 values in E4M3 format.
qptq (`bool`, *optional*):
Whether to apply GPTQ algorithm. GPTQ optimizes compressed weights in a layer-wise fashion to minimize the
Expand Down Expand Up @@ -1040,10 +1040,10 @@ def post_init(self):

if self.dtype is None:
self.dtype = "int4" if self.bits == 4 else "int8"
if self.dtype not in ["int4", "int8", "mxfp4", "nf4", "cb4"]:
if self.dtype not in ["int4", "int8", "mxfp4", "nf4", "cb4", "fp4", "fp8_e4m3"]:
raise ValueError(
"Weights quantization data type must be one of the following: "
f"['int4', 'int8', 'mxfp4', 'nf4', 'cb4'], but found: {self.dtype}."
f"['int4', 'int8', 'mxfp4', 'nf4', 'cb4', 'fp4', 'fp8_e4m3'], but found: {self.dtype}."
)
if self.dtype in ["mxfp4", "nf4", "cb4"]:
if self.bits != 4:
Expand Down
12 changes: 12 additions & 0 deletions tests/openvino/test_exporters_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -508,6 +508,18 @@ class OVCLIExportTestCase(unittest.TestCase):
]

TRANSFORMERS_4BIT_CONFIGURATIONS = [
(
"text-generation-with-past",
"llama",
"fp4 --group-size 16",
{"model": {"int8": 4, "f4e2m1": 14}},
),
(
"text-generation-with-past",
"llama",
"fp8_e4m3 --group-size 16",
{"model": {"int8": 4, "f8e4m3": 14}},
),
(
"text-generation-with-past",
"opt125m",
Expand Down