Skip to content

Commit 9851767

Browse files
authored
Added int4 compression config for arcee-ai/Trinity-Mini (huggingface#1631)
* Update configuration.py * Update configuration.py
1 parent 42d3f63 commit 9851767

1 file changed

Lines changed: 15 additions & 0 deletions

File tree

optimum/intel/openvino/configuration.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -411,6 +411,21 @@ class OVQuantizationMethod(str, Enum):
411411
"weight_only": True,
412412
},
413413
},
414+
"arcee-ai/Trinity-Mini": {
415+
"quantization_config1": {
416+
"bits": 4,
417+
"sym": False,
418+
"group_size": 64,
419+
# With ignored scope below we keep some weights in their original precision during the first quantization
420+
# run and then quantize them to int8 in the second run.
421+
"ignored_scope": {"patterns": [".*self_attn.*", ".*router.*"]},
422+
},
423+
"quantization_config2": {
424+
"bits": 8,
425+
"sym": False,
426+
"weight_only": True,
427+
},
428+
},
414429
}
415430

416431
_DEFAULT_8BIT_WQ_CONFIGS = {

0 commit comments

Comments
 (0)