Added int4 compression config for arcee-ai/Trinity-Mini (huggingface#1631)

MaximProshin · web-flow · commit 98517678c995 · 2026-03-04T15:06:46.000+04:00
* Update configuration.py

* Update configuration.py
diff --git a/optimum/intel/openvino/configuration.py b/optimum/intel/openvino/configuration.py
@@ -411,6 +411,21 @@ class OVQuantizationMethod(str, Enum):
             "weight_only": True,
         },
     },
+    "arcee-ai/Trinity-Mini": {
+        "quantization_config1": {
+            "bits": 4,
+            "sym": False,
+            "group_size": 64,
+            # With ignored scope below we keep some weights in their original precision during the first quantization
+            # run and then quantize them to int8 in the second run.
+            "ignored_scope": {"patterns": [".*self_attn.*", ".*router.*"]},
+        },
+        "quantization_config2": {
+            "bits": 8,
+            "sym": False,
+            "weight_only": True,
+        },
+    },
 }
 
 _DEFAULT_8BIT_WQ_CONFIGS = {