huggingface · echarlaix · Oct 7, 2025 · Oct 6, 2025 · Oct 7, 2025 · Oct 7, 2025
diff --git a/docs/source/openvino/export.mdx b/docs/source/openvino/export.mdx
@@ -30,7 +30,7 @@ optimum-cli export openvino --model local_llama --task text-generation-with-past
 Check out the help for more options:
 
 ```text
-usage: optimum-cli export openvino [-h] -m MODEL [--task TASK] [--framework {pt,tf}] [--trust-remote-code]
+usage: optimum-cli export openvino [-h] -m MODEL [--task TASK] [--framework {pt}] [--trust-remote-code]
                                    [--weight-format {fp32,fp16,int8,int4,mxfp4,nf4,cb4}]
                                    [--quant-mode {int8,f8e4m3,f8e5m2,nf4_f8e4m3,nf4_f8e5m2,cb4_f8e4m3,int4_f8e4m3,int4_f8e5m2}]
                                    [--library {transformers,diffusers,timm,sentence_transformers,open_clip}]
@@ -62,8 +62,7 @@ Optional arguments:
                         'question-answering', 'zero-shot-image-classification', 'mask-generation', 'text-generation',
                         'text-classification']. For decoder models, use 'xxx-with-past' to export the model using past
                         key values in the decoder.
-  --framework {pt,tf}   The framework to use for the export. If not provided, will attempt to use the local
-                        checkpoint's original framework or what is available in the environment.
+  --framework {pt}   The framework to use for the export. Defaults to 'pt' for PyTorch.
   --trust-remote-code   Allows to use custom code for the modeling hosted in the model repository. This option should
                         only be set for repositories you trust and in which you have read the code, as it will execute
                         on your local machine arbitrary code present in the model repository.

diff --git a/examples/neural_compressor/question-answering/utils_qa.py b/examples/neural_compressor/question-answering/utils_qa.py
@@ -178,7 +178,7 @@ def postprocess_qa_predictions(
         if len(predictions) == 0 or (len(predictions) == 1 and predictions[0]["text"] == ""):
             predictions.insert(0, {"text": "empty", "start_logit": 0.0, "end_logit": 0.0, "score": 0.0})
 
-        # Compute the softmax of all scores (we do it with numpy to stay independent from torch/tf in this file, using
+        # Compute the softmax of all scores (we do it with numpy to stay independent from torch in this file, using
         # the LogSumExp trick).
         scores = np.array([pred.pop("score") for pred in predictions])
         exp_scores = np.exp(scores - np.max(scores))
@@ -380,7 +380,7 @@ def postprocess_qa_predictions_with_beam_search(
         if len(predictions) == 0:
             predictions.insert(0, {"text": "", "start_logit": -1e-6, "end_logit": -1e-6, "score": -2e-6})
 
-        # Compute the softmax of all scores (we do it with numpy to stay independent from torch/tf in this file, using
+        # Compute the softmax of all scores (we do it with numpy to stay independent from torch in this file, using
         # the LogSumExp trick).
         scores = np.array([pred.pop("score") for pred in predictions])
         exp_scores = np.exp(scores - np.max(scores))

diff --git a/optimum/commands/export/openvino.py b/optimum/commands/export/openvino.py
@@ -15,21 +15,20 @@
 
 import json
 import logging
-import sys
 from pathlib import Path
-from typing import TYPE_CHECKING, Optional
+from typing import TYPE_CHECKING
 
 from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE
 
 from optimum.commands.base import BaseOptimumCLICommand, CommandInfo
-from optimum.exporters.tasks import TasksManager
+from optimum.utils.constant import ALL_TASKS
 
 
 logger = logging.getLogger(__name__)
 
 
 if TYPE_CHECKING:
-    from argparse import ArgumentParser, Namespace, _SubParsersAction
+    from argparse import ArgumentParser
 
 
 def parse_args_openvino(parser: "ArgumentParser"):
@@ -45,18 +44,17 @@ def parse_args_openvino(parser: "ArgumentParser"):
         "--task",
         default="auto",
         help=(
-            "The task to export the model for. If not specified, the task will be auto-inferred based on the model. Available tasks depend on the model, but are among:"
-            f" {str(TasksManager.get_all_tasks())}. For decoder models, use `xxx-with-past` to export the model using past key values in the decoder."
+            "The task to export the model for. If not specified, the task will be auto-inferred from the model's metadata or files. "
+            "For tasks that generate text, add the `xxx-with-past` suffix to export the model using past key values caching. "
+            f"Available tasks depend on the model, but are among the following list: {ALL_TASKS}."
         ),
     )
     optional_group.add_argument(
         "--framework",
         type=str,
-        choices=["pt", "tf"],
-        default=None,
-        help=(
-            "The framework to use for the export. If not provided, will attempt to use the local checkpoint's original framework or what is available in the environment."
-        ),
+        choices=["pt"],
+        default="pt",
+        help="The framework to use for the export. Defaults to 'pt' for PyTorch. ",
     )
     optional_group.add_argument(
         "--trust-remote-code",
@@ -323,19 +321,6 @@ def no_quantization_parameter_provided(args):
 class OVExportCommand(BaseOptimumCLICommand):
     COMMAND = CommandInfo(name="openvino", help="Export PyTorch models to OpenVINO IR.")
 
-    def __init__(
-        self,
-        subparsers: "_SubParsersAction",
-        args: Optional["Namespace"] = None,
-        command: Optional["CommandInfo"] = None,
-        from_defaults_factory: bool = False,
-        parser: Optional["ArgumentParser"] = None,
-    ):
-        super().__init__(
-            subparsers, args=args, command=command, from_defaults_factory=from_defaults_factory, parser=parser
-        )
-        self.args_string = " ".join(sys.argv[3:])
-
     @staticmethod
     def parse_args(parser: "ArgumentParser"):
         return parse_args_openvino(parser)

diff --git a/optimum/commands/neural_compressor/base.py b/optimum/commands/neural_compressor/base.py
@@ -13,8 +13,7 @@
 # limitations under the License.
 
 from optimum.commands.base import BaseOptimumCLICommand, CommandInfo
-
-from .quantize import INCQuantizeCommand
+from optimum.commands.neural_compressor.quantize import INCQuantizeCommand
 
 
 class INCCommand(BaseOptimumCLICommand):

diff --git a/optimum/commands/neural_compressor/quantize.py b/optimum/commands/neural_compressor/quantize.py
@@ -12,21 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import sys
 from pathlib import Path
-from typing import TYPE_CHECKING, Optional
+from typing import TYPE_CHECKING
 
-from optimum.commands.base import BaseOptimumCLICommand, CommandInfo
-from optimum.exporters.tasks import TasksManager
+from optimum.commands.base import BaseOptimumCLICommand
+from optimum.utils.constant import ALL_TASKS
 
 
 if TYPE_CHECKING:
-    from argparse import ArgumentParser, Namespace, _SubParsersAction
+    from argparse import ArgumentParser
 
 
 def parse_args_inc_quantize(parser: "ArgumentParser"):
     required_group = parser.add_argument_group("Required arguments")
     required_group.add_argument(
+        "-m",
         "--model",
         type=str,
         required=True,
@@ -45,34 +45,23 @@ def parse_args_inc_quantize(parser: "ArgumentParser"):
         "--task",
         default="auto",
         help=(
-            "The task to export the model for. If not specified, the task will be auto-inferred based on the model. Available tasks depend on the model, but are among:"
-            f" {str(TasksManager.get_all_tasks())}."
+            "The task to export the model for. If not specified, the task will be auto-inferred from the model's metadata or files. "
+            "For tasks that generate text, add the `xxx-with-past` suffix to export the model using past key values caching. "
+            f"Available tasks depend on the model, but are among the following list: {ALL_TASKS}."
         ),
     )
 
 
 class INCQuantizeCommand(BaseOptimumCLICommand):
-    def __init__(
-        self,
-        subparsers: "_SubParsersAction",
-        args: Optional["Namespace"] = None,
-        command: Optional["CommandInfo"] = None,
-        from_defaults_factory: bool = False,
-        parser: Optional["ArgumentParser"] = None,
-    ):
-        super().__init__(
-            subparsers, args=args, command=command, from_defaults_factory=from_defaults_factory, parser=parser
-        )
-        self.args_string = " ".join(sys.argv[3:])
-
     @staticmethod
     def parse_args(parser: "ArgumentParser"):
         return parse_args_inc_quantize(parser)
 
     def run(self):
         from neural_compressor.config import PostTrainingQuantConfig
 
-        from ...intel.neural_compressor import INCQuantizer
+        from optimum.exporters.tasks import TasksManager
+        from optimum.intel.neural_compressor import INCQuantizer
 
         save_dir = self.args.output
         model_id = self.args.model
@@ -85,10 +74,9 @@ def run(self):
             try:
                 task = TasksManager.infer_task_from_model(model_id)
             except Exception as e:
-                return (
-                    f"### Error: {e}. Please pass explicitely the task as it could not be inferred.",
-                    None,
-                )
+                raise ValueError(
+                    "The task could not be inferred automatically. Please provide the task using the --task argument."
+                ) from e
 
         model = TasksManager.get_model_from_task(task, model_id)
 

diff --git a/optimum/exporters/openvino/__main__.py b/optimum/exporters/openvino/__main__.py
@@ -111,7 +111,7 @@ def main_export(
     output: Union[str, Path],
     task: str = "auto",
     device: str = "cpu",
-    framework: Optional[str] = None,
+    framework: str = "pt",
     cache_dir: str = HUGGINGFACE_HUB_CACHE,
     trust_remote_code: bool = False,
     pad_token_id: Optional[int] = None,
@@ -150,9 +150,8 @@ def main_export(
             use `xxx-with-past` to export the model using past key values in the decoder.
         device (`str`, defaults to `"cpu"`):
             The device to use to do the export. Defaults to "cpu".
-        framework (`Optional[str]`, defaults to `None`):
-            The framework to use for the ONNX export (`"pt"` or `"tf"`). If not provided, will attempt to automatically detect
-            the framework for the checkpoint.
+        framework (`Optional[str]`, defaults to `pt`):
+            The framework to use for the ONNX export. Defaults to 'pt' for PyTorch.
         cache_dir (`Optional[str]`, defaults to `None`):
             Path indicating where to store cache. The default Hugging Face cache path will be used by default.
         trust_remote_code (`bool`, defaults to `False`):