Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions docs/source/openvino/export.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ optimum-cli export openvino --model local_llama --task text-generation-with-past
Check out the help for more options:

```text
usage: optimum-cli export openvino [-h] -m MODEL [--task TASK] [--framework {pt,tf}] [--trust-remote-code]
usage: optimum-cli export openvino [-h] -m MODEL [--task TASK] [--framework {pt}] [--trust-remote-code]
[--weight-format {fp32,fp16,int8,int4,mxfp4,nf4,cb4}]
[--quant-mode {int8,f8e4m3,f8e5m2,nf4_f8e4m3,nf4_f8e5m2,cb4_f8e4m3,int4_f8e4m3,int4_f8e5m2}]
[--library {transformers,diffusers,timm,sentence_transformers,open_clip}]
Expand Down Expand Up @@ -62,8 +62,7 @@ Optional arguments:
'question-answering', 'zero-shot-image-classification', 'mask-generation', 'text-generation',
'text-classification']. For decoder models, use 'xxx-with-past' to export the model using past
key values in the decoder.
--framework {pt,tf} The framework to use for the export. If not provided, will attempt to use the local
checkpoint's original framework or what is available in the environment.
--framework {pt} The framework to use for the export. Defaults to 'pt' for PyTorch.
--trust-remote-code Allows to use custom code for the modeling hosted in the model repository. This option should
only be set for repositories you trust and in which you have read the code, as it will execute
on your local machine arbitrary code present in the model repository.
Expand Down
4 changes: 2 additions & 2 deletions examples/neural_compressor/question-answering/utils_qa.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ def postprocess_qa_predictions(
if len(predictions) == 0 or (len(predictions) == 1 and predictions[0]["text"] == ""):
predictions.insert(0, {"text": "empty", "start_logit": 0.0, "end_logit": 0.0, "score": 0.0})

# Compute the softmax of all scores (we do it with numpy to stay independent from torch/tf in this file, using
# Compute the softmax of all scores (we do it with numpy to stay independent from torch in this file, using
# the LogSumExp trick).
scores = np.array([pred.pop("score") for pred in predictions])
exp_scores = np.exp(scores - np.max(scores))
Expand Down Expand Up @@ -380,7 +380,7 @@ def postprocess_qa_predictions_with_beam_search(
if len(predictions) == 0:
predictions.insert(0, {"text": "", "start_logit": -1e-6, "end_logit": -1e-6, "score": -2e-6})

# Compute the softmax of all scores (we do it with numpy to stay independent from torch/tf in this file, using
# Compute the softmax of all scores (we do it with numpy to stay independent from torch in this file, using
# the LogSumExp trick).
scores = np.array([pred.pop("score") for pred in predictions])
exp_scores = np.exp(scores - np.max(scores))
Expand Down
33 changes: 9 additions & 24 deletions optimum/commands/export/openvino.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,21 +15,20 @@

import json
import logging
import sys
from pathlib import Path
from typing import TYPE_CHECKING, Optional
from typing import TYPE_CHECKING

from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE

from optimum.commands.base import BaseOptimumCLICommand, CommandInfo
from optimum.exporters.tasks import TasksManager
from optimum.utils.constant import ALL_TASKS


logger = logging.getLogger(__name__)


if TYPE_CHECKING:
from argparse import ArgumentParser, Namespace, _SubParsersAction
from argparse import ArgumentParser


def parse_args_openvino(parser: "ArgumentParser"):
Expand All @@ -45,18 +44,17 @@ def parse_args_openvino(parser: "ArgumentParser"):
"--task",
default="auto",
help=(
"The task to export the model for. If not specified, the task will be auto-inferred based on the model. Available tasks depend on the model, but are among:"
f" {str(TasksManager.get_all_tasks())}. For decoder models, use `xxx-with-past` to export the model using past key values in the decoder."
"The task to export the model for. If not specified, the task will be auto-inferred from the model's metadata or files. "
"For tasks that generate text, add the `xxx-with-past` suffix to export the model using past key values caching. "
f"Available tasks depend on the model, but are among the following list: {ALL_TASKS}."
),
)
optional_group.add_argument(
"--framework",
type=str,
choices=["pt", "tf"],
default=None,
help=(
"The framework to use for the export. If not provided, will attempt to use the local checkpoint's original framework or what is available in the environment."
),
choices=["pt"],
default="pt",
help="The framework to use for the export. Defaults to 'pt' for PyTorch. ",
)
optional_group.add_argument(
"--trust-remote-code",
Expand Down Expand Up @@ -323,19 +321,6 @@ def no_quantization_parameter_provided(args):
class OVExportCommand(BaseOptimumCLICommand):
COMMAND = CommandInfo(name="openvino", help="Export PyTorch models to OpenVINO IR.")

def __init__(
self,
subparsers: "_SubParsersAction",
args: Optional["Namespace"] = None,
command: Optional["CommandInfo"] = None,
from_defaults_factory: bool = False,
parser: Optional["ArgumentParser"] = None,
):
super().__init__(
subparsers, args=args, command=command, from_defaults_factory=from_defaults_factory, parser=parser
)
self.args_string = " ".join(sys.argv[3:])

@staticmethod
def parse_args(parser: "ArgumentParser"):
return parse_args_openvino(parser)
Expand Down
3 changes: 1 addition & 2 deletions optimum/commands/neural_compressor/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,7 @@
# limitations under the License.

from optimum.commands.base import BaseOptimumCLICommand, CommandInfo

from .quantize import INCQuantizeCommand
from optimum.commands.neural_compressor.quantize import INCQuantizeCommand


class INCCommand(BaseOptimumCLICommand):
Expand Down
38 changes: 13 additions & 25 deletions optimum/commands/neural_compressor/quantize.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,21 +12,21 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import sys
from pathlib import Path
from typing import TYPE_CHECKING, Optional
from typing import TYPE_CHECKING

from optimum.commands.base import BaseOptimumCLICommand, CommandInfo
from optimum.exporters.tasks import TasksManager
from optimum.commands.base import BaseOptimumCLICommand
from optimum.utils.constant import ALL_TASKS


if TYPE_CHECKING:
from argparse import ArgumentParser, Namespace, _SubParsersAction
from argparse import ArgumentParser


def parse_args_inc_quantize(parser: "ArgumentParser"):
required_group = parser.add_argument_group("Required arguments")
required_group.add_argument(
"-m",
"--model",
type=str,
required=True,
Expand All @@ -45,34 +45,23 @@ def parse_args_inc_quantize(parser: "ArgumentParser"):
"--task",
default="auto",
help=(
"The task to export the model for. If not specified, the task will be auto-inferred based on the model. Available tasks depend on the model, but are among:"
f" {str(TasksManager.get_all_tasks())}."
"The task to export the model for. If not specified, the task will be auto-inferred from the model's metadata or files. "
"For tasks that generate text, add the `xxx-with-past` suffix to export the model using past key values caching. "
f"Available tasks depend on the model, but are among the following list: {ALL_TASKS}."
),
)


class INCQuantizeCommand(BaseOptimumCLICommand):
def __init__(
self,
subparsers: "_SubParsersAction",
args: Optional["Namespace"] = None,
command: Optional["CommandInfo"] = None,
from_defaults_factory: bool = False,
parser: Optional["ArgumentParser"] = None,
):
super().__init__(
subparsers, args=args, command=command, from_defaults_factory=from_defaults_factory, parser=parser
)
self.args_string = " ".join(sys.argv[3:])

@staticmethod
def parse_args(parser: "ArgumentParser"):
return parse_args_inc_quantize(parser)

def run(self):
from neural_compressor.config import PostTrainingQuantConfig

from ...intel.neural_compressor import INCQuantizer
from optimum.exporters.tasks import TasksManager
from optimum.intel.neural_compressor import INCQuantizer

save_dir = self.args.output
model_id = self.args.model
Expand All @@ -85,10 +74,9 @@ def run(self):
try:
task = TasksManager.infer_task_from_model(model_id)
except Exception as e:
return (
f"### Error: {e}. Please pass explicitely the task as it could not be inferred.",
None,
)
raise ValueError(
"The task could not be inferred automatically. Please provide the task using the --task argument."
) from e

model = TasksManager.get_model_from_task(task, model_id)

Expand Down
7 changes: 3 additions & 4 deletions optimum/exporters/openvino/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def main_export(
output: Union[str, Path],
task: str = "auto",
device: str = "cpu",
framework: Optional[str] = None,
framework: str = "pt",
cache_dir: str = HUGGINGFACE_HUB_CACHE,
trust_remote_code: bool = False,
pad_token_id: Optional[int] = None,
Expand Down Expand Up @@ -150,9 +150,8 @@ def main_export(
use `xxx-with-past` to export the model using past key values in the decoder.
device (`str`, defaults to `"cpu"`):
The device to use to do the export. Defaults to "cpu".
framework (`Optional[str]`, defaults to `None`):
The framework to use for the ONNX export (`"pt"` or `"tf"`). If not provided, will attempt to automatically detect
the framework for the checkpoint.
framework (`Optional[str]`, defaults to `pt`):
The framework to use for the ONNX export. Defaults to 'pt' for PyTorch.
cache_dir (`Optional[str]`, defaults to `None`):
Path indicating where to store cache. The default Hugging Face cache path will be used by default.
trust_remote_code (`bool`, defaults to `False`):
Expand Down