Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 11 additions & 11 deletions optimum/intel/openvino/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -1296,15 +1296,15 @@ def __init__(
):
"""
Configuration class for quantization of multimodel pipelines.
For each submodel in the pipeline, a separate quantization config can be provided. If the config is not provided for a
submodel, it won't be quantized.
For each OpenVINO model in the pipeline, a separate quantization config can be provided. If the config is not
provided for a model, it won't be quantized.

Args:
quantization_configs (Dict[str, Union[Dict, OVQuantizationConfigBase]]):
A dictionary where keys are submodel names and values are either dictionaries or instances of
`OVQuantizationConfigBase` containing quantization configurations for each submodel in the pipeline.
A dictionary where keys are OpenVINO model names and values are either dictionaries or instances of
`OVQuantizationConfigBase` containing quantization configurations for each OV model in the pipeline.
default_config (Optional[Union[Dict, OVQuantizationConfigBase]]):
A default quantization configuration that will be applied to all submodels that do not have a
A default quantization configuration that will be applied to all OV models that do not have a
specific configuration provided in `quantization_configs`.
num_samples (Optional[int]):
The maximum number of samples composing the calibration dataset. Defaults to None.
Expand All @@ -1329,13 +1329,13 @@ def or_op(a, b):
if kwargs.pop("ignored_scope", None) is not None:
logger.warning(
"`ignored_scope` parameter is not supported for pipeline quantization. It will be ignored. "
"Please use `ignored_scope` parameter in the submodel configs instead."
"Please use `ignored_scope` parameter in the model configs instead."
)

quantization_configs = copy.deepcopy(quantization_configs)
for submodel_name, submodel_config in quantization_configs.items():
if isinstance(submodel_config, dict):
quantization_configs[submodel_name] = _quantization_config_from_dict(submodel_config)
for ov_model_name, ov_model_config in quantization_configs.items():
if isinstance(ov_model_config, dict):
quantization_configs[ov_model_name] = _quantization_config_from_dict(ov_model_config)
if default_config is not None and isinstance(default_config, dict):
default_config = _quantization_config_from_dict(default_config)

Expand Down Expand Up @@ -1367,8 +1367,8 @@ def to_dict(self) -> Dict[str, Any]:

def post_init(self):
super().post_init()
for submodel_config in self.quantization_configs.values():
submodel_config.post_init()
for ov_model_config in self.quantization_configs.values():
ov_model_config.post_init()


def _quantization_config_from_dict(config_dict: Dict[str, Any]) -> OVQuantizationConfigBase:
Expand Down
102 changes: 88 additions & 14 deletions optimum/intel/openvino/modeling_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,12 +57,97 @@
logger = logging.getLogger(__name__)


class OVModelHostMixin:
"""
Mixin class for models that contain OpenVINO models as submodels.
"""

@property
def ov_models(self) -> Dict[str, Union[openvino.Model, openvino.runtime.CompiledModel]]:
"""
Returns a dictionary of all OpenVINO models associated with this model. Keys are model names, and values are
either instances of `openvino.Model` or `openvino.runtime.CompiledModel`. Compiled model instances are returned
if the model is initialized with `compile_only=True`.
"""
return {ov_model_name: getattr(self, ov_model_name) for ov_model_name in self._ov_model_names}

@property
def ov_submodels(self) -> Dict[str, Union[openvino.Model, openvino.runtime.CompiledModel]]:
logger.warn(
"`ov_submodels` property is deprecated and will be removed in v1.27. Please use `ov_models` property instead."
)
return self.ov_models

@property
def _ov_model_names(self) -> List[str]:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

from my understanding _ov_model_names will differ from _component_names for encoder-only or decoder-only models? I'm wondering if we should modify both so that they have the same behavior as other models (having only one component) so that we can always iterate on the models "components" to compile / clear request / set device, also would remove the distinction between _ov_model_names and _component_names wdyt @nikita-savelyevv ?

"""
List of openvino model names. Used as keys for a dictionary returned by `.ov_models` property.
"""
return ["model"]

@property
def _component_names(self) -> List[str]:
"""
List of model component names. Used as keys for a dictionary returned by `.components` property.
"""
return []

@property
def components(self) -> Dict[str, "OVModelHostMixin"]:
"""
Dictionary of model components which are instances of OVModelHostMixin.
"""
return {component_name: getattr(self, component_name) for component_name in self._component_names}

def replace_ov_model(self, current_model: openvino.Model, new_model: openvino.Model):
"""
Replace OpenVINO model within the model with new one. Replacement is performed by object id.

Args:
current_model (`openvino.Model`):
Current OpenVINO model to be replaced.
new_model (`openvino.Model`):
New OpenVINO model to replace the current one.
"""
# Validate replacement parameters
if isinstance(current_model, openvino.CompiledModel):
raise ValueError(
"OpenVINO model replacement is not supported for models initialized with `compile_only=True`."
)
# Replace OpenVINO model stored inside the model
for ov_model_name in self.ov_models:
if ov_model_name in ["lm_model", "vision_embeddings_model", "text_embeddings_model"] and isinstance(
getattr(type(self), ov_model_name, None), property
):
# TODO (nikita.savelyevv): Remove this check when these properties are removed
continue
if id(getattr(self, ov_model_name, None)) == id(current_model):
setattr(self, ov_model_name, new_model)
# Replace OpenVINO model stored inside components
for component in self.components.values():
component.replace_ov_model(current_model, new_model)
# Clear requests to force recompilation with the new model
self.clear_requests()

def clear_requests(self):
"""
Clear model inference requests.
"""
raise NotImplementedError

def compile(self):
"""
Compile all OpenVINO models within the model.
"""
raise NotImplementedError


@add_start_docstrings(
"""
Base OVModel class.
""",
)
class OVBaseModel(OptimizedModel):
class OVBaseModel(OptimizedModel, OVModelHostMixin):
auto_model_class = None
export_feature = None
_supports_cache_class = False # No loger defined/used in transformers
Expand Down Expand Up @@ -210,17 +295,6 @@ def dtype(self) -> Optional[torch.dtype]:

return None

@property
def ov_submodels(self) -> Dict[str, openvino.Model]:
return {submodel_name: getattr(self, submodel_name) for submodel_name in self._ov_submodel_names}
Comment on lines -213 to -215
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

breaking change ?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.


@property
def _ov_submodel_names(self) -> List[str]:
"""
List of openvino submodel names. Used as keys for a dictionary returned by `.ov_submodels` property.
"""
return ["model"]

@staticmethod
def load_model(
file_name: Union[str, Path],
Expand Down Expand Up @@ -803,7 +877,7 @@ def _incompatible_inputs_warning(self, inputs: Dict):
return None


class OVModelPart:
class OVModelPart(OVModelHostMixin):
def __init__(
self,
model: Model,
Expand All @@ -825,7 +899,7 @@ def __init__(
self.config = self.parent_model.config
self._model_dir = Path(model_dir or parent_model._model_save_dir)

def _compile(self):
def compile(self):
if self.parent_model._compile_only and isinstance(self.model, CompiledModel):
self.request = self.model
if self.request is None:
Expand Down
Loading