minor

andrey-churkin · andrey-churkin · commit c1564f05334e · 2025-04-30T16:18:27.000+01:00
diff --git a/nncf/onnx/graph/model_utils.py b/nncf/onnx/graph/model_utils.py
@@ -19,8 +19,6 @@
 from nncf.common.graph.transformations.layout import TransformationLayout
 from nncf.onnx.graph.metatypes.onnx_metatypes import ONNXDequantizeLinearMetatype
 from nncf.onnx.graph.metatypes.onnx_metatypes import ONNXQuantizeLinearMetatype
-from nncf.onnx.graph.onnx_helper import get_children
-from nncf.onnx.graph.onnx_helper import get_children_node_mapping
 from nncf.onnx.graph.transformations.commands import ONNXQDQNodeRemovingCommand
 from nncf.onnx.graph.transformations.commands import ONNXTargetPoint
 
@@ -54,49 +52,3 @@ def remove_fq_from_inputs(model: onnx.ModelProto, nncf_graph: NNCFGraph) -> onnx
         nodes_queue.extend(nncf_graph.get_next_nodes(current_node))
 
     return model_transformer.transform(transformation_layout)
-
-
-def eliminate_nop_cast(model: onnx.ModelProto) -> onnx.ModelProto:
-    """
-    Inspects the provided ONNX model to identify and remove any 'No-op' (no-operation)
-    cast nodes, which are operations that do not change the data type of their input.
-
-    :param model: The ONNX model to be processed.
-    :return: The ONNX model with the redundant cast nodes removed.
-    """
-    tensor_name_to_info = {
-        tensor.name: tensor
-        for tensor in (*model.graph.value_info, *model.graph.input, *model.graph.output, *model.graph.initializer)
-    }
-    redundant_cast_nodes = []
-    for node in model.graph.node:
-        if node.op_type == "Cast":
-            to_attr = None
-            for attr in node.attribute:
-                if attr.name == "to":
-                    to_attr = onnx.helper.get_attribute_value(attr)
-
-            if to_attr is None:
-                continue
-
-            inp = node.input[0]
-            info = tensor_name_to_info[inp]
-            if info.type.tensor_type.elem_type == to_attr:
-                redundant_cast_nodes.append(node)
-
-    value_infos = {i.name: i for i in model.graph.value_info}
-    input_name_to_nodes_map = get_children_node_mapping(model)
-
-    for cast_node in redundant_cast_nodes:
-        # Unlink Cast node from the graph
-        children = get_children(cast_node, input_name_to_nodes_map)
-        for child in children:
-            for i, input_name in enumerate(child.input):
-                if input_name == cast_node.output[0]:
-                    child.input[i] = cast_node.input[0]
-
-        # Remove Cast node from the graph
-        model.graph.value_info.remove(value_infos[cast_node.output[0]])
-        model.graph.node.remove(cast_node)
-
-    return model
diff --git a/nncf/onnx/graph/nncf_graph_builder.py b/nncf/onnx/graph/nncf_graph_builder.py
@@ -337,7 +337,7 @@ def convert_onnx_dtype_to_nncf_dtype(onnx_dtype: int) -> Dtype:
         return Dtype.FLOAT if onnx_dtype == int(onnx.TensorProto.FLOAT) else Dtype.INTEGER
 
     @staticmethod
-    def create_nncf_graph(onnx_model: onnx.ModelProto, infer_shapes: bool = True) -> NNCFGraph:
+    def create_nncf_graph(onnx_model: onnx.ModelProto) -> NNCFGraph:
         """
         Creates NNCFGraph from 'onnx_model'.
         Initially, ONNXGraph is built. All nodes from onnx_model which have valid metatype are added to NNCFGraph.
@@ -347,8 +347,7 @@ def create_nncf_graph(onnx_model: onnx.ModelProto, infer_shapes: bool = True) ->
         :return: NNCFGraph.
         """
         onnx_model = GraphConverter._replace_empty_node_name(onnx_model)
-        if infer_shapes:
-            onnx_model = onnx.shape_inference.infer_shapes(onnx_model)
+        onnx_model = onnx.shape_inference.infer_shapes(onnx_model)
         edge_info_mapping = get_edge_info_mapping(onnx_model)
         children_node_mapping = get_children_node_mapping(onnx_model)
         parents_node_mapping = get_parents_node_mapping(onnx_model)
diff --git a/nncf/onnx/graph/passes.py b/nncf/onnx/graph/passes.py
@@ -0,0 +1,77 @@
+# Copyright (c) 2025 Intel Corporation
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#      http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import onnx
+
+from nncf.onnx.graph.onnx_helper import get_children
+from nncf.onnx.graph.onnx_helper import get_children_node_mapping
+
+
+def eliminate_nop_cast(model: onnx.ModelProto) -> onnx.ModelProto:
+    """
+    Inspects the provided ONNX model to identify and remove any 'No-op' (no-operation)
+    cast nodes, which are operations that do not change the data type of their input.
+
+    :param model: The ONNX model to be processed.
+    :return: The ONNX model with the redundant cast nodes removed.
+    """
+    tensor_name_to_info = {
+        tensor.name: tensor
+        for tensor in (*model.graph.value_info, *model.graph.input, *model.graph.output, *model.graph.initializer)
+    }
+    redundant_cast_nodes = []
+    for node in model.graph.node:
+        if node.op_type == "Cast":
+            to_attr = None
+            for attr in node.attribute:
+                if attr.name == "to":
+                    to_attr = onnx.helper.get_attribute_value(attr)
+
+            if to_attr is None:
+                continue
+
+            inp = node.input[0]
+            info = tensor_name_to_info[inp]
+            if info.type.tensor_type.elem_type == to_attr:
+                redundant_cast_nodes.append(node)
+
+    value_infos = {i.name: i for i in model.graph.value_info}
+    input_name_to_nodes_map = get_children_node_mapping(model)
+
+    for cast_node in redundant_cast_nodes:
+        # Unlink Cast node from the graph
+        children = get_children(cast_node, input_name_to_nodes_map)
+        for child in children:
+            for i, input_name in enumerate(child.input):
+                if input_name == cast_node.output[0]:
+                    child.input[i] = cast_node.input[0]
+
+        # Remove Cast node from the graph
+        model.graph.value_info.remove(value_infos[cast_node.output[0]])
+        model.graph.node.remove(cast_node)
+
+    return model
+
+
+def apply_preprocess_passes(model: onnx.ModelProto) -> None:
+    """
+    Preprocesses the provided ONNX model for quantization.
+
+    This method performs the following steps:
+        1. Infers shapes in the model.
+        2. Removes redundant 'No-op' cast nodes from the model.
+
+    :param model: The ONNX model to be preprocessed.
+    :return: A preprocessed ONNX model, ready for quantization.
+    """
+    preprocessed_model = onnx.shape_inference.infer_shapes(model)
+    preprocessed_model = eliminate_nop_cast(preprocessed_model)
+    return preprocessed_model
diff --git a/nncf/onnx/quantization/quantize_model.py b/nncf/onnx/quantization/quantize_model.py
@@ -116,45 +116,6 @@ def check_external_data_location(model: onnx.ModelProto, external_data_dir: Opti
 
     # If len(data_path) == 0, it means there are no tensors that use external data.
     return str(external_data_dir) if data_paths else None
-def quantize_pre_process(model: onnx.ModelProto, save_as_external_data: bool = True):
-    """
-    Preprocesses the provided ONNX model for quantization.
-
-    This method performs the following steps:
-        1. Infers shapes in the model.
-        2. Removes redundant 'No-op' cast nodes from the model.
-
-    :param model: The ONNX model to be preprocessed.
-    :param save_as_external_data: A boolean flag indicating whether to
-        save the model with external data. If `True`, external data is
-        saved separately; otherwise, the model is saved as a single file.
-    :return: A preprocessed ONNX model, ready for quantization.
-    """
-    with tempfile.TemporaryDirectory(dir=tempfile.gettempdir()) as temp_dir:
-        temp_path = Path(temp_dir)
-        input_model_path = str(temp_path / "input_model.onnx")
-
-        if save_as_external_data:
-            onnx.save_model(
-                model,
-                input_model_path,
-                save_as_external_data=True,
-                all_tensors_to_one_file=True,
-                location="model.data",
-                size_threshold=1024,
-                convert_attribute=False,
-            )
-        else:
-            onnx.save(model, input_model_path)
-        model = None
-
-        shape_inferred_model_path = str(temp_path / "shape_inferred_model.onnx")
-        onnx.shape_inference.infer_shapes_path(input_model_path, shape_inferred_model_path)
-
-        preprocessed_model = onnx.load(shape_inferred_model_path)
-        preprocessed_model = eliminate_nop_cast(preprocessed_model)
-
-    return preprocessed_model
 
 
 def quantize_impl(
@@ -207,8 +168,7 @@ def quantize_impl(
         advanced_parameters=advanced_parameters,
     )
 
-    model = quantize_pre_process(model)
-    graph = GraphConverter.create_nncf_graph(model, infer_shapes=False)
+    graph = GraphConverter.create_nncf_graph(model)
     warning_model_no_batchwise_support(graph, advanced_parameters, model_type, OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS)
     quantized_model = quantization_algorithm.apply(model, graph, dataset=calibration_dataset)