diff --git a/runtime/onert/api/python/CMakeLists.txt b/runtime/onert/api/python/CMakeLists.txt index 25af51f6e3c..c6594e956d4 100644 --- a/runtime/onert/api/python/CMakeLists.txt +++ b/runtime/onert/api/python/CMakeLists.txt @@ -28,6 +28,7 @@ endif() # Add the Python module file(GLOB_RECURSE NNFW_API_PYBIND_SOURCES "src/*.cc") pybind11_add_module(nnfw_api_pybind ${NNFW_API_PYBIND_SOURCES}) + target_include_directories(nnfw_api_pybind PRIVATE include) target_link_libraries(nnfw_api_pybind PRIVATE nnfw-dev) target_link_libraries(nnfw_api_pybind PRIVATE nnfw_common nnfw_coverage) diff --git a/runtime/onert/api/python/include/nnfw_exception_bindings.h b/runtime/onert/api/python/include/nnfw_exception_bindings.h new file mode 100644 index 00000000000..6ee29db7605 --- /dev/null +++ b/runtime/onert/api/python/include/nnfw_exception_bindings.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_API_PYTHON_NNFW_EXCEPTION_BINDINGS_H__ +#define __ONERT_API_PYTHON_NNFW_EXCEPTION_BINDINGS_H__ + +#include + +namespace onert::api::python +{ + +namespace py = pybind11; + +// Declare binding exceptions +void bind_nnfw_exceptions(py::module_ &m); + +} // namespace onert::api::python + +#endif // __ONERT_API_PYTHON_NNFW_EXCEPTION_BINDINGS_H__ diff --git a/runtime/onert/api/python/include/nnfw_exceptions.h b/runtime/onert/api/python/include/nnfw_exceptions.h new file mode 100644 index 00000000000..7535cdbf34f --- /dev/null +++ b/runtime/onert/api/python/include/nnfw_exceptions.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_API_PYTHON_NNFW_EXCEPTIONS_H__ +#define __ONERT_API_PYTHON_NNFW_EXCEPTIONS_H__ + +#include +#include + +namespace onert::api::python +{ + +// Base for all NNFW errors +struct NnfwError : public std::runtime_error +{ + explicit NnfwError(const std::string &msg) : std::runtime_error(msg) {} +}; + +struct NnfwUnexpectedNullError : public NnfwError +{ + using NnfwError::NnfwError; +}; +struct NnfwInvalidStateError : public NnfwError +{ + using NnfwError::NnfwError; +}; +struct NnfwOutOfMemoryError : public NnfwError +{ + using NnfwError::NnfwError; +}; +struct NnfwInsufficientOutputError : public NnfwError +{ + using NnfwError::NnfwError; +}; +struct NnfwDeprecatedApiError : public NnfwError +{ + using NnfwError::NnfwError; +}; + +} // namespace onert::api::python + +#endif // __ONERT_API_PYTHON_NNFW_EXCEPTIONS_H__ diff --git a/runtime/onert/api/python/package/common/basesession.py b/runtime/onert/api/python/package/common/basesession.py index 1521efb452b..935d7d5c479 100644 --- a/runtime/onert/api/python/package/common/basesession.py +++ b/runtime/onert/api/python/package/common/basesession.py @@ -1,6 +1,8 @@ +from typing import List import numpy as np -from ..native import libnnfw_api_pybind +from ..native.libnnfw_api_pybind import infer, tensorinfo +from ..native.libnnfw_api_pybind.exception import OnertError def num_elems(tensor_info): @@ -52,19 +54,73 @@ def _recreate_session(self, backend_session): del self.session # Clean up the existing session self.session = backend_session + def get_inputs_tensorinfo(self) -> List[tensorinfo]: + """ + Retrieve tensorinfo for all input tensors. + + Raises: + OnertError: If the underlying C‑API call fails. + + Returns: + list[tensorinfo]: A list of tensorinfo objects for each input. + """ + num_inputs: int = self.session.input_size() + infos: List[tensorinfo] = [] + for i in range(num_inputs): + try: + infos.append(self.session.input_tensorinfo(i)) + except ValueError: + raise + except Exception as e: + raise OnertError(f"Failed to get input tensorinfo #{i}: {e}") from e + return infos + + def get_outputs_tensorinfo(self) -> List[tensorinfo]: + """ + Retrieve tensorinfo for all output tensors. + + Raises: + OnertError: If the underlying C‑API call fails. + + Returns: + list[tensorinfo]: A list of tensorinfo objects for each output. + """ + num_outputs: int = self.session.output_size() + infos: List[tensorinfo] = [] + for i in range(num_outputs): + try: + infos.append(self.session.output_tensorinfo(i)) + except ValueError: + raise + except Exception as e: + raise OnertError(f"Failed to get output tensorinfo #{i}: {e}") from e + return infos + def set_inputs(self, size, inputs_array=[]): """ Set the input tensors for the session. + Args: size (int): Number of input tensors. inputs_array (list): List of numpy arrays for the input data. + + Raises: + ValueError: If session uninitialized. + OnertError: If any C‑API call fails. """ if self.session is None: raise ValueError( "Session is not initialized with a model. Please compile with a model before setting inputs." ) + + self.inputs = [] for i in range(size): - input_tensorinfo = self.session.input_tensorinfo(i) + try: + input_tensorinfo = self.session.input_tensorinfo(i) + except ValueError: + raise + except Exception as e: + raise OnertError(f"Failed to get input tensorinfo #{i}: {e}") from e if len(inputs_array) > i: input_array = np.array(inputs_array[i], dtype=input_tensorinfo.dtype) @@ -75,26 +131,64 @@ def set_inputs(self, size, inputs_array=[]): input_array = np.zeros((num_elems(input_tensorinfo)), dtype=input_tensorinfo.dtype) - self.session.set_input(i, input_array) + try: + self.session.set_input(i, input_array) + except ValueError: + raise + except Exception as e: + raise OnertError(f"Failed to set input #{i}: {e}") from e + self.inputs.append(input_array) def set_outputs(self, size): """ Set the output tensors for the session. + Args: size (int): Number of output tensors. + + Raises: + ValueError: If session uninitialized. + OnertError: If any C‑API call fails. """ if self.session is None: raise ValueError( "Session is not initialized with a model. Please compile a model before setting outputs." ) + + self.outputs = [] for i in range(size): - output_tensorinfo = self.session.output_tensorinfo(i) + try: + output_tensorinfo = self.session.output_tensorinfo(i) + except ValueError: + raise + except Exception as e: + raise OnertError(f"Failed to get output tensorinfo #{i}: {e}") from e + output_array = np.zeros((num_elems(output_tensorinfo)), dtype=output_tensorinfo.dtype) - self.session.set_output(i, output_array) + + try: + self.session.set_output(i, output_array) + except ValueError: + raise + except Exception as e: + raise OnertError(f"Failed to set output #{i}: {e}") from e + self.outputs.append(output_array) def tensorinfo(): - return libnnfw_api_pybind.infer.nnfw_tensorinfo() + """ + Shortcut to create a fresh tensorinfo instance. + + Raises: + OnertError: If the C‑API call fails. + """ + + try: + return infer.nnfw_tensorinfo() + except OnertError: + raise + except Exception as e: + raise OnertError(f"Failed to create tensorinfo: {e}") from e diff --git a/runtime/onert/api/python/package/infer/session.py b/runtime/onert/api/python/package/infer/session.py index e0ef4f7f8bb..7829b5b8e72 100644 --- a/runtime/onert/api/python/package/infer/session.py +++ b/runtime/onert/api/python/package/infer/session.py @@ -1,4 +1,10 @@ -from ..native import libnnfw_api_pybind +from typing import List, Union, Tuple, Dict +import numpy as np +import time +from contextlib import contextmanager + +from ..native.libnnfw_api_pybind import infer, tensorinfo +from ..native.libnnfw_api_pybind.exception import OnertError from ..common.basesession import BaseSession @@ -6,41 +12,135 @@ class session(BaseSession): """ Class for inference using nnfw_session. """ - def __init__(self, path: str = None, backends: str = "cpu"): + def __init__(self, path: str, backends: str = "cpu") -> None: """ Initialize the inference session. + Args: path (str): Path to the model file or nnpackage directory. backends (str): Backends to use, default is "cpu". """ - if path is not None: - super().__init__(libnnfw_api_pybind.infer.nnfw_session(path, backends)) - self.session.prepare() - self.set_outputs(self.session.output_size()) - else: - super().__init__() + super().__init__(infer.nnfw_session(path, backends)) + self._prepared: bool = False - def compile(self, path: str, backends: str = "cpu"): + # Replace any dynamic dimension (-1) with 1 before static shape inference, + # because nnfw_session cannot perform static shape inference on input dimensions set to -1. + original_infos: List[tensorinfo] = self.get_inputs_tensorinfo() + fixed_infos: List[tensorinfo] = [] + for info in original_infos: + dims = list(info.dims) + # replace -1 with 1 + dims = [1 if d == -1 else d for d in dims] + info.dims = dims # assume setter accepts a list + fixed_infos.append(info) + # update tensorinfo in session + self.update_inputs_tensorinfo(fixed_infos) + + def update_inputs_tensorinfo(self, new_infos: List[tensorinfo]) -> None: """ - Prepare the session by recreating it with new parameters. + Update all input tensors' tensorinfo at once. + Args: - path (str): Path to the model file or nnpackage directory. Defaults to the existing path. - backends (str): Backends to use. Defaults to the existing backends. + new_infos (list[tensorinfo]): A list of updated tensorinfo objects for the inputs. + + Raises: + ValueError: If the number of new_infos does not match the session's input size. + OnertError: If the underlying C-API call fails. """ - # Update parameters if provided - if path is None: - raise ValueError("path must not be None.") - # Recreate the session with updated parameters - self._recreate_session(libnnfw_api_pybind.infer.nnfw_session(path, backends)) - # Prepare the new session - self.session.prepare() - self.set_outputs(self.session.output_size()) - - def inference(self): + num_inputs: int = self.session.input_size() # may raise OnertError + if len(new_infos) != num_inputs: + raise ValueError( + f"Expected {num_inputs} input tensorinfo(s), but got {len(new_infos)}.") + + for i, info in enumerate(new_infos): + if any(d < 0 for d in info.dims[:info.rank]): + raise ValueError( + f"Input tensorinfo #{i} has negative dims: {info.dims[:info.rank]}") + try: + self.session.set_input_tensorinfo(i, info) + except ValueError: + # re‑raise ValueError directly + raise + except Exception as e: + raise OnertError(f"Failed to set tensorinfo for input #{i}: {e}") from e + + def run_inference( + self, + inputs_array: List[np.ndarray], + *, + measure: bool = False + ) -> Union[List[np.ndarray], Tuple[List[np.ndarray], Dict[str, float]]]: """ - Perform model and get outputs + Run a complete inference cycle: + - If the session has not been prepared or outputs have not been set, call prepare() and set_outputs(). + - Automatically configure input buffers based on the provided numpy arrays. + - Execute the inference session. + - Return the output tensors with proper multi-dimensional shapes. + + This method supports both static and dynamic shape modification: + - If update_inputs_tensorinfo() has been called before running inference, the model is compiled + with the fixed static input shape. + - Otherwise, the input shapes can be adjusted dynamically. + + Args: + inputs_array (list[np.ndarray]): List of numpy arrays representing the input data. + measure (bool): If True, measure prepare/io/run latencies (ms). + Returns: - list: Outputs from the model. + list[np.ndarray]: A list containing the output numpy arrays. + OR + (outputs, metrics): Tuple where metrics is a dict with keys + 'prepare_time_ms', 'io_time_ms', 'run_time_ms'. """ - self.session.run() - return self.outputs + metrics: Dict[str, float] = {} + + # Check if the session is prepared. If not, call prepare() and set_outputs() once. + if not self._prepared: + try: + with self._time_block(metrics, 'prepare_time_ms', measure): + self.session.prepare() + self.set_outputs(self.session.output_size()) + self._prepared = True + except ValueError: + raise + except Exception as e: + raise OnertError(f"Session preparation failed: {e}") from e + + # Verify that the number of provided inputs matches the session's expected input count. + expected_input_size: int = self.session.input_size() # may raise OnertError + if len(inputs_array) != expected_input_size: + raise ValueError( + f"Expected {expected_input_size} input(s), but received {len(inputs_array)}." + ) + + # Configure input buffers using the current session's input size and provided data. + try: + with self._time_block(metrics, 'io_time_ms', measure): + self.set_inputs(expected_input_size, inputs_array) + except ValueError: + raise + except Exception as e: + raise OnertError(f"Failed to bind inputs: {e}") from e + + # Execute the inference. + try: + with self._time_block(metrics, 'run_time_ms', measure): + self.session.run() + except ValueError: + raise + except Exception as e: + raise OnertError(f"Inference execution failed: {e}") from e + + # Set the output buffers. + outputs = self.outputs + + return (outputs, metrics) if measure else outputs + + @contextmanager + def _time_block(self, metrics: Dict[str, float], key: str, mesure: bool): + if mesure: + start = time.perf_counter() + yield + metrics[key] = (time.perf_counter() - start) * 1000 + else: + yield diff --git a/runtime/onert/api/python/src/bindings/nnfw_api_wrapper_pybind.cc b/runtime/onert/api/python/src/bindings/nnfw_api_wrapper_pybind.cc index df31f45f3e3..5f63958fa8b 100644 --- a/runtime/onert/api/python/src/bindings/nnfw_api_wrapper_pybind.cc +++ b/runtime/onert/api/python/src/bindings/nnfw_api_wrapper_pybind.cc @@ -16,11 +16,13 @@ #include +#include "nnfw_exception_bindings.h" #include "nnfw_session_bindings.h" #include "nnfw_tensorinfo_bindings.h" #include "nnfw_traininfo_bindings.h" using namespace onert::api::python; +namespace py = pybind11; PYBIND11_MODULE(libnnfw_api_pybind, m) { @@ -34,6 +36,10 @@ PYBIND11_MODULE(libnnfw_api_pybind, m) auto infer = m.def_submodule("infer", "Inference submodule"); infer.attr("nnfw_session") = m.attr("nnfw_session"); + // Bind our NNFW‑status exceptions + auto ex = m.def_submodule("exception", "NNFW‑status Exception"); + bind_nnfw_exceptions(ex); + // Bind experimental `NNFW_SESSION` class auto experimental = m.def_submodule("experimental", "Experimental submodule"); experimental.attr("nnfw_session") = m.attr("nnfw_session"); diff --git a/runtime/onert/api/python/src/bindings/nnfw_exception_bindings.cc b/runtime/onert/api/python/src/bindings/nnfw_exception_bindings.cc new file mode 100644 index 00000000000..85503c3832a --- /dev/null +++ b/runtime/onert/api/python/src/bindings/nnfw_exception_bindings.cc @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "nnfw_exception_bindings.h" + +#include "nnfw_exceptions.h" + +#include + +namespace onert::api::python +{ + +namespace py = pybind11; + +void bind_nnfw_exceptions(py::module_ &m) +{ + // register base first + py::register_exception(m, "OnertError", PyExc_RuntimeError); + + // derived exceptions, each inheriting from NnfwError in Python as well + py::register_exception(m, "OnertUnexpectedNullError", + m.attr("OnertError").cast()); + py::register_exception(m, "OnertInvalidStateError", + m.attr("OnertError").cast()); + py::register_exception(m, "OnertOutOfMemoryError", + m.attr("OnertError").cast()); + py::register_exception(m, "OnertInsufficientOutputError", + m.attr("OnertError").cast()); + py::register_exception(m, "OnertDeprecatedApiError", + m.attr("OnertError").cast()); +} + +} // namespace onert::api::python diff --git a/runtime/onert/api/python/src/wrapper/nnfw_api_wrapper.cc b/runtime/onert/api/python/src/wrapper/nnfw_api_wrapper.cc index 9f494f28f34..4fdd6c083e9 100644 --- a/runtime/onert/api/python/src/wrapper/nnfw_api_wrapper.cc +++ b/runtime/onert/api/python/src/wrapper/nnfw_api_wrapper.cc @@ -16,6 +16,8 @@ #include "nnfw_api_wrapper.h" +#include "nnfw_exceptions.h" + #include namespace onert::api::python @@ -28,85 +30,54 @@ void ensure_status(NNFW_STATUS status) switch (status) { case NNFW_STATUS::NNFW_STATUS_NO_ERROR: - break; + return; case NNFW_STATUS::NNFW_STATUS_ERROR: - std::cout << "[ERROR]\tNNFW_STATUS_ERROR\n"; - exit(1); + throw NnfwError("NNFW_STATUS_ERROR"); case NNFW_STATUS::NNFW_STATUS_UNEXPECTED_NULL: - std::cout << "[ERROR]\tNNFW_STATUS_UNEXPECTED_NULL\n"; - exit(1); + throw NnfwUnexpectedNullError("NNFW_STATUS_UNEXPECTED_NULL"); case NNFW_STATUS::NNFW_STATUS_INVALID_STATE: - std::cout << "[ERROR]\tNNFW_STATUS_INVALID_STATE\n"; - exit(1); + throw NnfwInvalidStateError("NNFW_STATUS_INVALID_STATE"); case NNFW_STATUS::NNFW_STATUS_OUT_OF_MEMORY: - std::cout << "[ERROR]\tNNFW_STATUS_OUT_OF_MEMORY\n"; - exit(1); + throw NnfwOutOfMemoryError("NNFW_STATUS_OUT_OF_MEMORY"); case NNFW_STATUS::NNFW_STATUS_INSUFFICIENT_OUTPUT_SIZE: - std::cout << "[ERROR]\tNNFW_STATUS_INSUFFICIENT_OUTPUT_SIZE\n"; - exit(1); + throw NnfwInsufficientOutputError("NNFW_STATUS_INSUFFICIENT_OUTPUT_SIZE"); case NNFW_STATUS::NNFW_STATUS_DEPRECATED_API: - std::cout << "[ERROR]\tNNFW_STATUS_DEPRECATED_API\n"; - exit(1); + throw NnfwDeprecatedApiError("NNFW_STATUS_DEPRECATED_API"); + default: + throw NnfwError("NNFW_UNKNOWN_ERROR"); } } NNFW_LAYOUT getLayout(const char *layout) { - if (!strcmp(layout, "NCHW")) - { + if (std::strcmp(layout, "NCHW") == 0) return NNFW_LAYOUT::NNFW_LAYOUT_CHANNELS_FIRST; - } - else if (!strcmp(layout, "NHWC")) - { + else if (std::strcmp(layout, "NHWC") == 0) return NNFW_LAYOUT::NNFW_LAYOUT_CHANNELS_LAST; - } - else if (!strcmp(layout, "NONE")) - { + else if (std::strcmp(layout, "NONE") == 0) return NNFW_LAYOUT::NNFW_LAYOUT_NONE; - } else - { - std::cout << "[ERROR]\tLAYOUT_TYPE\n"; - exit(1); - } + throw NnfwError(std::string("Unknown layout type: '") + layout + "'"); } NNFW_TYPE getType(const char *type) { - if (!strcmp(type, "float32")) - { + if (std::strcmp(type, "float32") == 0) return NNFW_TYPE::NNFW_TYPE_TENSOR_FLOAT32; - } - else if (!strcmp(type, "int32")) - { + else if (std::strcmp(type, "int32") == 0) return NNFW_TYPE::NNFW_TYPE_TENSOR_INT32; - } - else if (!strcmp(type, "uint8")) - { + else if (std::strcmp(type, "uint8") == 0) return NNFW_TYPE::NNFW_TYPE_TENSOR_UINT8; - // return NNFW_TYPE::NNFW_TYPE_TENSOR_QUANT8_ASYMM; - } - else if (!strcmp(type, "bool")) - { + else if (std::strcmp(type, "bool") == 0) return NNFW_TYPE::NNFW_TYPE_TENSOR_BOOL; - } - else if (!strcmp(type, "int64")) - { + else if (std::strcmp(type, "int64") == 0) return NNFW_TYPE::NNFW_TYPE_TENSOR_INT64; - } - else if (!strcmp(type, "int8")) - { + else if (std::strcmp(type, "int8") == 0) return NNFW_TYPE::NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED; - } - else if (!strcmp(type, "int16")) - { + else if (std::strcmp(type, "int16") == 0) return NNFW_TYPE::NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED; - } else - { - std::cout << "[ERROR] String to NNFW_TYPE Failure\n"; - exit(1); - } + throw NnfwError(std::string("Cannot convert string to NNFW_TYPE: '") + type + "'"); } const char *getStringType(NNFW_TYPE type) @@ -129,8 +100,8 @@ const char *getStringType(NNFW_TYPE type) case NNFW_TYPE::NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED: return "int16"; default: - std::cout << "[ERROR] NNFW_TYPE to String Failure\n"; - exit(1); + throw NnfwError(std::string("Cannot convert NNFW_TYPE enum to string (value=") + + std::to_string(static_cast(type)) + ")"); } } diff --git a/runtime/onert/sample/minimal-python/src/inference_benchmark.py b/runtime/onert/sample/minimal-python/src/inference_benchmark.py new file mode 100644 index 00000000000..39296c9177e --- /dev/null +++ b/runtime/onert/sample/minimal-python/src/inference_benchmark.py @@ -0,0 +1,128 @@ +import argparse +import numpy as np +import psutil +import os +from typing import List +from onert import infer +# TODO: Import tensorinfo from onert +from onert.native.libnnfw_api_pybind import tensorinfo + + +def get_memory_usage_mb() -> float: + """Get current process memory usage in MB.""" + process = psutil.Process(os.getpid()) + return process.memory_info().rss / (1024 * 1024) + + +def parse_shapes(shape_strs: List[str]) -> List[List[int]]: + shapes = [] + for s in shape_strs: + try: + shapes.append([int(dim) for dim in s.strip().split(",")]) + except Exception: + raise ValueError(f"Invalid shape string: '{s}' (expected: 1,224,224,3 ...)") + return shapes + + +def apply_static_shapes(sess: infer.session, + static_shapes: List[List[int]]) -> List[tensorinfo]: + original_infos = sess.get_inputs_tensorinfo() + if len(static_shapes) != len(original_infos): + raise ValueError( + f"Input count mismatch: model expects {len(original_infos)} inputs, but got {len(static_shapes)} shapes" + ) + + updated_infos: List[tensorinfo] = [] + + for i, info in enumerate(original_infos): + shape = static_shapes[i] + if info.rank != len(shape): + raise ValueError( + f"Rank mismatch for input {i}: expected rank {info.rank}, got {len(shape)}" + ) + info.dims = shape + info.rank = len(shape) + updated_infos.append(info) + + sess.update_inputs_tensorinfo(updated_infos) + return updated_infos + + +def benchmark_inference(nnpackage_path: str, backends: str, input_shapes: List[List[int]], + repeat: int): + mem_before_kb = get_memory_usage_mb() * 1024 + + sess = infer.session(path=nnpackage_path, backends=backends) + model_load_kb = get_memory_usage_mb() * 1024 - mem_before_kb + + input_infos = apply_static_shapes( + sess, input_shapes) if input_shapes else sess.get_inputs_tensorinfo() + + # Create dummy input arrays + dummy_inputs = [] + for info in input_infos: + shape = tuple(info.dims[:info.rank]) + dummy_inputs.append(np.random.rand(*shape).astype(info.dtype)) + + prepare = total_io = total_run = 0.0 + + # Warmup runs + prepare_kb = 0 + for _ in range(3): + outputs, metrics = sess.run_inference(dummy_inputs, measure=True) + del outputs + if "prepare_time_ms" in metrics: + prepare = metrics["prepare_time_ms"] + prepare_kb = get_memory_usage_mb() * 1024 - mem_before_kb + + # Benchmark runs + for _ in range(repeat): + outputs, metrics = sess.run_inference(dummy_inputs, measure=True) + del outputs + total_io += metrics["io_time_ms"] + total_run += metrics["run_time_ms"] + + execute_kb = get_memory_usage_mb() * 1024 + + print("======= Inference Benchmark =======") + print(f"- Warmup runs : 3") + print(f"- Measured runs : {repeat}") + print(f"- Prepare : {prepare:.3f} ms") + print(f"- Avg I/O : {total_io / repeat:.3f} ms") + print(f"- Avg Run : {total_run / repeat:.3f} ms") + print("===================================") + print("RSS") + print(f"- MODEL_LOAD : {model_load_kb:.0f} KB") + print(f"- PREPARE : {prepare_kb:.0f} KB") + print(f"- EXECUTE : {execute_kb:.0f} KB") + print(f"- PEAK : {max(model_load_kb, prepare_kb, execute_kb):.0f} KB") + print("===================================") + + +# TODO: Support dynamic(on-the-fly) shape +def main(): + parser = argparse.ArgumentParser(description="ONERT Inference Benchmark") + parser.add_argument("nnpackage", type=str, help="Path to .nnpackage directory") + parser.add_argument("--backends", + type=str, + default="cpu", + help="Backends to use (default: cpu)") + parser.add_argument("--input-shape", + nargs="+", + help="Input shapes for each input (e.g. 1,224,224,3 1,10)") + parser.add_argument("--repeat", + type=int, + default=5, + help="Number of measured inference repetitions") + + args = parser.parse_args() + shapes = parse_shapes(args.input_shape) if args.input_shape else None + + benchmark_inference(nnpackage_path=args.nnpackage, + backends=args.backends, + input_shapes=shapes, + repeat=args.repeat) + + +if __name__ == "__main__": + main() diff --git a/runtime/onert/sample/minimal-python/src/minimal.py b/runtime/onert/sample/minimal-python/src/minimal.py index 2ae3f249fcd..6f5032187a9 100644 --- a/runtime/onert/sample/minimal-python/src/minimal.py +++ b/runtime/onert/sample/minimal-python/src/minimal.py @@ -1,4 +1,5 @@ from onert import infer +import numpy as np import sys @@ -8,10 +9,17 @@ def main(nnpackage_path, backends="cpu"): session = infer.session(nnpackage_path, backends) # Prepare input. Here we just allocate dummy input arrays. - input_size = session.input_size() - session.set_inputs(input_size) + input_infos = session.get_inputs_tensorinfo() + dummy_inputs = [] + for info in input_infos: + # Retrieve the dimensions list from tensorinfo property. + dims = list(info.dims) + # Build the shape tuple from tensorinfo dimensions. + shape = tuple(dims[:info.rank]) + # Create a dummy numpy array filled with zeros. + dummy_inputs.append(np.zeros(shape, dtype=info.dtype)) - outputs = session.inference() + outputs = session.run_inference(dummy_inputs) print(f"nnpackage {nnpackage_path.split('/')[-1]} runs successfully.") return diff --git a/runtime/onert/sample/minimal-python/src/static_shape_inference.py b/runtime/onert/sample/minimal-python/src/static_shape_inference.py new file mode 100644 index 00000000000..530f82bb2d9 --- /dev/null +++ b/runtime/onert/sample/minimal-python/src/static_shape_inference.py @@ -0,0 +1,52 @@ +from onert import infer +import numpy as np +import sys + + +def main(nnpackage_path, backends="cpu"): + # Create session and load the nnpackage + sess = infer.session(nnpackage_path, backends) + + # Retrieve the current tensorinfo for all inputs. + current_input_infos = sess.get_inputs_tensorinfo() + + # Create new tensorinfo objects with a static shape modification. + # For this example, assume we change the first dimension (e.g., batch size) to 10. + new_input_infos = [] + for info in current_input_infos: + # For example, if the current shape is (?, 4), update it to (10, 4). + # We copy the current info and modify the rank and dims. + # (Note: Depending on your model, you may want to modify additional dimensions.) + new_shape = [10] + list(info.dims[1:info.rank]) + info.rank = len(new_shape) + for i, dim in enumerate(new_shape): + info.dims[i] = dim + # For any remaining dimensions up to NNFW_MAX_RANK, set them to a default (1). + for i in range(len(new_shape), len(info.dims)): + info.dims[i] = 1 + new_input_infos.append(info) + + # Update all input tensorinfos in the session at once. + # This will call prepare() and set_outputs() internally. + sess.update_inputs_tensorinfo(new_input_infos) + + # Create dummy input arrays based on the new static shapes. + dummy_inputs = [] + for info in new_input_infos: + # Build the shape tuple from tensorinfo dimensions. + shape = tuple(info.dims[:info.rank]) + # Create a dummy numpy array filled with zeros. + dummy_inputs.append(np.zeros(shape, dtype=info.dtype)) + + # Run inference with the new static input shapes. + outputs = sess.run_inference(dummy_inputs) + + print( + f"Static shape modification sample: nnpackage {nnpackage_path.split('/')[-1]} runs successfully." + ) + return + + +if __name__ == "__main__": + argv = sys.argv[1:] + main(*argv)