How to fix the output batch in tf2onnx

I attempt to convert mars-small128.pb [model_link](https://drive.google.com/drive/folders/1m2ebLHB2JThZC8vWGDYEKGsevLssSkjo) to saved_model.pb I use `python3 -m tf2onnx.convert --graphdef mars-small128.pb --output saved_model.onnx --inputs "images:0" --outputs "features:0"`. Before that I investigate mars-small128.pb using `tf.compat.v1`, and the output layer has shape [None, 128]. 

After that the onnx file was successfully created, but when I try to build the tensorrt engine, the output shape is automatically changes to [1024, 128].

I attempt to modify the model output before exporting, but the engine cannot be built.

Here is my code to build an engine:
`# --------------------------------------------------------------------------- #
#                                   Import                                    #
# --------------------------------------------------------------------------- #
import logging
import numpy as np
import os
import sys
import tensorrt as trt

from cuda import cudart
from pathlib import Path

root_dir = Path(__file__).resolve().parent
sys.path.insert(1, os.path.join(root_dir, os.pardir))

from utils import common
from utils.engine_calibrator import EngineCalibrator
from utils.image_batcher import ImageBatcher

logging.basicConfig(level=logging.INFO)
logging.getLogger("EngineBuilder").setLevel(logging.INFO)
log = logging.getLogger("EngineBuilder")

# --------------------------------------------------------------------------- #
#                         Define functions/classes                            #
# --------------------------------------------------------------------------- #
class EngineBuilder:
    """
    Parses an ONNX graph and builds a TensorRT engine from it.
    """

    def __init__(self, verbose=False, workspace=8) -> None:
        """
        :param verbose: If enabled, a higher verbosity level will be set 
            on the TensorRT logger.
        :param workspace: Max memory workspace to allow, in Gb.
        """
        self.trt_logger = trt.Logger(trt.Logger.INFO)
        if verbose:
            self.trt_logger.min_severity = trt.Logger.Severity.VERBOSE

        trt.init_libnvinfer_plugins(self.trt_logger, namespace="")

        self.builder = trt.Builder(self.trt_logger)
        self.config = self.builder.create_builder_config()
        self.config.max_workspace_size = workspace * (2 ** 30)

        self.batch_size = None
        self.network = None
        self.parser = None

    def create_network(self, onnx_path: os.PathLike, batch_size=None) -> None:
        """
        Parse the ONNX graph and create the corresponding 
            TensorRT network definition.
        :param onnx_path: The path to the ONNX graph to load.
        :param batch_size: Static batch size to build the engine with.
        """
        network_flags = (
            1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
        )

        self.network = self.builder.create_network(network_flags)
        self.parser = trt.OnnxParser(self.network, self.trt_logger)

        onnx_path = os.path.realpath(onnx_path)
        with open(onnx_path, "rb") as f:
            if not self.parser.parse(f.read()):
                log.error("Failed to load ONNX file: {}".format(onnx_path))
                for error in range(self.parser.num_errors):
                    log.error(self.parser.get_error(error))
                sys.exit(1)
        
        inputs = [
            self.network.get_input(i) for i in range(self.network.num_inputs)
        ]
        outputs = [
            self.network.get_output(i) for i in range(self.network.num_outputs)
        ]

        log.info("Network Description")

        # Set shape
        self.batch_size = batch_size
        profile = self.builder.create_optimization_profile()
        input_layer_name = None
        for input in inputs:
            log.info(
                "Input '{}' with shape {} and dtype {}".\
                    format(input.name, input.shape, input.dtype)
            )
            if not input_layer_name:
                input_layer_name = str(input.name)
        
        for output in outputs:
            log.info(
                "Output '{}' with shape {} and dtype {}".\
                    format(output.name, output.shape, output.dtype)
            )
        
        profile.set_shape(
            input_layer_name, 
            (self.batch_size, input.shape[1], input.shape[2], input.shape[3]), 
            (self.batch_size, input.shape[1], input.shape[2], input.shape[3]), 
            (self.batch_size, input.shape[1], input.shape[2], input.shape[3])
        )

        self.config.add_optimization_profile(profile)
        assert self.batch_size > 0
        self.builder.max_batch_size = self.batch_size

    def create_engine(self, engine_path: os.PathLike, precision: str) -> None:
        """
        Build the TensorRT engine and serialize it to disk.
        :param engine_path: The path where to serialize the engine to.
        :param precision: The datatype to use for the engine, 
            either 'fp32' or 'fp16'.
        """
        engine_path = os.path.realpath(engine_path)
        engine_dir = os.path.dirname(engine_path)
        os.makedirs(engine_dir, exist_ok=True)
        log.info("Building {} Engine in {}".format(precision, engine_path))

        self.config.set_flag(trt.BuilderFlag.STRICT_TYPES)

        if precision == "fp16":
            if not self.builder.platform_has_fast_fp16:
                log.warning(
                    "FP16 is not supported natively on this platform/device"
                )
            else:
                self.config.set_flag(trt.BuilderFlag.FP16)
        
        log.info("Starting engine build process, this might take a while...")
        with self.builder.build_engine(
            self.network, self.config
            ) as engine, open(engine_path, "wb") as f:
            log.info("Serializing engine to file: {:}".format(engine_path))
            f.write(engine.serialize())


def main(
        onnx: os.PathLike,
        engine: os.PathLike,
        batch_size=1,
        precision="fp16",
        verbose=False,
        workspace=8) -> None:
    """Main file to convert onnx to trt.

    Args:
        onnx (str): The input ONNX model file to load
        engine (str): The output path for the TRT engine
        batch_size (int, optional): The static batch size to build 
            the engine with. Defaults to 1.
        precision (str, optional): The precision mode to build in, 
            either fp32/fp16. Defaults to "fp16".
        verbose (bool, optional): Enable more verbose log output. 
            Defaults to False.
        workspace (int, optional): The max memory workspace size to allow in Gb. 
            Defaults to 1.
    """

    log.info(f"Build at the precision: {precision}")
    log.info(f"Allow the workspace size (in GiB): {workspace}")
    builder = EngineBuilder(verbose, workspace)
    builder.create_network(onnx, batch_size)
    builder.create_engine(engine, precision)


if __name__ == '__main__':
    pass`

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

How to fix the output batch in tf2onnx #2334

Import

---------------------------------------------------------------------------

---------------------------------------------------------------------------

Define functions/classes

---------------------------------------------------------------------------

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

How to fix the output batch in tf2onnx #2334

Description

Import

---------------------------------------------------------------------------

---------------------------------------------------------------------------

Define functions/classes

---------------------------------------------------------------------------

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions