Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Prototype reorg of the FX converters to clean dependency chain #1683

Closed
wants to merge 1 commit into from

Conversation

narendasan
Copy link
Collaborator

Description

This PR proposes a reorg of the converter library for FX so that the dependency chain is much cleaner considering there's 3 versions of ops being supported in FX (nn, acc, aten).

Instead of having implementations call each other, the idea is to have an IR agnostic set of converters and then the [aten/acc/nn]_ops_converter files will just pack arguments properly.

Fixes # (issue)

Type of change

Please delete options that are not relevant and/or add your own.

  • New feature (non-breaking change which adds functionality)

Checklist:

  • My code follows the style guidelines of this project (You can use the linters)
  • I have performed a self-review of my own code
  • I have commented my code, particularly in hard-to-understand areas and hacks
  • I have made corresponding changes to the documentation
  • I have added tests to verify my fix or my feature
  • New and existing unit tests pass locally with my changes
  • I have added the relevant labels to my PR in so that relevant reviewers are notified

…so that the three tracing paths call down into a common converter base instead of across each other
@narendasan narendasan added the WIP Work is in progress, pull request should not be merged yet label Feb 20, 2023
@narendasan narendasan requested a review from apbose February 20, 2023 22:37
@github-actions github-actions bot requested a review from wushirong February 20, 2023 22:37
Copy link

@github-actions github-actions bot left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Code conforms to C++ style guidelines

Copy link

@github-actions github-actions bot left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Code conforms to C++ style guidelines

Copy link

@github-actions github-actions bot left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are some changes that do not conform to Python style guidelines:

--- py/torch_tensorrt/fx/converters/activation.py	2023-02-20 22:37:19.697378 +0000
+++ py/torch_tensorrt/fx/converters/activation.py	2023-02-20 22:37:34.727752 +0000
@@ -19,19 +19,20 @@
    TRTPluginFieldCollection,
    TRTTensor,
)
from ..utils import torch_dtype_from_trt

+
def add_activation_layer(
    network: TRTNetwork,
    input_val: TRTTensor,
    operation_type: trt.ActivationType,
    target: Target,
    name: str,
    alpha: Optional[Any] = None,
    beta: Optional[Any] = None,
-    dyn_range_fn: Optional[Callable[Tuple[float, float]]] = None
+    dyn_range_fn: Optional[Callable[Tuple[float, float]]] = None,
) -> TRTTensor:
    """
    Add a TensorRT Activation layer to `network`.

    Args:
@@ -68,20 +69,22 @@
        dyn_range = dyn_range_fn(input_val.dynamic_range)
        mark_as_int8_layer(layer, dyn_range)

    return layer.get_output(0)

+
def add_elu(
    network: TRTNetwork,
    target: Target,
    kwargs: Dict[str, Argument],
    name: str,
) -> TRTTensor:
    input_val = kwargs["input"]
    alpha = kwargs["alpha"]
    operation_type = trt.ActivationType.ELU
    return add_activation_layer(network, input_val, operation_type, target, name, alpha)
+

def add_gelu(
    network: TRTNetwork,
    target: Target,
    kwargs: Dict[str, Argument],
@@ -113,10 +116,11 @@

    layer = network.add_plugin_v2([input_val], plugin)
    set_layer_name(layer, target, name)
    return layer.get_output(0)

+
def add_hard_sigmoid(
    network: TRTNetwork,
    target: Target,
    kwargs: Dict[str, Argument],
    name: str,
@@ -130,10 +134,11 @@
        name,
        alpha=1 / 6,
        beta=0.5,
    )

+
def add_hardtanh(
    network: TRTNetwork,
    target: Target,
    kwargs: Dict[str, Argument],
    name: str,
@@ -168,10 +173,11 @@
    operation_type = trt.ActivationType.LEAKY_RELU
    return add_activation_layer(
        network, input_val, operation_type, target, name, negative_slope
    )

+
def add_relu(
    network: TRTNetwork,
    target: Target,
    kwargs: Dict[str, Argument],
    name: str,
@@ -180,21 +186,30 @@
    operation_type = trt.ActivationType.RELU

    def activation_dyn_range_fn(dyn_range):
        return max(0, dyn_range[0]), max(0, dyn_range[1])

-    return add_activation_layer(network, input_val, operation_type, target, name, dyn_range_fn=activation_dyn_range_fn)
+    return add_activation_layer(
+        network,
+        input_val,
+        operation_type,
+        target,
+        name,
+        dyn_range_fn=activation_dyn_range_fn,
+    )
+

def add_selu(
    network: TRTNetwork,
    target: Target,
    kwargs: Dict[str, Argument],
    name: str,
) -> TRTTensor:
    input_val = kwargs["input"]
    operation_type = trt.ActivationType.SELU
    return add_activation_layer(network, input_val, operation_type, target, name)
+

def add_sigmoid(
    network: TRTNetwork,
    target: Target,
    kwargs: Dict[str, Argument],
@@ -207,11 +222,16 @@
            return 1 / (1 + np.exp(-x))

        return sigmoid_fn(dyn_range[0]), sigmoid_fn(dyn_range[1])

    return add_activation_layer(
-        network, input_val, trt.ActivationType.SIGMOID, target, name, dyn_range_fn=activation_dyn_range_fn
+        network,
+        input_val,
+        trt.ActivationType.SIGMOID,
+        target,
+        name,
+        dyn_range_fn=activation_dyn_range_fn,
    )


def add_softsign(
    network: TRTNetwork,
@@ -221,10 +241,11 @@
) -> TRTTensor:
    input_val = kwargs["input"]
    operation_type = trt.ActivationType.SOFTSIGN
    return add_activation_layer(network, input_val, operation_type, target, name)

+
def add_tanh(
    network: TRTNetwork,
    target: Target,
    kwargs: Dict[str, Argument],
    name: str,
--- py/torch_tensorrt/fx/converters/nn_ops_converters.py	2023-02-20 22:37:19.697378 +0000
+++ py/torch_tensorrt/fx/converters/nn_ops_converters.py	2023-02-20 22:37:34.990162 +0000
@@ -7,17 +7,19 @@
from ..converter_registry import tensorrt_converter

from .converter_utils import mark_as_int8_layer
import activation

+
@tensorrt_converter(torch.nn.functional.relu)
@tensorrt_converter(torch.nn.modules.activation.ReLU)
def relu(network, submod, args, kwargs, layer_name):
    # args/kwargs should have already been normalized to kwargs
    assert len(args) == 0
-    return activation.add_relu(network,"tensorrt", kwargs, layer_name)
+    return activation.add_relu(network, "tensorrt", kwargs, layer_name)
+

@tensorrt_converter(torch.nn.modules.activation.Sigmoid)
def sigmoid(network, submod, args, kwargs, layer_name):
    # args/kwargs should have already been normalized to kwargs
    assert len(args) == 0
-    return activation.add_sigmoid(network,"tensorrt", kwargs, layer_name)
\ No newline at end of file
+    return activation.add_sigmoid(network, "tensorrt", kwargs, layer_name)
--- py/torch_tensorrt/fx/converters/acc_ops_converters.py	2023-02-20 22:37:19.697378 +0000
+++ py/torch_tensorrt/fx/converters/acc_ops_converters.py	2023-02-20 22:37:36.744356 +0000
@@ -1027,10 +1027,11 @@
    kwargs: Dict[str, Argument],
    name: str,
) -> Union[TRTTensor, Sequence[TRTTensor]]:
    return activation.add_elu(network, target, kwargs, name)

+
@tensorrt_converter(acc_ops.selu)
def acc_ops_selu(
    network: TRTNetwork,
    target: Target,
    args: Tuple[Argument, ...],
@@ -1048,10 +1049,11 @@
    kwargs: Dict[str, Argument],
    name: str,
) -> Union[TRTTensor, Sequence[TRTTensor]]:
    return activation.add_softsign(network, target, kwargs, name)

+
@tensorrt_converter(acc_ops.sin)
def acc_ops_sin(
    network: TRTNetwork,
    target: Target,
    args: Tuple[Argument, ...],
@@ -1122,10 +1124,11 @@
    args: Tuple[Argument, ...],
    kwargs: Dict[str, Argument],
    name: str,
) -> Union[TRTTensor, Sequence[TRTTensor]]:
    return activation.add_tanh(network, target, kwargs, name)
+

@tensorrt_converter(acc_ops.asin)
def acc_ops_asin(
    network: TRTNetwork,
    target: Target,
@@ -3323,10 +3326,11 @@
    kwargs: Dict[str, Argument],
    name: str,
) -> Union[TRTTensor, Sequence[TRTTensor]]:
    return activation.add_gelu(network, target, kwargs, name)

+
@tensorrt_converter(acc_ops.chunk)
def acc_ops_chunk(
    network: TRTNetwork,
    target: Target,
    args: Tuple[Argument, ...],
@@ -3477,10 +3481,11 @@
    args: Tuple[Argument, ...],
    kwargs: Dict[str, Argument],
    name: str,
) -> Union[TRTTensor, Sequence[TRTTensor]]:
    return activation.add_hardtanh(network, target, kwargs, name)
+

@tensorrt_converter(acc_ops.interpolate)
def acc_ops_interpolate(
    network: TRTNetwork,
    target: Target,

Copy link

@github-actions github-actions bot left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are some changes that do not conform to Python style guidelines:

--- py/torch_tensorrt/fx/converters/activation.py	2023-02-20 22:37:19.800788 +0000
+++ py/torch_tensorrt/fx/converters/activation.py	2023-02-20 22:37:37.322661 +0000
@@ -19,19 +19,20 @@
    TRTPluginFieldCollection,
    TRTTensor,
)
from ..utils import torch_dtype_from_trt

+
def add_activation_layer(
    network: TRTNetwork,
    input_val: TRTTensor,
    operation_type: trt.ActivationType,
    target: Target,
    name: str,
    alpha: Optional[Any] = None,
    beta: Optional[Any] = None,
-    dyn_range_fn: Optional[Callable[Tuple[float, float]]] = None
+    dyn_range_fn: Optional[Callable[Tuple[float, float]]] = None,
) -> TRTTensor:
    """
    Add a TensorRT Activation layer to `network`.

    Args:
@@ -68,20 +69,22 @@
        dyn_range = dyn_range_fn(input_val.dynamic_range)
        mark_as_int8_layer(layer, dyn_range)

    return layer.get_output(0)

+
def add_elu(
    network: TRTNetwork,
    target: Target,
    kwargs: Dict[str, Argument],
    name: str,
) -> TRTTensor:
    input_val = kwargs["input"]
    alpha = kwargs["alpha"]
    operation_type = trt.ActivationType.ELU
    return add_activation_layer(network, input_val, operation_type, target, name, alpha)
+

def add_gelu(
    network: TRTNetwork,
    target: Target,
    kwargs: Dict[str, Argument],
@@ -113,10 +116,11 @@

    layer = network.add_plugin_v2([input_val], plugin)
    set_layer_name(layer, target, name)
    return layer.get_output(0)

+
def add_hard_sigmoid(
    network: TRTNetwork,
    target: Target,
    kwargs: Dict[str, Argument],
    name: str,
@@ -130,10 +134,11 @@
        name,
        alpha=1 / 6,
        beta=0.5,
    )

+
def add_hardtanh(
    network: TRTNetwork,
    target: Target,
    kwargs: Dict[str, Argument],
    name: str,
@@ -168,10 +173,11 @@
    operation_type = trt.ActivationType.LEAKY_RELU
    return add_activation_layer(
        network, input_val, operation_type, target, name, negative_slope
    )

+
def add_relu(
    network: TRTNetwork,
    target: Target,
    kwargs: Dict[str, Argument],
    name: str,
@@ -180,21 +186,30 @@
    operation_type = trt.ActivationType.RELU

    def activation_dyn_range_fn(dyn_range):
        return max(0, dyn_range[0]), max(0, dyn_range[1])

-    return add_activation_layer(network, input_val, operation_type, target, name, dyn_range_fn=activation_dyn_range_fn)
+    return add_activation_layer(
+        network,
+        input_val,
+        operation_type,
+        target,
+        name,
+        dyn_range_fn=activation_dyn_range_fn,
+    )
+

def add_selu(
    network: TRTNetwork,
    target: Target,
    kwargs: Dict[str, Argument],
    name: str,
) -> TRTTensor:
    input_val = kwargs["input"]
    operation_type = trt.ActivationType.SELU
    return add_activation_layer(network, input_val, operation_type, target, name)
+

def add_sigmoid(
    network: TRTNetwork,
    target: Target,
    kwargs: Dict[str, Argument],
@@ -207,11 +222,16 @@
            return 1 / (1 + np.exp(-x))

        return sigmoid_fn(dyn_range[0]), sigmoid_fn(dyn_range[1])

    return add_activation_layer(
-        network, input_val, trt.ActivationType.SIGMOID, target, name, dyn_range_fn=activation_dyn_range_fn
+        network,
+        input_val,
+        trt.ActivationType.SIGMOID,
+        target,
+        name,
+        dyn_range_fn=activation_dyn_range_fn,
    )


def add_softsign(
    network: TRTNetwork,
@@ -221,10 +241,11 @@
) -> TRTTensor:
    input_val = kwargs["input"]
    operation_type = trt.ActivationType.SOFTSIGN
    return add_activation_layer(network, input_val, operation_type, target, name)

+
def add_tanh(
    network: TRTNetwork,
    target: Target,
    kwargs: Dict[str, Argument],
    name: str,
--- py/torch_tensorrt/fx/converters/nn_ops_converters.py	2023-02-20 22:37:19.800788 +0000
+++ py/torch_tensorrt/fx/converters/nn_ops_converters.py	2023-02-20 22:37:37.627636 +0000
@@ -7,17 +7,19 @@
from ..converter_registry import tensorrt_converter

from .converter_utils import mark_as_int8_layer
import activation

+
@tensorrt_converter(torch.nn.functional.relu)
@tensorrt_converter(torch.nn.modules.activation.ReLU)
def relu(network, submod, args, kwargs, layer_name):
    # args/kwargs should have already been normalized to kwargs
    assert len(args) == 0
-    return activation.add_relu(network,"tensorrt", kwargs, layer_name)
+    return activation.add_relu(network, "tensorrt", kwargs, layer_name)
+

@tensorrt_converter(torch.nn.modules.activation.Sigmoid)
def sigmoid(network, submod, args, kwargs, layer_name):
    # args/kwargs should have already been normalized to kwargs
    assert len(args) == 0
-    return activation.add_sigmoid(network,"tensorrt", kwargs, layer_name)
\ No newline at end of file
+    return activation.add_sigmoid(network, "tensorrt", kwargs, layer_name)
--- py/torch_tensorrt/fx/converters/acc_ops_converters.py	2023-02-20 22:37:19.800788 +0000
+++ py/torch_tensorrt/fx/converters/acc_ops_converters.py	2023-02-20 22:37:39.910005 +0000
@@ -1027,10 +1027,11 @@
    kwargs: Dict[str, Argument],
    name: str,
) -> Union[TRTTensor, Sequence[TRTTensor]]:
    return activation.add_elu(network, target, kwargs, name)

+
@tensorrt_converter(acc_ops.selu)
def acc_ops_selu(
    network: TRTNetwork,
    target: Target,
    args: Tuple[Argument, ...],
@@ -1048,10 +1049,11 @@
    kwargs: Dict[str, Argument],
    name: str,
) -> Union[TRTTensor, Sequence[TRTTensor]]:
    return activation.add_softsign(network, target, kwargs, name)

+
@tensorrt_converter(acc_ops.sin)
def acc_ops_sin(
    network: TRTNetwork,
    target: Target,
    args: Tuple[Argument, ...],
@@ -1122,10 +1124,11 @@
    args: Tuple[Argument, ...],
    kwargs: Dict[str, Argument],
    name: str,
) -> Union[TRTTensor, Sequence[TRTTensor]]:
    return activation.add_tanh(network, target, kwargs, name)
+

@tensorrt_converter(acc_ops.asin)
def acc_ops_asin(
    network: TRTNetwork,
    target: Target,
@@ -3323,10 +3326,11 @@
    kwargs: Dict[str, Argument],
    name: str,
) -> Union[TRTTensor, Sequence[TRTTensor]]:
    return activation.add_gelu(network, target, kwargs, name)

+
@tensorrt_converter(acc_ops.chunk)
def acc_ops_chunk(
    network: TRTNetwork,
    target: Target,
    args: Tuple[Argument, ...],
@@ -3477,10 +3481,11 @@
    args: Tuple[Argument, ...],
    kwargs: Dict[str, Argument],
    name: str,
) -> Union[TRTTensor, Sequence[TRTTensor]]:
    return activation.add_hardtanh(network, target, kwargs, name)
+

@tensorrt_converter(acc_ops.interpolate)
def acc_ops_interpolate(
    network: TRTNetwork,
    target: Target,

@narendasan
Copy link
Collaborator Author

@apbose This is sort of what I have been talking about. We abstract out the various potential IRs in files like activation.py where as [aten/acc/nn]_ops_converters.py are just for input packing. The idea is torch2trt style converters will fill out files like activation.py

Copy link
Collaborator

@apbose apbose left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is the dyn_range_fn required? What would be the typical use case?

@apbose apbose closed this Mar 17, 2023
@apbose
Copy link
Collaborator

apbose commented Mar 17, 2023

Raised PR #1745

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
cla signed component: api [Python] Issues re: Python API component: fx fx WIP Work is in progress, pull request should not be merged yet
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants