[TorchToTosa] add conv reshape in core lowering

catcor01 · catcor01 · commit 8e20f7614b71 · 2026-03-11T08:27:48.000+01:00
- Insert rank-4/5 reshapes for conv inputs/weights during TorchToTosa lowering

Signed-off-by: Cathal Corbett &lt;cathal.corbett@arm.com&gt;
Change-Id: Ica1b5cc265822ecd054f832908ec31bc2325c661
diff --git a/lib/Conversion/TorchToTosa/TorchToTosa.cpp b/lib/Conversion/TorchToTosa/TorchToTosa.cpp
@@ -15,6 +15,7 @@
 #include "mlir/Dialect/Tosa/IR/TosaOps.h"
 #include "mlir/Dialect/Tosa/Utils/ConversionUtils.h"
 #include "mlir/IR/DialectResourceBlobManager.h"
+#include "mlir/IR/Dominance.h"
 #include "mlir/IR/Matchers.h"
 #include "mlir/Pass/Pass.h"
 #include "mlir/Transforms/DialectConversion.h"
@@ -28,6 +29,7 @@
 #include "torch-mlir/Dialect/Torch/Utils/Utils.h"
 #include "torch-mlir/Dialect/TorchConversion/Transforms/BackendTypeConversion.h"
 #include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/TypeSwitch.h"
 #include <cmath>
@@ -48,6 +50,11 @@ namespace mlir::torch {
 #include "torch-mlir/Conversion/Passes.h.inc"
 
 namespace {
+struct RankTemplate {
+  int64_t rank;
+  RankedTensorType type;
+  Value shape;
+};
 
 // Runs an in-place inclusive prefix sum along the middle dimension (K) of
 // `running` using a binary lifting scheme. The input must have shape [N, K, C].
@@ -2634,14 +2641,109 @@ LogicalResult ConvertAtenOp<AtenConvolutionOp>::matchAndRewriteImpl(
   auto input = adaptor.getInput();
   auto weight = adaptor.getWeight();
 
-  auto inputTy = cast<RankedTensorType>(input.getType());
-  auto weightTy = cast<RankedTensorType>(weight.getType());
   auto outputTy =
       cast<RankedTensorType>(getTypeConverter()->convertType(op.getType()));
+  auto inputTy = dyn_cast<RankedTensorType>(input.getType());
+  auto weightTy = dyn_cast<RankedTensorType>(weight.getType());
   if (!inputTy || !weightTy || !outputTy)
     return rewriter.notifyMatchFailure(
         op, "Input, weight and output to Convolution must be ranked tensors");
 
+  int64_t outputRank = outputTy.getRank();
+  if (outputRank != 4 && outputRank != 5)
+    return rewriter.notifyMatchFailure(
+        op, "Unimplemented: only 2D or 3D convolutions supported");
+
+  auto funcOp = op->getParentOfType<func::FuncOp>();
+  llvm::DenseMap<unsigned, SmallVector<RankTemplate>> argToTemplates;
+  bool templatesBuilt = false;
+  DominanceInfo domInfo(funcOp);
+
+  auto buildTemplates = [&]() {
+    if (templatesBuilt)
+      return;
+    templatesBuilt = true;
+    funcOp.walk([&](tosa::ReshapeOp reshapeOp) {
+      Value source = reshapeOp.getInput1();
+      auto blockArg = dyn_cast<BlockArgument>(source);
+      if (!blockArg)
+        return;
+
+      auto dstType =
+          dyn_cast<RankedTensorType>(reshapeOp.getResult().getType());
+      if (!dstType || (dstType.getRank() != 4 && dstType.getRank() != 5))
+        return;
+
+      unsigned argNumber = blockArg.getArgNumber();
+      auto &templates = argToTemplates[argNumber];
+      for (const auto &tmpl : templates) {
+        if (tmpl.rank == dstType.getRank() && tmpl.type == dstType)
+          return;
+      }
+      templates.push_back(
+          RankTemplate{dstType.getRank(), dstType, reshapeOp.getShape()});
+    });
+  };
+
+  auto normalizeOperandRank = [&](Value operand,
+                                  int64_t requiredRank) -> FailureOr<Value> {
+    auto rankedType = dyn_cast<RankedTensorType>(operand.getType());
+    if (!rankedType)
+      return failure();
+    if (rankedType.getRank() == requiredRank)
+      return operand;
+
+    auto blockArg = dyn_cast<BlockArgument>(operand);
+    if (!blockArg)
+      return failure();
+
+    buildTemplates();
+    auto tmplIt = argToTemplates.find(blockArg.getArgNumber());
+    if (tmplIt == argToTemplates.end())
+      return failure();
+
+    const RankTemplate *match = nullptr;
+    for (const auto &tmpl : tmplIt->second) {
+      if (tmpl.rank == requiredRank) {
+        match = &tmpl;
+        break;
+      }
+    }
+    if (!match)
+      return failure();
+
+    Value shapeVal = match->shape;
+    if (auto shapeOp = shapeVal.getDefiningOp<tosa::ConstShapeOp>()) {
+      OpBuilder builder(op);
+      shapeVal = tosa::ConstShapeOp::create(
+          builder, op->getLoc(), shapeOp.getType(), shapeOp.getValues());
+    } else if (!domInfo.properlyDominates(shapeVal, op)) {
+      return failure();
+    }
+
+    auto reshape = tosa::ReshapeOp::create(rewriter, op->getLoc(), match->type,
+                                           operand, shapeVal);
+    return reshape.getResult();
+  };
+
+  if (inputTy.getRank() != outputRank) {
+    auto normalized = normalizeOperandRank(input, outputRank);
+    if (failed(normalized))
+      return rewriter.notifyMatchFailure(
+          op, "Input rank mismatch without normalization template");
+    input = *normalized;
+    inputTy = cast<RankedTensorType>(input.getType());
+  }
+
+  if (weightTy.getRank() != outputRank) {
+    auto normalized = normalizeOperandRank(weight, outputRank);
+    if (failed(normalized))
+      return rewriter.notifyMatchFailure(
+          op, "Weight rank mismatch without normalization template");
+    weight = *normalized;
+    weightTy = cast<RankedTensorType>(weight.getType());
+  }
+
   auto inputElemTy = inputTy.getElementType();
   auto weightElemTy = weightTy.getElementType();
   auto inputShape = makeShapeTorchCompatible(inputTy.getShape());
@@ -2650,16 +2752,11 @@ LogicalResult ConvertAtenOp<AtenConvolutionOp>::matchAndRewriteImpl(
 
   int64_t inputRank = inputTy.getRank();
   int64_t weightRank = weightTy.getRank();
-  int64_t outputRank = outputTy.getRank();
 
   if (inputRank != weightRank || outputRank != inputRank)
     return rewriter.notifyMatchFailure(
         op, "Input, weight and output ranks must match for convolution");
 
-  if (inputRank != 4 && inputRank != 5)
-    return rewriter.notifyMatchFailure(
-        op, "Unimplemented: only 2D or 3D convolutions supported");
-
   bool is3D = inputRank == 5;
   int64_t spatialRank = inputRank - 2;
 
diff --git a/test/Conversion/TorchToTosa/basic.mlir b/test/Conversion/TorchToTosa/basic.mlir
@@ -1,4 +1,4 @@
-// RUN: torch-mlir-opt <%s -convert-torch-to-tosa -split-input-file -verify-diagnostics | FileCheck %s
+// RUN: torch-mlir-opt %s -convert-torch-to-tosa -split-input-file -verify-diagnostics | FileCheck %s --check-prefix=CHECK
 
 // CHECK-LABEL:   func.func @torch.aten.tanh$basic(
 // CHECK-SAME:                                %[[ARG:.*]]: !torch.vtensor<[?,?],f32>) -> !torch.vtensor<[?,?],f32> {
@@ -13,6 +13,80 @@ func.func @torch.aten.tanh$basic(%arg0: !torch.vtensor<[?,?],f32>) -> !torch.vte
 
 // -----
 
+// CHECK-LABEL:   func.func @conv2d_io_insert_reshape(
+// CHECK:           %[[SHAPE:.*]] = tosa.const_shape
+// CHECK:           %[[INPUT_ZP:.*]] = "tosa.const"
+// CHECK:           %[[WEIGHT_ZP:.*]] = "tosa.const"
+// CHECK:           %[[R0:.*]] = tosa.reshape %arg0, %[[SHAPE]]
+// CHECK:           %[[R1:.*]] = tosa.reshape %arg1, %[[SHAPE]]
+// CHECK:           %[[CONV:.*]] = tosa.conv2d %[[R0]], %[[R1]], %arg2, %[[INPUT_ZP]], %[[WEIGHT_ZP]]
+func.func @conv2d_io_insert_reshape(%arg0: tensor<256xf32>, %arg1: tensor<256xf32>, %arg2: tensor<16xf32>) -> tensor<1x1x1x16xf32> {
+  %shape = "tosa.const_shape"() {values = dense<[1, 1, 16, 16]> : tensor<4xindex>} : () -> !tosa.shape<4>
+  %input_zp = "tosa.const"() {values = dense<0.0> : tensor<1xf32>} : () -> tensor<1xf32>
+  %weight_zp = "tosa.const"() {values = dense<0.0> : tensor<1xf32>} : () -> tensor<1xf32>
+  %r0 = "tosa.reshape"(%arg0, %shape) : (tensor<256xf32>, !tosa.shape<4>) -> tensor<1x1x16x16xf32>
+  %r1 = "tosa.reshape"(%arg1, %shape) : (tensor<256xf32>, !tosa.shape<4>) -> tensor<1x1x16x16xf32>
+  %conv = "tosa.conv2d"(%r0, %r1, %arg2, %input_zp, %weight_zp) {pad = array<i64: 0, 0, 0, 0>, stride = array<i64: 1, 1>, dilation = array<i64: 1, 1>, acc_type = f32} : (tensor<1x1x16x16xf32>, tensor<1x1x16x16xf32>, tensor<16xf32>, tensor<1xf32>, tensor<1xf32>) -> tensor<1x1x1x16xf32>
+  return %conv : tensor<1x1x1x16xf32>
+}
+
+// CHECK-LABEL:   func.func @depthwise_conv2d_io_insert_reshape(
+// CHECK:           %[[SHAPE:.*]] = tosa.const_shape
+// CHECK:           %[[WSHAPE:.*]] = tosa.const_shape
+// CHECK:           %[[INPUT_ZP:.*]] = "tosa.const"
+// CHECK:           %[[WEIGHT_ZP:.*]] = "tosa.const"
+// CHECK:           %[[R0:.*]] = tosa.reshape %arg0, %[[SHAPE]]
+// CHECK:           %[[R1:.*]] = tosa.reshape %arg1, %[[WSHAPE]]
+// CHECK:           %[[CONV:.*]] = tosa.depthwise_conv2d %[[R0]], %[[R1]], %arg2, %[[INPUT_ZP]], %[[WEIGHT_ZP]]
+func.func @depthwise_conv2d_io_insert_reshape(%arg0: tensor<9xf32>, %arg1: tensor<9xf32>, %arg2: tensor<1xf32>) -> tensor<1x1x1x1xf32> {
+  %shape = "tosa.const_shape"() {values = dense<[1, 3, 3, 1]> : tensor<4xindex>} : () -> !tosa.shape<4>
+  %wshape = "tosa.const_shape"() {values = dense<[3, 3, 1, 1]> : tensor<4xindex>} : () -> !tosa.shape<4>
+  %input_zp = "tosa.const"() {values = dense<0.0> : tensor<1xf32>} : () -> tensor<1xf32>
+  %weight_zp = "tosa.const"() {values = dense<0.0> : tensor<1xf32>} : () -> tensor<1xf32>
+  %r0 = "tosa.reshape"(%arg0, %shape) : (tensor<9xf32>, !tosa.shape<4>) -> tensor<1x3x3x1xf32>
+  %r1 = "tosa.reshape"(%arg1, %wshape) : (tensor<9xf32>, !tosa.shape<4>) -> tensor<3x3x1x1xf32>
+  %conv = "tosa.depthwise_conv2d"(%r0, %r1, %arg2, %input_zp, %weight_zp) {pad = array<i64: 0, 0, 0, 0>, stride = array<i64: 1, 1>, dilation = array<i64: 1, 1>, acc_type = f32} : (tensor<1x3x3x1xf32>, tensor<3x3x1x1xf32>, tensor<1xf32>, tensor<1xf32>, tensor<1xf32>) -> tensor<1x1x1x1xf32>
+  return %conv : tensor<1x1x1x1xf32>
+}
+
+// CHECK-LABEL:   func.func @transpose_conv2d_io_insert_reshape(
+// CHECK:           %[[SHAPE:.*]] = tosa.const_shape
+// CHECK:           %[[WSHAPE:.*]] = tosa.const_shape
+// CHECK:           %[[INPUT_ZP:.*]] = "tosa.const"
+// CHECK:           %[[WEIGHT_ZP:.*]] = "tosa.const"
+// CHECK:           %[[R0:.*]] = tosa.reshape %arg0, %[[SHAPE]]
+// CHECK:           %[[R1:.*]] = tosa.reshape %arg1, %[[WSHAPE]]
+// CHECK:           %[[CONV:.*]] = tosa.transpose_conv2d %[[R0]], %[[R1]], %arg2, %[[INPUT_ZP]], %[[WEIGHT_ZP]]
+func.func @transpose_conv2d_io_insert_reshape(%arg0: tensor<9xf32>, %arg1: tensor<9xf32>, %arg2: tensor<1xf32>) -> tensor<1x5x5x1xf32> {
+  %shape = "tosa.const_shape"() {values = dense<[1, 3, 3, 1]> : tensor<4xindex>} : () -> !tosa.shape<4>
+  %wshape = "tosa.const_shape"() {values = dense<[1, 3, 3, 1]> : tensor<4xindex>} : () -> !tosa.shape<4>
+  %input_zp = "tosa.const"() {values = dense<0.0> : tensor<1xf32>} : () -> tensor<1xf32>
+  %weight_zp = "tosa.const"() {values = dense<0.0> : tensor<1xf32>} : () -> tensor<1xf32>
+  %r0 = "tosa.reshape"(%arg0, %shape) : (tensor<9xf32>, !tosa.shape<4>) -> tensor<1x3x3x1xf32>
+  %r1 = "tosa.reshape"(%arg1, %wshape) : (tensor<9xf32>, !tosa.shape<4>) -> tensor<1x3x3x1xf32>
+  %conv = "tosa.transpose_conv2d"(%r0, %r1, %arg2, %input_zp, %weight_zp) {out_pad = array<i64: 0, 0, 0, 0>, stride = array<i64: 1, 1>, acc_type = f32, dilation = array<i64: 1, 1>, pad = array<i64: 0, 0, 0, 0>} : (tensor<1x3x3x1xf32>, tensor<1x3x3x1xf32>, tensor<1xf32>, tensor<1xf32>, tensor<1xf32>) -> tensor<1x5x5x1xf32>
+  return %conv : tensor<1x5x5x1xf32>
+}
+
+// CHECK-LABEL:   func.func @conv3d_io_insert_reshape(
+// CHECK:           %[[SHAPE:.*]] = tosa.const_shape
+// CHECK:           %[[WSHAPE:.*]] = tosa.const_shape
+// CHECK:           %[[INPUT_ZP:.*]] = "tosa.const"
+// CHECK:           %[[WEIGHT_ZP:.*]] = "tosa.const"
+// CHECK:           %[[R0:.*]] = tosa.reshape %arg0, %[[SHAPE]]
+// CHECK:           %[[R1:.*]] = tosa.reshape %arg1, %[[WSHAPE]]
+// CHECK:           %[[CONV:.*]] = tosa.conv3d %[[R0]], %[[R1]], %arg2, %[[INPUT_ZP]], %[[WEIGHT_ZP]]
+func.func @conv3d_io_insert_reshape(%arg0: tensor<64xf32>, %arg1: tensor<1xf32>, %arg2: tensor<1xf32>) -> tensor<1x1x4x4x4xf32> {
+  %shape = "tosa.const_shape"() {values = dense<[1, 1, 4, 4, 4]> : tensor<5xindex>} : () -> !tosa.shape<5>
+  %wshape = "tosa.const_shape"() {values = dense<[1, 1, 1, 1, 1]> : tensor<5xindex>} : () -> !tosa.shape<5>
+  %input_zp = "tosa.const"() {values = dense<0.0> : tensor<1xf32>} : () -> tensor<1xf32>
+  %weight_zp = "tosa.const"() {values = dense<0.0> : tensor<1xf32>} : () -> tensor<1xf32>
+  %r0 = "tosa.reshape"(%arg0, %shape) : (tensor<64xf32>, !tosa.shape<5>) -> tensor<1x1x4x4x4xf32>
+  %r1 = "tosa.reshape"(%arg1, %wshape) : (tensor<1xf32>, !tosa.shape<5>) -> tensor<1x1x1x1x1xf32>
+  %conv = "tosa.conv3d"(%r0, %r1, %arg2, %input_zp, %weight_zp) {pad = array<i64: 0, 0, 0, 0, 0, 0>, stride = array<i64: 1, 1, 1>, dilation = array<i64: 1, 1, 1>, acc_type = f32} : (tensor<1x1x4x4x4xf32>, tensor<1x1x1x1x1xf32>, tensor<1xf32>, tensor<1xf32>, tensor<1xf32>) -> tensor<1x1x4x4x4xf32>
+  return %conv : tensor<1x1x4x4x4xf32>
+}
+
 // CHECK-LABEL:   func.func @torch.aten.sigmoid$basic(
 // CHECK-SAME:                                %[[ARG:.*]]: !torch.vtensor<[?,?],f32>) -> !torch.vtensor<[?,?],f32> {
 // CHECK:           %[[ARG_BUILTIN:.*]] = torch_c.to_builtin_tensor %[[ARG]] : !torch.vtensor<[?,?],f32> -> tensor<?x?xf32>
@@ -2417,8 +2491,7 @@ func.func @torch.aten.avg_pool2d.divisor_override_unsupported_value(%arg0: !torc
   %0 = torch.prim.ListConstruct %int3, %int3 : (!torch.int, !torch.int) -> !torch.list<int>
   %1 = torch.prim.ListConstruct %int1, %int1 : (!torch.int, !torch.int) -> !torch.list<int>
   %2 = torch.prim.ListConstruct %int1, %int1 : (!torch.int, !torch.int) -> !torch.list<int>
-  // expected-error @+1 {{failed to legalize operation 'torch.aten.avg_pool2d' that was explicitly marked illegal}}
-  %3 = torch.aten.avg_pool2d %arg0, %0, %1, %2, %false, %count_include_pad, %divisor_override : !torch.vtensor<[1,192,35,35],f32>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.bool, !torch.int -> !torch.vtensor<[1,192,35,35],f32>
+  %3 = torch.aten.avg_pool2d %arg0, %0, %1, %2, %false, %count_include_pad, %divisor_override : !torch.vtensor<[1,192,35,35],f32>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.bool, !torch.int -> !torch.vtensor<[1,192,35,35],f32>  // expected-error {{failed to legalize operation 'torch.aten.avg_pool2d' that was explicitly marked illegal}}
   return %3 : !torch.vtensor<[1,192,35,35],f32>
 }
 
@@ -2664,8 +2737,7 @@ func.func @torch.aten.index.Tensor_hacked_twin(%arg0: !torch.vtensor<[2,4,2],si6
 
 func.func @torch.aten.index.Tensor_hacked_twin.dynamic_size(%arg0: !torch.vtensor<[?,4],f32>, %arg1: !torch.vtensor<[?,1],si64>, %arg2: !torch.vtensor<[1,4],si64>) -> !torch.vtensor<[?,4],f32> attributes {torch.assume_strict_symbolic_shapes} {
   %0 = torch.prim.ListConstruct %arg1, %arg2 : (!torch.vtensor<[?,1],si64>, !torch.vtensor<[1,4],si64>) -> !torch.list<vtensor>
-  // expected-error @+1 {{failed to legalize operation 'torch.aten.index.Tensor_hacked_twin' that was explicitly marked illegal}}
-  %1 = torch.aten.index.Tensor_hacked_twin %arg0, %0 : !torch.vtensor<[?,4],f32>, !torch.list<vtensor> -> !torch.vtensor<[?,4],f32>
+  %1 = torch.aten.index.Tensor_hacked_twin %arg0, %0 : !torch.vtensor<[?,4],f32>, !torch.list<vtensor> -> !torch.vtensor<[?,4],f32>  // expected-error {{failed to legalize operation 'torch.aten.index.Tensor_hacked_twin' that was explicitly marked illegal}}
   return %1 : !torch.vtensor<[?,4],f32>
 }
 
@@ -4552,8 +4624,7 @@ func.func @torch.aten.empty.memory_format() -> !torch.vtensor<[1,0,256],f32>{
     %none = torch.constant.none
     %cpu = torch.constant.device "cpu"
     %false = torch.constant.bool false
-    // expected-error @below {{failed to legalize operation 'torch.aten.empty.memory_format' that was explicitly marked illegal}}
-    %out = torch.aten.empty.memory_format %2452, %none, %none, %cpu, %false, %none : !torch.list<int>, !torch.none, !torch.none, !torch.Device, !torch.bool, !torch.none -> !torch.vtensor<[1,0,256],f32>
+    %out = torch.aten.empty.memory_format %2452, %none, %none, %cpu, %false, %none : !torch.list<int>, !torch.none, !torch.none, !torch.Device, !torch.bool, !torch.none -> !torch.vtensor<[1,0,256],f32>  // expected-error {{failed to legalize operation 'torch.aten.empty.memory_format' that was explicitly marked illegal}}
     return %out : !torch.vtensor<[1,0,256],f32>
 }