Add an optimization pattern to convert a fully_connected op with a weight with last dimension of 1 to a broadcasting mul op.

lrdxgm · tensorflower-gardener · commit 7eced04929c0 · 2025-05-07T10:18:02.000-07:00
PiperOrigin-RevId: 755893068
diff --git a/tensorflow/compiler/mlir/lite/tests/optimize.mlir b/tensorflow/compiler/mlir/lite/tests/optimize.mlir
@@ -4800,3 +4800,31 @@ func.func @AddComputedZeroNegative(%arg0: tensor<1x512xf32>, %arg1: tensor<512x5
   // CHECK: %0 = tfl.sub %arg1, %arg1 {fused_activation_function = "NONE"} : tensor<512x512xf32>
   // CHECK: %1 = tfl.add(%arg0, %0) <{fused_activation_function = "NONE"}> : (tensor<1x512xf32>, tensor<512x512xf32>) -> tensor<512x512xf32>
 }
+
+// CHECK-LABEL: @DegerateFC
+func.func @DegerateFC(%input: tensor<5x3x1xf32>) -> tensor<5x3x2xf32> {
+  %weights = arith.constant dense<[[1.0], [2.0]]> : tensor<2x1xf32>
+  %bias = "tfl.no_value"() {value} : () -> none
+  %0 = "tfl.fully_connected"(%input, %weights, %bias) {asymmetric_quantize_inputs = true, fused_activation_function = "NONE", keep_num_dims = true, weights_format = "DEFAULT"} : (tensor<5x3x1xf32>, tensor<2x1xf32>, none) -> tensor<5x3x2xf32>
+  func.return %0: tensor<5x3x2xf32>
+
+  // CHECK: %0 = tfl.mul(%arg0, %cst) <{fused_activation_function = "NONE"}> : (tensor<5x3x1xf32>, tensor<2xf32>) -> tensor<5x3x2xf32>
+}
+
+// CHECK-LABEL: @DegerateFCNegative
+func.func @DegerateFCNegative(%input_ok: tensor<5x3x1xf32>, %input_too_many_dims: tensor<11x7x5x3x1xf32>, %input_last_dim_not_1: tensor<5x3x2xf32>) -> (tensor<11x7x5x3x2xf32>, tensor<5x3x2xf32>, tensor<5x3x2xf32>, tensor<5x3x2xf32>) {
+  %weights_ok = arith.constant dense<[[1.0], [2.0]]> : tensor<2x1xf32>
+  %weights_last_dim_not_1 = arith.constant dense<[[1.0, 2.0], [3.0, 4.0]]> : tensor<2x2xf32>
+  %weights_quantized = "tfl.pseudo_qconst"() <{qtype = tensor<2x1x!quant.uniform<i8:f32:0, {1.0}>>, value = dense<42> : tensor<2x1xi8>}> : () -> tensor<2x1x!quant.uniform<i8:f32:0, {1.0}>>
+
+  %bias_ok = "tfl.no_value"() {value} : () -> none
+  %bias_notnull = arith.constant dense<[1.0, 2.0]>: tensor<2xf32>
+
+  %1 = "tfl.fully_connected"(%input_too_many_dims, %weights_ok, %bias_ok) {asymmetric_quantize_inputs = true, fused_activation_function = "NONE", keep_num_dims = true, weights_format = "DEFAULT"} : (tensor<11x7x5x3x1xf32>, tensor<2x1xf32>, none) -> tensor<11x7x5x3x2xf32>
+  %2 = "tfl.fully_connected"(%input_last_dim_not_1, %weights_last_dim_not_1, %bias_ok) {asymmetric_quantize_inputs = true, fused_activation_function = "NONE", keep_num_dims = true, weights_format = "DEFAULT"} : (tensor<5x3x2xf32>, tensor<2x2xf32>, none) -> tensor<5x3x2xf32>
+  %3 = "tfl.fully_connected"(%input_ok, %weights_quantized, %bias_ok) {asymmetric_quantize_inputs = true, fused_activation_function = "NONE", keep_num_dims = true, weights_format = "DEFAULT"} : (tensor<5x3x1xf32>, tensor<2x1x!quant.uniform<i8:f32:0, {1.0}>>, none) -> tensor<5x3x2xf32>
+  %4 = "tfl.fully_connected"(%input_ok, %weights_ok, %bias_notnull) {asymmetric_quantize_inputs = true, fused_activation_function = "NONE", keep_num_dims = true, weights_format = "DEFAULT"} : (tensor<5x3x1xf32>, tensor<2x1xf32>, tensor<2xf32>) -> tensor<5x3x2xf32>
+  func.return %1, %2, %3, %4 : tensor<11x7x5x3x2xf32>, tensor<5x3x2xf32>, tensor<5x3x2xf32>, tensor<5x3x2xf32>
+
+  // CHECK-NOT: tfl.mul
+}
diff --git a/tensorflow/compiler/mlir/lite/transforms/optimize_patterns.td b/tensorflow/compiler/mlir/lite/transforms/optimize_patterns.td
@@ -2191,3 +2191,27 @@ def AddComputedZeroLHS : Pat<
     TFL_AF_None),
   (replaceWithValue $input),
   [(HasSameType $input, $output)]>;
+
+// Replace matmul where inputs & weights have a last dimension of 1 with an
+// elementwise multiplication that broadcasts, i.e. replace:
+//   [a, b, 1] x [n, 1] => [a, b, n]
+// with:
+//   [a, b, 1] * [n] => [a, b, n]
+def DegenerateFCtoMul : Pat<
+  (TFL_FullyConnectedOp
+    $input,
+    (Arith_ConstantOp:$filter $filterVal),
+    $bias,
+    $fused_activation_function,
+    TFL_FCWO_Default,
+    ConstBoolAttrTrue,
+    $asymmetric_quantize_inputs),
+  (TFL_MulOp
+    $input,
+    (Arith_ConstantOp (FlattenTo1D $filterVal)),
+    $fused_activation_function),
+  [(HasRankAtMost<4> $input),
+   (HasRank<2> $filter),
+   (IsLastDimensionEqualOne $input),
+   (SameElementType $input, $filter),
+   (IsNoneType $bias)]>;