[TorchToTosa] Avoid i1 gather by casting through i8

catcor01 · catcor01 · commit 3d6bab00fa2b · 2026-03-06T09:37:56.000+01:00
TOSA gather does not accept i1 tensors.
When gather element type is i1, cast inputs to i8,
perform the gather (including gather-nd paths), then cast back to i1.

Signed-off-by: Cathal Corbett &lt;cathal.corbett@arm.com&gt;
Change-Id: I8e3034612c2fabec7c9e75d8295a863860a674c2
diff --git a/lib/Conversion/TorchToTosa/TorchToTosa.cpp b/lib/Conversion/TorchToTosa/TorchToTosa.cpp
@@ -4545,14 +4545,33 @@ LogicalResult ConvertAtenOp<AtenEmbeddingOp>::matchAndRewriteImpl(
           .value();
 
   SmallVector<int64_t> intermediateOutShape = {1, numIndices, weightShape[1]};
-  auto gatherOp = tosa::GatherOp::create(
-      rewriter, op->getLoc(),
-      RankedTensorType::get(makeShapeLLVMCompatible(intermediateOutShape),
-                            weightType.getElementType()),
-      reshapedWeight, castIndices);
+  auto gatherElemTy = weightType.getElementType();
+  auto gatherTy = RankedTensorType::get(
+      makeShapeLLVMCompatible(intermediateOutShape), gatherElemTy);
+  Value gatherResult;
+  if (auto intTy = dyn_cast<IntegerType>(gatherElemTy);
+      intTy && intTy.getWidth() == 1) {
+    auto i8Ty = rewriter.getI8Type();
+    auto reshapedWeightI8 =
+        tosa::tosaCastTensorToType(
+            rewriter, reshapedWeight,
+            RankedTensorType::get(makeShapeLLVMCompatible(newWeightShape),
+                                  i8Ty))
+            .value();
+    auto gatherTyI8 = RankedTensorType::get(
+        makeShapeLLVMCompatible(intermediateOutShape), i8Ty);
+    auto gatheredI8 = tosa::GatherOp::create(rewriter, op->getLoc(), gatherTyI8,
+                                             reshapedWeightI8, castIndices);
+    gatherResult =
+        tosa::tosaCastTensorToType(rewriter, gatheredI8, gatherTy).value();
+  } else {
+    gatherResult = tosa::GatherOp::create(rewriter, op->getLoc(), gatherTy,
+                                          reshapedWeight, castIndices)
+                       .getResult();
+  }
 
   rewriter.replaceOpWithNewOp<tosa::ReshapeOp>(
-      op, outType, gatherOp,
+      op, outType, gatherResult,
       tosa::getTosaConstShape(rewriter, op->getLoc(),
                               makeShapeTorchCompatible(outType.getShape())));
 
@@ -4868,9 +4887,24 @@ LogicalResult ConvertAtenOp<AtenSliceTensorOp>::matchAndRewriteImpl(
   // Duplicate the 1-D index vector across the batch dimension so that we can
   // use a single tosa.gather to materialize the strided slice.
   auto gatherTy = RankedTensorType::get({N, W, C}, elemTy);
-  Value gathered =
-      tosa::GatherOp::create(rewriter, loc, gatherTy, reshaped, idxNW)
-          .getResult();
+  Value gathered;
+  if (auto intTy = dyn_cast<IntegerType>(elemTy);
+      intTy && intTy.getWidth() == 1) {
+    auto i8Ty = rewriter.getI8Type();
+    auto reshapedI8 =
+        tosa::tosaCastTensorToType(
+            rewriter, reshaped,
+            RankedTensorType::get(makeShapeLLVMCompatible(nkcShape), i8Ty))
+            .value();
+    auto gatherTyI8 = RankedTensorType::get({N, W, C}, i8Ty);
+    auto gatheredI8 =
+        tosa::GatherOp::create(rewriter, loc, gatherTyI8, reshapedI8, idxNW);
+    gathered =
+        tosa::tosaCastTensorToType(rewriter, gatheredI8, gatherTy).value();
+  } else {
+    gathered = tosa::GatherOp::create(rewriter, loc, gatherTy, reshaped, idxNW)
+                   .getResult();
+  }
 
   SmallVector<int64_t> outShape = inputShape;
   outShape[dim] = W;
diff --git a/lib/Conversion/TorchToTosa/TosaLegalizeCommon.cpp b/lib/Conversion/TorchToTosa/TosaLegalizeCommon.cpp
@@ -420,18 +420,40 @@ std::optional<Value> convertGatherNdOp(PatternRewriter &rewriter, Operation *op,
   // Now the gather op itself
   // %9 = "tosa.gather"(%2, %7) : (tensor<1x12x1xf32>, tensor<1x8xi32>) ->
   // tensor<1x8x1xf32>
+  auto resultElemTy = resultType.getElementType();
+  Value valuesForGather = tosaValuesReshapeOp.getResult();
+  Type gatherElemTy = resultElemTy;
+  if (auto intTy = dyn_cast<IntegerType>(resultElemTy);
+      intTy && intTy.getWidth() == 1) {
+    auto i8Ty = rewriter.getI8Type();
+    valuesForGather = tosa::tosaCastTensorToType(
+                          rewriter, valuesForGather,
+                          GetTypeFromTensorShape(tosaValuesShape, i8Ty))
+                          .value();
+    gatherElemTy = i8Ty;
+  }
+
   auto tosaGatherOp = tosa::CreateOpAndInfer<tosa::GatherOp>(
       rewriter, op->getLoc(),
-      GetTypeFromTensorShape(tosaGatherResultShape,
-                             resultType.getElementType()),
-      tosaValuesReshapeOp.getResult(), tosaIndicesReshapeOp.getResult());
+      GetTypeFromTensorShape(tosaGatherResultShape, gatherElemTy),
+      valuesForGather, tosaIndicesReshapeOp.getResult());
 
   // Finally, reshape back to the original output shape of [Indices,
   // ParamChannels]. %10 = "tosa.reshape"(%9) {new_shape = [1, 4, 2]} :
   // (tensor<1x8x1xf32>) -> tensor<1x4x2xf32> %11 = torch_c.from_builtin_tensor
   // %10 : tensor<1x4x2xf32> -> !torch.vtensor<[1,4,2],f32>
+  Value gatherResult = tosaGatherOp.getResult();
+  if (auto intTy = dyn_cast<IntegerType>(resultElemTy);
+      intTy && intTy.getWidth() == 1) {
+    gatherResult =
+        tosa::tosaCastTensorToType(
+            rewriter, gatherResult,
+            GetTypeFromTensorShape(tosaGatherResultShape, resultElemTy))
+            .value();
+  }
+
   return tosa::CreateOpAndInfer<tosa::ReshapeOp>(
-             rewriter, op->getLoc(), resultType, tosaGatherOp.getResult(),
+             rewriter, op->getLoc(), resultType, gatherResult,
              tosa::getTosaConstShape(rewriter, op->getLoc(),
                                      resultType.getShape()))
       .getResult();
diff --git a/test/Conversion/TorchToTosa/basic.mlir b/test/Conversion/TorchToTosa/basic.mlir
@@ -1349,6 +1349,47 @@ func.func @torch.aten.gather(%arg0: !torch.vtensor<[1,4,3],f32>, %arg1: !torch.v
   return %0 : !torch.vtensor<[1,4,2],f32>
 }
 
+// -----
+// CHECK-LABEL:   func.func @torch.aten.gather$bool(
+// CHECK-SAME:                                       %[[VAL_0:.*]]: !torch.vtensor<[1,4,3],i1>,
+// CHECK-SAME:                                       %[[VAL_1:.*]]: !torch.vtensor<[1,4,2],si64>) -> !torch.vtensor<[1,4,2],i1> {
+// CHECK:           %[[VAL_2:.*]] = torch_c.to_builtin_tensor %[[VAL_1]] : !torch.vtensor<[1,4,2],si64> -> tensor<1x4x2xi64>
+// CHECK:           %[[VAL_3:.*]] = torch_c.to_builtin_tensor %[[VAL_0]] : !torch.vtensor<[1,4,3],i1> -> tensor<1x4x3xi1>
+// CHECK:           %[[VAL_4:.*]] = torch.constant.int -1
+// CHECK:           %[[VAL_5:.*]] = torch.constant.bool false
+// CHECK:           %[[VAL_6:.*]] = tosa.cast %[[VAL_2]] : (tensor<1x4x2xi64>) -> tensor<1x4x2xi32>
+// CHECK:           %[[VAL_7:.*]] = tosa.const_shape  {values = dense<[1, 4, 2, 1]> : tensor<4xindex>} : () -> !tosa.shape<4>
+// CHECK:           %[[VAL_8:.*]] = tosa.reshape %[[VAL_6]], %[[VAL_7]] : (tensor<1x4x2xi32>, !tosa.shape<4>) -> tensor<1x4x2x1xi32>
+// CHECK:           %[[VAL_9:.*]] = "tosa.const"() <{values = dense<0> : tensor<1x4x2x1xi32>}> : () -> tensor<1x4x2x1xi32>
+// CHECK:           %[[VAL_10:.*]] = "tosa.const"() <{values = dense<{{\[\[}}{{\[\[}}0], [0]], {{\[\[}}1], [1]], {{\[\[}}2], [2]], {{\[\[}}3], [3]]]]> : tensor<1x4x2x1xi32>}> : () -> tensor<1x4x2x1xi32>
+// CHECK:           %[[VAL_11:.*]] = tosa.concat %[[VAL_9]], %[[VAL_10]], %[[VAL_8]] {axis = 3 : i32} : (tensor<1x4x2x1xi32>, tensor<1x4x2x1xi32>, tensor<1x4x2x1xi32>) -> tensor<1x4x2x3xi32>
+// CHECK:           %[[VAL_12:.*]] = tosa.const_shape  {values = dense<[1, 12, 1]> : tensor<3xindex>} : () -> !tosa.shape<3>
+// CHECK:           %[[VAL_13:.*]] = tosa.reshape %[[VAL_3]], %[[VAL_12]] : (tensor<1x4x3xi1>, !tosa.shape<3>) -> tensor<1x12x1xi1>
+// CHECK:           %[[VAL_14:.*]] = tosa.cast %[[VAL_13]] : (tensor<1x12x1xi1>) -> tensor<1x12x1xi8>
+// CHECK:           %[[VAL_15:.*]] = tosa.const_shape  {values = dense<[8, 3]> : tensor<2xindex>} : () -> !tosa.shape<2>
+// CHECK:           %[[VAL_16:.*]] = tosa.reshape %[[VAL_11]], %[[VAL_15]] : (tensor<1x4x2x3xi32>, !tosa.shape<2>) -> tensor<8x3xi32>
+// CHECK:           %[[VAL_17:.*]] = "tosa.const"() <{values = dense<[12, 3, 1]> : tensor<3xi32>}> : () -> tensor<3xi32>
+// CHECK:           %[[VAL_18:.*]] = tosa.const_shape  {values = dense<[1, 3]> : tensor<2xindex>} : () -> !tosa.shape<2>
+// CHECK:           %[[VAL_19:.*]] = tosa.reshape %[[VAL_17]], %[[VAL_18]] : (tensor<3xi32>, !tosa.shape<2>) -> tensor<1x3xi32>
+// CHECK:           %[[VAL_20:.*]] = "tosa.const"() <{values = dense<0> : tensor<1xi8>}> : () -> tensor<1xi8>
+// CHECK:           %[[VAL_21:.*]] = tosa.mul %[[VAL_16]], %[[VAL_19]], %[[VAL_20]] : (tensor<8x3xi32>, tensor<1x3xi32>, tensor<1xi8>) -> tensor<8x3xi32>
+// CHECK:           %[[VAL_22:.*]] = tosa.reduce_sum %[[VAL_21]] {axis = 1 : i32} : (tensor<8x3xi32>) -> tensor<8x1xi32>
+// CHECK:           %[[VAL_23:.*]] = tosa.const_shape  {values = dense<[1, 8]> : tensor<2xindex>} : () -> !tosa.shape<2>
+// CHECK:           %[[VAL_24:.*]] = tosa.reshape %[[VAL_22]], %[[VAL_23]] : (tensor<8x1xi32>, !tosa.shape<2>) -> tensor<1x8xi32>
+// CHECK:           %[[VAL_25:.*]] = tosa.gather %[[VAL_14]], %[[VAL_24]] : (tensor<1x12x1xi8>, tensor<1x8xi32>) -> tensor<1x8x1xi8>
+// CHECK:           %[[VAL_26:.*]] = tosa.cast %[[VAL_25]] : (tensor<1x8x1xi8>) -> tensor<1x8x1xi1>
+// CHECK:           %[[VAL_27:.*]] = tosa.const_shape  {values = dense<[1, 4, 2]> : tensor<3xindex>} : () -> !tosa.shape<3>
+// CHECK:           %[[VAL_28:.*]] = tosa.reshape %[[VAL_26]], %[[VAL_27]] : (tensor<1x8x1xi1>, !tosa.shape<3>) -> tensor<1x4x2xi1>
+// CHECK:           %[[VAL_29:.*]] = torch_c.from_builtin_tensor %[[VAL_28]] : tensor<1x4x2xi1> -> !torch.vtensor<[1,4,2],i1>
+// CHECK:           return %[[VAL_29]] : !torch.vtensor<[1,4,2],i1>
+// CHECK:         }
+func.func @torch.aten.gather$bool(%arg0: !torch.vtensor<[1,4,3],i1>, %arg1: !torch.vtensor<[1,4,2],si64>) -> !torch.vtensor<[1,4,2],i1> {
+  %int-1 = torch.constant.int -1
+  %false = torch.constant.bool false
+  %0 = torch.aten.gather %arg0, %int-1, %arg1, %false : !torch.vtensor<[1,4,3],i1>, !torch.int, !torch.vtensor<[1,4,2],si64>, !torch.bool -> !torch.vtensor<[1,4,2],i1>
+  return %0 : !torch.vtensor<[1,4,2],i1>
+}
+
 // -----
 // CHECK-LABEL:   func.func @torch.aten.add$int(
 // CHECK-SAME:                                  %[[VAL_0:.*]]: !torch.vtensor<[2,2],si32>,
@@ -1422,6 +1463,25 @@ func.func @torch.aten.slice.negative_start(%arg0: !torch.vtensor<[4,65,256],f32>
   return %0 : !torch.vtensor<[4,16,256],f32>
 }
 
+// -----
+// CHECK-LABEL:   func.func @torch.aten.slice.bool_strided(
+// CHECK-SAME:                                              %[[VAL_0:.*]]: !torch.vtensor<[1,64,1],i1>) -> !torch.vtensor<[1,32,1],i1> {
+// CHECK:           %[[VAL_1:.*]] = torch_c.to_builtin_tensor %[[VAL_0]] : !torch.vtensor<[1,64,1],i1> -> tensor<1x64x1xi1>
+// CHECK:           %[[VAL_2:.*]] = tosa.cast %[[VAL_1]] : (tensor<1x64x1xi1>) -> tensor<1x64x1xi8>
+// CHECK:           %[[VAL_3:.*]] = tosa.gather %[[VAL_2]], %{{.*}} : (tensor<1x64x1xi8>, tensor<1x32xi32>) -> tensor<1x32x1xi8>
+// CHECK:           %[[VAL_4:.*]] = tosa.cast %[[VAL_3]] : (tensor<1x32x1xi8>) -> tensor<1x32x1xi1>
+// CHECK:           %[[VAL_5:.*]] = torch_c.from_builtin_tensor %[[VAL_4]] : tensor<1x32x1xi1> -> !torch.vtensor<[1,32,1],i1>
+// CHECK:           return %[[VAL_5]] : !torch.vtensor<[1,32,1],i1>
+// CHECK:         }
+func.func @torch.aten.slice.bool_strided(%arg0: !torch.vtensor<[1,64,1],i1>) -> !torch.vtensor<[1,32,1],i1> {
+  %int1 = torch.constant.int 1
+  %int0 = torch.constant.int 0
+  %int64 = torch.constant.int 64
+  %int2 = torch.constant.int 2
+  %0 = torch.aten.slice.Tensor %arg0, %int1, %int0, %int64, %int2 : !torch.vtensor<[1,64,1],i1>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,32,1],i1>
+  return %0 : !torch.vtensor<[1,32,1],i1>
+}
+
 // -----
 // CHECK-LABEL:   func.func @torch.aten.clamp.min_none(
 // CHECK-SAME:                                         %[[VAL_0:.*]]: !torch.vtensor<[1,1,128,128],si64>) -> !torch.vtensor<[1,1,128,128],si64> {