[TorchToTosa] Cast argmax inputs to f32 and add bf16 test

catcor01 · catcor01 · commit cd8bb69f008f · 2026-03-09T14:29:10.000+01:00
Ensure tosa.argmax receives f32 inputs by inserting tosa.cast in argmax
lowering (and min/max-dim argmax paths), and fix the axis attribute type.
Add a bf16 argmax conversion test in basic.mlir to validate the cast+extsi
sequence.

Signed-off-by: Cathal Corbett &lt;cathal.corbett@arm.com&gt;
Change-Id: I5f8847cc7400152c20001905dfacc92af0c1583c
diff --git a/lib/Conversion/TorchToTosa/TorchToTosa.cpp b/lib/Conversion/TorchToTosa/TorchToTosa.cpp
@@ -1501,9 +1501,19 @@ LogicalResult ConvertAtenOp<AtenArgmaxOp>::matchAndRewriteImpl(
       getTypeConverter()->convertType(op.getResult().getType()));
   auto outputETy = resultTy.getElementType();
 
+  auto ensureF32Input = [&](Value input) -> Value {
+    auto inputTy = cast<RankedTensorType>(input.getType());
+    if (inputTy.getElementType().isF32())
+      return input;
+    auto castTy =
+        RankedTensorType::get(inputTy.getShape(), rewriter.getF32Type());
+    return tosa::CastOp::create(rewriter, op->getLoc(), castTy, input);
+  };
+
   // Create a single instance of tosa.argmax.
   // Multiple dims require chained construct.
   auto buildArgmax = [&](int64_t reduceDim, Value input) -> Value {
+    input = ensureF32Input(input);
     auto inputTy = cast<RankedTensorType>(input.getType());
     auto inputShape = makeShapeTorchCompatible(inputTy.getShape());
     SmallVector<int64_t> outputShapeArr = {};
@@ -1523,7 +1533,7 @@ LogicalResult ConvertAtenOp<AtenArgmaxOp>::matchAndRewriteImpl(
         makeShapeLLVMCompatible(ArrayRef<int64_t>(outputShapeArr)),
         rewriter.getI32Type());
     auto reduceDimAttr =
-        rewriter.getIntegerAttr(rewriter.getI64Type(), reduceDim);
+        rewriter.getIntegerAttr(rewriter.getI32Type(), reduceDim);
 
     // Use default NaN Propagation mode "PROPAGATE" for tosa.argmax
     return tosa::ArgMaxOp::create(
@@ -4692,26 +4702,37 @@ class ConvertAtenMinMaxDimOp : public TorchToTosaOpConversionPattern<AtenOpT> {
 
     // To handle ReduceMinDim indices, we apply ArgMaxOp on the negate
     // of the input tensor, which will return indices of input's min values
+    auto ensureF32Input = [&](Value input) -> Value {
+      auto inputTy = cast<RankedTensorType>(input.getType());
+      if (inputTy.getElementType().isF32())
+        return input;
+      auto castTy =
+          RankedTensorType::get(inputTy.getShape(), rewriter.getF32Type());
+      return tosa::CastOp::create(rewriter, op->getLoc(), castTy, input);
+    };
+
     Value argMaxOp;
     if constexpr (std::is_same<AtenOpT, AtenMinDimOp>()) {
       Value negateOp =
           tosa::NegateOp::create(rewriter, op->getLoc(), selfType, self);
+      Value argInput = ensureF32Input(negateOp);
 
       // Use default NaN Propagation mode "PROPAGATE" for tosa.argmax
       argMaxOp = tosa::ArgMaxOp::create(
           rewriter, op->getLoc(),
           RankedTensorType::get(makeShapeLLVMCompatible(prunedShape),
                                 indicesElemType),
-          negateOp, dimAttr, /*nan_mode=*/
+          argInput, dimAttr, /*nan_mode=*/
           tosa::NanPropagationModeAttr::get(
               rewriter.getContext(), tosa::NanPropagationMode::PROPAGATE));
     } else {
+      Value argInput = ensureF32Input(self);
       // Use default NaN Propagation mode "PROPAGATE" for tosa.argmax
       argMaxOp = tosa::ArgMaxOp::create(
           rewriter, op->getLoc(),
           RankedTensorType::get(makeShapeLLVMCompatible(prunedShape),
                                 indicesElemType),
-          self, dimAttr, /*nan_mode=*/
+          argInput, dimAttr, /*nan_mode=*/
           tosa::NanPropagationModeAttr::get(
               rewriter.getContext(), tosa::NanPropagationMode::PROPAGATE));
     }
diff --git a/test/Conversion/TorchToTosa/basic.mlir b/test/Conversion/TorchToTosa/basic.mlir
@@ -1076,6 +1076,28 @@ func.func @torch.aten.max.dim$basic(%arg0: tensor<3x2x3xf32>) -> tensor<3x2x1xf3
 
 // -----
 
+// CHECK-LABEL:   func.func @torch.aten.argmax$bf16(
+// CHECK-SAME:                                        %[[VAL_0:.*]]: tensor<3x2x3xbf16>) -> tensor<3x2xi64> {
+// CHECK:           %[[VAL_1:.*]] = torch_c.from_builtin_tensor %[[VAL_0]] : tensor<3x2x3xbf16> -> !torch.vtensor<[3,2,3],bf16>
+// CHECK:           %[[VAL_2:.*]] = torch_c.to_builtin_tensor %[[VAL_1]] : !torch.vtensor<[3,2,3],bf16> -> tensor<3x2x3xbf16>
+// CHECK:           %[[VAL_3:.*]] = torch.constant.bool false
+// CHECK:           %[[VAL_4:.*]] = torch.constant.int 2
+// CHECK:           %[[VAL_5:.*]] = tosa.cast %[[VAL_2]] : (tensor<3x2x3xbf16>) -> tensor<3x2x3xf32>
+// CHECK:           %[[VAL_6:.*]] = tosa.argmax %[[VAL_5]] {axis = 2 : i32} : (tensor<3x2x3xf32>) -> tensor<3x2xi32>
+// CHECK:           %[[VAL_7:.*]] = arith.extsi %[[VAL_6]] : tensor<3x2xi32> to tensor<3x2xi64>
+// CHECK:           return %{{.*}} : tensor<3x2xi64>
+// CHECK:         }
+func.func @torch.aten.argmax$bf16(%arg0: tensor<3x2x3xbf16>) -> tensor<3x2xi64> {
+  %0 = torch_c.from_builtin_tensor %arg0 : tensor<3x2x3xbf16> -> !torch.vtensor<[3,2,3],bf16>
+  %false = torch.constant.bool false
+  %int2 = torch.constant.int 2
+  %1 = torch.aten.argmax %0, %int2, %false : !torch.vtensor<[3,2,3],bf16>, !torch.int, !torch.bool -> !torch.vtensor<[3,2],si64>
+  %2 = torch_c.to_builtin_tensor %1 : !torch.vtensor<[3,2],si64> -> tensor<3x2xi64>
+  return %2 : tensor<3x2xi64>
+}
+
+// -----
+
 // CHECK-LABEL: @torch.vtensor.literal_si64$basic(
 // CHECK: %[[VAL_0:.*]] = "tosa.const"() <{values = dense<-1> : tensor<1x512xi64>}> : () -> tensor<1x512xi64>
 // CHECK: %[[VAL_1:.*]] = torch_c.from_builtin_tensor %[[VAL_0]] : tensor<1x512xi64> -> !torch.vtensor<[1,512],si64>