Add bf16 e2e test and update utils func

catcor01 · catcor01 · commit f5742c43398a · 2026-03-09T14:29:10.000+01:00
Signed-off-by: Cathal Corbett &lt;cathal.corbett@arm.com&gt;
Change-Id: I5e240a4dbaaee76d11e5895c82d93b718aabeff2
diff --git a/include/torch-mlir/Conversion/TorchToTosa/TosaLegalizeUtils.h b/include/torch-mlir/Conversion/TorchToTosa/TosaLegalizeUtils.h
@@ -64,6 +64,9 @@ std::optional<Value> getConstTensor(PatternRewriter &rewriter, Operation *op,
 std::optional<Value> tosaCastTensorToType(PatternRewriter &rewriter, Value src,
                                           TensorType destType);
 
+// Ensure TOSA argmax input is f32 by inserting a tosa.cast when needed.
+Value ensureF32Input(PatternRewriter &rewriter, Operation *op, Value input);
+
 // Creates a TOSA operation and performs shape inference on the individual
 // op. This allows shape inference during the framework to TOSA lowering.
 template <typename TosaOp, typename... Args>
diff --git a/lib/Conversion/TorchToTosa/TorchToTosa.cpp b/lib/Conversion/TorchToTosa/TorchToTosa.cpp
@@ -1501,19 +1501,10 @@ LogicalResult ConvertAtenOp<AtenArgmaxOp>::matchAndRewriteImpl(
       getTypeConverter()->convertType(op.getResult().getType()));
   auto outputETy = resultTy.getElementType();
 
-  auto ensureF32Input = [&](Value input) -> Value {
-    auto inputTy = cast<RankedTensorType>(input.getType());
-    if (inputTy.getElementType().isF32())
-      return input;
-    auto castTy =
-        RankedTensorType::get(inputTy.getShape(), rewriter.getF32Type());
-    return tosa::CastOp::create(rewriter, op->getLoc(), castTy, input);
-  };
-
   // Create a single instance of tosa.argmax.
   // Multiple dims require chained construct.
   auto buildArgmax = [&](int64_t reduceDim, Value input) -> Value {
-    input = ensureF32Input(input);
+    input = tosa::ensureF32Input(rewriter, op.getOperation(), input);
     auto inputTy = cast<RankedTensorType>(input.getType());
     auto inputShape = makeShapeTorchCompatible(inputTy.getShape());
     SmallVector<int64_t> outputShapeArr = {};
@@ -4702,20 +4693,12 @@ class ConvertAtenMinMaxDimOp : public TorchToTosaOpConversionPattern<AtenOpT> {
 
     // To handle ReduceMinDim indices, we apply ArgMaxOp on the negate
     // of the input tensor, which will return indices of input's min values
-    auto ensureF32Input = [&](Value input) -> Value {
-      auto inputTy = cast<RankedTensorType>(input.getType());
-      if (inputTy.getElementType().isF32())
-        return input;
-      auto castTy =
-          RankedTensorType::get(inputTy.getShape(), rewriter.getF32Type());
-      return tosa::CastOp::create(rewriter, op->getLoc(), castTy, input);
-    };
-
     Value argMaxOp;
     if constexpr (std::is_same<AtenOpT, AtenMinDimOp>()) {
       Value negateOp =
           tosa::NegateOp::create(rewriter, op->getLoc(), selfType, self);
-      Value argInput = ensureF32Input(negateOp);
+      Value argInput =
+          tosa::ensureF32Input(rewriter, op.getOperation(), negateOp);
 
       // Use default NaN Propagation mode "PROPAGATE" for tosa.argmax
       argMaxOp = tosa::ArgMaxOp::create(
@@ -4726,7 +4709,7 @@ class ConvertAtenMinMaxDimOp : public TorchToTosaOpConversionPattern<AtenOpT> {
           tosa::NanPropagationModeAttr::get(
               rewriter.getContext(), tosa::NanPropagationMode::PROPAGATE));
     } else {
-      Value argInput = ensureF32Input(self);
+      Value argInput = tosa::ensureF32Input(rewriter, op.getOperation(), self);
       // Use default NaN Propagation mode "PROPAGATE" for tosa.argmax
       argMaxOp = tosa::ArgMaxOp::create(
           rewriter, op->getLoc(),
diff --git a/lib/Conversion/TorchToTosa/TosaLegalizeUtils.cpp b/lib/Conversion/TorchToTosa/TosaLegalizeUtils.cpp
@@ -381,6 +381,15 @@ std::optional<Value> tosaCastTensorToType(PatternRewriter &rewriter, Value src,
   return tosa::CastOp::create(rewriter, op->getLoc(), castedSrcType, src);
 }
 
+Value ensureF32Input(PatternRewriter &rewriter, Operation *op, Value input) {
+  auto inputTy = cast<RankedTensorType>(input.getType());
+  if (inputTy.getElementType().isF32())
+    return input;
+  auto castTy =
+      RankedTensorType::get(inputTy.getShape(), rewriter.getF32Type());
+  return tosa::CastOp::create(rewriter, op->getLoc(), castTy, input);
+}
+
 // Template instantiation
 template std::optional<Value>
 getConstTensor<bool>(PatternRewriter &, Operation *, ArrayRef<bool> vec,
diff --git a/projects/pt1/python/torch_mlir_e2e_test/test_suite/reduction.py b/projects/pt1/python/torch_mlir_e2e_test/test_suite/reduction.py
@@ -1642,6 +1642,29 @@ def ArgmaxModule_basic(module, tu: TestUtils):
 # ==============================================================================
 
 
+class ArgmaxBFloat16Module(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    @export
+    @annotate_args(
+        [
+            None,
+            ([-1, -1], torch.bfloat16, True),
+        ]
+    )
+    def forward(self, a):
+        return torch.ops.aten.argmax(a)
+
+
+@register_test_case(module_factory=lambda: ArgmaxBFloat16Module())
+def ArgmaxBFloat16Module_basic(module, tu: TestUtils):
+    module.forward(tu.rand(3, 4).to(torch.bfloat16))
+
+
+# ==============================================================================
+
+
 class ArgmaxKeepdimModule(torch.nn.Module):
     def __init__(self):
         super().__init__()