Revert "[language] Skip f16 to f32 promotion in max/min reductions" (#9921)

ThomasRaoux · web-flow · commit f97f66abf0c7 · 2026-04-03T14:42:33.000-07:00
Reverting as it breaks BC and needs some time to update Reverts #9903
diff --git a/python/test/unit/language/test_compile_only.py b/python/test/unit/language/test_compile_only.py
@@ -220,26 +220,3 @@ def fp8_convert(src, dst):
     src = ASTSource(fn=fp8_convert, signature={"src": "*fp32", "dst": "*fp8e5"}, constexprs={})
     triton.compile(src, target=GPUTarget("cuda", 90, 32))
     triton.compile(src, target=GPUTarget("cuda", 80, 32))
-
-
-def test_f16_min_max_no_promotion():
-    """f16 should not get promoted to f32 in min/max reductions."""
-
-    @triton.jit
-    def reduce_min(src, dst):
-        idx = tl.arange(0, 64)
-        x = tl.load(src + idx)
-        tl.store(dst, tl.min(x, axis=0))
-
-    @triton.jit
-    def reduce_max(src, dst):
-        idx = tl.arange(0, 64)
-        x = tl.load(src + idx)
-        tl.store(dst, tl.max(x, axis=0))
-
-    targets = [GPUTarget("cuda", 90, 32), GPUTarget("hip", "gfx942", 64)]
-    for target in targets:
-        for kernel in [reduce_min, reduce_max]:
-            f16 = triton.compile(ASTSource(fn=kernel, signature={"src": "*fp16", "dst": "*fp16"}, constexprs={}),
-                                 target=target)
-            assert "arith.extf" not in f16.asm["ttir"], "f16 should not get promoted to f32 in min/max reductions"
diff --git a/python/triton/language/standard.py b/python/triton/language/standard.py
@@ -184,9 +184,7 @@ def max(input, axis=None, return_indices=False, return_indices_tie_break_left=Tr
     else:
         if core.constexpr(input.dtype.primitive_bitwidth) < core.constexpr(32):
             if core.constexpr(input.dtype.is_floating()):
-                # Do not promote f16 to f32 as it has native hardware support
-                if not core.constexpr(input.dtype == core.float16):
-                    input = input.to(core.float32)
+                input = input.to(core.float32)
             else:
                 assert input.dtype.is_int(), "Expecting input to be integer type"
                 input = input.to(core.int32)
@@ -243,11 +241,9 @@ def min(input, axis=None, return_indices=False, return_indices_tie_break_left=Tr
         else:
             return core._reduce_with_indices(input, axis, _argmin_combine_tie_break_fast, keep_dims=keep_dims)
     else:
-        if core.constexpr(input.dtype.primitive_bitwidth) < core.constexpr(32):
+        if core.constexpr(input.dtype.primitive_bitwidth) < 32:
             if core.constexpr(input.dtype.is_floating()):
-                # Do not promote f16 to f32 as it has native hardware support
-                if not core.constexpr(input.dtype == core.float16):
-                    input = input.to(core.float32)
+                input = input.to(core.float32)
             else:
                 assert input.dtype.is_int(), "Expecting input to be integer type"
                 input = input.to(core.int32)