[CIR][Dialect] Add FMaximumOp and FMinimumOp (#1237)

ghehg · web-flow · commit 8d649fbf37ff · 2024-12-17T14:25:19.000-08:00
There are two sets of intrinsics regarding Min and Max operations for floating points [Maximum](https://mlir.llvm.org/docs/Dialects/LLVM/#llvmintrmaximum-llvmmaximumop) vs [Maxnum](https://mlir.llvm.org/docs/Dialects/LLVM/#llvmintrmaxnum-llvmmaxnumop) [Minimum](https://mlir.llvm.org/docs/Dialects/LLVM/#llvmintrminimum-llvmminimumop) vs [Minnum](https://mlir.llvm.org/docs/Dialects/LLVM/#llvmintrminnum-llvmminnumop) [The difference is whether NaN should be propagated when one of the inputs is NaN](https://llvm.org/docs/LangRef.html#llvm-maximumnum-intrinsic) Maxnum and Minnum would return number if one of inputs is NaN, and the other is a number, But Maximum and Minimum would return NaN (propagation of NaN) And they are resolved to different ASM such as [FMAX](https://developer.arm.com/documentation/ddi0596/2021-03/SIMD-FP-Instructions/FMAX--vector---Floating-point-Maximum--vector--?lang=en) vs [FMAXNM](https://developer.arm.com/documentation/ddi0596/2021-03/SIMD-FP-Instructions/FMAXNM--vector---Floating-point-Maximum-Number--vector--?lang=en) Both have user cases, we already implemented Maxnum and Minnum But Maximum and Minimum has user cases in [neon intrinsic ](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_f32 ) and [__builtin_elementwise_maximum ](https://github.com/llvm/clangir/blob/a989ecb2c55da1fe28e4072c31af025cba6c4f0f/clang/test/CodeGen/strictfp-elementwise-bulitins.cpp#L53)
diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td
@@ -4520,8 +4520,10 @@ class BinaryFPToFPBuiltinOp<string mnemonic, string llvmOpName>
 }
 
 def CopysignOp : BinaryFPToFPBuiltinOp<"copysign", "CopySignOp">;
-def FMaxOp : BinaryFPToFPBuiltinOp<"fmax", "MaxNumOp">;
-def FMinOp : BinaryFPToFPBuiltinOp<"fmin", "MinNumOp">;
+def FMaxNumOp : BinaryFPToFPBuiltinOp<"fmaxnum", "MaxNumOp">;
+def FMinNumOp : BinaryFPToFPBuiltinOp<"fminnum", "MinNumOp">;
+def FMaximumOp : BinaryFPToFPBuiltinOp<"fmaximum", "MaximumOp">;
+def FMinimumOp : BinaryFPToFPBuiltinOp<"fminimum", "MinimumOp">;
 def FModOp : BinaryFPToFPBuiltinOp<"fmod", "FRemOp">;
 def PowOp : BinaryFPToFPBuiltinOp<"pow", "PowOp">;
 
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
@@ -670,7 +670,7 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
     case Builtin::BI__builtin_fmaxf:
     case Builtin::BI__builtin_fmaxl:
       return RValue::get(
-          emitBinaryMaybeConstrainedFPBuiltin<cir::FMaxOp>(*this, *E));
+          emitBinaryMaybeConstrainedFPBuiltin<cir::FMaxNumOp>(*this, *E));
 
     case Builtin::BI__builtin_fmaxf16:
     case Builtin::BI__builtin_fmaxf128:
@@ -683,7 +683,7 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
     case Builtin::BI__builtin_fminf:
     case Builtin::BI__builtin_fminl:
       return RValue::get(
-          emitBinaryMaybeConstrainedFPBuiltin<cir::FMinOp>(*this, *E));
+          emitBinaryMaybeConstrainedFPBuiltin<cir::FMinNumOp>(*this, *E));
 
     case Builtin::BI__builtin_fminf16:
     case Builtin::BI__builtin_fminf128:
diff --git a/clang/test/CIR/CodeGen/builtin-floating-point.c b/clang/test/CIR/CodeGen/builtin-floating-point.c
@@ -1300,7 +1300,7 @@ long double call_copysignl(long double x, long double y) {
 float my_fmaxf(float x, float y) {
   return __builtin_fmaxf(x, y);
   // CHECK: cir.func @my_fmaxf
-  // CHECK:   %{{.+}} = cir.fmax %{{.+}}, %{{.+}} : !cir.float
+  // CHECK:   %{{.+}} = cir.fmaxnum %{{.+}}, %{{.+}} : !cir.float
 
   // LLVM: define dso_local float @my_fmaxf
   // LLVM:   %{{.+}} = call float @llvm.maxnum.f32(float %{{.+}}, float %{{.+}})
@@ -1310,7 +1310,7 @@ float my_fmaxf(float x, float y) {
 double my_fmax(double x, double y) {
   return __builtin_fmax(x, y);
   // CHECK: cir.func @my_fmax
-  // CHECK:   %{{.+}} = cir.fmax %{{.+}}, %{{.+}} : !cir.double
+  // CHECK:   %{{.+}} = cir.fmaxnum %{{.+}}, %{{.+}} : !cir.double
 
   // LLVM: define dso_local double @my_fmax
   // LLVM:   %{{.+}} = call double @llvm.maxnum.f64(double %{{.+}}, double %{{.+}})
@@ -1320,8 +1320,8 @@ double my_fmax(double x, double y) {
 long double my_fmaxl(long double x, long double y) {
   return __builtin_fmaxl(x, y);
   // CHECK: cir.func @my_fmaxl
-  // CHECK:   %{{.+}} = cir.fmax %{{.+}}, %{{.+}} : !cir.long_double<!cir.f80>
-  // AARCH64: %{{.+}} = cir.fmax %{{.+}}, %{{.+}} : !cir.long_double<!cir.double>
+  // CHECK:   %{{.+}} = cir.fmaxnum %{{.+}}, %{{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: %{{.+}} = cir.fmaxnum %{{.+}}, %{{.+}} : !cir.long_double<!cir.double>
 
   // LLVM: define dso_local x86_fp80 @my_fmaxl
   // LLVM:   %{{.+}} = call x86_fp80 @llvm.maxnum.f80(x86_fp80 %{{.+}}, x86_fp80 %{{.+}})
@@ -1335,7 +1335,7 @@ long double fmaxl(long double, long double);
 float call_fmaxf(float x, float y) {
   return fmaxf(x, y);
   // CHECK: cir.func @call_fmaxf
-  // CHECK:   %{{.+}} = cir.fmax %{{.+}}, %{{.+}} : !cir.float
+  // CHECK:   %{{.+}} = cir.fmaxnum %{{.+}}, %{{.+}} : !cir.float
 
   // LLVM: define dso_local float @call_fmaxf
   // LLVM:   %{{.+}} = call float @llvm.maxnum.f32(float %{{.+}}, float %{{.+}})
@@ -1345,7 +1345,7 @@ float call_fmaxf(float x, float y) {
 double call_fmax(double x, double y) {
   return fmax(x, y);
   // CHECK: cir.func @call_fmax
-  // CHECK:   %{{.+}} = cir.fmax %{{.+}}, %{{.+}} : !cir.double
+  // CHECK:   %{{.+}} = cir.fmaxnum %{{.+}}, %{{.+}} : !cir.double
 
   // LLVM: define dso_local double @call_fmax
   // LLVM:   %{{.+}} = call double @llvm.maxnum.f64(double %{{.+}}, double %{{.+}})
@@ -1355,8 +1355,8 @@ double call_fmax(double x, double y) {
 long double call_fmaxl(long double x, long double y) {
   return fmaxl(x, y);
   // CHECK: cir.func @call_fmaxl
-  // CHECK:   %{{.+}} = cir.fmax %{{.+}}, %{{.+}} : !cir.long_double<!cir.f80>
-  // AARCH64: %{{.+}} = cir.fmax %{{.+}}, %{{.+}} : !cir.long_double<!cir.double>
+  // CHECK:   %{{.+}} = cir.fmaxnum %{{.+}}, %{{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: %{{.+}} = cir.fmaxnum %{{.+}}, %{{.+}} : !cir.long_double<!cir.double>
 
   // LLVM: define dso_local x86_fp80 @call_fmaxl
   // LLVM:   %{{.+}} = call x86_fp80 @llvm.maxnum.f80(x86_fp80 %{{.+}}, x86_fp80 %{{.+}})
@@ -1368,7 +1368,7 @@ long double call_fmaxl(long double x, long double y) {
 float my_fminf(float x, float y) {
   return __builtin_fminf(x, y);
   // CHECK: cir.func @my_fminf
-  // CHECK:   %{{.+}} = cir.fmin %{{.+}}, %{{.+}} : !cir.float
+  // CHECK:   %{{.+}} = cir.fminnum %{{.+}}, %{{.+}} : !cir.float
 
   // LLVM: define dso_local float @my_fminf
   // LLVM:   %{{.+}} = call float @llvm.minnum.f32(float %{{.+}}, float %{{.+}})
@@ -1378,7 +1378,7 @@ float my_fminf(float x, float y) {
 double my_fmin(double x, double y) {
   return __builtin_fmin(x, y);
   // CHECK: cir.func @my_fmin
-  // CHECK:   %{{.+}} = cir.fmin %{{.+}}, %{{.+}} : !cir.double
+  // CHECK:   %{{.+}} = cir.fminnum %{{.+}}, %{{.+}} : !cir.double
 
   // LLVM: define dso_local double @my_fmin
   // LLVM:   %{{.+}} = call double @llvm.minnum.f64(double %{{.+}}, double %{{.+}})
@@ -1388,8 +1388,8 @@ double my_fmin(double x, double y) {
 long double my_fminl(long double x, long double y) {
   return __builtin_fminl(x, y);
   // CHECK: cir.func @my_fminl
-  // CHECK:   %{{.+}} = cir.fmin %{{.+}}, %{{.+}} : !cir.long_double<!cir.f80>
-  // AARCH64: %{{.+}} = cir.fmin %{{.+}}, %{{.+}} : !cir.long_double<!cir.double>
+  // CHECK:   %{{.+}} = cir.fminnum %{{.+}}, %{{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: %{{.+}} = cir.fminnum %{{.+}}, %{{.+}} : !cir.long_double<!cir.double>
 
   // LLVM: define dso_local x86_fp80 @my_fminl
   // LLVM:   %{{.+}} = call x86_fp80 @llvm.minnum.f80(x86_fp80 %{{.+}}, x86_fp80 %{{.+}})
@@ -1403,7 +1403,7 @@ long double fminl(long double, long double);
 float call_fminf(float x, float y) {
   return fminf(x, y);
   // CHECK: cir.func @call_fminf
-  // CHECK:   %{{.+}} = cir.fmin %{{.+}}, %{{.+}} : !cir.float
+  // CHECK:   %{{.+}} = cir.fminnum %{{.+}}, %{{.+}} : !cir.float
 
   // LLVM: define dso_local float @call_fminf
   // LLVM:   %{{.+}} = call float @llvm.minnum.f32(float %{{.+}}, float %{{.+}})
@@ -1413,7 +1413,7 @@ float call_fminf(float x, float y) {
 double call_fmin(double x, double y) {
   return fmin(x, y);
   // CHECK: cir.func @call_fmin
-  // CHECK:   %{{.+}} = cir.fmin %{{.+}}, %{{.+}} : !cir.double
+  // CHECK:   %{{.+}} = cir.fminnum %{{.+}}, %{{.+}} : !cir.double
 
   // LLVM: define dso_local double @call_fmin
   // LLVM:   %{{.+}} = call double @llvm.minnum.f64(double %{{.+}}, double %{{.+}})
@@ -1423,8 +1423,8 @@ double call_fmin(double x, double y) {
 long double call_fminl(long double x, long double y) {
   return fminl(x, y);
   // CHECK: cir.func @call_fminl
-  // CHECK:   %{{.+}} = cir.fmin %{{.+}}, %{{.+}} : !cir.long_double<!cir.f80>
-  // AARCH64: %{{.+}} = cir.fmin %{{.+}}, %{{.+}} : !cir.long_double<!cir.double>
+  // CHECK:   %{{.+}} = cir.fminnum %{{.+}}, %{{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: %{{.+}} = cir.fminnum %{{.+}}, %{{.+}} : !cir.long_double<!cir.double>
 
   // LLVM: define dso_local x86_fp80 @call_fminl
   // LLVM:   %{{.+}} = call x86_fp80 @llvm.minnum.f80(x86_fp80 %{{.+}}, x86_fp80 %{{.+}})
diff --git a/clang/test/CIR/Lowering/builtin-floating-point.cir b/clang/test/CIR/Lowering/builtin-floating-point.cir
@@ -138,22 +138,22 @@ module {
     %215 = cir.copysign %arg2, %arg2 : !cir.vector<!cir.float x 4>
     // CHECK: llvm.intr.copysign(%arg2, %arg2) : (vector<4xf32>, vector<4xf32>) -> vector<4xf32>
 
-    %16 = cir.fmax %arg0, %arg0 : !cir.float
+    %16 = cir.fmaxnum %arg0, %arg0 : !cir.float
     // CHECK: llvm.intr.maxnum(%arg0, %arg0) : (f32, f32) -> f32
 
-    %116 = cir.fmax %arg1, %arg1 : !cir.vector<!cir.double x 2>
+    %116 = cir.fmaxnum %arg1, %arg1 : !cir.vector<!cir.double x 2>
     // CHECK: llvm.intr.maxnum(%arg1, %arg1) : (vector<2xf64>, vector<2xf64>) -> vector<2xf64>
 
-    %216 = cir.fmax %arg2, %arg2 : !cir.vector<!cir.float x 4>
+    %216 = cir.fmaxnum %arg2, %arg2 : !cir.vector<!cir.float x 4>
     // CHECK: llvm.intr.maxnum(%arg2, %arg2) : (vector<4xf32>, vector<4xf32>) -> vector<4xf32>
 
-    %17 = cir.fmin %arg0, %arg0 : !cir.float
+    %17 = cir.fminnum %arg0, %arg0 : !cir.float
     // CHECK: llvm.intr.minnum(%arg0, %arg0) : (f32, f32) -> f32
 
-    %117 = cir.fmin %arg1, %arg1 : !cir.vector<!cir.double x 2>
+    %117 = cir.fminnum %arg1, %arg1 : !cir.vector<!cir.double x 2>
     // CHECK: llvm.intr.minnum(%arg1, %arg1) : (vector<2xf64>, vector<2xf64>) -> vector<2xf64>
 
-    %217 = cir.fmin %arg2, %arg2 : !cir.vector<!cir.float x 4>
+    %217 = cir.fminnum %arg2, %arg2 : !cir.vector<!cir.float x 4>
     // CHECK: llvm.intr.minnum(%arg2, %arg2) : (vector<4xf32>, vector<4xf32>) -> vector<4xf32>
 
     %18 = cir.fmod %arg0, %arg0 : !cir.float
@@ -174,6 +174,24 @@ module {
     %219 = cir.pow %arg2, %arg2 : !cir.vector<!cir.float x 4>
     // CHECK: llvm.intr.pow(%arg2, %arg2) : (vector<4xf32>, vector<4xf32>) -> vector<4xf32>
 
+    %20 = cir.fmaximum %arg0, %arg0 : !cir.float
+    // CHECK: llvm.intr.maximum(%arg0, %arg0) : (f32, f32) -> f32
+
+    %120 = cir.fmaximum %arg1, %arg1 : !cir.vector<!cir.double x 2>
+    // CHECK: llvm.intr.maximum(%arg1, %arg1) : (vector<2xf64>, vector<2xf64>) -> vector<2xf64>
+
+    %220 = cir.fmaximum %arg2, %arg2 : !cir.vector<!cir.float x 4>
+    // CHECK: llvm.intr.maximum(%arg2, %arg2) : (vector<4xf32>, vector<4xf32>) -> vector<4xf32>
+
+    %21 = cir.fminimum %arg0, %arg0 : !cir.float
+    // CHECK: llvm.intr.minimum(%arg0, %arg0) : (f32, f32) -> f32
+
+    %121 = cir.fminimum %arg1, %arg1 : !cir.vector<!cir.double x 2>
+    // CHECK: llvm.intr.minimum(%arg1, %arg1) : (vector<2xf64>, vector<2xf64>) -> vector<2xf64>
+
+    %221 = cir.fminimum %arg2, %arg2 : !cir.vector<!cir.float x 4>
+    // CHECK: llvm.intr.minimum(%arg2, %arg2) : (vector<4xf32>, vector<4xf32>) -> vector<4xf32>
+
     cir.return
   }
 }