[msan] Implement support for avx512fp16.mask.{add/sub/mul/div/max/min}.sh.round #137441

thurstond · 2025-04-26T03:31:12Z

This adds a handler, visitGenericScalarHalfwordInst, which works for mask.{add/sub/mul/div/max/min}.sh.round.

Updates the tests in #136260

…}.sh.round This adds a handler, visitGenericScalarHalfwordInst, which works for mask.{add/sub/mul/div/max/min}.sh.round. Updates the tests in llvm#136260

llvmbot · 2025-04-26T03:31:43Z

@llvm/pr-subscribers-compiler-rt-sanitizer

Author: Thurston Dang (thurstond)

Changes

This adds a handler, visitGenericScalarHalfwordInst, which works for mask.{add/sub/mul/div/max/min}.sh.round.

Updates the tests in #136260

Patch is 47.87 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/137441.diff

2 Files Affected:

(modified) llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp (+69)
(modified) llvm/test/Instrumentation/MemorySanitizer/X86/avx512fp16-intrinsics.ll (+237-220)

diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 8e31e8d2a4fbd..9f4708e14aa6a 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -4312,6 +4312,65 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     setOriginForNaryOp(I);
   }
 
+  // For sh compiler intrinsics:
+  // llvm.x86.avx512fp16.mask.{add/sub/mul/div/max/min}.sh.round
+  //   (<8 x half>, <8 x half>, <8 x half>, i8,  i32)
+  //    A           B           WriteThru   Mask RoundingMode
+  //
+  // if (Mask[0])
+  //   DstShadow[0] = AShadow[0] | BShadow[0]
+  // else
+  //   DstShadow[0] = WriteThruShadow[0]
+  //
+  // DstShadow[1..7] = AShadow[1..7]
+  void visitGenericScalarHalfwordInst(IntrinsicInst &I) {
+    IRBuilder<> IRB(&I);
+
+    assert(I.arg_size() == 5);
+    Value *A = I.getOperand(0);
+    Value *B = I.getOperand(1);
+    Value *WriteThrough = I.getOperand(2);
+    Value *Mask = I.getOperand(3);
+    Value *RoundingMode = I.getOperand(4);
+
+    // Technically, we could probably just check whether the LSB is initialized
+    insertShadowCheck(Mask, &I);
+    insertShadowCheck(RoundingMode, &I);
+
+    assert(isa<FixedVectorType>(A->getType()));
+    unsigned NumElements =
+        cast<FixedVectorType>(A->getType())->getNumElements();
+    assert(NumElements == 8);
+    assert(A->getType() == B->getType());
+    assert(B->getType() == WriteThrough->getType());
+    assert(Mask->getType()->getPrimitiveSizeInBits() == NumElements);
+    assert(RoundingMode->getType()->isIntegerTy());
+
+    Mask = IRB.CreateBitCast(
+        Mask, FixedVectorType::get(IRB.getInt1Ty(), NumElements));
+
+    Value *AShadow = getShadow(A);
+    Value *BShadow = getShadow(B);
+    Value *ABLowerShadow =
+        IRB.CreateOr(IRB.CreateExtractElement(
+                         AShadow, ConstantInt::get(IRB.getInt32Ty(), 0)),
+                     IRB.CreateExtractElement(
+                         BShadow, ConstantInt::get(IRB.getInt32Ty(), 0)));
+    Value *WriteThroughShadow = getShadow(WriteThrough);
+    Value *WriteThroughLowerShadow = IRB.CreateExtractElement(
+        WriteThroughShadow, ConstantInt::get(IRB.getInt32Ty(), 0));
+
+    Value *DstLowerShadow = IRB.CreateSelect(
+        IRB.CreateExtractElement(Mask, ConstantInt::get(IRB.getInt32Ty(), 0)),
+        ABLowerShadow, WriteThroughLowerShadow);
+    Value *DstShadow = IRB.CreateInsertElement(
+        AShadow, DstLowerShadow, ConstantInt::get(IRB.getInt32Ty(), 0),
+        "_msprop");
+
+    setShadow(&I, DstShadow);
+    setOriginForNaryOp(I);
+  }
+
   // Handle Arm NEON vector load intrinsics (vld*).
   //
   // The WithLane instructions (ld[234]lane) are similar to:
@@ -5041,6 +5100,16 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
       break;
     }
 
+    case Intrinsic::x86_avx512fp16_mask_add_sh_round:
+    case Intrinsic::x86_avx512fp16_mask_sub_sh_round:
+    case Intrinsic::x86_avx512fp16_mask_mul_sh_round:
+    case Intrinsic::x86_avx512fp16_mask_div_sh_round:
+    case Intrinsic::x86_avx512fp16_mask_max_sh_round:
+    case Intrinsic::x86_avx512fp16_mask_min_sh_round: {
+      visitGenericScalarHalfwordInst(I);
+      break;
+    }
+
     case Intrinsic::fshl:
     case Intrinsic::fshr:
       handleFunnelShift(I);
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512fp16-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512fp16-intrinsics.ll
index 61a32e5e2042e..b11b21da492d2 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512fp16-intrinsics.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512fp16-intrinsics.ll
@@ -13,9 +13,6 @@
 ; - llvm.x86.avx512fp16.mask.getexp.sh
 ; - llvm.x86.avx512fp16.mask.getmant.ph.512
 ; - llvm.x86.avx512fp16.mask.getmant.sh
-; - llvm.x86.avx512fp16.mask.max.sh.round
-; - llvm.x86.avx512fp16.mask.min.sh.round
-; - llvm.x86.avx512fp16.mask.mul.sh.round
 ; - llvm.x86.avx512fp16.mask.rcp.ph.512
 ; - llvm.x86.avx512fp16.mask.rcp.sh
 ; - llvm.x86.avx512fp16.mask.reduce.ph.512
@@ -27,7 +24,6 @@
 ; - llvm.x86.avx512fp16.mask.scalef.ph.512
 ; - llvm.x86.avx512fp16.mask.scalef.sh
 ; - llvm.x86.avx512fp16.mask.sqrt.sh
-; - llvm.x86.avx512fp16.mask.sub.sh.round
 ; - llvm.x86.avx512fp16.mask.vcvtph2dq.512
 ; - llvm.x86.avx512fp16.mask.vcvtph2qq.512
 ; - llvm.x86.avx512fp16.mask.vcvtph2udq.512
@@ -1393,8 +1389,8 @@ define <8 x half> @test_int_x86_avx512fp16_mask_add_sh(<8 x half> %x1, <8 x half
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 56) to ptr), align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
 ; CHECK-NEXT:    [[TMP5:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
+; CHECK-NEXT:    [[TMP6:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
 ; CHECK-NEXT:    br i1 [[_MSCMP]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]]
@@ -1409,54 +1405,57 @@ define <8 x half> @test_int_x86_avx512fp16_mask_add_sh(<8 x half> %x1, <8 x half
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load i16, ptr [[TMP10]], align 2
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = insertelement <8 x i16> splat (i16 -1), i16 [[_MSLD]], i32 0
 ; CHECK-NEXT:    [[VAL:%.*]] = insertelement <8 x half> poison, half [[VAL_HALF]], i32 0
-; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
-; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP11]], 0
-; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
-; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i128 [[TMP12]], 0
-; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
-; CHECK-NEXT:    br i1 [[_MSOR]], label %[[BB13:.*]], label %[[BB14:.*]], !prof [[PROF1]]
-; CHECK:       [[BB13]]:
-; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR8]]
-; CHECK-NEXT:    unreachable
-; CHECK:       [[BB14]]:
+; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <8 x i16> [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0
+; CHECK-NEXT:    [[TMP13:%.*]] = or i16 [[TMP11]], [[TMP12]]
+; CHECK-NEXT:    [[TMP14:%.*]] = select i1 true, i16 [[TMP13]], i16 0
+; CHECK-NEXT:    [[_MSPROP1:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP14]], i32 0
 ; CHECK-NEXT:    [[RES0:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.add.sh.round(<8 x half> [[X1]], <8 x half> [[X2]], <8 x half> zeroinitializer, i8 -1, i32 4)
-; CHECK-NEXT:    [[TMP15:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
-; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i128 [[TMP15]], 0
-; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <8 x i16> [[TMP4]] to i128
-; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i128 [[TMP16]], 0
-; CHECK-NEXT:    [[_MSOR5:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]]
+; CHECK-NEXT:    [[TMP15:%.*]] = bitcast i8 [[MASK]] to <8 x i1>
+; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <8 x i16> [[_MSPROP1]], i32 0
+; CHECK-NEXT:    [[TMP17:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0
+; CHECK-NEXT:    [[TMP18:%.*]] = or i16 [[TMP16]], [[TMP17]]
+; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <8 x i16> [[TMP6]], i32 0
+; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <8 x i1> [[TMP15]], i32 0
+; CHECK-NEXT:    [[TMP21:%.*]] = select i1 [[TMP20]], i16 [[TMP18]], i16 [[TMP19]]
+; CHECK-NEXT:    [[_MSPROP2:%.*]] = insertelement <8 x i16> [[_MSPROP1]], i16 [[TMP21]], i32 0
 ; CHECK-NEXT:    [[_MSCMP6:%.*]] = icmp ne i8 [[TMP5]], 0
-; CHECK-NEXT:    [[_MSOR7:%.*]] = or i1 [[_MSOR5]], [[_MSCMP6]]
-; CHECK-NEXT:    br i1 [[_MSOR7]], label %[[BB17:.*]], label %[[BB18:.*]], !prof [[PROF1]]
-; CHECK:       [[BB17]]:
+; CHECK-NEXT:    br i1 [[_MSCMP6]], label %[[BB22:.*]], label %[[BB23:.*]], !prof [[PROF1]]
+; CHECK:       [[BB22]]:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR8]]
 ; CHECK-NEXT:    unreachable
-; CHECK:       [[BB18]]:
+; CHECK:       [[BB23]]:
 ; CHECK-NEXT:    [[RES1:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.add.sh.round(<8 x half> [[RES0]], <8 x half> [[X2]], <8 x half> [[SRC]], i8 [[MASK]], i32 4)
-; CHECK-NEXT:    [[TMP19:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
-; CHECK-NEXT:    [[_MSCMP8:%.*]] = icmp ne i128 [[TMP19]], 0
+; CHECK-NEXT:    [[TMP24:%.*]] = bitcast i8 [[MASK]] to <8 x i1>
+; CHECK-NEXT:    [[TMP25:%.*]] = extractelement <8 x i16> [[_MSPROP2]], i32 0
+; CHECK-NEXT:    [[TMP26:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0
+; CHECK-NEXT:    [[TMP27:%.*]] = or i16 [[TMP25]], [[TMP26]]
+; CHECK-NEXT:    [[TMP28:%.*]] = extractelement <8 x i1> [[TMP24]], i32 0
+; CHECK-NEXT:    [[TMP29:%.*]] = select i1 [[TMP28]], i16 [[TMP27]], i16 0
+; CHECK-NEXT:    [[_MSPROP3:%.*]] = insertelement <8 x i16> [[_MSPROP2]], i16 [[TMP29]], i32 0
 ; CHECK-NEXT:    [[_MSCMP9:%.*]] = icmp ne i8 [[TMP5]], 0
-; CHECK-NEXT:    [[_MSOR10:%.*]] = or i1 [[_MSCMP8]], [[_MSCMP9]]
-; CHECK-NEXT:    br i1 [[_MSOR10]], label %[[BB20:.*]], label %[[BB21:.*]], !prof [[PROF1]]
-; CHECK:       [[BB20]]:
+; CHECK-NEXT:    br i1 [[_MSCMP9]], label %[[BB30:.*]], label %[[BB31:.*]], !prof [[PROF1]]
+; CHECK:       [[BB30]]:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR8]]
 ; CHECK-NEXT:    unreachable
-; CHECK:       [[BB21]]:
+; CHECK:       [[BB31]]:
 ; CHECK-NEXT:    [[RES2:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.add.sh.round(<8 x half> [[RES1]], <8 x half> [[X2]], <8 x half> zeroinitializer, i8 [[MASK]], i32 4)
-; CHECK-NEXT:    [[TMP22:%.*]] = bitcast <8 x i16> [[_MSPROP]] to i128
-; CHECK-NEXT:    [[_MSCMP11:%.*]] = icmp ne i128 [[TMP22]], 0
-; CHECK-NEXT:    [[TMP23:%.*]] = bitcast <8 x i16> [[TMP4]] to i128
-; CHECK-NEXT:    [[_MSCMP12:%.*]] = icmp ne i128 [[TMP23]], 0
-; CHECK-NEXT:    [[_MSOR13:%.*]] = or i1 [[_MSCMP11]], [[_MSCMP12]]
+; CHECK-NEXT:    [[TMP32:%.*]] = bitcast i8 [[MASK]] to <8 x i1>
+; CHECK-NEXT:    [[TMP33:%.*]] = extractelement <8 x i16> [[_MSPROP3]], i32 0
+; CHECK-NEXT:    [[TMP34:%.*]] = extractelement <8 x i16> [[_MSPROP]], i32 0
+; CHECK-NEXT:    [[TMP35:%.*]] = or i16 [[TMP33]], [[TMP34]]
+; CHECK-NEXT:    [[TMP36:%.*]] = extractelement <8 x i16> [[TMP6]], i32 0
+; CHECK-NEXT:    [[TMP37:%.*]] = extractelement <8 x i1> [[TMP32]], i32 0
+; CHECK-NEXT:    [[TMP38:%.*]] = select i1 [[TMP37]], i16 [[TMP35]], i16 [[TMP36]]
+; CHECK-NEXT:    [[_MSPROP4:%.*]] = insertelement <8 x i16> [[_MSPROP3]], i16 [[TMP38]], i32 0
 ; CHECK-NEXT:    [[_MSCMP14:%.*]] = icmp ne i8 [[TMP5]], 0
-; CHECK-NEXT:    [[_MSOR15:%.*]] = or i1 [[_MSOR13]], [[_MSCMP14]]
-; CHECK-NEXT:    br i1 [[_MSOR15]], label %[[BB24:.*]], label %[[BB25:.*]], !prof [[PROF1]]
-; CHECK:       [[BB24]]:
+; CHECK-NEXT:    br i1 [[_MSCMP14]], label %[[BB39:.*]], label %[[BB40:.*]], !prof [[PROF1]]
+; CHECK:       [[BB39]]:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR8]]
 ; CHECK-NEXT:    unreachable
-; CHECK:       [[BB25]]:
+; CHECK:       [[BB40]]:
 ; CHECK-NEXT:    [[RES3:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.add.sh.round(<8 x half> [[RES2]], <8 x half> [[VAL]], <8 x half> [[SRC]], i8 [[MASK]], i32 4)
-; CHECK-NEXT:    store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    store <8 x i16> [[_MSPROP4]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <8 x half> [[RES3]]
 ;
   %val.half = load half,ptr %ptr
@@ -1476,8 +1475,8 @@ define <8 x half> @test_int_x86_avx512fp16_mask_sub_sh(<8 x half> %x1, <8 x half
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 56) to ptr), align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
 ; CHECK-NEXT:    [[TMP5:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
+; CHECK-NEXT:    [[TMP6:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
 ; CHECK-NEXT:    br i1 [[_MSCMP]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]]
@@ -1492,54 +1491,57 @@ define <8 x half> @test_int_x86_avx512fp16_mask_sub_sh(<8 x half> %x1, <8 x half
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load i16, ptr [[TMP10]], align 2
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = insertelement <8 x i16> splat (i16 -1), i16 [[_MSLD]], i32 0
 ; CHECK-NEXT:    [[VAL:%.*]] = insertelement <8 x half> poison, half [[VAL_HALF]], i32 0
-; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
-; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP11]], 0
-; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
-; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i128 [[TMP12]], 0
-; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
-; CHECK-NEXT:    br i1 [[_MSOR]], label %[[BB13:.*]], label %[[BB14:.*]], !prof [[PROF1]]
-; CHECK:       [[BB13]]:
-; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR8]]
-; CHECK-NEXT:    unreachable
-; CHECK:       [[BB14]]:
+; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <8 x i16> [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0
+; CHECK-NEXT:    [[TMP13:%.*]] = or i16 [[TMP11]], [[TMP12]]
+; CHECK-NEXT:    [[TMP14:%.*]] = select i1 true, i16 [[TMP13]], i16 0
+; CHECK-NEXT:    [[_MSPROP1:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP14]], i32 0
 ; CHECK-NEXT:    [[RES0:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.sub.sh.round(<8 x half> [[X1]], <8 x half> [[X2]], <8 x half> zeroinitializer, i8 -1, i32 4)
-; CHECK-NEXT:    [[TMP15:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
-; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i128 [[TMP15]], 0
-; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <8 x i16> [[TMP4]] to i128
-; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i128 [[TMP16]], 0
-; CHECK-NEXT:    [[_MSOR5:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]]
+; CHECK-NEXT:    [[TMP15:%.*]] = bitcast i8 [[MASK]] to <8 x i1>
+; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <8 x i16> [[_MSPROP1]], i32 0
+; CHECK-NEXT:    [[TMP17:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0
+; CHECK-NEXT:    [[TMP18:%.*]] = or i16 [[TMP16]], [[TMP17]]
+; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <8 x i16> [[TMP6]], i32 0
+; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <8 x i1> [[TMP15]], i32 0
+; CHECK-NEXT:    [[TMP21:%.*]] = select i1 [[TMP20]], i16 [[TMP18]], i16 [[TMP19]]
+; CHECK-NEXT:    [[_MSPROP2:%.*]] = insertelement <8 x i16> [[_MSPROP1]], i16 [[TMP21]], i32 0
 ; CHECK-NEXT:    [[_MSCMP6:%.*]] = icmp ne i8 [[TMP5]], 0
-; CHECK-NEXT:    [[_MSOR7:%.*]] = or i1 [[_MSOR5]], [[_MSCMP6]]
-; CHECK-NEXT:    br i1 [[_MSOR7]], label %[[BB17:.*]], label %[[BB18:.*]], !prof [[PROF1]]
-; CHECK:       [[BB17]]:
+; CHECK-NEXT:    br i1 [[_MSCMP6]], label %[[BB22:.*]], label %[[BB23:.*]], !prof [[PROF1]]
+; CHECK:       [[BB22]]:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR8]]
 ; CHECK-NEXT:    unreachable
-; CHECK:       [[BB18]]:
+; CHECK:       [[BB23]]:
 ; CHECK-NEXT:    [[RES1:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.sub.sh.round(<8 x half> [[RES0]], <8 x half> [[X2]], <8 x half> [[SRC]], i8 [[MASK]], i32 4)
-; CHECK-NEXT:    [[TMP19:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
-; CHECK-NEXT:    [[_MSCMP8:%.*]] = icmp ne i128 [[TMP19]], 0
+; CHECK-NEXT:    [[TMP24:%.*]] = bitcast i8 [[MASK]] to <8 x i1>
+; CHECK-NEXT:    [[TMP25:%.*]] = extractelement <8 x i16> [[_MSPROP2]], i32 0
+; CHECK-NEXT:    [[TMP26:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0
+; CHECK-NEXT:    [[TMP27:%.*]] = or i16 [[TMP25]], [[TMP26]]
+; CHECK-NEXT:    [[TMP28:%.*]] = extractelement <8 x i1> [[TMP24]], i32 0
+; CHECK-NEXT:    [[TMP29:%.*]] = select i1 [[TMP28]], i16 [[TMP27]], i16 0
+; CHECK-NEXT:    [[_MSPROP3:%.*]] = insertelement <8 x i16> [[_MSPROP2]], i16 [[TMP29]], i32 0
 ; CHECK-NEXT:    [[_MSCMP9:%.*]] = icmp ne i8 [[TMP5]], 0
-; CHECK-NEXT:    [[_MSOR10:%.*]] = or i1 [[_MSCMP8]], [[_MSCMP9]]
-; CHECK-NEXT:    br i1 [[_MSOR10]], label %[[BB20:.*]], label %[[BB21:.*]], !prof [[PROF1]]
-; CHECK:       [[BB20]]:
+; CHECK-NEXT:    br i1 [[_MSCMP9]], label %[[BB30:.*]], label %[[BB31:.*]], !prof [[PROF1]]
+; CHECK:       [[BB30]]:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR8]]
 ; CHECK-NEXT:    unreachable
-; CHECK:       [[BB21]]:
+; CHECK:       [[BB31]]:
 ; CHECK-NEXT:    [[RES2:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.sub.sh.round(<8 x half> [[RES1]], <8 x half> [[X2]], <8 x half> zeroinitializer, i8 [[MASK]], i32 4)
-; CHECK-NEXT:    [[TMP22:%.*]] = bitcast <8 x i16> [[_MSPROP]] to i128
-; CHECK-NEXT:    [[_MSCMP11:%.*]] = icmp ne i128 [[TMP22]], 0
-; CHECK-NEXT:    [[TMP23:%.*]] = bitcast <8 x i16> [[TMP4]] to i128
-; CHECK-NEXT:    [[_MSCMP12:%.*]] = icmp ne i128 [[TMP23]], 0
-; CHECK-NEXT:    [[_MSOR13:%.*]] = or i1 [[_MSCMP11]], [[_MSCMP12]]
+; CHECK-NEXT:    [[TMP32:%.*]] = bitcast i8 [[MASK]] to <8 x i1>
+; CHECK-NEXT:    [[TMP33:%.*]] = extractelement <8 x i16> [[_MSPROP3]], i32 0
+; CHECK-NEXT:    [[TMP34:%.*]] = extractelement <8 x i16> [[_MSPROP]], i32 0
+; CHECK-NEXT:    [[TMP35:%.*]] = or i16 [[TMP33]], [[TMP34]]
+; CHECK-NEXT:    [[TMP36:%.*]] = extractelement <8 x i16> [[TMP6]], i32 0
+; CHECK-NEXT:    [[TMP37:%.*]] = extractelement <8 x i1> [[TMP32]], i32 0
+; CHECK-NEXT:    [[TMP38:%.*]] = select i1 [[TMP37]], i16 [[TMP35]], i16 [[TMP36]]
+; CHECK-NEXT:    [[_MSPROP4:%.*]] = insertelement <8 x i16> [[_MSPROP3]], i16 [[TMP38]], i32 0
 ; CHECK-NEXT:    [[_MSCMP14:%.*]] = icmp ne i8 [[TMP5]], 0
-; CHECK-NEXT:    [[_MSOR15:%.*]] = or i1 [[_MSOR13]], [[_MSCMP14]]
-; CHECK-NEXT:    br i1 [[_MSOR15]], label %[[BB24:.*]], label %[[BB25:.*]], !prof [[PROF1]]
-; CHECK:       [[BB24]]:
+; CHECK-NEXT:    br i1 [[_MSCMP14]], label %[[BB39:.*]], label %[[BB40:.*]], !prof [[PROF1]]
+; CHECK:       [[BB39]]:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR8]]
 ; CHECK-NEXT:    unreachable
-; CHECK:       [[BB25]]:
+; CHECK:       [[BB40]]:
 ; CHECK-NEXT:    [[RES3:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.sub.sh.round(<8 x half> [[RES2]], <8 x half> [[VAL]], <8 x half> [[SRC]], i8 [[MASK]], i32 4)
-; CHECK-NEXT:    store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    store <8 x i16> [[_MSPROP4]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <8 x half> [[RES3]]
 ;
   %val.half = load half,ptr %ptr
@@ -1559,8 +1561,8 @@ define <8 x half> @test_int_x86_avx512fp16_mask_mul_sh(<8 x half> %x1, <8 x half
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 56) to ptr), align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
 ; CHECK-NEXT:    [[TMP5:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
+; CHECK-NEXT:    [[TMP6:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
 ; CHECK-NEXT:    br i1 [[_MSCMP]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]]
@@ -1575,54 +1577,57 @@ define <8 x half> @test_int_x86_avx512fp16_mask_mul_sh(<8 x half> %x1, <8 x half
 ; CHECK-NEXT:    [[_MSLD:%.*]] =...
[truncated]

llvmbot · 2025-04-26T03:31:44Z

@llvm/pr-subscribers-llvm-transforms

Author: Thurston Dang (thurstond)

Changes

This adds a handler, visitGenericScalarHalfwordInst, which works for mask.{add/sub/mul/div/max/min}.sh.round.

Updates the tests in #136260

Patch is 47.87 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/137441.diff

2 Files Affected:

(modified) llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp (+69)
(modified) llvm/test/Instrumentation/MemorySanitizer/X86/avx512fp16-intrinsics.ll (+237-220)

diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 8e31e8d2a4fbd..9f4708e14aa6a 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -4312,6 +4312,65 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     setOriginForNaryOp(I);
   }
 
+  // For sh compiler intrinsics:
+  // llvm.x86.avx512fp16.mask.{add/sub/mul/div/max/min}.sh.round
+  //   (<8 x half>, <8 x half>, <8 x half>, i8,  i32)
+  //    A           B           WriteThru   Mask RoundingMode
+  //
+  // if (Mask[0])
+  //   DstShadow[0] = AShadow[0] | BShadow[0]
+  // else
+  //   DstShadow[0] = WriteThruShadow[0]
+  //
+  // DstShadow[1..7] = AShadow[1..7]
+  void visitGenericScalarHalfwordInst(IntrinsicInst &I) {
+    IRBuilder<> IRB(&I);
+
+    assert(I.arg_size() == 5);
+    Value *A = I.getOperand(0);
+    Value *B = I.getOperand(1);
+    Value *WriteThrough = I.getOperand(2);
+    Value *Mask = I.getOperand(3);
+    Value *RoundingMode = I.getOperand(4);
+
+    // Technically, we could probably just check whether the LSB is initialized
+    insertShadowCheck(Mask, &I);
+    insertShadowCheck(RoundingMode, &I);
+
+    assert(isa<FixedVectorType>(A->getType()));
+    unsigned NumElements =
+        cast<FixedVectorType>(A->getType())->getNumElements();
+    assert(NumElements == 8);
+    assert(A->getType() == B->getType());
+    assert(B->getType() == WriteThrough->getType());
+    assert(Mask->getType()->getPrimitiveSizeInBits() == NumElements);
+    assert(RoundingMode->getType()->isIntegerTy());
+
+    Mask = IRB.CreateBitCast(
+        Mask, FixedVectorType::get(IRB.getInt1Ty(), NumElements));
+
+    Value *AShadow = getShadow(A);
+    Value *BShadow = getShadow(B);
+    Value *ABLowerShadow =
+        IRB.CreateOr(IRB.CreateExtractElement(
+                         AShadow, ConstantInt::get(IRB.getInt32Ty(), 0)),
+                     IRB.CreateExtractElement(
+                         BShadow, ConstantInt::get(IRB.getInt32Ty(), 0)));
+    Value *WriteThroughShadow = getShadow(WriteThrough);
+    Value *WriteThroughLowerShadow = IRB.CreateExtractElement(
+        WriteThroughShadow, ConstantInt::get(IRB.getInt32Ty(), 0));
+
+    Value *DstLowerShadow = IRB.CreateSelect(
+        IRB.CreateExtractElement(Mask, ConstantInt::get(IRB.getInt32Ty(), 0)),
+        ABLowerShadow, WriteThroughLowerShadow);
+    Value *DstShadow = IRB.CreateInsertElement(
+        AShadow, DstLowerShadow, ConstantInt::get(IRB.getInt32Ty(), 0),
+        "_msprop");
+
+    setShadow(&I, DstShadow);
+    setOriginForNaryOp(I);
+  }
+
   // Handle Arm NEON vector load intrinsics (vld*).
   //
   // The WithLane instructions (ld[234]lane) are similar to:
@@ -5041,6 +5100,16 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
       break;
     }
 
+    case Intrinsic::x86_avx512fp16_mask_add_sh_round:
+    case Intrinsic::x86_avx512fp16_mask_sub_sh_round:
+    case Intrinsic::x86_avx512fp16_mask_mul_sh_round:
+    case Intrinsic::x86_avx512fp16_mask_div_sh_round:
+    case Intrinsic::x86_avx512fp16_mask_max_sh_round:
+    case Intrinsic::x86_avx512fp16_mask_min_sh_round: {
+      visitGenericScalarHalfwordInst(I);
+      break;
+    }
+
     case Intrinsic::fshl:
     case Intrinsic::fshr:
       handleFunnelShift(I);
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512fp16-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512fp16-intrinsics.ll
index 61a32e5e2042e..b11b21da492d2 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512fp16-intrinsics.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512fp16-intrinsics.ll
@@ -13,9 +13,6 @@
 ; - llvm.x86.avx512fp16.mask.getexp.sh
 ; - llvm.x86.avx512fp16.mask.getmant.ph.512
 ; - llvm.x86.avx512fp16.mask.getmant.sh
-; - llvm.x86.avx512fp16.mask.max.sh.round
-; - llvm.x86.avx512fp16.mask.min.sh.round
-; - llvm.x86.avx512fp16.mask.mul.sh.round
 ; - llvm.x86.avx512fp16.mask.rcp.ph.512
 ; - llvm.x86.avx512fp16.mask.rcp.sh
 ; - llvm.x86.avx512fp16.mask.reduce.ph.512
@@ -27,7 +24,6 @@
 ; - llvm.x86.avx512fp16.mask.scalef.ph.512
 ; - llvm.x86.avx512fp16.mask.scalef.sh
 ; - llvm.x86.avx512fp16.mask.sqrt.sh
-; - llvm.x86.avx512fp16.mask.sub.sh.round
 ; - llvm.x86.avx512fp16.mask.vcvtph2dq.512
 ; - llvm.x86.avx512fp16.mask.vcvtph2qq.512
 ; - llvm.x86.avx512fp16.mask.vcvtph2udq.512
@@ -1393,8 +1389,8 @@ define <8 x half> @test_int_x86_avx512fp16_mask_add_sh(<8 x half> %x1, <8 x half
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 56) to ptr), align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
 ; CHECK-NEXT:    [[TMP5:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
+; CHECK-NEXT:    [[TMP6:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
 ; CHECK-NEXT:    br i1 [[_MSCMP]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]]
@@ -1409,54 +1405,57 @@ define <8 x half> @test_int_x86_avx512fp16_mask_add_sh(<8 x half> %x1, <8 x half
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load i16, ptr [[TMP10]], align 2
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = insertelement <8 x i16> splat (i16 -1), i16 [[_MSLD]], i32 0
 ; CHECK-NEXT:    [[VAL:%.*]] = insertelement <8 x half> poison, half [[VAL_HALF]], i32 0
-; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
-; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP11]], 0
-; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
-; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i128 [[TMP12]], 0
-; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
-; CHECK-NEXT:    br i1 [[_MSOR]], label %[[BB13:.*]], label %[[BB14:.*]], !prof [[PROF1]]
-; CHECK:       [[BB13]]:
-; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR8]]
-; CHECK-NEXT:    unreachable
-; CHECK:       [[BB14]]:
+; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <8 x i16> [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0
+; CHECK-NEXT:    [[TMP13:%.*]] = or i16 [[TMP11]], [[TMP12]]
+; CHECK-NEXT:    [[TMP14:%.*]] = select i1 true, i16 [[TMP13]], i16 0
+; CHECK-NEXT:    [[_MSPROP1:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP14]], i32 0
 ; CHECK-NEXT:    [[RES0:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.add.sh.round(<8 x half> [[X1]], <8 x half> [[X2]], <8 x half> zeroinitializer, i8 -1, i32 4)
-; CHECK-NEXT:    [[TMP15:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
-; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i128 [[TMP15]], 0
-; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <8 x i16> [[TMP4]] to i128
-; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i128 [[TMP16]], 0
-; CHECK-NEXT:    [[_MSOR5:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]]
+; CHECK-NEXT:    [[TMP15:%.*]] = bitcast i8 [[MASK]] to <8 x i1>
+; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <8 x i16> [[_MSPROP1]], i32 0
+; CHECK-NEXT:    [[TMP17:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0
+; CHECK-NEXT:    [[TMP18:%.*]] = or i16 [[TMP16]], [[TMP17]]
+; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <8 x i16> [[TMP6]], i32 0
+; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <8 x i1> [[TMP15]], i32 0
+; CHECK-NEXT:    [[TMP21:%.*]] = select i1 [[TMP20]], i16 [[TMP18]], i16 [[TMP19]]
+; CHECK-NEXT:    [[_MSPROP2:%.*]] = insertelement <8 x i16> [[_MSPROP1]], i16 [[TMP21]], i32 0
 ; CHECK-NEXT:    [[_MSCMP6:%.*]] = icmp ne i8 [[TMP5]], 0
-; CHECK-NEXT:    [[_MSOR7:%.*]] = or i1 [[_MSOR5]], [[_MSCMP6]]
-; CHECK-NEXT:    br i1 [[_MSOR7]], label %[[BB17:.*]], label %[[BB18:.*]], !prof [[PROF1]]
-; CHECK:       [[BB17]]:
+; CHECK-NEXT:    br i1 [[_MSCMP6]], label %[[BB22:.*]], label %[[BB23:.*]], !prof [[PROF1]]
+; CHECK:       [[BB22]]:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR8]]
 ; CHECK-NEXT:    unreachable
-; CHECK:       [[BB18]]:
+; CHECK:       [[BB23]]:
 ; CHECK-NEXT:    [[RES1:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.add.sh.round(<8 x half> [[RES0]], <8 x half> [[X2]], <8 x half> [[SRC]], i8 [[MASK]], i32 4)
-; CHECK-NEXT:    [[TMP19:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
-; CHECK-NEXT:    [[_MSCMP8:%.*]] = icmp ne i128 [[TMP19]], 0
+; CHECK-NEXT:    [[TMP24:%.*]] = bitcast i8 [[MASK]] to <8 x i1>
+; CHECK-NEXT:    [[TMP25:%.*]] = extractelement <8 x i16> [[_MSPROP2]], i32 0
+; CHECK-NEXT:    [[TMP26:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0
+; CHECK-NEXT:    [[TMP27:%.*]] = or i16 [[TMP25]], [[TMP26]]
+; CHECK-NEXT:    [[TMP28:%.*]] = extractelement <8 x i1> [[TMP24]], i32 0
+; CHECK-NEXT:    [[TMP29:%.*]] = select i1 [[TMP28]], i16 [[TMP27]], i16 0
+; CHECK-NEXT:    [[_MSPROP3:%.*]] = insertelement <8 x i16> [[_MSPROP2]], i16 [[TMP29]], i32 0
 ; CHECK-NEXT:    [[_MSCMP9:%.*]] = icmp ne i8 [[TMP5]], 0
-; CHECK-NEXT:    [[_MSOR10:%.*]] = or i1 [[_MSCMP8]], [[_MSCMP9]]
-; CHECK-NEXT:    br i1 [[_MSOR10]], label %[[BB20:.*]], label %[[BB21:.*]], !prof [[PROF1]]
-; CHECK:       [[BB20]]:
+; CHECK-NEXT:    br i1 [[_MSCMP9]], label %[[BB30:.*]], label %[[BB31:.*]], !prof [[PROF1]]
+; CHECK:       [[BB30]]:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR8]]
 ; CHECK-NEXT:    unreachable
-; CHECK:       [[BB21]]:
+; CHECK:       [[BB31]]:
 ; CHECK-NEXT:    [[RES2:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.add.sh.round(<8 x half> [[RES1]], <8 x half> [[X2]], <8 x half> zeroinitializer, i8 [[MASK]], i32 4)
-; CHECK-NEXT:    [[TMP22:%.*]] = bitcast <8 x i16> [[_MSPROP]] to i128
-; CHECK-NEXT:    [[_MSCMP11:%.*]] = icmp ne i128 [[TMP22]], 0
-; CHECK-NEXT:    [[TMP23:%.*]] = bitcast <8 x i16> [[TMP4]] to i128
-; CHECK-NEXT:    [[_MSCMP12:%.*]] = icmp ne i128 [[TMP23]], 0
-; CHECK-NEXT:    [[_MSOR13:%.*]] = or i1 [[_MSCMP11]], [[_MSCMP12]]
+; CHECK-NEXT:    [[TMP32:%.*]] = bitcast i8 [[MASK]] to <8 x i1>
+; CHECK-NEXT:    [[TMP33:%.*]] = extractelement <8 x i16> [[_MSPROP3]], i32 0
+; CHECK-NEXT:    [[TMP34:%.*]] = extractelement <8 x i16> [[_MSPROP]], i32 0
+; CHECK-NEXT:    [[TMP35:%.*]] = or i16 [[TMP33]], [[TMP34]]
+; CHECK-NEXT:    [[TMP36:%.*]] = extractelement <8 x i16> [[TMP6]], i32 0
+; CHECK-NEXT:    [[TMP37:%.*]] = extractelement <8 x i1> [[TMP32]], i32 0
+; CHECK-NEXT:    [[TMP38:%.*]] = select i1 [[TMP37]], i16 [[TMP35]], i16 [[TMP36]]
+; CHECK-NEXT:    [[_MSPROP4:%.*]] = insertelement <8 x i16> [[_MSPROP3]], i16 [[TMP38]], i32 0
 ; CHECK-NEXT:    [[_MSCMP14:%.*]] = icmp ne i8 [[TMP5]], 0
-; CHECK-NEXT:    [[_MSOR15:%.*]] = or i1 [[_MSOR13]], [[_MSCMP14]]
-; CHECK-NEXT:    br i1 [[_MSOR15]], label %[[BB24:.*]], label %[[BB25:.*]], !prof [[PROF1]]
-; CHECK:       [[BB24]]:
+; CHECK-NEXT:    br i1 [[_MSCMP14]], label %[[BB39:.*]], label %[[BB40:.*]], !prof [[PROF1]]
+; CHECK:       [[BB39]]:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR8]]
 ; CHECK-NEXT:    unreachable
-; CHECK:       [[BB25]]:
+; CHECK:       [[BB40]]:
 ; CHECK-NEXT:    [[RES3:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.add.sh.round(<8 x half> [[RES2]], <8 x half> [[VAL]], <8 x half> [[SRC]], i8 [[MASK]], i32 4)
-; CHECK-NEXT:    store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    store <8 x i16> [[_MSPROP4]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <8 x half> [[RES3]]
 ;
   %val.half = load half,ptr %ptr
@@ -1476,8 +1475,8 @@ define <8 x half> @test_int_x86_avx512fp16_mask_sub_sh(<8 x half> %x1, <8 x half
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 56) to ptr), align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
 ; CHECK-NEXT:    [[TMP5:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
+; CHECK-NEXT:    [[TMP6:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
 ; CHECK-NEXT:    br i1 [[_MSCMP]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]]
@@ -1492,54 +1491,57 @@ define <8 x half> @test_int_x86_avx512fp16_mask_sub_sh(<8 x half> %x1, <8 x half
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load i16, ptr [[TMP10]], align 2
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = insertelement <8 x i16> splat (i16 -1), i16 [[_MSLD]], i32 0
 ; CHECK-NEXT:    [[VAL:%.*]] = insertelement <8 x half> poison, half [[VAL_HALF]], i32 0
-; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
-; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP11]], 0
-; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
-; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i128 [[TMP12]], 0
-; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
-; CHECK-NEXT:    br i1 [[_MSOR]], label %[[BB13:.*]], label %[[BB14:.*]], !prof [[PROF1]]
-; CHECK:       [[BB13]]:
-; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR8]]
-; CHECK-NEXT:    unreachable
-; CHECK:       [[BB14]]:
+; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <8 x i16> [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0
+; CHECK-NEXT:    [[TMP13:%.*]] = or i16 [[TMP11]], [[TMP12]]
+; CHECK-NEXT:    [[TMP14:%.*]] = select i1 true, i16 [[TMP13]], i16 0
+; CHECK-NEXT:    [[_MSPROP1:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP14]], i32 0
 ; CHECK-NEXT:    [[RES0:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.sub.sh.round(<8 x half> [[X1]], <8 x half> [[X2]], <8 x half> zeroinitializer, i8 -1, i32 4)
-; CHECK-NEXT:    [[TMP15:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
-; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i128 [[TMP15]], 0
-; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <8 x i16> [[TMP4]] to i128
-; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i128 [[TMP16]], 0
-; CHECK-NEXT:    [[_MSOR5:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]]
+; CHECK-NEXT:    [[TMP15:%.*]] = bitcast i8 [[MASK]] to <8 x i1>
+; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <8 x i16> [[_MSPROP1]], i32 0
+; CHECK-NEXT:    [[TMP17:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0
+; CHECK-NEXT:    [[TMP18:%.*]] = or i16 [[TMP16]], [[TMP17]]
+; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <8 x i16> [[TMP6]], i32 0
+; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <8 x i1> [[TMP15]], i32 0
+; CHECK-NEXT:    [[TMP21:%.*]] = select i1 [[TMP20]], i16 [[TMP18]], i16 [[TMP19]]
+; CHECK-NEXT:    [[_MSPROP2:%.*]] = insertelement <8 x i16> [[_MSPROP1]], i16 [[TMP21]], i32 0
 ; CHECK-NEXT:    [[_MSCMP6:%.*]] = icmp ne i8 [[TMP5]], 0
-; CHECK-NEXT:    [[_MSOR7:%.*]] = or i1 [[_MSOR5]], [[_MSCMP6]]
-; CHECK-NEXT:    br i1 [[_MSOR7]], label %[[BB17:.*]], label %[[BB18:.*]], !prof [[PROF1]]
-; CHECK:       [[BB17]]:
+; CHECK-NEXT:    br i1 [[_MSCMP6]], label %[[BB22:.*]], label %[[BB23:.*]], !prof [[PROF1]]
+; CHECK:       [[BB22]]:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR8]]
 ; CHECK-NEXT:    unreachable
-; CHECK:       [[BB18]]:
+; CHECK:       [[BB23]]:
 ; CHECK-NEXT:    [[RES1:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.sub.sh.round(<8 x half> [[RES0]], <8 x half> [[X2]], <8 x half> [[SRC]], i8 [[MASK]], i32 4)
-; CHECK-NEXT:    [[TMP19:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
-; CHECK-NEXT:    [[_MSCMP8:%.*]] = icmp ne i128 [[TMP19]], 0
+; CHECK-NEXT:    [[TMP24:%.*]] = bitcast i8 [[MASK]] to <8 x i1>
+; CHECK-NEXT:    [[TMP25:%.*]] = extractelement <8 x i16> [[_MSPROP2]], i32 0
+; CHECK-NEXT:    [[TMP26:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0
+; CHECK-NEXT:    [[TMP27:%.*]] = or i16 [[TMP25]], [[TMP26]]
+; CHECK-NEXT:    [[TMP28:%.*]] = extractelement <8 x i1> [[TMP24]], i32 0
+; CHECK-NEXT:    [[TMP29:%.*]] = select i1 [[TMP28]], i16 [[TMP27]], i16 0
+; CHECK-NEXT:    [[_MSPROP3:%.*]] = insertelement <8 x i16> [[_MSPROP2]], i16 [[TMP29]], i32 0
 ; CHECK-NEXT:    [[_MSCMP9:%.*]] = icmp ne i8 [[TMP5]], 0
-; CHECK-NEXT:    [[_MSOR10:%.*]] = or i1 [[_MSCMP8]], [[_MSCMP9]]
-; CHECK-NEXT:    br i1 [[_MSOR10]], label %[[BB20:.*]], label %[[BB21:.*]], !prof [[PROF1]]
-; CHECK:       [[BB20]]:
+; CHECK-NEXT:    br i1 [[_MSCMP9]], label %[[BB30:.*]], label %[[BB31:.*]], !prof [[PROF1]]
+; CHECK:       [[BB30]]:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR8]]
 ; CHECK-NEXT:    unreachable
-; CHECK:       [[BB21]]:
+; CHECK:       [[BB31]]:
 ; CHECK-NEXT:    [[RES2:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.sub.sh.round(<8 x half> [[RES1]], <8 x half> [[X2]], <8 x half> zeroinitializer, i8 [[MASK]], i32 4)
-; CHECK-NEXT:    [[TMP22:%.*]] = bitcast <8 x i16> [[_MSPROP]] to i128
-; CHECK-NEXT:    [[_MSCMP11:%.*]] = icmp ne i128 [[TMP22]], 0
-; CHECK-NEXT:    [[TMP23:%.*]] = bitcast <8 x i16> [[TMP4]] to i128
-; CHECK-NEXT:    [[_MSCMP12:%.*]] = icmp ne i128 [[TMP23]], 0
-; CHECK-NEXT:    [[_MSOR13:%.*]] = or i1 [[_MSCMP11]], [[_MSCMP12]]
+; CHECK-NEXT:    [[TMP32:%.*]] = bitcast i8 [[MASK]] to <8 x i1>
+; CHECK-NEXT:    [[TMP33:%.*]] = extractelement <8 x i16> [[_MSPROP3]], i32 0
+; CHECK-NEXT:    [[TMP34:%.*]] = extractelement <8 x i16> [[_MSPROP]], i32 0
+; CHECK-NEXT:    [[TMP35:%.*]] = or i16 [[TMP33]], [[TMP34]]
+; CHECK-NEXT:    [[TMP36:%.*]] = extractelement <8 x i16> [[TMP6]], i32 0
+; CHECK-NEXT:    [[TMP37:%.*]] = extractelement <8 x i1> [[TMP32]], i32 0
+; CHECK-NEXT:    [[TMP38:%.*]] = select i1 [[TMP37]], i16 [[TMP35]], i16 [[TMP36]]
+; CHECK-NEXT:    [[_MSPROP4:%.*]] = insertelement <8 x i16> [[_MSPROP3]], i16 [[TMP38]], i32 0
 ; CHECK-NEXT:    [[_MSCMP14:%.*]] = icmp ne i8 [[TMP5]], 0
-; CHECK-NEXT:    [[_MSOR15:%.*]] = or i1 [[_MSOR13]], [[_MSCMP14]]
-; CHECK-NEXT:    br i1 [[_MSOR15]], label %[[BB24:.*]], label %[[BB25:.*]], !prof [[PROF1]]
-; CHECK:       [[BB24]]:
+; CHECK-NEXT:    br i1 [[_MSCMP14]], label %[[BB39:.*]], label %[[BB40:.*]], !prof [[PROF1]]
+; CHECK:       [[BB39]]:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR8]]
 ; CHECK-NEXT:    unreachable
-; CHECK:       [[BB25]]:
+; CHECK:       [[BB40]]:
 ; CHECK-NEXT:    [[RES3:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.sub.sh.round(<8 x half> [[RES2]], <8 x half> [[VAL]], <8 x half> [[SRC]], i8 [[MASK]], i32 4)
-; CHECK-NEXT:    store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    store <8 x i16> [[_MSPROP4]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <8 x half> [[RES3]]
 ;
   %val.half = load half,ptr %ptr
@@ -1559,8 +1561,8 @@ define <8 x half> @test_int_x86_avx512fp16_mask_mul_sh(<8 x half> %x1, <8 x half
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 56) to ptr), align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
 ; CHECK-NEXT:    [[TMP5:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
+; CHECK-NEXT:    [[TMP6:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
 ; CHECK-NEXT:    br i1 [[_MSCMP]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]]
@@ -1575,54 +1577,57 @@ define <8 x half> @test_int_x86_avx512fp16_mask_mul_sh(<8 x half> %x1, <8 x half
 ; CHECK-NEXT:    [[_MSLD:%.*]] =...
[truncated]

fmayer · 2025-04-28T21:31:25Z

Relevant test is failing on windows bot

thurstond · 2025-04-28T21:49:22Z

Relevant test is failing on windows bot

Argh, evaluation order got me again. Fixed in 33ee854

llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp

github-actions · 2025-04-28T22:36:45Z

✅ With the latest revision this PR passed the C/C++ code formatter.

fmayer · 2025-04-28T22:39:48Z

Please press F to format code

thurstond · 2025-04-28T22:43:24Z

Please press F to format code

Done

…}.sh.round (llvm#137441) This adds a handler, visitGenericScalarHalfwordInst, which works for mask.{add/sub/mul/div/max/min}.sh.round. Updates the tests in llvm#136260

[msan] Implement support for avx512fp16.mask.{add/sub/mul/div/max/min…

6c76dd0

…}.sh.round This adds a handler, visitGenericScalarHalfwordInst, which works for mask.{add/sub/mul/div/max/min}.sh.round. Updates the tests in llvm#136260

thurstond requested review from fmayer and vitalybuka April 26, 2025 03:31

llvmbot added compiler-rt:sanitizer llvm:transforms labels Apr 26, 2025

Clarify comment

24b7e86

Resolve ambiguity in LLVM instruction order

33ee854

Reorder in parameter order

89d6f6c

fmayer reviewed Apr 28, 2025

View reviewed changes

llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp Show resolved Hide resolved

llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp Outdated Show resolved Hide resolved

llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp Outdated Show resolved Hide resolved

Press F to pay respects to Florian's feedback

9b32b57

thurstond requested a review from fmayer April 28, 2025 22:34

Remove unnecessary branch weights assertion

0f0291b

clang-format

5291f16

fmayer approved these changes Apr 28, 2025

View reviewed changes

thurstond merged commit d913ea3 into llvm:main Apr 29, 2025
11 checks passed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[msan] Implement support for avx512fp16.mask.{add/sub/mul/div/max/min}.sh.round #137441

[msan] Implement support for avx512fp16.mask.{add/sub/mul/div/max/min}.sh.round #137441

Uh oh!

thurstond commented Apr 26, 2025

Uh oh!

llvmbot commented Apr 26, 2025

Uh oh!

llvmbot commented Apr 26, 2025

Uh oh!

fmayer commented Apr 28, 2025

Uh oh!

thurstond commented Apr 28, 2025

Uh oh!

Uh oh!

Uh oh!

Uh oh!

github-actions bot commented Apr 28, 2025 •

edited

Loading

Uh oh!

fmayer commented Apr 28, 2025

Uh oh!

thurstond commented Apr 28, 2025

Uh oh!

Uh oh!

Uh oh!

[msan] Implement support for avx512fp16.mask.{add/sub/mul/div/max/min}.sh.round #137441

[msan] Implement support for avx512fp16.mask.{add/sub/mul/div/max/min}.sh.round #137441

Uh oh!

Conversation

thurstond commented Apr 26, 2025

Uh oh!

llvmbot commented Apr 26, 2025

Uh oh!

llvmbot commented Apr 26, 2025

Uh oh!

fmayer commented Apr 28, 2025

Uh oh!

thurstond commented Apr 28, 2025

Uh oh!

Uh oh!

Uh oh!

Uh oh!

github-actions bot commented Apr 28, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

fmayer commented Apr 28, 2025

Uh oh!

thurstond commented Apr 28, 2025

Uh oh!

Uh oh!

Uh oh!

github-actions bot commented Apr 28, 2025 •

edited

Loading