diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 8e31e8d2a4fbd..b1c832ac9aeb0 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -4312,6 +4312,69 @@ struct MemorySanitizerVisitor : public InstVisitor { setOriginForNaryOp(I); } + Value *extractLowerShadow(IRBuilder<> &IRB, Value *V) { + assert(isa(V->getType())); + assert(cast(V->getType())->getNumElements() > 0); + Value *Shadow = getShadow(V); + return IRB.CreateExtractElement(Shadow, + ConstantInt::get(IRB.getInt32Ty(), 0)); + } + + // For sh.* compiler intrinsics: + // llvm.x86.avx512fp16.mask.{add/sub/mul/div/max/min}.sh.round + // (<8 x half>, <8 x half>, <8 x half>, i8, i32) + // A B WriteThru Mask RoundingMode + // + // DstShadow[0] = Mask[0] ? (AShadow[0] | BShadow[0]) : WriteThruShadow[0] + // DstShadow[1..7] = AShadow[1..7] + void visitGenericScalarHalfwordInst(IntrinsicInst &I) { + IRBuilder<> IRB(&I); + + assert(I.arg_size() == 5); + Value *A = I.getOperand(0); + Value *B = I.getOperand(1); + Value *WriteThrough = I.getOperand(2); + Value *Mask = I.getOperand(3); + Value *RoundingMode = I.getOperand(4); + + // Technically, we could probably just check whether the LSB is + // initialized, but intuitively it feels like a partly uninitialized mask + // is unintended, and we should warn the user immediately. + insertShadowCheck(Mask, &I); + insertShadowCheck(RoundingMode, &I); + + assert(isa(A->getType())); + unsigned NumElements = + cast(A->getType())->getNumElements(); + assert(NumElements == 8); + assert(A->getType() == B->getType()); + assert(B->getType() == WriteThrough->getType()); + assert(Mask->getType()->getPrimitiveSizeInBits() == NumElements); + assert(RoundingMode->getType()->isIntegerTy()); + + Value *ALowerShadow = extractLowerShadow(IRB, A); + Value *BLowerShadow = extractLowerShadow(IRB, B); + + Value *ABLowerShadow = IRB.CreateOr(ALowerShadow, BLowerShadow); + + Value *WriteThroughLowerShadow = extractLowerShadow(IRB, WriteThrough); + + Mask = IRB.CreateBitCast( + Mask, FixedVectorType::get(IRB.getInt1Ty(), NumElements)); + Value *MaskLower = + IRB.CreateExtractElement(Mask, ConstantInt::get(IRB.getInt32Ty(), 0)); + + Value *AShadow = getShadow(A); + Value *DstLowerShadow = + IRB.CreateSelect(MaskLower, ABLowerShadow, WriteThroughLowerShadow); + Value *DstShadow = IRB.CreateInsertElement( + AShadow, DstLowerShadow, ConstantInt::get(IRB.getInt32Ty(), 0), + "_msprop"); + + setShadow(&I, DstShadow); + setOriginForNaryOp(I); + } + // Handle Arm NEON vector load intrinsics (vld*). // // The WithLane instructions (ld[234]lane) are similar to: @@ -5041,6 +5104,16 @@ struct MemorySanitizerVisitor : public InstVisitor { break; } + case Intrinsic::x86_avx512fp16_mask_add_sh_round: + case Intrinsic::x86_avx512fp16_mask_sub_sh_round: + case Intrinsic::x86_avx512fp16_mask_mul_sh_round: + case Intrinsic::x86_avx512fp16_mask_div_sh_round: + case Intrinsic::x86_avx512fp16_mask_max_sh_round: + case Intrinsic::x86_avx512fp16_mask_min_sh_round: { + visitGenericScalarHalfwordInst(I); + break; + } + case Intrinsic::fshl: case Intrinsic::fshr: handleFunnelShift(I); diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512fp16-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512fp16-intrinsics.ll index 61a32e5e2042e..c5d91adf64cb3 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512fp16-intrinsics.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512fp16-intrinsics.ll @@ -13,9 +13,6 @@ ; - llvm.x86.avx512fp16.mask.getexp.sh ; - llvm.x86.avx512fp16.mask.getmant.ph.512 ; - llvm.x86.avx512fp16.mask.getmant.sh -; - llvm.x86.avx512fp16.mask.max.sh.round -; - llvm.x86.avx512fp16.mask.min.sh.round -; - llvm.x86.avx512fp16.mask.mul.sh.round ; - llvm.x86.avx512fp16.mask.rcp.ph.512 ; - llvm.x86.avx512fp16.mask.rcp.sh ; - llvm.x86.avx512fp16.mask.reduce.ph.512 @@ -27,7 +24,6 @@ ; - llvm.x86.avx512fp16.mask.scalef.ph.512 ; - llvm.x86.avx512fp16.mask.scalef.sh ; - llvm.x86.avx512fp16.mask.sqrt.sh -; - llvm.x86.avx512fp16.mask.sub.sh.round ; - llvm.x86.avx512fp16.mask.vcvtph2dq.512 ; - llvm.x86.avx512fp16.mask.vcvtph2qq.512 ; - llvm.x86.avx512fp16.mask.vcvtph2udq.512 @@ -1393,8 +1389,8 @@ define <8 x half> @test_int_x86_avx512fp16_mask_add_sh(<8 x half> %x1, <8 x half ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 56) to ptr), align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 -; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: [[TMP6:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] @@ -1409,54 +1405,57 @@ define <8 x half> @test_int_x86_avx512fp16_mask_add_sh(<8 x half> %x1, <8 x half ; CHECK-NEXT: [[_MSLD:%.*]] = load i16, ptr [[TMP10]], align 2 ; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i16> splat (i16 -1), i16 [[_MSLD]], i32 0 ; CHECK-NEXT: [[VAL:%.*]] = insertelement <8 x half> poison, half [[VAL_HALF]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP11]], 0 -; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP12]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB13:.*]], label %[[BB14:.*]], !prof [[PROF1]] -; CHECK: [[BB13]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: [[BB14]]: +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i16> [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = or i16 [[TMP11]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = select i1 true, i16 [[TMP13]], i16 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP14]], i32 0 ; CHECK-NEXT: [[RES0:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.add.sh.round(<8 x half> [[X1]], <8 x half> [[X2]], <8 x half> zeroinitializer, i8 -1, i32 4) -; CHECK-NEXT: [[TMP15:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 -; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP15]], 0 -; CHECK-NEXT: [[TMP16:%.*]] = bitcast <8 x i16> [[TMP4]] to i128 -; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP16]], 0 -; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]] +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i16> [[_MSPROP1]], i32 0 +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP18:%.*]] = or i16 [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <8 x i16> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP22:%.*]] = bitcast i8 [[MASK]] to <8 x i1> +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i1> [[TMP22]], i32 0 +; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i16 [[TMP18]], i16 [[TMP19]] +; CHECK-NEXT: [[_MSPROP2:%.*]] = insertelement <8 x i16> [[_MSPROP1]], i16 [[TMP21]], i32 0 ; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i8 [[TMP5]], 0 -; CHECK-NEXT: [[_MSOR7:%.*]] = or i1 [[_MSOR5]], [[_MSCMP6]] -; CHECK-NEXT: br i1 [[_MSOR7]], label %[[BB17:.*]], label %[[BB18:.*]], !prof [[PROF1]] -; CHECK: [[BB17]]: +; CHECK-NEXT: br i1 [[_MSCMP6]], label %[[BB22:.*]], label %[[BB23:.*]], !prof [[PROF1]] +; CHECK: [[BB22]]: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] ; CHECK-NEXT: unreachable -; CHECK: [[BB18]]: +; CHECK: [[BB23]]: ; CHECK-NEXT: [[RES1:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.add.sh.round(<8 x half> [[RES0]], <8 x half> [[X2]], <8 x half> [[SRC]], i8 [[MASK]], i32 4) -; CHECK-NEXT: [[TMP19:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 -; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i128 [[TMP19]], 0 +; CHECK-NEXT: [[TMP25:%.*]] = extractelement <8 x i16> [[_MSPROP2]], i32 0 +; CHECK-NEXT: [[TMP26:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP27:%.*]] = or i16 [[TMP25]], [[TMP26]] +; CHECK-NEXT: [[TMP30:%.*]] = bitcast i8 [[MASK]] to <8 x i1> +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <8 x i1> [[TMP30]], i32 0 +; CHECK-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], i16 [[TMP27]], i16 0 +; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <8 x i16> [[_MSPROP2]], i16 [[TMP29]], i32 0 ; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i8 [[TMP5]], 0 -; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSCMP8]], [[_MSCMP9]] -; CHECK-NEXT: br i1 [[_MSOR10]], label %[[BB20:.*]], label %[[BB21:.*]], !prof [[PROF1]] -; CHECK: [[BB20]]: +; CHECK-NEXT: br i1 [[_MSCMP9]], label %[[BB30:.*]], label %[[BB31:.*]], !prof [[PROF1]] +; CHECK: [[BB30]]: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] ; CHECK-NEXT: unreachable -; CHECK: [[BB21]]: +; CHECK: [[BB31]]: ; CHECK-NEXT: [[RES2:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.add.sh.round(<8 x half> [[RES1]], <8 x half> [[X2]], <8 x half> zeroinitializer, i8 [[MASK]], i32 4) -; CHECK-NEXT: [[TMP22:%.*]] = bitcast <8 x i16> [[_MSPROP]] to i128 -; CHECK-NEXT: [[_MSCMP11:%.*]] = icmp ne i128 [[TMP22]], 0 -; CHECK-NEXT: [[TMP23:%.*]] = bitcast <8 x i16> [[TMP4]] to i128 -; CHECK-NEXT: [[_MSCMP12:%.*]] = icmp ne i128 [[TMP23]], 0 -; CHECK-NEXT: [[_MSOR13:%.*]] = or i1 [[_MSCMP11]], [[_MSCMP12]] +; CHECK-NEXT: [[TMP33:%.*]] = extractelement <8 x i16> [[_MSPROP3]], i32 0 +; CHECK-NEXT: [[TMP34:%.*]] = extractelement <8 x i16> [[_MSPROP]], i32 0 +; CHECK-NEXT: [[TMP35:%.*]] = or i16 [[TMP33]], [[TMP34]] +; CHECK-NEXT: [[TMP36:%.*]] = extractelement <8 x i16> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP39:%.*]] = bitcast i8 [[MASK]] to <8 x i1> +; CHECK-NEXT: [[TMP37:%.*]] = extractelement <8 x i1> [[TMP39]], i32 0 +; CHECK-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], i16 [[TMP35]], i16 [[TMP36]] +; CHECK-NEXT: [[_MSPROP4:%.*]] = insertelement <8 x i16> [[_MSPROP3]], i16 [[TMP38]], i32 0 ; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i8 [[TMP5]], 0 -; CHECK-NEXT: [[_MSOR15:%.*]] = or i1 [[_MSOR13]], [[_MSCMP14]] -; CHECK-NEXT: br i1 [[_MSOR15]], label %[[BB24:.*]], label %[[BB25:.*]], !prof [[PROF1]] -; CHECK: [[BB24]]: +; CHECK-NEXT: br i1 [[_MSCMP14]], label %[[BB39:.*]], label %[[BB40:.*]], !prof [[PROF1]] +; CHECK: [[BB39]]: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] ; CHECK-NEXT: unreachable -; CHECK: [[BB25]]: +; CHECK: [[BB40]]: ; CHECK-NEXT: [[RES3:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.add.sh.round(<8 x half> [[RES2]], <8 x half> [[VAL]], <8 x half> [[SRC]], i8 [[MASK]], i32 4) -; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <8 x i16> [[_MSPROP4]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x half> [[RES3]] ; %val.half = load half,ptr %ptr @@ -1476,8 +1475,8 @@ define <8 x half> @test_int_x86_avx512fp16_mask_sub_sh(<8 x half> %x1, <8 x half ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 56) to ptr), align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 -; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: [[TMP6:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] @@ -1492,54 +1491,57 @@ define <8 x half> @test_int_x86_avx512fp16_mask_sub_sh(<8 x half> %x1, <8 x half ; CHECK-NEXT: [[_MSLD:%.*]] = load i16, ptr [[TMP10]], align 2 ; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i16> splat (i16 -1), i16 [[_MSLD]], i32 0 ; CHECK-NEXT: [[VAL:%.*]] = insertelement <8 x half> poison, half [[VAL_HALF]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP11]], 0 -; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP12]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB13:.*]], label %[[BB14:.*]], !prof [[PROF1]] -; CHECK: [[BB13]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: [[BB14]]: +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i16> [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = or i16 [[TMP11]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = select i1 true, i16 [[TMP13]], i16 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP14]], i32 0 ; CHECK-NEXT: [[RES0:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.sub.sh.round(<8 x half> [[X1]], <8 x half> [[X2]], <8 x half> zeroinitializer, i8 -1, i32 4) -; CHECK-NEXT: [[TMP15:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 -; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP15]], 0 -; CHECK-NEXT: [[TMP16:%.*]] = bitcast <8 x i16> [[TMP4]] to i128 -; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP16]], 0 -; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]] +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i16> [[_MSPROP1]], i32 0 +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP18:%.*]] = or i16 [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <8 x i16> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP22:%.*]] = bitcast i8 [[MASK]] to <8 x i1> +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i1> [[TMP22]], i32 0 +; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i16 [[TMP18]], i16 [[TMP19]] +; CHECK-NEXT: [[_MSPROP2:%.*]] = insertelement <8 x i16> [[_MSPROP1]], i16 [[TMP21]], i32 0 ; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i8 [[TMP5]], 0 -; CHECK-NEXT: [[_MSOR7:%.*]] = or i1 [[_MSOR5]], [[_MSCMP6]] -; CHECK-NEXT: br i1 [[_MSOR7]], label %[[BB17:.*]], label %[[BB18:.*]], !prof [[PROF1]] -; CHECK: [[BB17]]: +; CHECK-NEXT: br i1 [[_MSCMP6]], label %[[BB22:.*]], label %[[BB23:.*]], !prof [[PROF1]] +; CHECK: [[BB22]]: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] ; CHECK-NEXT: unreachable -; CHECK: [[BB18]]: +; CHECK: [[BB23]]: ; CHECK-NEXT: [[RES1:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.sub.sh.round(<8 x half> [[RES0]], <8 x half> [[X2]], <8 x half> [[SRC]], i8 [[MASK]], i32 4) -; CHECK-NEXT: [[TMP19:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 -; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i128 [[TMP19]], 0 +; CHECK-NEXT: [[TMP25:%.*]] = extractelement <8 x i16> [[_MSPROP2]], i32 0 +; CHECK-NEXT: [[TMP26:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP27:%.*]] = or i16 [[TMP25]], [[TMP26]] +; CHECK-NEXT: [[TMP30:%.*]] = bitcast i8 [[MASK]] to <8 x i1> +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <8 x i1> [[TMP30]], i32 0 +; CHECK-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], i16 [[TMP27]], i16 0 +; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <8 x i16> [[_MSPROP2]], i16 [[TMP29]], i32 0 ; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i8 [[TMP5]], 0 -; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSCMP8]], [[_MSCMP9]] -; CHECK-NEXT: br i1 [[_MSOR10]], label %[[BB20:.*]], label %[[BB21:.*]], !prof [[PROF1]] -; CHECK: [[BB20]]: +; CHECK-NEXT: br i1 [[_MSCMP9]], label %[[BB30:.*]], label %[[BB31:.*]], !prof [[PROF1]] +; CHECK: [[BB30]]: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] ; CHECK-NEXT: unreachable -; CHECK: [[BB21]]: +; CHECK: [[BB31]]: ; CHECK-NEXT: [[RES2:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.sub.sh.round(<8 x half> [[RES1]], <8 x half> [[X2]], <8 x half> zeroinitializer, i8 [[MASK]], i32 4) -; CHECK-NEXT: [[TMP22:%.*]] = bitcast <8 x i16> [[_MSPROP]] to i128 -; CHECK-NEXT: [[_MSCMP11:%.*]] = icmp ne i128 [[TMP22]], 0 -; CHECK-NEXT: [[TMP23:%.*]] = bitcast <8 x i16> [[TMP4]] to i128 -; CHECK-NEXT: [[_MSCMP12:%.*]] = icmp ne i128 [[TMP23]], 0 -; CHECK-NEXT: [[_MSOR13:%.*]] = or i1 [[_MSCMP11]], [[_MSCMP12]] +; CHECK-NEXT: [[TMP33:%.*]] = extractelement <8 x i16> [[_MSPROP3]], i32 0 +; CHECK-NEXT: [[TMP34:%.*]] = extractelement <8 x i16> [[_MSPROP]], i32 0 +; CHECK-NEXT: [[TMP35:%.*]] = or i16 [[TMP33]], [[TMP34]] +; CHECK-NEXT: [[TMP36:%.*]] = extractelement <8 x i16> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP39:%.*]] = bitcast i8 [[MASK]] to <8 x i1> +; CHECK-NEXT: [[TMP37:%.*]] = extractelement <8 x i1> [[TMP39]], i32 0 +; CHECK-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], i16 [[TMP35]], i16 [[TMP36]] +; CHECK-NEXT: [[_MSPROP4:%.*]] = insertelement <8 x i16> [[_MSPROP3]], i16 [[TMP38]], i32 0 ; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i8 [[TMP5]], 0 -; CHECK-NEXT: [[_MSOR15:%.*]] = or i1 [[_MSOR13]], [[_MSCMP14]] -; CHECK-NEXT: br i1 [[_MSOR15]], label %[[BB24:.*]], label %[[BB25:.*]], !prof [[PROF1]] -; CHECK: [[BB24]]: +; CHECK-NEXT: br i1 [[_MSCMP14]], label %[[BB39:.*]], label %[[BB40:.*]], !prof [[PROF1]] +; CHECK: [[BB39]]: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] ; CHECK-NEXT: unreachable -; CHECK: [[BB25]]: +; CHECK: [[BB40]]: ; CHECK-NEXT: [[RES3:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.sub.sh.round(<8 x half> [[RES2]], <8 x half> [[VAL]], <8 x half> [[SRC]], i8 [[MASK]], i32 4) -; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <8 x i16> [[_MSPROP4]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x half> [[RES3]] ; %val.half = load half,ptr %ptr @@ -1559,8 +1561,8 @@ define <8 x half> @test_int_x86_avx512fp16_mask_mul_sh(<8 x half> %x1, <8 x half ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 56) to ptr), align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 -; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: [[TMP6:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] @@ -1575,54 +1577,57 @@ define <8 x half> @test_int_x86_avx512fp16_mask_mul_sh(<8 x half> %x1, <8 x half ; CHECK-NEXT: [[_MSLD:%.*]] = load i16, ptr [[TMP10]], align 2 ; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i16> splat (i16 -1), i16 [[_MSLD]], i32 0 ; CHECK-NEXT: [[VAL:%.*]] = insertelement <8 x half> poison, half [[VAL_HALF]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP11]], 0 -; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP12]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB13:.*]], label %[[BB14:.*]], !prof [[PROF1]] -; CHECK: [[BB13]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: [[BB14]]: +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i16> [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = or i16 [[TMP11]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = select i1 true, i16 [[TMP13]], i16 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP14]], i32 0 ; CHECK-NEXT: [[RES0:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.mul.sh.round(<8 x half> [[X1]], <8 x half> [[X2]], <8 x half> zeroinitializer, i8 -1, i32 4) -; CHECK-NEXT: [[TMP15:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 -; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP15]], 0 -; CHECK-NEXT: [[TMP16:%.*]] = bitcast <8 x i16> [[TMP4]] to i128 -; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP16]], 0 -; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]] +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i16> [[_MSPROP1]], i32 0 +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP18:%.*]] = or i16 [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <8 x i16> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP22:%.*]] = bitcast i8 [[MASK]] to <8 x i1> +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i1> [[TMP22]], i32 0 +; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i16 [[TMP18]], i16 [[TMP19]] +; CHECK-NEXT: [[_MSPROP2:%.*]] = insertelement <8 x i16> [[_MSPROP1]], i16 [[TMP21]], i32 0 ; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i8 [[TMP5]], 0 -; CHECK-NEXT: [[_MSOR7:%.*]] = or i1 [[_MSOR5]], [[_MSCMP6]] -; CHECK-NEXT: br i1 [[_MSOR7]], label %[[BB17:.*]], label %[[BB18:.*]], !prof [[PROF1]] -; CHECK: [[BB17]]: +; CHECK-NEXT: br i1 [[_MSCMP6]], label %[[BB22:.*]], label %[[BB23:.*]], !prof [[PROF1]] +; CHECK: [[BB22]]: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] ; CHECK-NEXT: unreachable -; CHECK: [[BB18]]: +; CHECK: [[BB23]]: ; CHECK-NEXT: [[RES1:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.mul.sh.round(<8 x half> [[RES0]], <8 x half> [[X2]], <8 x half> [[SRC]], i8 [[MASK]], i32 4) -; CHECK-NEXT: [[TMP19:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 -; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i128 [[TMP19]], 0 +; CHECK-NEXT: [[TMP25:%.*]] = extractelement <8 x i16> [[_MSPROP2]], i32 0 +; CHECK-NEXT: [[TMP26:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP27:%.*]] = or i16 [[TMP25]], [[TMP26]] +; CHECK-NEXT: [[TMP30:%.*]] = bitcast i8 [[MASK]] to <8 x i1> +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <8 x i1> [[TMP30]], i32 0 +; CHECK-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], i16 [[TMP27]], i16 0 +; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <8 x i16> [[_MSPROP2]], i16 [[TMP29]], i32 0 ; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i8 [[TMP5]], 0 -; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSCMP8]], [[_MSCMP9]] -; CHECK-NEXT: br i1 [[_MSOR10]], label %[[BB20:.*]], label %[[BB21:.*]], !prof [[PROF1]] -; CHECK: [[BB20]]: +; CHECK-NEXT: br i1 [[_MSCMP9]], label %[[BB30:.*]], label %[[BB31:.*]], !prof [[PROF1]] +; CHECK: [[BB30]]: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] ; CHECK-NEXT: unreachable -; CHECK: [[BB21]]: +; CHECK: [[BB31]]: ; CHECK-NEXT: [[RES2:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.mul.sh.round(<8 x half> [[RES1]], <8 x half> [[X2]], <8 x half> zeroinitializer, i8 [[MASK]], i32 4) -; CHECK-NEXT: [[TMP22:%.*]] = bitcast <8 x i16> [[_MSPROP]] to i128 -; CHECK-NEXT: [[_MSCMP11:%.*]] = icmp ne i128 [[TMP22]], 0 -; CHECK-NEXT: [[TMP23:%.*]] = bitcast <8 x i16> [[TMP4]] to i128 -; CHECK-NEXT: [[_MSCMP12:%.*]] = icmp ne i128 [[TMP23]], 0 -; CHECK-NEXT: [[_MSOR13:%.*]] = or i1 [[_MSCMP11]], [[_MSCMP12]] +; CHECK-NEXT: [[TMP33:%.*]] = extractelement <8 x i16> [[_MSPROP3]], i32 0 +; CHECK-NEXT: [[TMP34:%.*]] = extractelement <8 x i16> [[_MSPROP]], i32 0 +; CHECK-NEXT: [[TMP35:%.*]] = or i16 [[TMP33]], [[TMP34]] +; CHECK-NEXT: [[TMP36:%.*]] = extractelement <8 x i16> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP39:%.*]] = bitcast i8 [[MASK]] to <8 x i1> +; CHECK-NEXT: [[TMP37:%.*]] = extractelement <8 x i1> [[TMP39]], i32 0 +; CHECK-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], i16 [[TMP35]], i16 [[TMP36]] +; CHECK-NEXT: [[_MSPROP4:%.*]] = insertelement <8 x i16> [[_MSPROP3]], i16 [[TMP38]], i32 0 ; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i8 [[TMP5]], 0 -; CHECK-NEXT: [[_MSOR15:%.*]] = or i1 [[_MSOR13]], [[_MSCMP14]] -; CHECK-NEXT: br i1 [[_MSOR15]], label %[[BB24:.*]], label %[[BB25:.*]], !prof [[PROF1]] -; CHECK: [[BB24]]: +; CHECK-NEXT: br i1 [[_MSCMP14]], label %[[BB39:.*]], label %[[BB40:.*]], !prof [[PROF1]] +; CHECK: [[BB39]]: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] ; CHECK-NEXT: unreachable -; CHECK: [[BB25]]: +; CHECK: [[BB40]]: ; CHECK-NEXT: [[RES3:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.mul.sh.round(<8 x half> [[RES2]], <8 x half> [[VAL]], <8 x half> [[SRC]], i8 [[MASK]], i32 4) -; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <8 x i16> [[_MSPROP4]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x half> [[RES3]] ; %val.half = load half,ptr %ptr @@ -1642,8 +1647,8 @@ define <8 x half> @test_int_x86_avx512fp16_mask_div_sh(<8 x half> %x1, <8 x half ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 56) to ptr), align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 -; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: [[TMP6:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] @@ -1658,54 +1663,57 @@ define <8 x half> @test_int_x86_avx512fp16_mask_div_sh(<8 x half> %x1, <8 x half ; CHECK-NEXT: [[_MSLD:%.*]] = load i16, ptr [[TMP10]], align 2 ; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i16> splat (i16 -1), i16 [[_MSLD]], i32 0 ; CHECK-NEXT: [[VAL:%.*]] = insertelement <8 x half> poison, half [[VAL_HALF]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP11]], 0 -; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP12]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB13:.*]], label %[[BB14:.*]], !prof [[PROF1]] -; CHECK: [[BB13]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: [[BB14]]: +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i16> [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = or i16 [[TMP11]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = select i1 true, i16 [[TMP13]], i16 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP14]], i32 0 ; CHECK-NEXT: [[RES0:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.div.sh.round(<8 x half> [[X1]], <8 x half> [[X2]], <8 x half> zeroinitializer, i8 -1, i32 4) -; CHECK-NEXT: [[TMP15:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 -; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP15]], 0 -; CHECK-NEXT: [[TMP16:%.*]] = bitcast <8 x i16> [[TMP4]] to i128 -; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP16]], 0 -; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]] +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i16> [[_MSPROP1]], i32 0 +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP18:%.*]] = or i16 [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <8 x i16> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP22:%.*]] = bitcast i8 [[MASK]] to <8 x i1> +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i1> [[TMP22]], i32 0 +; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i16 [[TMP18]], i16 [[TMP19]] +; CHECK-NEXT: [[_MSPROP2:%.*]] = insertelement <8 x i16> [[_MSPROP1]], i16 [[TMP21]], i32 0 ; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i8 [[TMP5]], 0 -; CHECK-NEXT: [[_MSOR7:%.*]] = or i1 [[_MSOR5]], [[_MSCMP6]] -; CHECK-NEXT: br i1 [[_MSOR7]], label %[[BB17:.*]], label %[[BB18:.*]], !prof [[PROF1]] -; CHECK: [[BB17]]: +; CHECK-NEXT: br i1 [[_MSCMP6]], label %[[BB22:.*]], label %[[BB23:.*]], !prof [[PROF1]] +; CHECK: [[BB22]]: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] ; CHECK-NEXT: unreachable -; CHECK: [[BB18]]: +; CHECK: [[BB23]]: ; CHECK-NEXT: [[RES1:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.div.sh.round(<8 x half> [[RES0]], <8 x half> [[X2]], <8 x half> [[SRC]], i8 [[MASK]], i32 4) -; CHECK-NEXT: [[TMP19:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 -; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i128 [[TMP19]], 0 +; CHECK-NEXT: [[TMP25:%.*]] = extractelement <8 x i16> [[_MSPROP2]], i32 0 +; CHECK-NEXT: [[TMP26:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP27:%.*]] = or i16 [[TMP25]], [[TMP26]] +; CHECK-NEXT: [[TMP30:%.*]] = bitcast i8 [[MASK]] to <8 x i1> +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <8 x i1> [[TMP30]], i32 0 +; CHECK-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], i16 [[TMP27]], i16 0 +; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <8 x i16> [[_MSPROP2]], i16 [[TMP29]], i32 0 ; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i8 [[TMP5]], 0 -; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSCMP8]], [[_MSCMP9]] -; CHECK-NEXT: br i1 [[_MSOR10]], label %[[BB20:.*]], label %[[BB21:.*]], !prof [[PROF1]] -; CHECK: [[BB20]]: +; CHECK-NEXT: br i1 [[_MSCMP9]], label %[[BB30:.*]], label %[[BB31:.*]], !prof [[PROF1]] +; CHECK: [[BB30]]: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] ; CHECK-NEXT: unreachable -; CHECK: [[BB21]]: +; CHECK: [[BB31]]: ; CHECK-NEXT: [[RES2:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.div.sh.round(<8 x half> [[RES1]], <8 x half> [[X2]], <8 x half> zeroinitializer, i8 [[MASK]], i32 4) -; CHECK-NEXT: [[TMP22:%.*]] = bitcast <8 x i16> [[_MSPROP]] to i128 -; CHECK-NEXT: [[_MSCMP11:%.*]] = icmp ne i128 [[TMP22]], 0 -; CHECK-NEXT: [[TMP23:%.*]] = bitcast <8 x i16> [[TMP4]] to i128 -; CHECK-NEXT: [[_MSCMP12:%.*]] = icmp ne i128 [[TMP23]], 0 -; CHECK-NEXT: [[_MSOR13:%.*]] = or i1 [[_MSCMP11]], [[_MSCMP12]] +; CHECK-NEXT: [[TMP33:%.*]] = extractelement <8 x i16> [[_MSPROP3]], i32 0 +; CHECK-NEXT: [[TMP34:%.*]] = extractelement <8 x i16> [[_MSPROP]], i32 0 +; CHECK-NEXT: [[TMP35:%.*]] = or i16 [[TMP33]], [[TMP34]] +; CHECK-NEXT: [[TMP36:%.*]] = extractelement <8 x i16> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP39:%.*]] = bitcast i8 [[MASK]] to <8 x i1> +; CHECK-NEXT: [[TMP37:%.*]] = extractelement <8 x i1> [[TMP39]], i32 0 +; CHECK-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], i16 [[TMP35]], i16 [[TMP36]] +; CHECK-NEXT: [[_MSPROP4:%.*]] = insertelement <8 x i16> [[_MSPROP3]], i16 [[TMP38]], i32 0 ; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i8 [[TMP5]], 0 -; CHECK-NEXT: [[_MSOR15:%.*]] = or i1 [[_MSOR13]], [[_MSCMP14]] -; CHECK-NEXT: br i1 [[_MSOR15]], label %[[BB24:.*]], label %[[BB25:.*]], !prof [[PROF1]] -; CHECK: [[BB24]]: +; CHECK-NEXT: br i1 [[_MSCMP14]], label %[[BB39:.*]], label %[[BB40:.*]], !prof [[PROF1]] +; CHECK: [[BB39]]: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] ; CHECK-NEXT: unreachable -; CHECK: [[BB25]]: +; CHECK: [[BB40]]: ; CHECK-NEXT: [[RES3:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.div.sh.round(<8 x half> [[RES2]], <8 x half> [[VAL]], <8 x half> [[SRC]], i8 [[MASK]], i32 4) -; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <8 x i16> [[_MSPROP4]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x half> [[RES3]] ; %val.half = load half,ptr %ptr @@ -1725,8 +1733,8 @@ define <8 x half> @test_int_x86_avx512fp16_mask_min_sh(<8 x half> %x1, <8 x half ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 56) to ptr), align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 -; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: [[TMP6:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] @@ -1741,54 +1749,57 @@ define <8 x half> @test_int_x86_avx512fp16_mask_min_sh(<8 x half> %x1, <8 x half ; CHECK-NEXT: [[_MSLD:%.*]] = load i16, ptr [[TMP10]], align 2 ; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i16> splat (i16 -1), i16 [[_MSLD]], i32 0 ; CHECK-NEXT: [[VAL:%.*]] = insertelement <8 x half> poison, half [[VAL_HALF]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP11]], 0 -; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP12]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB13:.*]], label %[[BB14:.*]], !prof [[PROF1]] -; CHECK: [[BB13]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: [[BB14]]: +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i16> [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = or i16 [[TMP11]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = select i1 true, i16 [[TMP13]], i16 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP14]], i32 0 ; CHECK-NEXT: [[RES0:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.min.sh.round(<8 x half> [[X1]], <8 x half> [[X2]], <8 x half> zeroinitializer, i8 -1, i32 4) -; CHECK-NEXT: [[TMP15:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 -; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP15]], 0 -; CHECK-NEXT: [[TMP16:%.*]] = bitcast <8 x i16> [[TMP4]] to i128 -; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP16]], 0 -; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]] +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i16> [[_MSPROP1]], i32 0 +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP18:%.*]] = or i16 [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <8 x i16> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP22:%.*]] = bitcast i8 [[MASK]] to <8 x i1> +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i1> [[TMP22]], i32 0 +; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i16 [[TMP18]], i16 [[TMP19]] +; CHECK-NEXT: [[_MSPROP2:%.*]] = insertelement <8 x i16> [[_MSPROP1]], i16 [[TMP21]], i32 0 ; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i8 [[TMP5]], 0 -; CHECK-NEXT: [[_MSOR7:%.*]] = or i1 [[_MSOR5]], [[_MSCMP6]] -; CHECK-NEXT: br i1 [[_MSOR7]], label %[[BB17:.*]], label %[[BB18:.*]], !prof [[PROF1]] -; CHECK: [[BB17]]: +; CHECK-NEXT: br i1 [[_MSCMP6]], label %[[BB22:.*]], label %[[BB23:.*]], !prof [[PROF1]] +; CHECK: [[BB22]]: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] ; CHECK-NEXT: unreachable -; CHECK: [[BB18]]: +; CHECK: [[BB23]]: ; CHECK-NEXT: [[RES1:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.min.sh.round(<8 x half> [[RES0]], <8 x half> [[X2]], <8 x half> [[SRC]], i8 [[MASK]], i32 4) -; CHECK-NEXT: [[TMP19:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 -; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i128 [[TMP19]], 0 +; CHECK-NEXT: [[TMP25:%.*]] = extractelement <8 x i16> [[_MSPROP2]], i32 0 +; CHECK-NEXT: [[TMP26:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP27:%.*]] = or i16 [[TMP25]], [[TMP26]] +; CHECK-NEXT: [[TMP30:%.*]] = bitcast i8 [[MASK]] to <8 x i1> +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <8 x i1> [[TMP30]], i32 0 +; CHECK-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], i16 [[TMP27]], i16 0 +; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <8 x i16> [[_MSPROP2]], i16 [[TMP29]], i32 0 ; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i8 [[TMP5]], 0 -; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSCMP8]], [[_MSCMP9]] -; CHECK-NEXT: br i1 [[_MSOR10]], label %[[BB20:.*]], label %[[BB21:.*]], !prof [[PROF1]] -; CHECK: [[BB20]]: +; CHECK-NEXT: br i1 [[_MSCMP9]], label %[[BB30:.*]], label %[[BB31:.*]], !prof [[PROF1]] +; CHECK: [[BB30]]: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] ; CHECK-NEXT: unreachable -; CHECK: [[BB21]]: +; CHECK: [[BB31]]: ; CHECK-NEXT: [[RES2:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.min.sh.round(<8 x half> [[RES1]], <8 x half> [[X2]], <8 x half> zeroinitializer, i8 [[MASK]], i32 4) -; CHECK-NEXT: [[TMP22:%.*]] = bitcast <8 x i16> [[_MSPROP]] to i128 -; CHECK-NEXT: [[_MSCMP11:%.*]] = icmp ne i128 [[TMP22]], 0 -; CHECK-NEXT: [[TMP23:%.*]] = bitcast <8 x i16> [[TMP4]] to i128 -; CHECK-NEXT: [[_MSCMP12:%.*]] = icmp ne i128 [[TMP23]], 0 -; CHECK-NEXT: [[_MSOR13:%.*]] = or i1 [[_MSCMP11]], [[_MSCMP12]] +; CHECK-NEXT: [[TMP33:%.*]] = extractelement <8 x i16> [[_MSPROP3]], i32 0 +; CHECK-NEXT: [[TMP34:%.*]] = extractelement <8 x i16> [[_MSPROP]], i32 0 +; CHECK-NEXT: [[TMP35:%.*]] = or i16 [[TMP33]], [[TMP34]] +; CHECK-NEXT: [[TMP36:%.*]] = extractelement <8 x i16> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP39:%.*]] = bitcast i8 [[MASK]] to <8 x i1> +; CHECK-NEXT: [[TMP37:%.*]] = extractelement <8 x i1> [[TMP39]], i32 0 +; CHECK-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], i16 [[TMP35]], i16 [[TMP36]] +; CHECK-NEXT: [[_MSPROP4:%.*]] = insertelement <8 x i16> [[_MSPROP3]], i16 [[TMP38]], i32 0 ; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i8 [[TMP5]], 0 -; CHECK-NEXT: [[_MSOR15:%.*]] = or i1 [[_MSOR13]], [[_MSCMP14]] -; CHECK-NEXT: br i1 [[_MSOR15]], label %[[BB24:.*]], label %[[BB25:.*]], !prof [[PROF1]] -; CHECK: [[BB24]]: +; CHECK-NEXT: br i1 [[_MSCMP14]], label %[[BB39:.*]], label %[[BB40:.*]], !prof [[PROF1]] +; CHECK: [[BB39]]: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] ; CHECK-NEXT: unreachable -; CHECK: [[BB25]]: +; CHECK: [[BB40]]: ; CHECK-NEXT: [[RES3:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.min.sh.round(<8 x half> [[RES2]], <8 x half> [[VAL]], <8 x half> [[SRC]], i8 [[MASK]], i32 4) -; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <8 x i16> [[_MSPROP4]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x half> [[RES3]] ; %val.half = load half,ptr %ptr @@ -1808,8 +1819,8 @@ define <8 x half> @test_int_x86_avx512fp16_mask_max_sh(<8 x half> %x1, <8 x half ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 56) to ptr), align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 -; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: [[TMP6:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] @@ -1824,54 +1835,57 @@ define <8 x half> @test_int_x86_avx512fp16_mask_max_sh(<8 x half> %x1, <8 x half ; CHECK-NEXT: [[_MSLD:%.*]] = load i16, ptr [[TMP10]], align 2 ; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i16> splat (i16 -1), i16 [[_MSLD]], i32 0 ; CHECK-NEXT: [[VAL:%.*]] = insertelement <8 x half> poison, half [[VAL_HALF]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP11]], 0 -; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP12]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB13:.*]], label %[[BB14:.*]], !prof [[PROF1]] -; CHECK: [[BB13]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: [[BB14]]: +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i16> [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = or i16 [[TMP11]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = select i1 true, i16 [[TMP13]], i16 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP14]], i32 0 ; CHECK-NEXT: [[RES0:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.max.sh.round(<8 x half> [[X1]], <8 x half> [[X2]], <8 x half> zeroinitializer, i8 -1, i32 4) -; CHECK-NEXT: [[TMP15:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 -; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP15]], 0 -; CHECK-NEXT: [[TMP16:%.*]] = bitcast <8 x i16> [[TMP4]] to i128 -; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP16]], 0 -; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]] +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i16> [[_MSPROP1]], i32 0 +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP18:%.*]] = or i16 [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <8 x i16> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP22:%.*]] = bitcast i8 [[MASK]] to <8 x i1> +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i1> [[TMP22]], i32 0 +; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i16 [[TMP18]], i16 [[TMP19]] +; CHECK-NEXT: [[_MSPROP2:%.*]] = insertelement <8 x i16> [[_MSPROP1]], i16 [[TMP21]], i32 0 ; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i8 [[TMP5]], 0 -; CHECK-NEXT: [[_MSOR7:%.*]] = or i1 [[_MSOR5]], [[_MSCMP6]] -; CHECK-NEXT: br i1 [[_MSOR7]], label %[[BB17:.*]], label %[[BB18:.*]], !prof [[PROF1]] -; CHECK: [[BB17]]: +; CHECK-NEXT: br i1 [[_MSCMP6]], label %[[BB22:.*]], label %[[BB23:.*]], !prof [[PROF1]] +; CHECK: [[BB22]]: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] ; CHECK-NEXT: unreachable -; CHECK: [[BB18]]: +; CHECK: [[BB23]]: ; CHECK-NEXT: [[RES1:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.max.sh.round(<8 x half> [[RES0]], <8 x half> [[X2]], <8 x half> [[SRC]], i8 [[MASK]], i32 4) -; CHECK-NEXT: [[TMP19:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 -; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i128 [[TMP19]], 0 +; CHECK-NEXT: [[TMP25:%.*]] = extractelement <8 x i16> [[_MSPROP2]], i32 0 +; CHECK-NEXT: [[TMP26:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP27:%.*]] = or i16 [[TMP25]], [[TMP26]] +; CHECK-NEXT: [[TMP30:%.*]] = bitcast i8 [[MASK]] to <8 x i1> +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <8 x i1> [[TMP30]], i32 0 +; CHECK-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], i16 [[TMP27]], i16 0 +; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <8 x i16> [[_MSPROP2]], i16 [[TMP29]], i32 0 ; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i8 [[TMP5]], 0 -; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSCMP8]], [[_MSCMP9]] -; CHECK-NEXT: br i1 [[_MSOR10]], label %[[BB20:.*]], label %[[BB21:.*]], !prof [[PROF1]] -; CHECK: [[BB20]]: +; CHECK-NEXT: br i1 [[_MSCMP9]], label %[[BB30:.*]], label %[[BB31:.*]], !prof [[PROF1]] +; CHECK: [[BB30]]: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] ; CHECK-NEXT: unreachable -; CHECK: [[BB21]]: +; CHECK: [[BB31]]: ; CHECK-NEXT: [[RES2:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.max.sh.round(<8 x half> [[RES1]], <8 x half> [[X2]], <8 x half> zeroinitializer, i8 [[MASK]], i32 4) -; CHECK-NEXT: [[TMP22:%.*]] = bitcast <8 x i16> [[_MSPROP]] to i128 -; CHECK-NEXT: [[_MSCMP11:%.*]] = icmp ne i128 [[TMP22]], 0 -; CHECK-NEXT: [[TMP23:%.*]] = bitcast <8 x i16> [[TMP4]] to i128 -; CHECK-NEXT: [[_MSCMP12:%.*]] = icmp ne i128 [[TMP23]], 0 -; CHECK-NEXT: [[_MSOR13:%.*]] = or i1 [[_MSCMP11]], [[_MSCMP12]] +; CHECK-NEXT: [[TMP33:%.*]] = extractelement <8 x i16> [[_MSPROP3]], i32 0 +; CHECK-NEXT: [[TMP34:%.*]] = extractelement <8 x i16> [[_MSPROP]], i32 0 +; CHECK-NEXT: [[TMP35:%.*]] = or i16 [[TMP33]], [[TMP34]] +; CHECK-NEXT: [[TMP36:%.*]] = extractelement <8 x i16> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP39:%.*]] = bitcast i8 [[MASK]] to <8 x i1> +; CHECK-NEXT: [[TMP37:%.*]] = extractelement <8 x i1> [[TMP39]], i32 0 +; CHECK-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], i16 [[TMP35]], i16 [[TMP36]] +; CHECK-NEXT: [[_MSPROP4:%.*]] = insertelement <8 x i16> [[_MSPROP3]], i16 [[TMP38]], i32 0 ; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i8 [[TMP5]], 0 -; CHECK-NEXT: [[_MSOR15:%.*]] = or i1 [[_MSOR13]], [[_MSCMP14]] -; CHECK-NEXT: br i1 [[_MSOR15]], label %[[BB24:.*]], label %[[BB25:.*]], !prof [[PROF1]] -; CHECK: [[BB24]]: +; CHECK-NEXT: br i1 [[_MSCMP14]], label %[[BB39:.*]], label %[[BB40:.*]], !prof [[PROF1]] +; CHECK: [[BB39]]: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] ; CHECK-NEXT: unreachable -; CHECK: [[BB25]]: +; CHECK: [[BB40]]: ; CHECK-NEXT: [[RES3:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.max.sh.round(<8 x half> [[RES2]], <8 x half> [[VAL]], <8 x half> [[SRC]], i8 [[MASK]], i32 4) -; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <8 x i16> [[_MSPROP4]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x half> [[RES3]] ; %val.half = load half,ptr %ptr