Skip to content

Commit 08ea199

Browse files
JIT: Improve x86 HWIntrinsic containment (#110736)
* improve x86 HWIntrinsic containment * fix emitter asserts * fix Avx.Compare simd size, tidying * remove incorrect instruction mapping * comment typo --------- Co-authored-by: Tanner Gooding <[email protected]>
1 parent 574b967 commit 08ea199

File tree

9 files changed

+305
-629
lines changed

9 files changed

+305
-629
lines changed

src/coreclr/jit/codegeninterface.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,8 @@ class CodeGenInterface
159159
public:
160160
static bool instIsFP(instruction ins);
161161
#if defined(TARGET_XARCH)
162-
static bool instIsEmbeddedBroadcastCompatible(instruction ins);
162+
static bool instIsEmbeddedBroadcastCompatible(instruction ins);
163+
static unsigned instInputSize(instruction ins);
163164
#endif // TARGET_XARCH
164165
//-------------------------------------------------------------------------
165166
// Liveness-related fields & methods

src/coreclr/jit/emitxarch.cpp

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9250,8 +9250,11 @@ void emitter::emitIns_SIMD_R_R_A_R(instruction ins,
92509250
// SSE4.1 blendv* hardcode the mask vector (op3) in XMM0
92519251
emitIns_Mov(INS_movaps, attr, REG_XMM0, op3Reg, /* canSkip */ true);
92529252

9253-
// Ensure we aren't overwriting op3 (which should be REG_XMM0)
9254-
assert(targetReg != REG_XMM0);
9253+
// If targetReg == REG_XMM0, it means that op3 was last use and we decided to
9254+
// reuse REG_XMM0 for destination i.e. targetReg. In such case, make sure
9255+
// that XMM0 value after the (op3Reg -> XMM0) move done above is not
9256+
// overwritten by op1Reg.
9257+
assert((targetReg != REG_XMM0) || (op1Reg == op3Reg));
92559258

92569259
emitIns_Mov(INS_movaps, attr, targetReg, op1Reg, /* canSkip */ true);
92579260
emitIns_R_A(ins, attr, targetReg, indir);
@@ -9325,8 +9328,11 @@ void emitter::emitIns_SIMD_R_R_C_R(instruction ins,
93259328
// SSE4.1 blendv* hardcode the mask vector (op3) in XMM0
93269329
emitIns_Mov(INS_movaps, attr, REG_XMM0, op3Reg, /* canSkip */ true);
93279330

9328-
// Ensure we aren't overwriting op3 (which should be REG_XMM0)
9329-
assert(targetReg != REG_XMM0);
9331+
// If targetReg == REG_XMM0, it means that op3 was last use and we decided to
9332+
// reuse REG_XMM0 for destination i.e. targetReg. In such case, make sure
9333+
// that XMM0 value after the (op3Reg -> XMM0) move done above is not
9334+
// overwritten by op1Reg.
9335+
assert((targetReg != REG_XMM0) || (op1Reg == op3Reg));
93309336

93319337
emitIns_Mov(INS_movaps, attr, targetReg, op1Reg, /* canSkip */ true);
93329338
emitIns_R_C(ins, attr, targetReg, fldHnd, offs);
@@ -9400,8 +9406,11 @@ void emitter::emitIns_SIMD_R_R_S_R(instruction ins,
94009406
// SSE4.1 blendv* hardcode the mask vector (op3) in XMM0
94019407
emitIns_Mov(INS_movaps, attr, REG_XMM0, op3Reg, /* canSkip */ true);
94029408

9403-
// Ensure we aren't overwriting op3 (which should be REG_XMM0)
9404-
assert(targetReg != REG_XMM0);
9409+
// If targetReg == REG_XMM0, it means that op3 was last use and we decided to
9410+
// reuse REG_XMM0 for destination i.e. targetReg. In such case, make sure
9411+
// that XMM0 value after the (op3Reg -> XMM0) move done above is not
9412+
// overwritten by op1Reg.
9413+
assert((targetReg != REG_XMM0) || (op1Reg == op3Reg));
94059414

94069415
emitIns_Mov(INS_movaps, attr, targetReg, op1Reg, /* canSkip */ true);
94079416
emitIns_R_S(ins, attr, targetReg, varx, offs);

src/coreclr/jit/gentree.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20430,6 +20430,7 @@ bool GenTree::isContainableHWIntrinsic() const
2043020430
return true;
2043120431
}
2043220432

20433+
case NI_SSE3_LoadAndDuplicateToVector128:
2043320434
case NI_SSE3_MoveAndDuplicate:
2043420435
case NI_AVX_BroadcastScalarToVector128:
2043520436
case NI_AVX2_BroadcastScalarToVector128:
@@ -27011,8 +27012,6 @@ bool GenTreeHWIntrinsic::OperIsMemoryLoad(GenTree** pAddr) const
2701127012
case NI_SSE41_ConvertToVector128Int64:
2701227013
case NI_AVX2_BroadcastScalarToVector128:
2701327014
case NI_AVX2_BroadcastScalarToVector256:
27014-
case NI_AVX512F_BroadcastScalarToVector512:
27015-
case NI_AVX512BW_BroadcastScalarToVector512:
2701627015
case NI_AVX2_ConvertToVector256Int16:
2701727016
case NI_AVX2_ConvertToVector256Int32:
2701827017
case NI_AVX2_ConvertToVector256Int64:
@@ -27281,6 +27280,7 @@ bool GenTreeHWIntrinsic::OperIsBroadcastScalar() const
2728127280
case NI_AVX2_BroadcastScalarToVector256:
2728227281
case NI_AVX_BroadcastScalarToVector128:
2728327282
case NI_AVX_BroadcastScalarToVector256:
27283+
case NI_SSE3_LoadAndDuplicateToVector128:
2728427284
case NI_SSE3_MoveAndDuplicate:
2728527285
case NI_AVX512F_BroadcastScalarToVector512:
2728627286
return true;

src/coreclr/jit/hwintrinsic.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2075,8 +2075,6 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,
20752075
case NI_SSE41_ConvertToVector128Int64:
20762076
case NI_AVX2_BroadcastScalarToVector128:
20772077
case NI_AVX2_BroadcastScalarToVector256:
2078-
case NI_AVX512F_BroadcastScalarToVector512:
2079-
case NI_AVX512BW_BroadcastScalarToVector512:
20802078
case NI_AVX2_ConvertToVector256Int16:
20812079
case NI_AVX2_ConvertToVector256Int32:
20822080
case NI_AVX2_ConvertToVector256Int64:

src/coreclr/jit/hwintrinsiccodegenxarch.cpp

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -912,18 +912,7 @@ void CodeGen::genHWIntrinsic_R_RM(
912912
case NI_AVX2_BroadcastScalarToVector128:
913913
case NI_AVX2_BroadcastScalarToVector256:
914914
{
915-
if (varTypeIsSmall(node->GetSimdBaseType()))
916-
{
917-
if (compiler->canUseEvexEncoding())
918-
{
919-
needsInstructionFixup = true;
920-
}
921-
else
922-
{
923-
needsBroadcastFixup = true;
924-
}
925-
}
926-
else if (compiler->canUseEvexEncoding())
915+
if (compiler->canUseEvexEncoding())
927916
{
928917
needsInstructionFixup = true;
929918
}

0 commit comments

Comments
 (0)