Skip to content

Commit

Permalink
JIT: Improve x86 HWIntrinsic containment (#110736)
Browse files Browse the repository at this point in the history
* improve x86 HWIntrinsic containment

* fix emitter asserts

* fix Avx.Compare simd size, tidying

* remove incorrect instruction mapping

* comment typo

---------

Co-authored-by: Tanner Gooding <[email protected]>
  • Loading branch information
saucecontrol and tannergooding authored Jan 10, 2025
1 parent 574b967 commit 08ea199
Show file tree
Hide file tree
Showing 9 changed files with 305 additions and 629 deletions.
3 changes: 2 additions & 1 deletion src/coreclr/jit/codegeninterface.h
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,8 @@ class CodeGenInterface
public:
static bool instIsFP(instruction ins);
#if defined(TARGET_XARCH)
static bool instIsEmbeddedBroadcastCompatible(instruction ins);
static bool instIsEmbeddedBroadcastCompatible(instruction ins);
static unsigned instInputSize(instruction ins);
#endif // TARGET_XARCH
//-------------------------------------------------------------------------
// Liveness-related fields & methods
Expand Down
21 changes: 15 additions & 6 deletions src/coreclr/jit/emitxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9250,8 +9250,11 @@ void emitter::emitIns_SIMD_R_R_A_R(instruction ins,
// SSE4.1 blendv* hardcode the mask vector (op3) in XMM0
emitIns_Mov(INS_movaps, attr, REG_XMM0, op3Reg, /* canSkip */ true);

// Ensure we aren't overwriting op3 (which should be REG_XMM0)
assert(targetReg != REG_XMM0);
// If targetReg == REG_XMM0, it means that op3 was last use and we decided to
// reuse REG_XMM0 for destination i.e. targetReg. In such case, make sure
// that XMM0 value after the (op3Reg -> XMM0) move done above is not
// overwritten by op1Reg.
assert((targetReg != REG_XMM0) || (op1Reg == op3Reg));

emitIns_Mov(INS_movaps, attr, targetReg, op1Reg, /* canSkip */ true);
emitIns_R_A(ins, attr, targetReg, indir);
Expand Down Expand Up @@ -9325,8 +9328,11 @@ void emitter::emitIns_SIMD_R_R_C_R(instruction ins,
// SSE4.1 blendv* hardcode the mask vector (op3) in XMM0
emitIns_Mov(INS_movaps, attr, REG_XMM0, op3Reg, /* canSkip */ true);

// Ensure we aren't overwriting op3 (which should be REG_XMM0)
assert(targetReg != REG_XMM0);
// If targetReg == REG_XMM0, it means that op3 was last use and we decided to
// reuse REG_XMM0 for destination i.e. targetReg. In such case, make sure
// that XMM0 value after the (op3Reg -> XMM0) move done above is not
// overwritten by op1Reg.
assert((targetReg != REG_XMM0) || (op1Reg == op3Reg));

emitIns_Mov(INS_movaps, attr, targetReg, op1Reg, /* canSkip */ true);
emitIns_R_C(ins, attr, targetReg, fldHnd, offs);
Expand Down Expand Up @@ -9400,8 +9406,11 @@ void emitter::emitIns_SIMD_R_R_S_R(instruction ins,
// SSE4.1 blendv* hardcode the mask vector (op3) in XMM0
emitIns_Mov(INS_movaps, attr, REG_XMM0, op3Reg, /* canSkip */ true);

// Ensure we aren't overwriting op3 (which should be REG_XMM0)
assert(targetReg != REG_XMM0);
// If targetReg == REG_XMM0, it means that op3 was last use and we decided to
// reuse REG_XMM0 for destination i.e. targetReg. In such case, make sure
// that XMM0 value after the (op3Reg -> XMM0) move done above is not
// overwritten by op1Reg.
assert((targetReg != REG_XMM0) || (op1Reg == op3Reg));

emitIns_Mov(INS_movaps, attr, targetReg, op1Reg, /* canSkip */ true);
emitIns_R_S(ins, attr, targetReg, varx, offs);
Expand Down
4 changes: 2 additions & 2 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20430,6 +20430,7 @@ bool GenTree::isContainableHWIntrinsic() const
return true;
}

case NI_SSE3_LoadAndDuplicateToVector128:
case NI_SSE3_MoveAndDuplicate:
case NI_AVX_BroadcastScalarToVector128:
case NI_AVX2_BroadcastScalarToVector128:
Expand Down Expand Up @@ -27011,8 +27012,6 @@ bool GenTreeHWIntrinsic::OperIsMemoryLoad(GenTree** pAddr) const
case NI_SSE41_ConvertToVector128Int64:
case NI_AVX2_BroadcastScalarToVector128:
case NI_AVX2_BroadcastScalarToVector256:
case NI_AVX512F_BroadcastScalarToVector512:
case NI_AVX512BW_BroadcastScalarToVector512:
case NI_AVX2_ConvertToVector256Int16:
case NI_AVX2_ConvertToVector256Int32:
case NI_AVX2_ConvertToVector256Int64:
Expand Down Expand Up @@ -27281,6 +27280,7 @@ bool GenTreeHWIntrinsic::OperIsBroadcastScalar() const
case NI_AVX2_BroadcastScalarToVector256:
case NI_AVX_BroadcastScalarToVector128:
case NI_AVX_BroadcastScalarToVector256:
case NI_SSE3_LoadAndDuplicateToVector128:
case NI_SSE3_MoveAndDuplicate:
case NI_AVX512F_BroadcastScalarToVector512:
return true;
Expand Down
2 changes: 0 additions & 2 deletions src/coreclr/jit/hwintrinsic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2075,8 +2075,6 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,
case NI_SSE41_ConvertToVector128Int64:
case NI_AVX2_BroadcastScalarToVector128:
case NI_AVX2_BroadcastScalarToVector256:
case NI_AVX512F_BroadcastScalarToVector512:
case NI_AVX512BW_BroadcastScalarToVector512:
case NI_AVX2_ConvertToVector256Int16:
case NI_AVX2_ConvertToVector256Int32:
case NI_AVX2_ConvertToVector256Int64:
Expand Down
13 changes: 1 addition & 12 deletions src/coreclr/jit/hwintrinsiccodegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -912,18 +912,7 @@ void CodeGen::genHWIntrinsic_R_RM(
case NI_AVX2_BroadcastScalarToVector128:
case NI_AVX2_BroadcastScalarToVector256:
{
if (varTypeIsSmall(node->GetSimdBaseType()))
{
if (compiler->canUseEvexEncoding())
{
needsInstructionFixup = true;
}
else
{
needsBroadcastFixup = true;
}
}
else if (compiler->canUseEvexEncoding())
if (compiler->canUseEvexEncoding())
{
needsInstructionFixup = true;
}
Expand Down
Loading

0 comments on commit 08ea199

Please sign in to comment.