Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Experiment] Inline "is on heap?" checks for checked write barriers #111561

Closed
wants to merge 8 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/coreclr/inc/corinfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -2549,6 +2549,8 @@ class ICorStaticInfo
uint16_t* value
) = 0;

virtual bool getGcHeapBoundaries(void** pLowerAddr, void** pHighestAddr) = 0;

//------------------------------------------------------------------------------
// getObjectType: obtains type handle for given object
//
Expand Down
4 changes: 4 additions & 0 deletions src/coreclr/inc/icorjitinfoimpl_generated.h
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,10 @@ bool getStringChar(
int index,
uint16_t* value) override;

bool getGcHeapBoundaries(
void** pLowerAddr,
void** pHighestAddr) override;

CORINFO_CLASS_HANDLE getObjectType(
CORINFO_OBJECT_HANDLE objPtr) override;

Expand Down
10 changes: 5 additions & 5 deletions src/coreclr/inc/jiteeversionguid.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,11 @@ typedef const GUID *LPCGUID;
#define GUID_DEFINED
#endif // !GUID_DEFINED

constexpr GUID JITEEVersionIdentifier = { /* cc0e7adf-e397-40b6-9d14-a7149815c991 */
0xcc0e7adf,
0xe397,
0x40b6,
{0x9d, 0x14, 0xa7, 0x14, 0x98, 0x15, 0xc9, 0x91}
constexpr GUID JITEEVersionIdentifier = { /* c30061d9-a2f4-47eb-894c-effb0f1d37d3 */
0xc30061d9,
0xa2f4,
0x47eb,
{0x89, 0x4c, 0xef, 0xfb, 0x0f, 0x1d, 0x37, 0xd3}
};

//////////////////////////////////////////////////////////////////////////////////////////////////////////
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/ICorJitInfo_names_generated.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ DEF_CLR_API(getUnBoxHelper)
DEF_CLR_API(getRuntimeTypePointer)
DEF_CLR_API(isObjectImmutable)
DEF_CLR_API(getStringChar)
DEF_CLR_API(getGcHeapBoundaries)
DEF_CLR_API(getObjectType)
DEF_CLR_API(getReadyToRunHelper)
DEF_CLR_API(getReadyToRunDelegateCtorHelper)
Expand Down
10 changes: 10 additions & 0 deletions src/coreclr/jit/ICorJitInfo_wrapper_generated.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -703,6 +703,16 @@ bool WrapICorJitInfo::getStringChar(
return temp;
}

bool WrapICorJitInfo::getGcHeapBoundaries(
void** pLowerAddr,
void** pHighestAddr)
{
API_ENTER(getGcHeapBoundaries);
bool temp = wrapHnd->getGcHeapBoundaries(pLowerAddr, pHighestAddr);
API_LEAVE(getGcHeapBoundaries);
return temp;
}

CORINFO_CLASS_HANDLE WrapICorJitInfo::getObjectType(
CORINFO_OBJECT_HANDLE objPtr)
{
Expand Down
3 changes: 1 addition & 2 deletions src/coreclr/jit/codegenarm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -811,8 +811,7 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
sourceIsLocal = true;
}

bool dstOnStack =
dstAddr->gtSkipReloadOrCopy()->OperIs(GT_LCL_ADDR) || cpObjNode->GetLayout()->IsStackOnly(compiler);
bool dstOnStack = cpObjNode->IsAddressNotOnHeap(compiler);

#ifdef DEBUG
assert(!dstAddr->isContained());
Expand Down
3 changes: 1 addition & 2 deletions src/coreclr/jit/codegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3665,8 +3665,7 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
sourceIsLocal = true;
}

bool dstOnStack =
dstAddr->gtSkipReloadOrCopy()->OperIs(GT_LCL_ADDR) || cpObjNode->GetLayout()->IsStackOnly(compiler);
bool dstOnStack = cpObjNode->IsAddressNotOnHeap(compiler);

#ifdef DEBUG
assert(!dstAddr->isContained());
Expand Down
3 changes: 1 addition & 2 deletions src/coreclr/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4218,8 +4218,7 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
GenTree* dstAddr = cpObjNode->Addr();
GenTree* source = cpObjNode->Data();
var_types srcAddrType = TYP_BYREF;
bool dstOnStack =
dstAddr->gtSkipReloadOrCopy()->OperIs(GT_LCL_ADDR) || cpObjNode->GetLayout()->IsStackOnly(compiler);
bool dstOnStack = cpObjNode->IsAddressNotOnHeap(compiler);

// If the GenTree node has data about GC pointers, this means we're dealing
// with CpObj, so this requires special logic.
Expand Down
3 changes: 3 additions & 0 deletions src/coreclr/jit/compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5137,6 +5137,9 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl

if (opts.OptimizationEnabled())
{
// Expand WB
DoPhase(this, PHASE_EXPAND_CASTS, &Compiler::fgWriteBarrierExpansion);

// Optimize boolean conditions
//
DoPhase(this, PHASE_OPTIMIZE_BOOLS, &Compiler::optOptimizeBools);
Expand Down
3 changes: 3 additions & 0 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -6113,6 +6113,9 @@ class Compiler
PhaseStatus fgLateCastExpansion();
bool fgLateCastExpansionForCall(BasicBlock** pBlock, Statement* stmt, GenTreeCall* call);

PhaseStatus fgWriteBarrierExpansion();
bool fgWriteBarrierExpansionForStore(BasicBlock** pBlock, Statement* stmt, GenTree* store);

PhaseStatus fgInsertGCPolls();
BasicBlock* fgCreateGCPoll(GCPollType pollType, BasicBlock* block);

Expand Down
15 changes: 15 additions & 0 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18244,6 +18244,21 @@ bool GenTree::isIndir() const
return OperGet() == GT_IND || OperGet() == GT_STOREIND;
}

bool GenTreeIndir::IsAddressNotOnHeap(Compiler* comp)
{
if (((gtFlags & GTF_IND_TGT_NOT_HEAP) != 0) || Addr()->gtSkipReloadOrCopy()->OperIs(GT_LCL_ADDR))
{
return true;
}

if (OperIs(GT_STORE_BLK) && AsBlk()->GetLayout()->IsStackOnly(comp))
{
return true;
}

return false;
}

bool GenTreeIndir::HasBase()
{
return Base() != nullptr;
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/gentree.h
Original file line number Diff line number Diff line change
Expand Up @@ -7941,6 +7941,7 @@ struct GenTreeIndir : public GenTreeOp
return gtOp2;
}

bool IsAddressNotOnHeap(Compiler* comp);
bool HasBase();
bool HasIndex();
GenTree* Base();
Expand Down
177 changes: 170 additions & 7 deletions src/coreclr/jit/helperexpansion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,13 @@ static GenTree* SpillExpression(Compiler* comp, GenTree* expr, BasicBlock* exprB
// Return Value:
// Number of the local that replaces the tree
//
static unsigned SplitAtTreeAndReplaceItWithLocal(
Compiler* comp, BasicBlock* block, Statement* stmt, GenTree* tree, BasicBlock** topBlock, BasicBlock** bottomBlock)
static unsigned SplitAtTreeAndReplaceItWithLocal(Compiler* comp,
BasicBlock* block,
Statement* stmt,
GenTree* tree,
BasicBlock** topBlock,
BasicBlock** bottomBlock,
bool replaceWithNothing = false)
{
BasicBlock* prevBb = block;
GenTree** callUse = nullptr;
Expand All @@ -52,12 +57,21 @@ static unsigned SplitAtTreeAndReplaceItWithLocal(
newFirstStmt = newFirstStmt->GetNextStmt();
}

// Grab a temp to store the result.
const unsigned tmpNum = comp->lvaGrabTemp(true DEBUGARG("replacement local"));
comp->lvaTable[tmpNum].lvType = tree->TypeGet();
unsigned tmpNum;
if (replaceWithNothing)
{
tmpNum = BAD_VAR_NUM;
*callUse = comp->gtNewNothingNode();
}
else
{
// Grab a temp to store the result.
tmpNum = comp->lvaGrabTemp(true DEBUGARG("replacement local"));
comp->lvaTable[tmpNum].lvType = tree->TypeGet();

// Replace the original call with that temp
*callUse = comp->gtNewLclvNode(tmpNum, tree->TypeGet());
// Replace the original call with that temp
*callUse = comp->gtNewLclvNode(tmpNum, tree->TypeGet());
}

comp->fgMorphStmtBlockOps(block, stmt);
comp->gtUpdateStmtSideEffects(stmt);
Expand Down Expand Up @@ -1890,6 +1904,155 @@ bool Compiler::fgVNBasedIntrinsicExpansionForCall_ReadUtf8(BasicBlock** pBlock,
return true;
}

PhaseStatus Compiler::fgWriteBarrierExpansion()
{
if (opts.OptimizationDisabled())
{
return PhaseStatus::MODIFIED_NOTHING;
}

PhaseStatus result = PhaseStatus::MODIFIED_NOTHING;
for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->Next())
{
if (block->isRunRarely())
{
// It's just an optimization - don't waste time on rarely executed blocks
continue;
}

while (true)
{
AGAIN:
for (Statement* const stmt : block->NonPhiStatements())
{
for (GenTree* const tree : stmt->TreeList())
{
if (fgWriteBarrierExpansionForStore(&block, stmt, tree))
{
result = PhaseStatus::MODIFIED_EVERYTHING;
goto AGAIN;
}
}
}
break;
}
}

if (result == PhaseStatus::MODIFIED_EVERYTHING)
{
fgInvalidateDfsTree();
}

return result;
}

bool Compiler::fgWriteBarrierExpansionForStore(BasicBlock** pBlock, Statement* stmt, GenTree* store)
{
if (!store->OperIs(GT_STORE_BLK, GT_STOREIND))
{
return false;
}

if ((store->gtFlags & (GTF_IND_TGT_NOT_HEAP | GTF_IND_TGT_HEAP)) != 0)
{
// We already know the target is on the heap or not on the heap
return false;
}

auto storeAddr = store->AsIndir()->Addr();
if (!storeAddr->OperIs(GT_LCL_VAR) || (storeAddr->AsLclVar()->GetLclNum() != info.compRetBuffArg))
{
return false;
}

void* lowerBoundAddr;
void* upperBoundAddr;
if (!info.compCompHnd->getGcHeapBoundaries(&lowerBoundAddr, &upperBoundAddr))
{
// We can't determine the heap boundaries
return false;
}

if (stmt->GetRootNode() != store)
{
return false;
}

BasicBlock* block = *pBlock;
DebugInfo debugInfo = stmt->GetDebugInfo();
BasicBlockFlags originalFlags = block->GetFlagsRaw();
BasicBlock* prevBb = block;

// We use fgSplitBlockAfterStatement() API here to split the block, however, we want to split
// it *Before* rather than *After* so if the current statement is the first in the
// current block - invoke fgSplitBlockAtBeginning
if (stmt == block->firstStmt())
{
block = fgSplitBlockAtBeginning(prevBb);
}
else
{
assert(stmt->GetPrevStmt() != block->lastStmt());
JITDUMP("Splitting " FMT_BB " after statement " FMT_STMT "\n", prevBb->bbNum, stmt->GetPrevStmt()->GetID());
block = fgSplitBlockAfterStatement(prevBb, stmt->GetPrevStmt());
}

*stmt->GetRootNodePointer() = gtNewNothingNode();
fgMorphStmtBlockOps(block, stmt);
gtUpdateStmtSideEffects(stmt);

// We split a block, possibly, in the middle - we need to propagate some flags
prevBb->SetFlagsRaw(originalFlags & (~(BBF_SPLIT_LOST | BBF_RETLESS_CALL) | BBF_GC_SAFE_POINT));
block->SetFlags(originalFlags & (BBF_SPLIT_GAINED | BBF_IMPORTED | BBF_GC_SAFE_POINT | BBF_RETLESS_CALL));

// prevBb should flow into block
assert(prevBb->KindIs(BBJ_ALWAYS) && prevBb->JumpsToNext() && prevBb->NextIs(block));

GenTree* lowerAddrCon = gtNewIconHandleNode((size_t)lowerBoundAddr, GTF_ICON_GLOBAL_PTR);
GenTree* lowerAddrTmp = fgInsertCommaFormTemp(&lowerAddrCon);

ssize_t detla = (ssize_t)upperBoundAddr - (ssize_t)lowerBoundAddr;
GenTree* upperAddrCon = gtNewOperNode(GT_ADD, TYP_I_IMPL, lowerAddrTmp, gtNewIconNode(detla, TYP_I_IMPL));
GenTree* lowerBoundAddrIndir = gtNewIndir(TYP_I_IMPL, lowerAddrCon, GTF_IND_NONFAULTING | GTF_IND_INVARIANT);
GenTree* upperBoundAddrIndir = gtNewIndir(TYP_I_IMPL, upperAddrCon, GTF_IND_NONFAULTING | GTF_IND_INVARIANT);

// const bool notInHeap = ((BYTE*)dest < g_lowest_address | (BYTE*)dest >= g_highest_address);
GenTree* lowAddrCheckOp = gtNewOperNode(GT_LT, TYP_INT, gtCloneExpr(storeAddr), lowerBoundAddrIndir);
GenTree* highAddrCheckOp = gtNewOperNode(GT_GE, TYP_INT, gtCloneExpr(storeAddr), upperBoundAddrIndir);
GenTree* combinedCheck =
gtNewOperNode(GT_NE, TYP_INT, gtNewOperNode(GT_OR, TYP_INT, lowAddrCheckOp, highAddrCheckOp), gtNewIconNode(0));
BasicBlock* lowAddrCheckBb =
fgNewBBFromTreeAfter(BBJ_COND, prevBb, gtNewOperNode(GT_JTRUE, TYP_VOID, combinedCheck), debugInfo, true);

GenTree* heapStore = gtCloneExpr(store);
heapStore->gtFlags |= GTF_IND_TGT_HEAP;

GenTree* stackStore = gtCloneExpr(store);
stackStore->gtFlags |= GTF_IND_TGT_NOT_HEAP;

BasicBlock* heapPathBb = fgNewBBFromTreeAfter(BBJ_ALWAYS, lowAddrCheckBb, heapStore, debugInfo, true);
BasicBlock* stackPathBb = fgNewBBFromTreeAfter(BBJ_ALWAYS, heapPathBb, stackStore, debugInfo, true);
fgRedirectTargetEdge(prevBb, lowAddrCheckBb);

FlowEdge* const addrCheckTrueEdge = fgAddRefPred(stackPathBb, lowAddrCheckBb);
FlowEdge* const addrCheckFalseEdge = fgAddRefPred(heapPathBb, lowAddrCheckBb);
FlowEdge* const heapPathEdge = fgAddRefPred(block, heapPathBb);
FlowEdge* const stackPathEdge = fgAddRefPred(block, stackPathBb);

lowAddrCheckBb->SetTrueEdge(addrCheckTrueEdge);
lowAddrCheckBb->SetFalseEdge(addrCheckFalseEdge);
heapPathBb->SetTargetEdge(heapPathEdge);
stackPathBb->SetTargetEdge(stackPathEdge);
lowAddrCheckBb->inheritWeightPercentage(prevBb, 100);
heapPathBb->inheritWeightPercentage(lowAddrCheckBb, 40);
stackPathBb->inheritWeightPercentage(lowAddrCheckBb, 60);
addrCheckTrueEdge->setLikelihood(0.6);
addrCheckFalseEdge->setLikelihood(0.4);
heapPathEdge->setLikelihood(1.0);
stackPathEdge->setLikelihood(1.0);
return true;
}

//------------------------------------------------------------------------------
// fgLateCastExpansion: Partially inline various cast helpers, e.g.:
//
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/lowerarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -762,7 +762,7 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
{
// No write barriers are needed on the stack.
// If the layout contains a byref, then we know it must live on the stack.
if (dstAddr->OperIs(GT_LCL_ADDR) || layout->IsStackOnly(comp))
if (blkNode->IsAddressNotOnHeap(comp))
{
// If the size is small enough to unroll then we need to mark the block as non-interruptible
// to actually allow unrolling. The generated code does not report GC references loaded in the
Expand Down
4 changes: 2 additions & 2 deletions src/coreclr/jit/lowerxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -450,7 +450,7 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
{
// No write barriers are needed on the stack.
// If the layout is byref-like, then we know it must live on the stack.
if (dstAddr->OperIs(GT_LCL_ADDR) || layout->IsStackOnly(comp))
if (blkNode->IsAddressNotOnHeap(comp))
{
// If the size is small enough to unroll then we need to mark the block as non-interruptible
// to actually allow unrolling. The generated code does not report GC references loaded in the
Expand All @@ -477,7 +477,7 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
// the entire operation takes 20 cycles and encodes in 5 bytes (loading RCX and REP MOVSD/Q).
unsigned nonGCSlots = 0;

if (dstAddr->OperIs(GT_LCL_ADDR) || layout->IsStackOnly(comp))
if (blkNode->IsAddressNotOnHeap(comp))
{
// If the destination is on the stack then no write barriers are needed.
nonGCSlots = layout->GetSlotCount();
Expand Down
Loading
Loading