Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enhance logging of stubs through the PerfMap on Linux #113943

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
1 change: 1 addition & 0 deletions src/coreclr/inc/clrconfigvalues.h
Original file line number Diff line number Diff line change
Expand Up @@ -457,6 +457,7 @@ RETAIL_CONFIG_DWORD_INFO(EXTERNAL_PerfMapEnabled, W("PerfMapEnabled"), 0, "This
RETAIL_CONFIG_STRING_INFO_EX(EXTERNAL_PerfMapJitDumpPath, W("PerfMapJitDumpPath"), "Specifies a path to write the perf jitdump file. Defaults to /tmp", CLRConfig::LookupOptions::TrimWhiteSpaceFromStringValue)
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_PerfMapIgnoreSignal, W("PerfMapIgnoreSignal"), 0, "When perf map is enabled, this option will configure the specified signal to be accepted and ignored as a marker in the perf logs. It is disabled by default")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_PerfMapShowOptimizationTiers, W("PerfMapShowOptimizationTiers"), 1, "Shows optimization tiers in the perf map for methods, as part of the symbol name. Useful for seeing separate stack frames for different optimization tiers of each method.")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_PerfMapStubGranularity, W("PerfMapStubGranularity"), 0, "Report stubs with varying amounts of granularity (low bit being zero indicates attempt to group all stubs of a type together) (second lowest bit being non-zero records stubs at individual allocation sites, which is more expensive, but also more accurate).")
#endif

RETAIL_CONFIG_STRING_INFO(EXTERNAL_StartupDelayMS, W("StartupDelayMS"), "")
Expand Down
12 changes: 10 additions & 2 deletions src/coreclr/utilcode/loaderheap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1065,6 +1065,9 @@ BOOL UnlockedLoaderHeap::CommitPages(void* pData, size_t dwSizeToCommitPart)
return TRUE;
}

#ifdef FEATURE_PERFMAP
bool PerfMapLowGranularityStubs();
#endif // FEATURE_PERFMAP
BOOL UnlockedLoaderHeap::UnlockedReservePages(size_t dwSizeToCommit)
{
CONTRACTL
Expand Down Expand Up @@ -1106,8 +1109,13 @@ BOOL UnlockedLoaderHeap::UnlockedReservePages(size_t dwSizeToCommit)
// Figure out how much to reserve
dwSizeToReserve = max<size_t>(dwSizeToCommit, m_dwReserveBlockSize);

// Round to VIRTUAL_ALLOC_RESERVE_GRANULARITY
dwSizeToReserve = ALIGN_UP(dwSizeToReserve, VIRTUAL_ALLOC_RESERVE_GRANULARITY);
#ifdef FEATURE_PERFMAP // Perfmap requires that the memory assigned to stub generated regions be allocated only via fully commited memory
if (!IsInterleaved() || !PerfMapLowGranularityStubs())
#endif // FEATURE_PERFMAP
{
// Round to VIRTUAL_ALLOC_RESERVE_GRANULARITY
dwSizeToReserve = ALIGN_UP(dwSizeToReserve, VIRTUAL_ALLOC_RESERVE_GRANULARITY);
}

_ASSERTE(dwSizeToCommit <= dwSizeToReserve);

Expand Down
10 changes: 9 additions & 1 deletion src/coreclr/vm/amd64/cgenamd64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -694,7 +694,7 @@ DWORD GetOffsetAtEndOfFunction(ULONGLONG uImageBase,

#define DYNAMIC_HELPER_ALIGNMENT sizeof(TADDR)

#define BEGIN_DYNAMIC_HELPER_EMIT(size) \
#define BEGIN_DYNAMIC_HELPER_EMIT_WORKER(size) \
SIZE_T cb = size; \
SIZE_T cbAligned = ALIGN_UP(cb, DYNAMIC_HELPER_ALIGNMENT); \
BYTE * pStartRX = (BYTE *)(void*)pAllocator->GetDynamicHelpersHeap()->AllocAlignedMem(cbAligned, DYNAMIC_HELPER_ALIGNMENT); \
Expand All @@ -703,6 +703,14 @@ DWORD GetOffsetAtEndOfFunction(ULONGLONG uImageBase,
size_t rxOffset = pStartRX - pStart; \
BYTE * p = pStart;

#ifdef FEATURE_PERFMAP
#define BEGIN_DYNAMIC_HELPER_EMIT(size) \
BEGIN_DYNAMIC_HELPER_EMIT_WORKER(size) \
PerfMap::LogStubs(__FUNCTION__, "DynamicHelper", (PCODE)p, size, PerfMapStubType::Individual);
#else
#define BEGIN_DYNAMIC_HELPER_EMIT(size) BEGIN_DYNAMIC_HELPER_EMIT_WORKER(size)
#endif

#define END_DYNAMIC_HELPER_EMIT() \
_ASSERTE(pStart + cb == p); \
while (p < pStart + cbAligned) *p++ = X86_INSTR_INT3; \
Expand Down
11 changes: 10 additions & 1 deletion src/coreclr/vm/arm/stubs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1758,7 +1758,7 @@ void MovRegImm(BYTE* p, int reg, TADDR imm)

#define DYNAMIC_HELPER_ALIGNMENT sizeof(TADDR)

#define BEGIN_DYNAMIC_HELPER_EMIT(size) \
#define BEGIN_DYNAMIC_HELPER_EMIT_WORKER(size) \
SIZE_T cb = size; \
SIZE_T cbAligned = ALIGN_UP(cb, DYNAMIC_HELPER_ALIGNMENT); \
BYTE * pStartRX = (BYTE *)(void*)pAllocator->GetDynamicHelpersHeap()->AllocAlignedMem(cbAligned, DYNAMIC_HELPER_ALIGNMENT); \
Expand All @@ -1767,6 +1767,15 @@ void MovRegImm(BYTE* p, int reg, TADDR imm)
size_t rxOffset = pStartRX - pStart; \
BYTE * p = pStart;

#ifdef FEATURE_PERFMAP
#define BEGIN_DYNAMIC_HELPER_EMIT(size) \
BEGIN_DYNAMIC_HELPER_EMIT_WORKER(size) \
PerfMap::LogStubs(__FUNCTION__, "DynamicHelper", (PCODE)p, size, PerfMapStubType::Individual);
#else
#define BEGIN_DYNAMIC_HELPER_EMIT(size) BEGIN_DYNAMIC_HELPER_EMIT_WORKER(size)
#endif


#define END_DYNAMIC_HELPER_EMIT() \
_ASSERTE(pStart + cb == p); \
while (p < pStart + cbAligned) { *(WORD *)p = 0xdefe; p += 2; } \
Expand Down
11 changes: 10 additions & 1 deletion src/coreclr/vm/arm64/stubs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1447,7 +1447,7 @@ void StubLinkerCPU::EmitCallManagedMethod(MethodDesc *pMD, BOOL fTailCall)

#define DYNAMIC_HELPER_ALIGNMENT sizeof(TADDR)

#define BEGIN_DYNAMIC_HELPER_EMIT(size) \
#define BEGIN_DYNAMIC_HELPER_EMIT_WORKER(size) \
SIZE_T cb = size; \
SIZE_T cbAligned = ALIGN_UP(cb, DYNAMIC_HELPER_ALIGNMENT); \
BYTE * pStartRX = (BYTE *)(void*)pAllocator->GetDynamicHelpersHeap()->AllocAlignedMem(cbAligned, DYNAMIC_HELPER_ALIGNMENT); \
Expand All @@ -1456,6 +1456,15 @@ void StubLinkerCPU::EmitCallManagedMethod(MethodDesc *pMD, BOOL fTailCall)
size_t rxOffset = pStartRX - pStart; \
BYTE * p = pStart;

#ifdef FEATURE_PERFMAP
#define BEGIN_DYNAMIC_HELPER_EMIT(size) \
BEGIN_DYNAMIC_HELPER_EMIT_WORKER(size) \
PerfMap::LogStubs(__FUNCTION__, "DynamicHelper", (PCODE)p, size, PerfMapStubType::Individual);
#else
#define BEGIN_DYNAMIC_HELPER_EMIT(size) BEGIN_DYNAMIC_HELPER_EMIT_WORKER(size)
#endif


#define END_DYNAMIC_HELPER_EMIT() \
_ASSERTE(pStart + cb == p); \
while (p < pStart + cbAligned) { *(DWORD*)p = 0xBADC0DF0; p += 4; }\
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/vm/ceemain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -735,6 +735,7 @@ void EEStartupHelper()

#ifdef FEATURE_PERFMAP
PerfMap::Initialize();
InitThreadManagerPerfMapData();
#endif

#ifdef FEATURE_PGO
Expand Down
5 changes: 4 additions & 1 deletion src/coreclr/vm/codeman.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2127,6 +2127,7 @@ TaggedMemAllocPtr CodeFragmentHeap::RealAllocAlignedMem(size_t dwRequestedSize
if (dwSize < SMALL_BLOCK_THRESHOLD)
dwSize = 4 * SMALL_BLOCK_THRESHOLD;
pMem = ExecutionManager::GetEEJitManager()->allocCodeFragmentBlock(dwSize, dwAlignment, m_pAllocator, m_kind);
ReportStubBlock(pMem, dwSize, m_kind);
}

SIZE_T dwExtra = (BYTE *)ALIGN_UP(pMem, dwAlignment) - (BYTE *)pMem;
Expand Down Expand Up @@ -3226,6 +3227,8 @@ JumpStubBlockHeader * EEJitManager::allocJumpStubBlock(MethodDesc* pMD, DWORD n
_ASSERTE(IS_ALIGNED(blockWriterHolder.GetRW(), CODE_SIZE_ALIGN));
}

ReportStubBlock((void*)mem, blockSize, STUB_CODE_BLOCK_JUMPSTUB);

blockWriterHolder.GetRW()->m_next = NULL;
blockWriterHolder.GetRW()->m_used = 0;
blockWriterHolder.GetRW()->m_allocated = numJumps;
Expand Down Expand Up @@ -5253,7 +5256,7 @@ PCODE ExecutionManager::getNextJumpStub(MethodDesc* pMD, PCODE target,
emitBackToBackJump(jumpStub, jumpStubRW, (void*) target);

#ifdef FEATURE_PERFMAP
PerfMap::LogStubs(__FUNCTION__, "emitBackToBackJump", (PCODE)jumpStub, BACK_TO_BACK_JUMP_ALLOCATE_SIZE);
PerfMap::LogStubs(__FUNCTION__, "emitBackToBackJump", (PCODE)jumpStub, BACK_TO_BACK_JUMP_ALLOCATE_SIZE, PerfMapStubType::IndividualWithinBlock);
#endif

// We always add the new jumpstub to the jumpStubCache
Expand Down
39 changes: 39 additions & 0 deletions src/coreclr/vm/codeman.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ class EECodeInfo;
#define ROUND_DOWN_TO_PAGE(x) ( (size_t) (x) & ~((size_t)GetOsPageSize()-1))
#define ROUND_UP_TO_PAGE(x) (((size_t) (x) + (GetOsPageSize()-1)) & ~((size_t)GetOsPageSize()-1))


enum StubCodeBlockKind : int
{
STUB_CODE_BLOCK_UNKNOWN = 0,
Expand All @@ -112,6 +113,44 @@ enum StubCodeBlockKind : int
STUB_CODE_BLOCK_METHOD_CALL_THUNK = 0x13,
};

inline const char *GetStubCodeBlockKindString(StubCodeBlockKind kind)
{
switch (kind)
{
case STUB_CODE_BLOCK_JUMPSTUB:
return "JumpStub";
case STUB_CODE_BLOCK_STUBLINK:
return "StubLinkStub";
case STUB_CODE_BLOCK_MANAGED:
return "Managed";
case STUB_CODE_BLOCK_METHOD_CALL_THUNK:
return "MethodCallThunk";
case STUB_CODE_BLOCK_DYNAMICHELPER:
return "MethodCallThunk";
case STUB_CODE_BLOCK_FIXUPPRECODE:
return "MethodCallThunk";
#ifdef FEATURE_VIRTUAL_STUB_DISPATCH
case STUB_CODE_BLOCK_VSD_DISPATCH_STUB:
return "VSD_DispatchStub";
case STUB_CODE_BLOCK_VSD_RESOLVE_STUB:
return "VSD_ResolveStub";
case STUB_CODE_BLOCK_VSD_LOOKUP_STUB:
return "VSD_LookupStub";
case STUB_CODE_BLOCK_VSD_VTABLE_STUB:
return "VSD_VTableStub";
#endif // FEATURE_VIRTUAL_STUB_DISPATCH
default:
return "Unknown";
}
}

void ReportStubBlock(void* start, size_t size, StubCodeBlockKind kind);
#ifndef FEATURE_PERFMAP
inline void ReportStubBlock(void* start, size_t size, StubCodeBlockKind kind)
{
}
#endif

//-----------------------------------------------------------------------------
// Method header which exists just before the code.
// Every IJitManager could have its own format for the header.
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/vm/comdelegate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -913,7 +913,7 @@ static PCODE SetupShuffleThunk(MethodTable * pDelMT, MethodDesc *pTargetMeth)
pShuffleThunkCache = ((AssemblyLoaderAllocator*)pLoaderAllocator)->GetShuffleThunkCache();
}

pShuffleThunk = pShuffleThunkCache->Canonicalize((const BYTE *)&rShuffleEntryArray[0]);
pShuffleThunk = pShuffleThunkCache->Canonicalize((const BYTE *)&rShuffleEntryArray[0], "DelegateShuffleThunk");
}
else
{
Expand Down
6 changes: 6 additions & 0 deletions src/coreclr/vm/dllimportcallback.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@
#include "stubgen.h"
#include "appdomain.inl"

#ifdef FEATURE_PERFMAP
#include "perfmap.h"
#endif

struct UM2MThunk_Args
{
Expand Down Expand Up @@ -249,6 +252,9 @@ UMEntryThunk* UMEntryThunk::CreateUMEntryThunk()

UMEntryThunkData *pData = (UMEntryThunkData *)pamTracker->Track(pLoaderAllocator->GetLowFrequencyHeap()->AllocMem(S_SIZE_T(sizeof(UMEntryThunkData))));
p = (UMEntryThunk*)pamTracker->Track(pLoaderAllocator->GetNewStubPrecodeHeap()->AllocAlignedMem(size, 1));
#ifdef FEATURE_PERFMAP
PerfMap::LogStubs(__FUNCTION__, "UMEntryThunk", (PCODE)p, size, PerfMapStubType::IndividualWithinBlock);
#endif
pData->m_pUMEntryThunk = p;
p->Init(p, dac_cast<TADDR>(pData), NULL, dac_cast<TADDR>(PRECODE_UMENTRY_THUNK));
pamTracker->SuppressRelease();
Expand Down
10 changes: 9 additions & 1 deletion src/coreclr/vm/i386/cgenx86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -980,7 +980,7 @@ void ResumeAtJit(PCONTEXT pContext, LPVOID oldESP)

#define DYNAMIC_HELPER_ALIGNMENT sizeof(TADDR)

#define BEGIN_DYNAMIC_HELPER_EMIT(size) \
#define BEGIN_DYNAMIC_HELPER_EMIT_WORKER(size) \
SIZE_T cb = size; \
SIZE_T cbAligned = ALIGN_UP(cb, DYNAMIC_HELPER_ALIGNMENT); \
BYTE * pStartRX = (BYTE *)(void*)pAllocator->GetDynamicHelpersHeap()->AllocAlignedMem(cbAligned, DYNAMIC_HELPER_ALIGNMENT); \
Expand All @@ -989,6 +989,14 @@ void ResumeAtJit(PCONTEXT pContext, LPVOID oldESP)
size_t rxOffset = pStartRX - pStart; \
BYTE * p = pStart;

#ifdef FEATURE_PERFMAP
#define BEGIN_DYNAMIC_HELPER_EMIT(size) \
BEGIN_DYNAMIC_HELPER_EMIT_WORKER(size) \
PerfMap::LogStubs(__FUNCTION__, "DynamicHelper", (PCODE)p, size, PerfMapStubType::Individual);
#else
#define BEGIN_DYNAMIC_HELPER_EMIT(size) BEGIN_DYNAMIC_HELPER_EMIT_WORKER(size)
#endif

#define END_DYNAMIC_HELPER_EMIT() \
_ASSERTE(pStart + cb == p); \
while (p < pStart + cbAligned) *p++ = X86_INSTR_INT3; \
Expand Down
8 changes: 4 additions & 4 deletions src/coreclr/vm/i386/jitinterfacex86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -366,7 +366,7 @@ void *JIT_TrialAlloc::GenAllocSFast(Flags flags)
// Jump to the framed helper
sl.X86EmitNearJump(sl.NewExternalCodeLabel((LPVOID)JIT_New));

Stub *pStub = sl.Link(SystemDomain::GetGlobalLoaderAllocator()->GetExecutableHeap());
Stub *pStub = sl.Link(SystemDomain::GetGlobalLoaderAllocator()->GetExecutableHeap(), NEWSTUB_FL_NONE, "AllocSFast");

return (void *)pStub->GetEntryPoint();
}
Expand Down Expand Up @@ -498,7 +498,7 @@ void *JIT_TrialAlloc::GenBox(Flags flags)
// Jump to the slow version of JIT_Box
sl.X86EmitNearJump(sl.NewExternalCodeLabel((LPVOID) JIT_Box));

Stub *pStub = sl.Link(SystemDomain::GetGlobalLoaderAllocator()->GetExecutableHeap());
Stub *pStub = sl.Link(SystemDomain::GetGlobalLoaderAllocator()->GetExecutableHeap(), NEWSTUB_FL_NONE, "Box");

return (void *)pStub->GetEntryPoint();
}
Expand Down Expand Up @@ -656,7 +656,7 @@ void *JIT_TrialAlloc::GenAllocArray(Flags flags)
_ASSERTE(target->e.m_pExternalAddress);
sl.X86EmitNearJump(target);

Stub *pStub = sl.Link(SystemDomain::GetGlobalLoaderAllocator()->GetExecutableHeap());
Stub *pStub = sl.Link(SystemDomain::GetGlobalLoaderAllocator()->GetExecutableHeap(), NEWSTUB_FL_NONE, "AllocArray");

return (void *)pStub->GetEntryPoint();
}
Expand Down Expand Up @@ -744,7 +744,7 @@ void *JIT_TrialAlloc::GenAllocString(Flags flags)
CodeLabel * target = sl.NewExternalCodeLabel((LPVOID)FramedAllocateString);
sl.X86EmitNearJump(target);

Stub *pStub = sl.Link(SystemDomain::GetGlobalLoaderAllocator()->GetExecutableHeap());
Stub *pStub = sl.Link(SystemDomain::GetGlobalLoaderAllocator()->GetExecutableHeap(), NEWSTUB_FL_NONE, "AllocString");

return (void *)pStub->GetEntryPoint();
}
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/vm/loaderallocator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ class CodeRangeMapRangeList : public RangeList
#ifndef DACCESS_COMPILE
void AddRangeWorkerHelper(TADDR start, TADDR end, void* id)
{
ReportStubBlock((void*)start, (size_t)(end - start), _rangeListType);
SimpleWriteLockHolder lh(&_RangeListRWLock);

_ASSERTE(id == _id || _id == NULL);
Expand Down
10 changes: 9 additions & 1 deletion src/coreclr/vm/loongarch64/stubs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1314,7 +1314,7 @@ void StubLinkerCPU::EmitCallManagedMethod(MethodDesc *pMD, BOOL fTailCall)

#define DYNAMIC_HELPER_ALIGNMENT sizeof(TADDR)

#define BEGIN_DYNAMIC_HELPER_EMIT(size) \
#define BEGIN_DYNAMIC_HELPER_EMIT_WORKER(size) \
SIZE_T cb = size; \
SIZE_T cbAligned = ALIGN_UP(cb, DYNAMIC_HELPER_ALIGNMENT); \
BYTE * pStartRX = (BYTE *)(void*)pAllocator->GetDynamicHelpersHeap()->AllocAlignedMem(cbAligned, DYNAMIC_HELPER_ALIGNMENT); \
Expand All @@ -1323,6 +1323,14 @@ void StubLinkerCPU::EmitCallManagedMethod(MethodDesc *pMD, BOOL fTailCall)
size_t rxOffset = pStartRX - pStart; \
BYTE * p = pStart;

#ifdef FEATURE_PERFMAP
#define BEGIN_DYNAMIC_HELPER_EMIT(size) \
BEGIN_DYNAMIC_HELPER_EMIT_WORKER(size) \
PerfMap::LogStubs(__FUNCTION__, "DynamicHelper", (PCODE)p, size, PerfMapStubType::Individual);
#else
#define BEGIN_DYNAMIC_HELPER_EMIT(size) BEGIN_DYNAMIC_HELPER_EMIT_WORKER(size)
#endif

#define END_DYNAMIC_HELPER_EMIT() \
_ASSERTE(pStart + cb == p); \
while (p < pStart + cbAligned) { *(DWORD*)p = 0xffffff0f/*badcode*/; p += 4; }\
Expand Down
Loading
Loading