diff --git a/src/OpenTelemetry.AutoInstrumentation.Native/CMakeLists.txt b/src/OpenTelemetry.AutoInstrumentation.Native/CMakeLists.txt
index 5ebc8d7def..803b1d732f 100644
--- a/src/OpenTelemetry.AutoInstrumentation.Native/CMakeLists.txt
+++ b/src/OpenTelemetry.AutoInstrumentation.Native/CMakeLists.txt
@@ -160,6 +160,7 @@ add_library("OpenTelemetry.AutoInstrumentation.Native.static" STATIC
member_resolver.cpp
metadata_builder.cpp
miniutf.cpp
+ stack_capture_strategy_factory.cpp
regex_utils.cpp
string_utils.cpp
util.cpp
diff --git a/src/OpenTelemetry.AutoInstrumentation.Native/OpenTelemetry.AutoInstrumentation.Native.vcxproj b/src/OpenTelemetry.AutoInstrumentation.Native/OpenTelemetry.AutoInstrumentation.Native.vcxproj
index d0afa520be..9fa26aa03c 100644
--- a/src/OpenTelemetry.AutoInstrumentation.Native/OpenTelemetry.AutoInstrumentation.Native.vcxproj
+++ b/src/OpenTelemetry.AutoInstrumentation.Native/OpenTelemetry.AutoInstrumentation.Native.vcxproj
@@ -178,6 +178,7 @@
+
@@ -196,13 +197,17 @@
+
+
+
+
@@ -228,9 +233,11 @@
+
+
diff --git a/src/OpenTelemetry.AutoInstrumentation.Native/continuous_profiler.cpp b/src/OpenTelemetry.AutoInstrumentation.Native/continuous_profiler.cpp
index 9fb7e9c038..40fb8044b9 100644
--- a/src/OpenTelemetry.AutoInstrumentation.Native/continuous_profiler.cpp
+++ b/src/OpenTelemetry.AutoInstrumentation.Native/continuous_profiler.cpp
@@ -98,8 +98,8 @@ static std::mutex name_cache_lock = std::mutex();
static std::shared_mutex profiling_lock = std::shared_mutex();
-static ICorProfilerInfo12* profiler_info; // After feature sets settle down, perhaps this should be refactored and have
- // a single static instance of ThreadSampler
+static ICorProfilerInfo7* profiler_info; // After feature sets settle down, perhaps this should be refactored and have
+ // a single static instance of ThreadSampler
// Dirt-simple back pressure system to save overhead if managed code is not reading fast enough
bool ThreadSamplingShouldProduceThreadSample()
@@ -330,9 +330,7 @@ void ThreadSamplesBuffer::WriteSpanContext(const thread_span_context& span_conte
WriteUInt64(span_context.span_id_);
}
-void ThreadSamplesBuffer::StartSample(ThreadID id,
- const ThreadState* state,
- const thread_span_context& span_context) const
+void ThreadSamplesBuffer::StartSample(const ThreadState* state, const thread_span_context& span_context) const
{
CHECK_SAMPLES_BUFFER_LENGTH()
WriteByte(kThreadSamplesStartSample);
@@ -553,7 +551,7 @@ void NamingHelper::ClearFunctionIdentifierCache()
mdToken function_token = 0;
// theoretically there is a possibility to use GetFunctionInfo method, but it does not support generic methods
const HRESULT hr =
- info12_->GetFunctionInfo2(func_id, frame_info, nullptr, &module_id, &function_token, 0, nullptr, nullptr);
+ info7_->GetFunctionInfo2(func_id, frame_info, nullptr, &module_id, &function_token, 0, nullptr, nullptr);
if (FAILED(hr))
{
trace::Logger::Debug("GetFunctionInfo2 failed. HRESULT=0x", std::setfill('0'), std::setw(8), std::hex, hr);
@@ -583,8 +581,8 @@ void NamingHelper::GetFunctionName(FunctionIdentifier function_identifier, trace
}
ComPtr metadata_import;
- HRESULT hr = info12_->GetModuleMetaData(function_identifier.module_id, ofRead, IID_IMetaDataImport2,
- reinterpret_cast(&metadata_import));
+ HRESULT hr = info7_->GetModuleMetaData(function_identifier.module_id, ofRead, IID_IMetaDataImport2,
+ reinterpret_cast(&metadata_import));
if (FAILED(hr))
{
trace::Logger::Debug("GetModuleMetaData failed. HRESULT=0x", std::setfill('0'), std::setw(8), std::hex, hr);
@@ -783,30 +781,35 @@ static HRESULT __stdcall FrameCallback(_In_ FunctionID func_id,
static void CaptureFunctionIdentifiersForThreads(
ContinuousProfiler* prof,
- ICorProfilerInfo12* info12,
+ ICorProfilerInfo7* info7,
const std::unordered_set& selectedThreads,
std::unordered_map>& threadStacksBuffer)
{
prof->helper.ClearFunctionIdentifierCache();
- for (auto threadId : selectedThreads)
+
+ if (auto stackCaptureStrategy = prof->GetStackCaptureStrategy(); stackCaptureStrategy != nullptr)
{
- DoStackSnapshotParams doStackSnapshotParams(prof, &threadStacksBuffer[threadId]);
- HRESULT snapshotHr = info12->DoStackSnapshot(threadId, &FrameCallback, COR_PRF_SNAPSHOT_DEFAULT,
- &doStackSnapshotParams, nullptr, 0);
- if (FAILED(snapshotHr))
+ auto frameProcessor = [&threadStacksBuffer, prof](StackSnapshotCallbackContext* snapshot_context) -> HRESULT
{
- trace::Logger::Debug("DoStackSnapshot failed. HRESULT=0x", std::setfill('0'), std::setw(8), std::hex,
- snapshotHr);
- }
+ auto thread = snapshot_context->threadId;
+ DoStackSnapshotParams doStackSnapshotParams{prof, &threadStacksBuffer[thread]};
+ FrameCallback(snapshot_context->functionId, snapshot_context->instructionPointer,
+ snapshot_context->frameInfo, snapshot_context->contextSize, snapshot_context->context,
+ &doStackSnapshotParams);
+ return S_OK;
+ };
+
+ StackSnapshotCallbackContext context{frameProcessor};
+ stackCaptureStrategy->CaptureStacks(selectedThreads, &context);
}
}
-static std::unordered_set EnumerateThreads(ICorProfilerInfo12* info12)
+static std::unordered_set EnumerateThreads(ICorProfilerInfo7* info7)
{
std::unordered_set threads;
ICorProfilerThreadEnum* thread_enum = nullptr;
- HRESULT hr = info12->EnumThreads(&thread_enum);
+ HRESULT hr = info7->EnumThreads(&thread_enum);
if (FAILED(hr))
{
trace::Logger::Debug("Could not EnumThreads. HRESULT=0x", std::setfill('0'), std::setw(8), std::hex, hr);
@@ -826,7 +829,7 @@ static void ResolveFrames(ContinuousProfiler* prof,
const std::vector& threadStack,
ThreadSamplesBuffer& buffer)
{
- for (auto functionIdentifier : threadStack)
+ for (const auto& functionIdentifier : threadStack)
{
const trace::WSTRING* name = prof->helper.Lookup(functionIdentifier, prof->stats_);
// This is where line numbers could be calculated
@@ -867,7 +870,7 @@ static void ResolveSymbolsAndPublishBufferForAllThreads(
thread_span_context spanContext = GetContext(threadId);
const auto threadState = GetThreadState(prof->managed_tid_to_state_, threadId);
- prof->cur_cpu_writer_->StartSample(threadId, threadState, spanContext);
+ prof->cur_cpu_writer_->StartSample(threadState, spanContext);
if (prof->selectedThreadsSamplingInterval.has_value())
{
@@ -949,7 +952,7 @@ static void RemoveOutdatedEntries(std::unordered_map&
}
static void PauseClrAndCaptureSamples(ContinuousProfiler* prof,
- ICorProfilerInfo12* info12,
+ ICorProfilerInfo7* info7,
const SamplingType samplingType,
std::unordered_map>& threadStacksBuffer)
{
@@ -1010,52 +1013,33 @@ static void PauseClrAndCaptureSamples(ContinuousProfiler*
const auto start = std::chrono::steady_clock::now();
- HRESULT hr = info12->SuspendRuntime();
-
- if (FAILED(hr))
+ try
{
- trace::Logger::Warn("Could not suspend runtime to sample threads. HRESULT=0x", std::setfill('0'), std::setw(8),
- std::hex, hr);
- }
- else
- {
- try
- {
- if (samplingType == SamplingType::Continuous)
- {
- auto allThreads = EnumerateThreads(info12);
- CaptureFunctionIdentifiersForThreads(prof, info12, allThreads, threadStacksBuffer);
- }
- else if (samplingType == SamplingType::SelectedThreads)
- {
- CaptureFunctionIdentifiersForThreads(prof, info12, selective_sampling_thread_buffer,
- threadStacksBuffer);
- }
- }
- catch (const std::exception& e)
+ if (samplingType == SamplingType::Continuous)
{
- trace::Logger::Warn("Could not capture thread samples: ", e.what());
+ auto allThreads = EnumerateThreads(info7);
+ CaptureFunctionIdentifiersForThreads(prof, info7, allThreads, threadStacksBuffer);
}
- catch (...)
+ else if (samplingType == SamplingType::SelectedThreads)
{
- trace::Logger::Warn("Could not capture thread sample for unknown reasons");
+ CaptureFunctionIdentifiersForThreads(prof, info7, selective_sampling_thread_buffer, threadStacksBuffer);
}
}
- // I don't have any proof but I sure hope that if suspending fails then it's still ok to ask to resume, with no
- // ill effects
- hr = info12->ResumeRuntime();
+ catch (const std::exception& e)
+ {
+ trace::Logger::Warn("Could not capture thread samples: ", e.what());
+ }
+ catch (...)
+ {
+ trace::Logger::Warn("Could not capture thread sample for unknown reasons");
+ }
const auto end = std::chrono::steady_clock::now();
const auto elapsed_micros = std::chrono::duration_cast(end - start).count();
prof->stats_.micros_suspended = static_cast(elapsed_micros);
- if (FAILED(hr))
- {
- trace::Logger::Error("Could not resume runtime? HRESULT=0x", std::setfill('0'), std::setw(8), std::hex, hr);
- }
-
const size_t nonEmptyCount = std::count_if(threadStacksBuffer.begin(), threadStacksBuffer.end(),
[](const std::pair>& v)
{ return !v.second.empty(); });
@@ -1118,9 +1102,9 @@ static bool ShouldTrackIterations(const ContinuousProfiler* const prof)
static void SamplingThreadMain(ContinuousProfiler* prof)
{
- ICorProfilerInfo12* info12 = prof->info12;
+ ICorProfilerInfo7* info7 = prof->info7;
- info12->InitializeCurrentThread();
+ info7->InitializeCurrentThread();
std::unordered_map> threadStacksBuffer;
unsigned int iteration = 0;
@@ -1159,7 +1143,7 @@ static void SamplingThreadMain(ContinuousProfiler* prof)
iteration = 0;
}
- PauseClrAndCaptureSamples(prof, info12, samplingType, threadStacksBuffer);
+ PauseClrAndCaptureSamples(prof, info7, samplingType, threadStacksBuffer);
if (prof->IsShutdownRequested())
{
@@ -1185,11 +1169,28 @@ static void SamplingThreadMain(ContinuousProfiler* prof)
}
}
+void ContinuousProfiler::SetGlobalInfo7(ICorProfilerInfo7* cor_profiler_info7)
+{
+ info7 = cor_profiler_info7;
+ this->helper.info7_ = cor_profiler_info7;
+ profiler_info = cor_profiler_info7;
+}
+
void ContinuousProfiler::SetGlobalInfo12(ICorProfilerInfo12* cor_profiler_info12)
{
- profiler_info = cor_profiler_info12;
- this->info12 = cor_profiler_info12;
- this->helper.info12_ = cor_profiler_info12;
+ // ICorProfilerInfo12 derives from ICorProfilerInfo7, so we can use it as ICorProfilerInfo7
+ SetGlobalInfo7(cor_profiler_info12);
+ info12 = cor_profiler_info12;
+}
+
+void ContinuousProfiler::SetStackCaptureStrategy(IStackCaptureStrategy* stack_capture_strategy)
+{
+ stack_capture_strategy_ = stack_capture_strategy;
+}
+
+IStackCaptureStrategy* ContinuousProfiler::GetStackCaptureStrategy() const
+{
+ return stack_capture_strategy_;
}
void ContinuousProfiler::InitSelectiveSamplingBuffer()
@@ -1263,8 +1264,8 @@ constexpr auto AllocationTickV4SizeWithoutTypeName = 4 + 4 + 2 + 8 + EtwPoint
static void CaptureAllocationStack(ContinuousProfiler* prof, std::vector& threadStack)
{
DoStackSnapshotParams doStackSnapshotParams(prof, &threadStack);
- HRESULT hr = prof->info12->DoStackSnapshot((ThreadID)NULL, &FrameCallback, COR_PRF_SNAPSHOT_DEFAULT,
- &doStackSnapshotParams, nullptr, 0);
+ HRESULT hr = prof->info7->DoStackSnapshot((ThreadID)NULL, &FrameCallback, COR_PRF_SNAPSHOT_DEFAULT,
+ &doStackSnapshotParams, nullptr, 0);
if (FAILED(hr))
{
trace::Logger::Debug("DoStackSnapshot failed. HRESULT=0x", std::setfill('0'), std::setw(8), std::hex, hr);
@@ -1362,7 +1363,7 @@ void ContinuousProfiler::AllocationTick(ULONG dataLen, LPCBYTE data)
size_t typeNameCharLen = (dataLen - AllocationTickV4SizeWithoutTypeName) / 2 - 1;
ThreadID threadId;
- const HRESULT hr = info12->GetCurrentThreadID(&threadId);
+ const HRESULT hr = info7->GetCurrentThreadID(&threadId);
if (FAILED(hr))
{
trace::Logger::Debug("GetCurrentThreadId failed, ", hr);
@@ -1405,6 +1406,11 @@ void ContinuousProfiler::AllocationTick(ULONG dataLen, LPCBYTE data)
void ContinuousProfiler::StartAllocationSampling(const unsigned int maxMemorySamplesPerMinute)
{
+ if (!info12) // no info12 - we are on .Net Fx - ignore allocation sampling request
+ {
+ trace::Logger::Warn("Ignore Allocation Sampling request, it is not supported for .Net Framework applications");
+ return;
+ }
this->allocationSubSampler = std::make_unique(maxMemorySamplesPerMinute, 60);
COR_PRF_EVENTPIPE_PROVIDER_CONFIG sessionConfig[] = {{WStr("Microsoft-Windows-DotNETRuntime"),
@@ -1422,6 +1428,10 @@ void ContinuousProfiler::StartAllocationSampling(const unsigned int maxMemorySam
void ContinuousProfiler::StopAllocationSampling()
{
+ if (!info12) // no info12 - we are on .Net Fx - ignore allocation sampling stop request
+ {
+ return;
+ }
if (session_ == 0)
{
return;
diff --git a/src/OpenTelemetry.AutoInstrumentation.Native/continuous_profiler.h b/src/OpenTelemetry.AutoInstrumentation.Native/continuous_profiler.h
index dd93eb26ba..02a3ab764a 100644
--- a/src/OpenTelemetry.AutoInstrumentation.Native/continuous_profiler.h
+++ b/src/OpenTelemetry.AutoInstrumentation.Native/continuous_profiler.h
@@ -7,6 +7,7 @@
#define OTEL_CONTINUOUS_PROFILER_H_
#include "continuous_profiler_clr_helpers.h"
+#include "stack_capture_strategy.h"
#include
#include
@@ -210,8 +211,9 @@ class ThreadSamplesBuffer
void StartSelectedThreadsBatch() const;
void EndSelectedThreadsBatch() const;
void WriteSpanContext(const thread_span_context& span_context) const;
- void StartSample(ThreadID id, const ThreadState* state, const thread_span_context& span_context) const;
- void StartSampleForSelectedThread(const ThreadState* state, const thread_span_context& span_context) const;
+ void StartSample(const ThreadState* state, const thread_span_context& span_context) const;
+ void StartSampleForSelectedThread(const ThreadState* state,
+ const thread_span_context& span_context) const;
void MarkSelectedForFrequentSampling(bool value) const;
void RecordFrame(const FunctionIdentifier& fid, const trace::WSTRING& frame);
void EndSample() const;
@@ -270,7 +272,7 @@ class NamingHelper
{
public:
// These are permanent parts of the helper object
- ICorProfilerInfo12* info12_ = nullptr;
+ ICorProfilerInfo7* info7_ = nullptr;
NamingHelper();
void ClearFunctionIdentifierCache();
@@ -314,23 +316,27 @@ enum class SamplingType : int32_t { Continuous = 1, SelectedThreads = 2 };
class ContinuousProfiler
{
public:
- std::optional threadSamplingInterval;
- std::optional selectedThreadsSamplingInterval;
+ std::optional threadSamplingInterval;
+ std::optional selectedThreadsSamplingInterval;
std::chrono::time_point nextOutdatedEntriesScan;
- void StartThreadSampling();
- void Shutdown();
- bool IsShutdownRequested() const;
- static void InitSelectiveSamplingBuffer();
- unsigned int maxMemorySamplesPerMinute;
- void StartAllocationSampling(unsigned int maxMemorySamplesPerMinute);
- void StopAllocationSampling();
- void AllocationTick(ULONG dataLen, LPCBYTE data);
- ICorProfilerInfo12* info12;
- static void ThreadCreated(ThreadID thread_id);
- void ThreadDestroyed(ThreadID thread_id);
- void ThreadNameChanged(ThreadID thread_id, ULONG cch_name, WCHAR name[]);
+ void StartThreadSampling();
+ void Shutdown();
+ bool IsShutdownRequested() const;
+ static void InitSelectiveSamplingBuffer();
+ unsigned int maxMemorySamplesPerMinute;
+ void StartAllocationSampling(unsigned int maxMemorySamplesPerMinute);
+ void StopAllocationSampling();
+ void AllocationTick(ULONG dataLen, LPCBYTE data);
+ ICorProfilerInfo12* info12 = nullptr;
+ ICorProfilerInfo7* info7 = nullptr;
+ static void ThreadCreated(ThreadID thread_id);
+ void ThreadDestroyed(ThreadID thread_id);
+ void ThreadNameChanged(ThreadID thread_id, ULONG cch_name, WCHAR name[]);
void SetGlobalInfo12(ICorProfilerInfo12* info12);
+ void SetGlobalInfo7(ICorProfilerInfo7* cor_profiler_info7);
+ void SetStackCaptureStrategy(IStackCaptureStrategy* strategy);
+ IStackCaptureStrategy* GetStackCaptureStrategy() const;
ThreadState* GetCurrentThreadState(ThreadID tid);
std::unordered_map managed_tid_to_state_;
@@ -350,6 +356,7 @@ class ContinuousProfiler
std::atomic_bool shutdown_requested_{ false };
std::unique_ptr thread_sampling_thread_;
EVENTPIPE_SESSION session_ = 0;
+ IStackCaptureStrategy* stack_capture_strategy_ = nullptr; // Non-owning pointer
};
} // namespace continuous_profiler
diff --git a/src/OpenTelemetry.AutoInstrumentation.Native/cor_profiler.cpp b/src/OpenTelemetry.AutoInstrumentation.Native/cor_profiler.cpp
index c356618299..4cf8187048 100644
--- a/src/OpenTelemetry.AutoInstrumentation.Native/cor_profiler.cpp
+++ b/src/OpenTelemetry.AutoInstrumentation.Native/cor_profiler.cpp
@@ -30,6 +30,7 @@
#include "version.h"
#include "continuous_profiler.h"
#include "member_resolver.h"
+#include "stack_capture_strategy_factory.h"
#ifdef MACOS
#include
@@ -276,6 +277,10 @@ HRESULT STDMETHODCALLTYPE CorProfiler::Initialize(IUnknown* cor_profiler_info_un
this->info_->AddRef();
is_attached_.store(true);
profiler = this;
+
+ stack_capture_strategy_ =
+ continuous_profiler::StackCaptureStrategyFactory::Create(this->info_, runtime_information_);
+
return S_OK;
}
@@ -1180,9 +1185,24 @@ void CorProfiler::InternalAddInstrumentation(WCHAR* id, CallTargetDefinition* it
bool CorProfiler::InitThreadSampler()
{
+#if defined(_WIN32) && defined(_M_AMD64)
+ // for net fx, the native thread ID is needed by stack capture
+ // the profiler callback, ThreadAssignedToOSThread is not invoked for main thread
+ // for the following machinery to work,
+ // 1 The thread needs to have executed managed code first
+ // 2. InitThreadSampler must must be executing in context of main thread
+ // InitThreadSampler is called from managed code
+ // And more importantly, the main thread calls InitThreadSampler
+ ThreadID mainThreadId = 0;
+ if (auto hr = info_->GetCurrentThreadID(&mainThreadId); SUCCEEDED(hr))
+ {
+ ThreadAssignedToOSThread(mainThreadId, ::GetCurrentThreadId());
+ }
+#endif
+
DWORD pdvEventsLow;
DWORD pdvEventsHigh;
- auto hr = this->info12_->GetEventMask2(&pdvEventsLow, &pdvEventsHigh);
+ auto hr = this->info_->GetEventMask2(&pdvEventsLow, &pdvEventsHigh);
if (FAILED(hr))
{
Logger::Warn("ConfigureContinuousProfiler: Failed to take event masks for continuous profiler.");
@@ -1191,7 +1211,7 @@ bool CorProfiler::InitThreadSampler()
pdvEventsLow |= COR_PRF_MONITOR_THREADS | COR_PRF_ENABLE_STACK_SNAPSHOT;
- hr = this->info12_->SetEventMask2(pdvEventsLow, pdvEventsHigh);
+ hr = this->info_->SetEventMask2(pdvEventsLow, pdvEventsHigh);
if (FAILED(hr))
{
Logger::Warn("ConfigureContinuousProfiler: Failed to set event masks for continuous profiler.");
@@ -1200,6 +1220,8 @@ bool CorProfiler::InitThreadSampler()
this->continuousProfiler = new continuous_profiler::ContinuousProfiler();
this->continuousProfiler->SetGlobalInfo12(this->info12_);
+ this->continuousProfiler->SetGlobalInfo7(this->info_);
+ this->continuousProfiler->SetStackCaptureStrategy(stack_capture_strategy_.get());
Logger::Info("ConfigureContinuousProfiler: Events masks configured for continuous profiler");
return true;
}
@@ -1210,15 +1232,25 @@ void CorProfiler::ConfigureContinuousProfiler(bool threadSamplingEnabled
unsigned int maxMemorySamplesPerMinute,
unsigned int selectedThreadsSamplingInterval)
{
- Logger::Info("ConfigureContinuousProfiler: thread sampling enabled: ", threadSamplingEnabled,
- ", thread sampling interval: ", threadSamplingInterval,
- ", allocationSamplingEnabled: ", allocationSamplingEnabled,
- ", max memory samples per minute: ", maxMemorySamplesPerMinute,
- ", selected threads sampling interval: ", selectedThreadsSamplingInterval);
+ ContinuousProfilerParams params{threadSamplingEnabled, threadSamplingInterval, allocationSamplingEnabled,
+ maxMemorySamplesPerMinute, selectedThreadsSamplingInterval};
+ // Guard against multiple initialization: In .NET Framework, this method may be called
+ // once per AppDomain, but the continuous profiler is a process-level singleton.
+ // std::call_once ensures thread-safe one-time initialization across all AppDomains.
+ std::call_once(sampling_init_flag_, [this, ¶ms]() { ConfigureContinuousProfilerInternal(params); });
+}
+
+void CorProfiler::ConfigureContinuousProfilerInternal(const ContinuousProfilerParams& params)
+{
+ Logger::Info("ConfigureContinuousProfiler: thread sampling enabled: ", params.threadSamplingEnabled,
+ ", thread sampling interval: ", params.threadSamplingInterval,
+ ", allocationSamplingEnabled: ", params.allocationSamplingEnabled,
+ ", max memory samples per minute: ", params.maxMemorySamplesPerMinute,
+ ", selected threads sampling interval: ", params.selectedThreadsSamplingInterval);
- const bool selectiveSamplingConfigured = selectedThreadsSamplingInterval != 0;
+ const bool selectiveSamplingConfigured = params.selectedThreadsSamplingInterval != 0;
- if (!threadSamplingEnabled && !allocationSamplingEnabled && !selectiveSamplingConfigured)
+ if (!params.threadSamplingEnabled && !params.allocationSamplingEnabled && !selectiveSamplingConfigured)
{
Logger::Debug("ConfigureContinuousProfiler: no sampling type configured.");
return;
@@ -1230,26 +1262,26 @@ void CorProfiler::ConfigureContinuousProfiler(bool threadSamplingEnabled
return;
}
- if (threadSamplingEnabled)
+ if (params.threadSamplingEnabled)
{
- this->continuousProfiler->threadSamplingInterval = threadSamplingInterval;
+ this->continuousProfiler->threadSamplingInterval = params.threadSamplingInterval;
}
if (selectiveSamplingConfigured)
{
- this->continuousProfiler->selectedThreadsSamplingInterval = selectedThreadsSamplingInterval;
+ this->continuousProfiler->selectedThreadsSamplingInterval = params.selectedThreadsSamplingInterval;
this->continuousProfiler->nextOutdatedEntriesScan = std::chrono::steady_clock::now();
continuous_profiler::ContinuousProfiler::InitSelectiveSamplingBuffer();
}
- if (threadSamplingEnabled || selectiveSamplingConfigured)
+ if (params.threadSamplingEnabled || selectiveSamplingConfigured)
{
Logger::Info("ContinuousProfiler::StartThreadSampling");
this->continuousProfiler->StartThreadSampling();
}
- if (allocationSamplingEnabled)
+ if (params.allocationSamplingEnabled)
{
- this->continuousProfiler->StartAllocationSampling(maxMemorySamplesPerMinute);
+ this->continuousProfiler->StartAllocationSampling(params.maxMemorySamplesPerMinute);
}
}
@@ -3765,6 +3797,11 @@ HRESULT STDMETHODCALLTYPE CorProfiler::ThreadCreated(ThreadID threadId)
{
continuousProfiler->ThreadCreated(threadId);
}
+
+ if (stack_capture_strategy_)
+ {
+ stack_capture_strategy_->OnThreadCreated(threadId);
+ }
return S_OK;
}
HRESULT STDMETHODCALLTYPE CorProfiler::ThreadDestroyed(ThreadID threadId)
@@ -3773,6 +3810,12 @@ HRESULT STDMETHODCALLTYPE CorProfiler::ThreadDestroyed(ThreadID threadId)
{
continuousProfiler->ThreadDestroyed(threadId);
}
+
+ if (stack_capture_strategy_)
+ {
+ stack_capture_strategy_->OnThreadDestroyed(threadId);
+ }
+
return S_OK;
}
HRESULT STDMETHODCALLTYPE CorProfiler::ThreadNameChanged(ThreadID threadId, ULONG cchName, WCHAR name[])
@@ -3781,6 +3824,20 @@ HRESULT STDMETHODCALLTYPE CorProfiler::ThreadNameChanged(ThreadID threadId, ULON
{
continuousProfiler->ThreadNameChanged(threadId, cchName, name);
}
+
+ if (stack_capture_strategy_)
+ {
+ stack_capture_strategy_->OnThreadNameChanged(threadId, cchName, name);
+ }
+
+ return S_OK;
+}
+HRESULT STDMETHODCALLTYPE CorProfiler::ThreadAssignedToOSThread(ThreadID managedThreadId, DWORD osThreadId)
+{
+ if (stack_capture_strategy_)
+ {
+ stack_capture_strategy_->OnThreadAssignedToOSThread(managedThreadId, osThreadId);
+ }
return S_OK;
}
diff --git a/src/OpenTelemetry.AutoInstrumentation.Native/cor_profiler.h b/src/OpenTelemetry.AutoInstrumentation.Native/cor_profiler.h
index 5ab2a7eb63..5bfd864032 100644
--- a/src/OpenTelemetry.AutoInstrumentation.Native/cor_profiler.h
+++ b/src/OpenTelemetry.AutoInstrumentation.Native/cor_profiler.h
@@ -23,6 +23,7 @@
#include "rejit_handler.h"
#include
#include "clr_helpers.h"
+#include "stack_capture_strategy.h"
// Forward declaration
namespace continuous_profiler
@@ -32,6 +33,14 @@ class ContinuousProfiler;
namespace trace
{
+struct ContinuousProfilerParams
+{
+ bool threadSamplingEnabled;
+ unsigned int threadSamplingInterval;
+ bool allocationSamplingEnabled;
+ unsigned int maxMemorySamplesPerMinute;
+ unsigned int selectedThreadsSamplingInterval;
+};
class CorProfiler : public CorProfilerBase
{
@@ -57,6 +66,9 @@ class CorProfiler : public CorProfilerBase
bool is_desktop_iis = false;
continuous_profiler::ContinuousProfiler* continuousProfiler;
+ std::unique_ptr stack_capture_strategy_;
+ std::once_flag sampling_init_flag_;
+ HRESULT STDMETHODCALLTYPE ThreadAssignedToOSThread(ThreadID managedThreadId, DWORD osThreadId) override;
//
@@ -133,6 +145,7 @@ class CorProfiler : public CorProfilerBase
//
void InternalAddInstrumentation(WCHAR* id, CallTargetDefinition* items, int size, bool isDerived);
bool InitThreadSampler();
+ void ConfigureContinuousProfilerInternal(const ContinuousProfilerParams& params);
public:
CorProfiler() = default;
diff --git a/src/OpenTelemetry.AutoInstrumentation.Native/dot_net_stack_capture_strategy.h b/src/OpenTelemetry.AutoInstrumentation.Native/dot_net_stack_capture_strategy.h
new file mode 100644
index 0000000000..62820943eb
--- /dev/null
+++ b/src/OpenTelemetry.AutoInstrumentation.Native/dot_net_stack_capture_strategy.h
@@ -0,0 +1,108 @@
+// Copyright The OpenTelemetry Authors
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef OTEL_PROFILER_DOTNET_STACK_CAPTURE_STRATEGY_H_
+#define OTEL_PROFILER_DOTNET_STACK_CAPTURE_STRATEGY_H_
+
+#include "stack_capture_strategy.h"
+
+#include "logger.h"
+#include
+
+namespace continuous_profiler {
+
+/// @brief Stack capture strategy for .NET Core/5+
+/// @details Uses SuspendRuntime/ResumeRuntime to pause entire CLR
+class DotNetStackCaptureStrategy : public IStackCaptureStrategy {
+public:
+ explicit DotNetStackCaptureStrategy(ICorProfilerInfo12* profilerInfo)
+ : profilerInfo_(profilerInfo) {
+ trace::Logger::Info("Initialized DotNetStackCaptureStrategy (CLR suspension)");
+ }
+
+ HRESULT CaptureStacks(
+ const std::unordered_set& threads,
+ StackSnapshotCallbackContext* clientData) override {
+
+ if (threads.empty()) {
+ return S_OK;
+ }
+ try
+ {
+ // RAII guard - suspends CLR in constructor, resumes in destructor
+ RuntimeSuspensionGuard suspensionGuard(profilerInfo_);
+ // With CLR suspended, capture stacks for requested threads
+ HRESULT captureResult = S_OK;
+ for (ThreadID tid : threads) {
+ clientData->threadId = tid;
+ HRESULT frameHr = profilerInfo_->DoStackSnapshot(
+ tid,
+ continuous_profiler::IStackCaptureStrategy::StackSnapshotCallbackDefault,
+ COR_PRF_SNAPSHOT_DEFAULT,
+ clientData,
+ nullptr,
+ 0);
+
+ if (FAILED(frameHr)) {
+ //trace::Logger::Debug("DoStackSnapshot failed for thread ", tid,
+ // " HRESULT=", trace::HResultStr(frameHr));
+ if (SUCCEEDED(captureResult)) {
+ captureResult = frameHr; // Remember first error
+ }
+ }
+ }
+
+ // RuntimeSuspensionGuard destructor will automatically resume CLR
+ return SUCCEEDED(captureResult) ? S_OK : captureResult;
+ }
+ catch (const std::runtime_error& ex)
+ {
+ trace::Logger::Error("DotNetStackCaptureStrategy: Runtime Error: ", ex.what());
+ return E_FAIL;
+ }
+ catch (const std::exception& ex) {
+ trace::Logger::Error("DotNetStackCaptureStrategy: Exception during CaptureStacks: ", ex.what());
+ return E_FAIL;
+ }
+ }
+
+ // No thread tracking needed - CLR suspension is global
+
+private:
+ ICorProfilerInfo12* profilerInfo_;
+
+ /// @brief RAII guard for CLR runtime suspension/resumption
+ class RuntimeSuspensionGuard {
+ public:
+ explicit RuntimeSuspensionGuard(ICorProfilerInfo12* profilerInfo)
+ : profilerInfo_(profilerInfo) { // Initialize member
+
+ if (auto suspendResult = profilerInfo_->SuspendRuntime(); FAILED(suspendResult))
+ {
+ auto errorString = "SuspendRuntime failed with HRESULT=" + std::to_string(suspendResult);
+ throw std::runtime_error(errorString);
+ }
+ }
+
+ ~RuntimeSuspensionGuard() {
+
+ if (HRESULT resumeHr = profilerInfo_->ResumeRuntime(); FAILED(resumeHr)) {
+ trace::Logger::Error("DotNetStackCaptureStrategy: ResumeRuntime FAILED! HRESULT=",
+ trace::HResultStr(resumeHr));
+ }
+ }
+
+ // Non-copyable, non-movable
+ RuntimeSuspensionGuard(const RuntimeSuspensionGuard&) = delete;
+ RuntimeSuspensionGuard& operator=(const RuntimeSuspensionGuard&) = delete;
+ RuntimeSuspensionGuard(RuntimeSuspensionGuard&&) = delete;
+ RuntimeSuspensionGuard& operator=(RuntimeSuspensionGuard&&) = delete;
+
+ private:
+ ICorProfilerInfo12* profilerInfo_;
+ };
+};
+
+} // namespace continuous_profiler
+
+#endif // OTEL_PROFILER_DOTNET_STACK_CAPTURE_STRATEGY_H_
\ No newline at end of file
diff --git a/src/OpenTelemetry.AutoInstrumentation.Native/netfx_stack_capture_strategy_x64.h b/src/OpenTelemetry.AutoInstrumentation.Native/netfx_stack_capture_strategy_x64.h
new file mode 100644
index 0000000000..fe2e2c9fc9
--- /dev/null
+++ b/src/OpenTelemetry.AutoInstrumentation.Native/netfx_stack_capture_strategy_x64.h
@@ -0,0 +1,65 @@
+// Copyright The OpenTelemetry Authors
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef OTEL_PROFILER_NETFX_STACK_CAPTURE_STRATEGY_H_
+#define OTEL_PROFILER_NETFX_STACK_CAPTURE_STRATEGY_H_
+
+#if defined(_WIN32) && defined(_M_AMD64)
+
+#include "stack_capture_strategy.h"
+#include "profiler_stack_capture.h"
+
+namespace continuous_profiler {
+
+/// @brief Stack capture strategy for .NET Framework
+/// @details Uses thread suspension + seeded DoStackSnapshot via StackCaptureEngine
+class NetFxStackCaptureStrategyX64 : public IStackCaptureStrategy {
+public:
+ explicit NetFxStackCaptureStrategyX64(ICorProfilerInfo2* profilerInfo)
+ : engine_(std::make_unique(
+ std::make_unique(profilerInfo))) {
+ trace::Logger::Info("Initialized NetFxStackCaptureStrategyX64 (per-thread suspension)");
+ }
+
+ HRESULT CaptureStacks(
+ const std::unordered_set& threads,
+ StackSnapshotCallbackContext* clientData) override {
+ // StackCaptureEngine handles:
+ // - Per-thread suspension via ScopedThreadSuspend
+ // - Safety probes with canary thread
+ // - Seeded DoStackSnapshot with PrepareContextForSnapshot
+ return engine_->CaptureStacks(threads, clientData);
+ }
+
+ // Forward lifecycle events to StackCaptureEngine
+ void OnThreadCreated(ThreadID threadId) override {
+ if (engine_) {
+ engine_->ThreadCreated(threadId);
+ }
+ }
+
+ void OnThreadDestroyed(ThreadID threadId) override {
+ if (engine_) {
+ engine_->ThreadDestroyed(threadId);
+ }
+ }
+
+ void OnThreadNameChanged(ThreadID threadId, ULONG cchName, WCHAR name[]) override {
+ if (engine_ && name && cchName > 0) {
+ engine_->ThreadNameChanged(threadId, cchName, name);
+ }
+ }
+ void OnThreadAssignedToOSThread(ThreadID managedThreadId, DWORD osThreadId) override {
+ if (engine_) {
+ engine_->ThreadAssignedToOSThread(managedThreadId, osThreadId);
+ }
+ }
+
+private:
+ std::unique_ptr engine_;
+};
+
+} // namespace continuous_profiler
+
+#endif // defined(_WIN32) && defined(_M_AMD64)
+#endif // OTEL_PROFILER_NETFX_STACK_CAPTURE_STRATEGY_H_
\ No newline at end of file
diff --git a/src/OpenTelemetry.AutoInstrumentation.Native/profiler_stack_capture.cpp b/src/OpenTelemetry.AutoInstrumentation.Native/profiler_stack_capture.cpp
new file mode 100644
index 0000000000..89a37adb1e
--- /dev/null
+++ b/src/OpenTelemetry.AutoInstrumentation.Native/profiler_stack_capture.cpp
@@ -0,0 +1,684 @@
+// Copyright The OpenTelemetry Authors
+// SPDX-License-Identifier: Apache-2.0
+
+#if defined(_WIN32) && defined(_M_AMD64)
+#include "profiler_stack_capture.h"
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include "logger.h"
+
+#ifndef DECLSPEC_IMPORT
+#define DECLSPEC_IMPORT __declspec(dllimport)
+#endif
+
+extern "C"
+{
+ DECLSPEC_IMPORT PRUNTIME_FUNCTION NTAPI RtlLookupFunctionEntry(DWORD64 ControlPc,
+ PDWORD64 ImageBase,
+ PUNWIND_HISTORY_TABLE HistoryTable);
+ DECLSPEC_IMPORT PEXCEPTION_ROUTINE NTAPI RtlVirtualUnwind(DWORD HandlerType,
+ DWORD64 ImageBase,
+ DWORD64 ControlPc,
+ PRUNTIME_FUNCTION FunctionEntry,
+ PCONTEXT ContextRecord,
+ PVOID* HandlerData,
+ PDWORD64 EstablisherFrame,
+ PKNONVOLATILE_CONTEXT_POINTERS ContextPointers);
+}
+
+namespace ProfilerStackCapture
+{
+
+// ========================================================================================
+// SEH-Protected Helper Functions
+// ========================================================================================
+
+/// @brief Helper function for reading return address (SEH-protected, no C++ objects)
+static bool ReadReturnAddressFromStack(DWORD64 rsp, DWORD64* pReturnAddress)
+{
+ __try
+ {
+ *pReturnAddress = *reinterpret_cast(rsp);
+ return true;
+ }
+ __except (EXCEPTION_EXECUTE_HANDLER)
+ {
+ trace::Logger::Debug("[StackCapture] ReadReturnAddressFromStack - Access violation reading RSP=0x", std::hex,
+ rsp, std::dec, ", ExceptionCode=0x", std::hex, GetExceptionCode(), std::dec);
+ return false;
+ }
+}
+
+/// @brief Helper function for RtlVirtualUnwind (SEH-protected, no C++ objects)
+static bool SafeRtlVirtualUnwind(DWORD64 imageBase,
+ DWORD64 controlPc,
+ PRUNTIME_FUNCTION runtimeFunction,
+ PCONTEXT context,
+ PULONG64 pEstablisherFrame)
+{
+ __try
+ {
+ PVOID handlerData = nullptr;
+ ULONG64 eFrame = 0;
+ RtlVirtualUnwind(0, imageBase, controlPc, runtimeFunction, context, &handlerData, &eFrame, nullptr);
+
+ if (pEstablisherFrame)
+ {
+ *pEstablisherFrame = eFrame;
+ }
+
+ return true;
+ }
+ __except (EXCEPTION_EXECUTE_HANDLER)
+ {
+ trace::Logger::Debug("[StackCapture] SafeRtlVirtualUnwind - RtlVirtualUnwind failed. ImageBase=0x", std::hex,
+ imageBase, ", ControlPC=0x", controlPc, std::dec, ", ExceptionCode=0x", std::hex,
+ GetExceptionCode(), std::dec);
+ return false;
+ }
+}
+
+/// @brief Helper for safety probe worker (SEH-protected, no std::unique_ptr)
+static HRESULT ExecuteProbeOperations(IProfilerApi* profilerApi, ThreadID canaryManagedId, const CONTEXT& canaryCtx)
+{
+ HRESULT result = S_OK;
+
+ int* testAlloc = nullptr;
+ __try
+ {
+
+ // Test 1: Heap allocation (using new/delete instead of unique_ptr as we are inside SEH block)
+ if (testAlloc = new int(42))
+ {
+ delete testAlloc;
+ testAlloc = nullptr;
+ }
+ // Test 2: RTL function lookup
+ UNWIND_HISTORY_TABLE historyTable = {};
+ DWORD64 imageBase = 0;
+ RtlLookupFunctionEntry(canaryCtx.Rip, &imageBase, &historyTable);
+
+ // Test 3: DoStackSnapshot
+ auto probeCallback = [](FunctionID, UINT_PTR, COR_PRF_FRAME_INFO, ULONG32, BYTE[], void*) -> HRESULT
+ { return S_FALSE; };
+
+ result =
+ profilerApi->DoStackSnapshot(canaryManagedId, probeCallback, COR_PRF_SNAPSHOT_DEFAULT, nullptr, nullptr, 0);
+ }
+ __except (EXCEPTION_EXECUTE_HANDLER)
+ {
+ DWORD exceptionCode = GetExceptionCode();
+ trace::Logger::Debug("[StackCapture] ExecuteProbeOperations - Exception during safety tests. ExceptionCode=0x",
+ std::hex, exceptionCode, std::dec, ", RIP=0x", canaryCtx.Rip);
+ if (testAlloc)
+ {
+ delete testAlloc;
+ }
+ return E_FAIL;
+ }
+
+ // If stack snapshot was aborted, treat as success for probe purposes, as we explicitly
+ // short-circuited it from the callback, prompting CORPROF_E_STACKSNAPSHOT_ABORTED
+ return result == CORPROF_E_STACKSNAPSHOT_ABORTED ? S_OK : result;
+}
+
+// PrepareContextForSnapshot - walks native stack to find managed frame and prepares context for DoStackSnapshot
+static HRESULT PrepareContextForSnapshot(ThreadID managedThreadId,
+ HANDLE threadHandle,
+ CONTEXT* pContext,
+ IProfilerApi* profilerApi,
+ std::atomic* pStopRequested)
+{
+ const int MAX_WALK_EVER = 10000;
+ DWORD64 origRSP = 0;
+
+ // Quick check: are we already at managed code?
+ FunctionID fid = 0;
+ HRESULT hr = profilerApi->GetFunctionFromIP(reinterpret_cast(pContext->Rip), &fid);
+ if (SUCCEEDED(hr) && fid != 0)
+ {
+ return S_OK;
+ }
+
+ // Walk native frames to find managed code
+ for (int walkCount = 0; walkCount < MAX_WALK_EVER; ++walkCount)
+ {
+ if (pStopRequested && pStopRequested->load())
+ {
+ return E_ABORT;
+ }
+
+ // Check for stack progress
+ if (origRSP != 0 && pContext->Rsp <= origRSP)
+ {
+ break;
+ }
+ origRSP = pContext->Rsp;
+
+ // Check for end of stack
+ if (pContext->Rip == 0)
+ {
+ break;
+ }
+
+ // Try to find runtime function for current RIP
+ UNWIND_HISTORY_TABLE historyTable = {};
+ DWORD64 imageBase = 0;
+ PRUNTIME_FUNCTION runtimeFunction = RtlLookupFunctionEntry(pContext->Rip, &imageBase, &historyTable);
+
+ DWORD64 instructionPointer;
+
+ if (!runtimeFunction)
+ {
+ // Leaf function - read return address from stack
+ DWORD64 returnAddress = 0;
+ if (!ReadReturnAddressFromStack(pContext->Rsp, &returnAddress))
+ {
+ return E_FAIL;
+ }
+ // no runtime function, manually unwind to previous frame, adjust the RIP and RSP fields
+ pContext->Rip = returnAddress;
+ pContext->Rsp += sizeof(DWORD64);
+ instructionPointer = returnAddress;
+ }
+ else
+ {
+ // Has unwind info - use function begin address (critical for CLR detection)
+ instructionPointer = imageBase + runtimeFunction->BeginAddress;
+
+ // Unwind to previous frame, updates Rip and Rsp fields, we have runtimeFunction to guide us
+ if (!SafeRtlVirtualUnwind(imageBase, pContext->Rip, runtimeFunction, pContext, nullptr))
+ {
+ return E_FAIL;
+ }
+ }
+ // Virtual unwind traverses frames, so after unwind, RIP points to caller's instruction
+ // For leaf functions, we manually set RIP to return address
+ // Illustration: we use virtual unwind or manual stack read to move from:
+ // SleepEx() -> CLR transition stub -> YourApp.DoWork()
+ // Before:
+ // kernel32.dll!SleepEx ->RIP might point to this (native code)
+ // |_CLR transition stub
+ // |_YourApp.DoWork() -> We want to start HERE in managed code, not in SleepEx or the transition stub
+ // After: RIP is adjusted to point to the managed frame - YourApp.DoWork()
+
+ // Check if this instruction pointer is managed
+ hr = profilerApi->GetFunctionFromIP(reinterpret_cast(instructionPointer), &fid);
+ if (SUCCEEDED(hr) && fid != 0)
+ {
+ // Update context to point to this managed frame's beginning
+ pContext->Rip = instructionPointer; // this is the seed for DoStackSnapshot
+ return S_OK;
+ }
+ }
+
+ // Exhausted all frames without finding managed code, let us log at debug level to avoid noise
+ // failure to find managed frame is expected in some scenarios (e.g., native threads)
+ trace::Logger::Debug(
+ "[StackCapture] PrepareContextForSnapshot - Unable to locate managed frame in stack walk for ThreadID=",
+ managedThreadId);
+ return E_FAIL;
+}
+
+// InvocationQueue implementation
+InvocationQueue::InvocationQueue()
+{
+ worker_ = std::unique_ptr(new std::thread(&InvocationQueue::WorkerLoop, this));
+}
+InvocationQueue::~InvocationQueue()
+{
+ Stop();
+}
+void InvocationQueue::Stop()
+{
+ bool expected = false;
+ if (stop_.compare_exchange_strong(expected, true))
+ {
+ condVar_.notify_all();
+ }
+ else
+ {
+ stop_ = true;
+ condVar_.notify_all();
+ }
+}
+InvocationStatus InvocationQueue::Invoke(const std::function& fn, std::chrono::milliseconds timeout)
+{
+ if (stop_.load())
+ return InvocationStatus::TimedOut;
+ auto item = std::make_shared();
+ item->fn = fn;
+ auto fut = item->completedPromise.get_future();
+ {
+ std::lock_guard lock(mutex_);
+ queue_.push_back(item);
+ }
+ condVar_.notify_one();
+ return fut.wait_for(timeout) == std::future_status::ready ? InvocationStatus::Invoked : InvocationStatus::TimedOut;
+}
+void InvocationQueue::WorkerLoop()
+{
+ for (;;)
+ {
+ std::shared_ptr item;
+ {
+ std::unique_lock lock(mutex_);
+ condVar_.wait(lock, [this]() { return stop_.load() || !queue_.empty(); });
+ if (stop_.load())
+ break;
+ if (!queue_.empty())
+ {
+ item = queue_.front();
+ queue_.pop_front();
+ }
+ else
+ continue;
+ }
+ try
+ {
+ item->fn();
+ }
+ catch (...)
+ {
+ }
+ item->completedPromise.set_value();
+ }
+}
+
+// ProfilerApiAdapter
+HRESULT ProfilerApiAdapter::DoStackSnapshot(ThreadID threadId,
+ StackSnapshotCallback callback,
+ DWORD infoFlags,
+ void* clientData,
+ BYTE* context,
+ ULONG contextSize)
+{
+ return profilerInfo_->DoStackSnapshot(threadId, callback, infoFlags, clientData, context, contextSize);
+}
+HRESULT ProfilerApiAdapter::GetFunctionFromIP(LPCBYTE ip, FunctionID* functionId)
+{
+ return profilerInfo_->GetFunctionFromIP(ip, functionId);
+}
+
+// ScopedThreadSuspend
+ScopedThreadSuspend::ScopedThreadSuspend(DWORD nativeThreadId) : threadHandle_(INVALID_HANDLE_VALUE), suspended_(false)
+{
+ threadHandle_ = OpenThread(THREAD_GET_CONTEXT | THREAD_SUSPEND_RESUME, FALSE, nativeThreadId);
+ if (threadHandle_ == NULL)
+ {
+ throw std::runtime_error("Failed to open thread handle");
+ }
+
+ DWORD suspendCount = SuspendThread(threadHandle_);
+ if (suspendCount == static_cast(-1))
+ {
+ CloseHandle(threadHandle_);
+ threadHandle_ = INVALID_HANDLE_VALUE;
+ throw std::runtime_error("Failed to suspend thread");
+ }
+
+ suspended_ = true;
+}
+
+ScopedThreadSuspend::~ScopedThreadSuspend()
+{
+ if (threadHandle_ != INVALID_HANDLE_VALUE)
+ {
+ if (suspended_)
+ {
+ ResumeThread(threadHandle_);
+ }
+ CloseHandle(threadHandle_);
+ }
+}
+
+ScopedThreadSuspend::ScopedThreadSuspend(ScopedThreadSuspend&& other) noexcept
+ : threadHandle_(other.threadHandle_), suspended_(other.suspended_)
+{
+ other.threadHandle_ = INVALID_HANDLE_VALUE;
+ other.suspended_ = false;
+}
+
+ScopedThreadSuspend& ScopedThreadSuspend::operator=(ScopedThreadSuspend&& other) noexcept
+{
+ if (this != &other)
+ {
+ if (threadHandle_ != INVALID_HANDLE_VALUE)
+ {
+ if (suspended_)
+ ResumeThread(threadHandle_);
+ CloseHandle(threadHandle_);
+ }
+ threadHandle_ = other.threadHandle_;
+ suspended_ = other.suspended_;
+ other.threadHandle_ = INVALID_HANDLE_VALUE;
+ other.suspended_ = false;
+ }
+ return *this;
+}
+
+// StackCaptureEngine
+StackCaptureEngine::StackCaptureEngine(std::unique_ptr profilerApi, const CaptureOptions& options)
+ : profilerApi_(std::move(profilerApi)), options_(options)
+{
+ invocationQueue_ = std::make_unique();
+ trace::Logger::Info(L"[StackCapture] Engine initialized with canary prefix: ", options_.canaryThreadName);
+}
+StackCaptureEngine::~StackCaptureEngine()
+{
+ Stop();
+}
+
+void StackCaptureEngine::Stop()
+{
+ stopRequested_ = true;
+ captureCondVar_.notify_all();
+ if (invocationQueue_)
+ invocationQueue_->Stop();
+}
+
+HRESULT StackCaptureEngine::ThreadDestroyed(ThreadID threadId)
+{
+ std::lock_guard lock(threadListMutex_);
+ activeThreads_.erase(threadId);
+ threadNames_.erase(threadId);
+
+ // Clear canary if it was this thread
+ if (canaryThread_.managedId == threadId)
+ {
+ trace::Logger::Info("[StackCapture] Canary thread destroyed - ManagedID=", threadId,
+ ", NativeID=", canaryThread_.nativeId);
+ canaryThread_.reset();
+ // threadNames_ has map of thread ID and names - find another canary if possible
+ for (const auto& [managedId, name] : threadNames_)
+ {
+ if (options_.IsCanaryThread(name))
+ {
+ auto osThreadIt = activeThreads_.find(managedId);
+ if (osThreadIt != activeThreads_.end())
+ {
+ canaryThread_ = CanaryThreadInfo{managedId, osThreadIt->second};
+ trace::Logger::Info("[StackCapture] New canary thread designated after destruction - ManagedID=",
+ managedId, ", NativeID=", osThreadIt->second, ", Name=", name);
+ captureCondVar_.notify_all();
+ break;
+ }
+ }
+ }
+ }
+
+ return S_OK;
+}
+
+HRESULT StackCaptureEngine::ThreadAssignedToOSThread(ThreadID managedThreadId, DWORD osThreadId)
+{
+ std::lock_guard lock(threadListMutex_);
+ activeThreads_[managedThreadId] = osThreadId;
+
+ if (canaryThread_.isValid())
+ {
+ return S_OK;
+ }
+
+ auto nameIt = threadNames_.find(managedThreadId);
+ if (nameIt != threadNames_.end())
+ {
+ if (options_.IsCanaryThread(nameIt->second))
+ {
+ canaryThread_ = CanaryThreadInfo{managedThreadId, osThreadId};
+ trace::Logger::Info("[StackCapture] Canary thread designated via ThreadAssignedToOSThread - ManagedID=",
+ managedThreadId, ", NativeID=", osThreadId, ", Name=", nameIt->second);
+ captureCondVar_.notify_all();
+ }
+ }
+
+ return S_OK;
+}
+
+HRESULT StackCaptureEngine::ThreadNameChanged(ThreadID threadId, ULONG cchName, WCHAR name[])
+{
+ if (!name || cchName == 0)
+ return S_OK;
+
+ std::lock_guard lock(threadListMutex_);
+
+ std::wstring threadName(name, cchName);
+ threadNames_[threadId] = threadName;
+ trace::Logger::Debug("[StackCapture] ThreadNameChanged - ManagedID=", threadId, ", Name=", threadName);
+ if (options_.IsCanaryThread(threadName))
+ {
+ auto osThreadIt = activeThreads_.find(threadId);
+ if (osThreadIt != activeThreads_.end() && !canaryThread_.isValid())
+ {
+ canaryThread_ = CanaryThreadInfo{threadId, osThreadIt->second};
+ captureCondVar_.notify_all();
+ trace::Logger::Info("[StackCapture] Canary thread designated via ThreadNameChanged - ManagedID=", threadId,
+ ", NativeID=", osThreadIt->second, ", Name=", threadName);
+ }
+ else
+ {
+ trace::Logger::Debug(
+ "[StackCapture] Canary thread name matched but OS thread not yet assigned - ManagedID=", threadId,
+ ", Name=", threadName);
+ }
+ }
+
+ return S_OK;
+}
+
+bool StackCaptureEngine::SafetyProbe(const CanaryThreadInfo& canaryInfo)
+{
+ if (!invocationQueue_)
+ return true;
+
+ std::atomic snapshotHr{S_OK};
+ InvocationStatus status = InvocationStatus::TimedOut;
+
+ try
+ {
+ ScopedThreadSuspend canaryThread(canaryInfo.nativeId);
+
+ CONTEXT canaryCtx = {};
+ canaryCtx.ContextFlags = CONTEXT_FULL;
+
+ // Get thread context
+ if (!GetThreadContext(canaryThread.GetHandle(), &canaryCtx))
+ {
+ DWORD error = GetLastError();
+ trace::Logger::Error("[StackCapture] SafetyProbe failed - GetThreadContext failed. Error=", error,
+ ", NativeID=", canaryThread_.nativeId);
+ return false;
+ }
+
+ auto canaryManagedId = canaryInfo.managedId;
+
+ status = invocationQueue_->Invoke(
+ [this, canaryManagedId, canaryCtx, &snapshotHr]()
+ {
+ HRESULT hr = ExecuteProbeOperations(profilerApi_.get(), canaryManagedId, canaryCtx);
+ snapshotHr.store(hr);
+ },
+ options_.probeTimeout);
+
+ // Canary thread auto-resumes here via RAII
+ }
+ catch (const std::exception& ex)
+ {
+ trace::Logger::Error("[StackCapture] SafetyProbe failed - Exception during thread suspension/context capture: ",
+ ex.what());
+ return false;
+ }
+
+ // Check invocation status
+ if (status != InvocationStatus::Invoked)
+ {
+ trace::Logger::Warn("[StackCapture] SafetyProbe failed - Probe operations timed out after ",
+ options_.probeTimeout.count(), "ms");
+ return false;
+ }
+
+ // Analyze HRESULT from probe operations
+ HRESULT hr = snapshotHr.load();
+
+ if (hr == CORPROF_E_STACKSNAPSHOT_UNSAFE)
+ {
+ trace::Logger::Warn("[StackCapture] SafetyProbe detected UNSAFE condition - DoStackSnapshot returned "
+ "CORPROF_E_STACKSNAPSHOT_UNSAFE");
+ return false;
+ }
+
+ if (FAILED(hr))
+ {
+ // Log specific HRESULT codes for diagnostics
+ if (hr == E_FAIL)
+ {
+ trace::Logger::Error("[StackCapture] SafetyProbe failed - Probe operations returned E_FAIL (0x", std::hex,
+ hr, std::dec, ")");
+ }
+ else if (hr == E_ABORT)
+ {
+ trace::Logger::Error("[StackCapture] SafetyProbe failed - Probe operations aborted (0x", std::hex, hr,
+ std::dec, ")");
+ }
+ else
+ {
+ trace::Logger::Error("[StackCapture] SafetyProbe failed - Probe operations returned HRESULT=0x", std::hex,
+ hr, std::dec);
+ }
+ return false;
+ }
+
+ trace::Logger::Debug("[StackCapture] SafetyProbe succeeded - Stack capture is safe");
+ return true;
+}
+
+HRESULT StackCaptureEngine::CaptureStackSeeded(ThreadID managedThreadId,
+ HANDLE threadHandle,
+ StackCaptureContext* stackCaptureContext)
+{
+ // Try unseeded first - fast path for threads already in managed code
+ stackCaptureContext->clientParams->threadId = managedThreadId;
+ HRESULT hr = profilerApi_->DoStackSnapshot(managedThreadId,
+ continuous_profiler::IStackCaptureStrategy::StackSnapshotCallbackDefault,
+ COR_PRF_SNAPSHOT_DEFAULT, stackCaptureContext->clientParams,
+ nullptr, // No seed
+ 0);
+
+ if (SUCCEEDED(hr))
+ {
+ trace::Logger::Debug("[StackCapture] Unseeded capture succeeded. ThreadID=", managedThreadId);
+ return hr;
+ }
+
+ trace::Logger::Debug("[StackCapture] Unseeded failed (0x", std::hex, hr, "), attempting seeded capture...");
+
+ // Fallback: PrepareContext will check if we're at managed code before walking
+ CONTEXT context = {};
+ context.ContextFlags = CONTEXT_FULL;
+
+ if (!GetThreadContext(threadHandle, &context))
+ {
+ return E_FAIL;
+ }
+ hr = PrepareContextForSnapshot(managedThreadId, threadHandle, &context, profilerApi_.get(), &stopRequested_);
+ if (FAILED(hr))
+ {
+ return hr;
+ }
+
+ hr = profilerApi_->DoStackSnapshot(managedThreadId,
+ continuous_profiler::IStackCaptureStrategy::StackSnapshotCallbackDefault,
+ COR_PRF_SNAPSHOT_DEFAULT, stackCaptureContext->clientParams,
+ reinterpret_cast(&context), sizeof(CONTEXT));
+
+ if (FAILED(hr))
+ {
+ trace::Logger::Debug("[StackCapture] Seeded capture failed. HRESULT=0x", std::hex, hr,
+ ", ThreadID=", managedThreadId);
+ }
+ else
+ {
+ trace::Logger::Debug("[StackCapture] Seeded capture succeeded. ThreadID=", managedThreadId);
+ }
+
+ return hr;
+}
+
+CanaryThreadInfo StackCaptureEngine::WaitForCanaryThread(std::chrono::milliseconds timeout)
+{
+ trace::Logger::Debug("[StackCapture] Waiting for canary thread (timeout=", timeout.count(), "ms)");
+ CanaryThreadInfo canary;
+ {
+ std::unique_lock lock(threadListMutex_);
+ bool result = captureCondVar_.wait_for(lock, timeout,
+ [this]() { return stopRequested_.load() || canaryThread_.isValid(); });
+
+ if (!result)
+ {
+ trace::Logger::Warn("[StackCapture] Canary thread wait timed out after ", timeout.count(), "ms");
+ }
+ else
+ {
+ canary = canaryThread_;
+ trace::Logger::Debug("[StackCapture] Canary thread ready - ManagedID=", canary.managedId,
+ ", NativeID=", canary.nativeId);
+ }
+ }
+
+ return canary;
+}
+
+HRESULT StackCaptureEngine::CaptureStacks(std::unordered_set const& threads,
+ continuous_profiler::StackSnapshotCallbackContext* clientData)
+{
+ auto canary = WaitForCanaryThread();
+
+ if (!canary.isValid())
+ return E_FAIL;
+
+ for (const auto& managedId : threads)
+ {
+ if (stopRequested_)
+ break;
+ if (managedId == canary.managedId)
+ continue;
+ DWORD nativeId = 0;
+ {
+ std::lock_guard lock(threadListMutex_);
+ auto it = activeThreads_.find(managedId);
+ if (it == activeThreads_.end())
+ {
+ continue;
+ }
+ nativeId = it->second;
+ }
+ try
+ {
+ ScopedThreadSuspend targetThread(nativeId);
+ if (!SafetyProbe(canary))
+ {
+ trace::Logger::Debug(
+ "[StackCapture] CaptureStacks - Skipping thread due to safety probe failure. ManagedID=", managedId,
+ ", NativeID=", nativeId);
+ continue;
+ }
+ clientData->threadId = managedId;
+ StackCaptureContext stackCaptureContext{0, &stopRequested_, clientData};
+ CaptureStackSeeded(managedId, targetThread.GetHandle(), &stackCaptureContext);
+ }
+ catch (const std::exception& ex)
+ {
+ trace::Logger::Error("[StackCapture] CaptureStacks - Exception during stack capture for ManagedID=",
+ managedId, ", NativeID=", nativeId, ": ", ex.what());
+ }
+ }
+ return S_OK;
+}
+
+} // namespace ProfilerStackCapture
+#endif // defined(_WIN32) && defined(_M_AMD64)
\ No newline at end of file
diff --git a/src/OpenTelemetry.AutoInstrumentation.Native/profiler_stack_capture.h b/src/OpenTelemetry.AutoInstrumentation.Native/profiler_stack_capture.h
new file mode 100644
index 0000000000..8d3968c132
--- /dev/null
+++ b/src/OpenTelemetry.AutoInstrumentation.Native/profiler_stack_capture.h
@@ -0,0 +1,170 @@
+// Copyright The OpenTelemetry Authors
+// SPDX-License-Identifier: Apache-2.0
+#ifndef OTEL_PROFILER_STACK_CAPTURE_H_
+#define OTEL_PROFILER_STACK_CAPTURE_H_
+
+#if defined(_WIN32) && defined(_M_AMD64)
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include