Skip to content

Commit b4c9850

Browse files
mwoottonmeta-codesync[bot]
authored andcommitted
Add rocprofiler-sdk support 2 (#1128)
Summary: Supersedes #1050 Convert to using rocprofiler-sdk instead of roctracer for collecting hip api calls and AMD gpu activity. Reuses most existing roctracer infrastructure with a name for name replacement. Simultaneous support for both roctracer and rocprofiler-sdk was deemed impractical. This would require a whole new set of #ifdefs, a major refactor of the roctracer code, and additional build support. Even then, only one could be active at a time (and you wouldn't want both active). In homage to the abandoned refactor, RocLogger.cpp/h were created to contain the rocprofbase classes and the api filter. Roctracer has no established end date. Rocprofiler-sdk is in rocm_3.1 forward. This will create a dependency where (newest kineto on old rocm) and (old kineto on newest rocm) could fail to build with AMD gpu support. That window is already over 1 year wide. Pull Request resolved: #1128 Reviewed By: aaronenyeshi Differential Revision: D82773951 Pulled By: sraikund16 fbshipit-source-id: 56db042d067ca1ea45c90f99d32d46574e36746e
1 parent 07cf7a5 commit b4c9850

20 files changed

+2060
-308
lines changed

libkineto/CMakeLists.txt

Lines changed: 59 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,50 @@ else()
7979
set(LIBKINETO_NOXPUPTI ON)
8080
endif()
8181

82+
# Detect ROCM Version
83+
if(NOT LIBKINETO_NOROCTRACER)
84+
if(NOT ROCM_INCLUDE_DIRS)
85+
set(ROCM_INCLUDE_DIRS "${ROCM_SOURCE_DIR}/include")
86+
endif()
87+
88+
find_file(ROCM_VERSION_HEADER_PATH
89+
NAMES rocm-core/rocm_version.h
90+
NO_DEFAULT_PATH
91+
PATHS ${ROCM_INCLUDE_DIRS}
92+
)
93+
94+
if(EXISTS ${ROCM_VERSION_HEADER_PATH})
95+
set(ROCM_HEADER_FILE ${ROCM_VERSION_HEADER_PATH})
96+
endif()
97+
98+
# Read the ROCM headerfile into a variable
99+
message(STATUS "Reading ROCM version from: ${ROCM_HEADER_FILE}")
100+
file(READ "${ROCM_HEADER_FILE}" ROCM_HEADER_CONTENT)
101+
102+
string(REGEX MATCH "ROCM_VERSION_MAJOR[ ]+[0-9]+" TEMP1 ${ROCM_HEADER_CONTENT})
103+
string(REPLACE "ROCM_VERSION_MAJOR" "" TEMP2 ${TEMP1})
104+
string(STRIP ${TEMP2} ROCM_VERSION_DEV_MAJOR)
105+
string(REGEX MATCH "ROCM_VERSION_MINOR[ ]+[0-9]+" TEMP1 ${ROCM_HEADER_CONTENT})
106+
string(REPLACE "ROCM_VERSION_MINOR" "" TEMP2 ${TEMP1})
107+
string(STRIP ${TEMP2} ROCM_VERSION_DEV_MINOR)
108+
string(REGEX MATCH "ROCM_VERSION_PATCH[ ]+[0-9]+" TEMP1 ${ROCM_HEADER_CONTENT})
109+
string(REPLACE "ROCM_VERSION_PATCH" "" TEMP2 ${TEMP1})
110+
string(STRIP ${TEMP2} ROCM_VERSION_DEV_PATCH)
111+
112+
message(STATUS "ROCM major: ${ROCM_VERSION_DEV_MAJOR}")
113+
message(STATUS "ROCM minor: ${ROCM_VERSION_DEV_MINOR}")
114+
message(STATUS "ROCM patch: ${ROCM_VERSION_DEV_PATCH}")
115+
116+
# Use rocprofiler-sdk for rocm version 6.4 forward
117+
if ((${ROCM_VERSION_DEV_MAJOR} GREATER_EQUAL 7) OR (${ROCM_VERSION_DEV_MAJOR} GREATER_EQUAL 6 AND ${ROCM_VERSION_DEV_MINOR} GREATER_EQUAL 4))
118+
set(USE_ROCPROFILER_SDK ON)
119+
endif()
120+
if (${USE_ROCPROFILER_SDK})
121+
message(STATUS "Building with: rocprofiler-sdk")
122+
else()
123+
message(STATUS "Building with: libroctracer")
124+
endif()
125+
endif()
82126
if(NOT DEFINED LIBKINETO_NOAIUPTI)
83127
message(INFO " LIBKINETO_NOAIUPTI NOT DEFINED adding subdirectory(src/plugin/aiupti)")
84128
add_subdirectory(src/plugin/aiupti)
@@ -90,8 +134,14 @@ if(LIBKINETO_NOCUPTI AND LIBKINETO_NOROCTRACER AND LIBKINETO_NOXPUPTI AND LIBKIN
90134
message(STATUS " CUPTI unavailable or disabled - not building GPU profilers")
91135
else()
92136
if(NOT LIBKINETO_NOROCTRACER)
93-
get_filelist("get_libkineto_roctracer_srcs(with_api=False)" LIBKINETO_roc_SRCS)
94-
message(STATUS " Building with roctracer")
137+
if (${USE_ROCPROFILER_SDK})
138+
get_filelist("get_libkineto_rocprofiler_srcs(with_api=False)" LIBKINETO_roc_SRCS)
139+
message(STATUS " Building with rocprofiler-sdk")
140+
else()
141+
get_filelist("get_libkineto_roctracer_srcs(with_api=False)" LIBKINETO_roc_SRCS)
142+
add_compile_options(-DROCTRACER_FALLBACK)
143+
message(STATUS " Building with roctracer")
144+
endif()
95145
elseif(NOT LIBKINETO_NOCUPTI)
96146
get_filelist("get_libkineto_cupti_srcs(with_api=False)" LIBKINETO_cuda_SRCS)
97147
endif()
@@ -228,9 +278,14 @@ target_include_directories(kineto PUBLIC
228278
$<BUILD_INTERFACE:${LIBKINETO_SOURCE_DIR}>)
229279

230280
if(NOT LIBKINETO_NOROCTRACER)
231-
find_library(ROCTRACER_LIBRARY NAMES libroctracer64.so HINTS
281+
if (${USE_ROCPROFILER_SDK})
282+
find_library(ROCPROF_LIBRARY NAMES librocprofiler-sdk.so HINTS
232283
${ROCM_SOURCE_DIR}/lib)
233-
target_link_libraries(kineto "${ROCTRACER_LIBRARY}")
284+
else()
285+
find_library(ROCPROF_LIBRARY NAMES libroctracer64.so HINTS
286+
${ROCM_SOURCE_DIR}/lib)
287+
endif()
288+
target_link_libraries(kineto "${ROCPROF_LIBRARY}")
234289
find_library(KINETO_HIP_LIBRARY NAMES libamdhip64.so HINTS
235290
${ROCM_SOURCE_DIR}/lib)
236291
target_link_libraries(kineto "${KINETO_HIP_LIBRARY}")

libkineto/libkineto_defs.bzl

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,18 @@ def get_libkineto_cupti_srcs(with_api = True):
2727
"src/cupti_strings.cpp",
2828
] + (get_libkineto_cpu_only_srcs(with_api))
2929

30+
def get_libkineto_rocprofiler_srcs(with_api = True):
31+
return [
32+
"src/RocprofActivityApi.cpp",
33+
"src/RocprofLogger.cpp",
34+
"src/RocLogger.cpp",
35+
] + (get_libkineto_cpu_only_srcs(with_api))
36+
3037
def get_libkineto_roctracer_srcs(with_api = True):
3138
return [
3239
"src/RoctracerActivityApi.cpp",
3340
"src/RoctracerLogger.cpp",
41+
"src/RocLogger.cpp",
3442
] + (get_libkineto_cpu_only_srcs(with_api))
3543

3644
def get_libkineto_xpupti_srcs(with_api = True):

libkineto/src/ActivityProfilerController.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,12 @@
1818

1919
#include "CuptiActivityApi.h"
2020
#ifdef HAS_ROCTRACER
21+
#ifndef ROCTRACER_FALLBACK
22+
#include "RocprofActivityApi.h"
23+
#else
2124
#include "RoctracerActivityApi.h"
2225
#endif
26+
#endif
2327

2428
#include "ThreadUtil.h"
2529
#include "output_json.h"
@@ -67,8 +71,13 @@ ActivityProfilerController::ActivityProfilerController(
6771
#endif // !USE_GOOGLE_LOG
6872

6973
#ifdef HAS_ROCTRACER
74+
#ifndef ROCTRACER_FALLBACK
75+
profiler_ = std::make_unique<CuptiActivityProfiler>(
76+
RocprofActivityApi::singleton(), cpuOnly);
77+
#else
7078
profiler_ = std::make_unique<CuptiActivityProfiler>(
7179
RoctracerActivityApi::singleton(), cpuOnly);
80+
#endif
7281
#else
7382
profiler_ = std::make_unique<CuptiActivityProfiler>(
7483
CuptiActivityApi::singleton(), cpuOnly);

libkineto/src/CuptiActivityProfiler.cpp

Lines changed: 64 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
#ifdef HAS_CUPTI
2626
#include <cupti.h>
2727
#elif defined(HAS_ROCTRACER)
28-
#include <roctracer.h>
28+
#include <rocprofiler-sdk/version.h>
2929
#endif
3030

3131
#include "Config.h"
@@ -39,9 +39,14 @@
3939
#include "KernelRegistry.h"
4040
#endif // HAS_CUPTI
4141
#ifdef HAS_ROCTRACER
42+
#include "RocLogger.h"
43+
#ifndef ROCTRACER_FALLBACK
44+
#include "RocprofActivity.h"
45+
#include "RocprofActivityApi.h"
46+
#else
4247
#include "RoctracerActivity.h"
4348
#include "RoctracerActivityApi.h"
44-
#include "RoctracerLogger.h"
49+
#endif
4550
#endif
4651
#ifdef HAS_XPUPTI
4752
#include "plugin/xpupti/XpuptiActivityProfiler.h"
@@ -126,14 +131,14 @@ bool ConfigDerivedState::canStart(
126131
return true;
127132
}
128133
if (profileStartTime_ < now) {
129-
LOG(ERROR)
130-
<< "Not starting tracing - start timestamp is in the past. Time difference (ms): "
131-
<< duration_cast<milliseconds>(now - profileStartTime_).count();
134+
LOG(ERROR) << "Not starting tracing - start timestamp is in the past. Time "
135+
"difference (ms): "
136+
<< duration_cast<milliseconds>(now - profileStartTime_).count();
132137
return false;
133138
} else if ((profileStartTime_ - now) < profileWarmupDuration_) {
134-
LOG(ERROR)
135-
<< "Not starting tracing - insufficient time for warmup. Time to warmup (ms): "
136-
<< duration_cast<milliseconds>(profileStartTime_ - now).count();
139+
LOG(ERROR) << "Not starting tracing - insufficient time for warmup. Time "
140+
"to warmup (ms): "
141+
<< duration_cast<milliseconds>(profileStartTime_ - now).count();
137142
return false;
138143
}
139144
return true;
@@ -214,9 +219,15 @@ void CuptiActivityProfiler::transferCpuTrace(
214219
}
215220

216221
#ifdef HAS_ROCTRACER
222+
#ifndef ROCTRACER_FALLBACK
223+
CuptiActivityProfiler::CuptiActivityProfiler(
224+
RocprofActivityApi& cupti,
225+
bool cpuOnly)
226+
#else
217227
CuptiActivityProfiler::CuptiActivityProfiler(
218228
RoctracerActivityApi& cupti,
219229
bool cpuOnly)
230+
#endif
220231
#else
221232
CuptiActivityProfiler::CuptiActivityProfiler(
222233
CuptiActivityApi& cupti,
@@ -256,23 +267,23 @@ void CuptiActivityProfiler::logGpuVersions() {
256267
addVersionMetadata("cuda_driver_version", std::to_string(cudaDriverVersion));
257268

258269
#elif defined(HAS_ROCTRACER)
259-
uint32_t majorVersion = roctracer_version_major();
260-
uint32_t minorVersion = roctracer_version_minor();
270+
uint32_t majorVersion = ROCPROFILER_VERSION_MAJOR;
271+
uint32_t minorVersion = ROCPROFILER_VERSION_MINOR;
261272
std::string roctracerVersion =
262273
std::to_string(majorVersion) + "." + std::to_string(minorVersion);
263274
int hipRuntimeVersion = 0, hipDriverVersion = 0;
264275
CUDA_CALL(hipRuntimeGetVersion(&hipRuntimeVersion));
265276
CUDA_CALL(hipDriverGetVersion(&hipDriverVersion));
266-
LOG(INFO) << "HIP versions. Roctracer: " << roctracerVersion
277+
LOG(INFO) << "HIP versions. Rocprofiler-sdk: " << roctracerVersion
267278
<< "; Runtime: " << hipRuntimeVersion
268279
<< "; Driver: " << hipDriverVersion;
269280

270-
LOGGER_OBSERVER_ADD_METADATA("roctracer_version", roctracerVersion);
281+
LOGGER_OBSERVER_ADD_METADATA("rocprofiler-sdk_version", roctracerVersion);
271282
LOGGER_OBSERVER_ADD_METADATA(
272283
"hip_runtime_version", std::to_string(hipRuntimeVersion));
273284
LOGGER_OBSERVER_ADD_METADATA(
274285
"hip_driver_version", std::to_string(hipDriverVersion));
275-
addVersionMetadata("roctracer_version", roctracerVersion);
286+
addVersionMetadata("rocprofiler-sdk_version", roctracerVersion);
276287
addVersionMetadata("hip_runtime_version", std::to_string(hipRuntimeVersion));
277288
addVersionMetadata("hip_driver_version", std::to_string(hipDriverVersion));
278289

@@ -372,7 +383,7 @@ void CuptiActivityProfiler::processTraceInternal(ActivityLogger& logger) {
372383
VLOG(0) << "Retrieving GPU activity buffers";
373384
const int count = cupti_.processActivities(
374385
std::bind(
375-
&CuptiActivityProfiler::handleRoctracerActivity,
386+
&CuptiActivityProfiler::handleRocprofActivity,
376387
this,
377388
std::placeholders::_1,
378389
&logger),
@@ -449,9 +460,10 @@ void CuptiActivityProfiler::processCpuTrace(
449460
activityMap_[act->correlationId()] = act.get();
450461
if (act->deviceId() == 0) {
451462
if (!warn_once) {
452-
LOG(WARNING)
453-
<< "CPU activity with pid 0 detected. This is likely due to the python stack"
454-
" tracer not being able to determine the pid for an event. Overriding pid to main thread pid";
463+
LOG(WARNING) << "CPU activity with pid 0 detected. This is likely due "
464+
"to the python stack"
465+
" tracer not being able to determine the pid for an "
466+
"event. Overriding pid to main thread pid";
455467
}
456468
act->setDevice(processId());
457469
warn_once = true;
@@ -470,8 +482,8 @@ inline void CuptiActivityProfiler::handleCorrelationActivity(
470482
correlation->externalKind == CUPTI_EXTERNAL_CORRELATION_KIND_CUSTOM1) {
471483
userCorrelationMap_[correlation->correlationId] = correlation->externalId;
472484
} else {
473-
LOG(WARNING)
474-
<< "Invalid CUpti_ActivityExternalCorrelation sent to handleCuptiActivity";
485+
LOG(WARNING) << "Invalid CUpti_ActivityExternalCorrelation sent to "
486+
"handleCuptiActivity";
475487
ecs_.invalid_external_correlation_events++;
476488
}
477489
}
@@ -480,14 +492,14 @@ inline void CuptiActivityProfiler::handleCorrelationActivity(
480492
inline void CuptiActivityProfiler::handleCorrelationActivity(
481493
uint64_t correlationId,
482494
uint64_t externalId,
483-
RoctracerLogger::CorrelationDomain externalKind) {
484-
if (externalKind == RoctracerLogger::CorrelationDomain::Domain0) {
495+
RocLogger::CorrelationDomain externalKind) {
496+
if (externalKind == RocLogger::CorrelationDomain::Domain0) {
485497
cpuCorrelationMap_[correlationId] = externalId;
486-
} else if (externalKind == RoctracerLogger::CorrelationDomain::Domain1) {
498+
} else if (externalKind == RocLogger::CorrelationDomain::Domain1) {
487499
userCorrelationMap_[correlationId] = externalId;
488500
} else {
489-
LOG(WARNING)
490-
<< "Invalid CUpti_ActivityExternalCorrelation sent to handleCuptiActivity";
501+
LOG(WARNING) << "Invalid CUpti_ActivityExternalCorrelation sent to "
502+
"handleCuptiActivity";
491503
ecs_.invalid_external_correlation_events++;
492504
}
493505
}
@@ -960,37 +972,37 @@ void CuptiActivityProfiler::handleRuntimeActivity(
960972
}
961973

962974
inline void CuptiActivityProfiler::handleGpuActivity(
963-
const roctracerAsyncRow* act,
975+
const rocprofAsyncRow* act,
964976
ActivityLogger* logger) {
965977
const ITraceActivity* linked = linkedActivity(act->id, cpuCorrelationMap_);
966978
const auto& gpu_activity =
967979
traceBuffers_->addActivityWrapper(GpuActivity(act, linked));
968980
handleGpuActivity(gpu_activity, logger);
969981
}
970982

971-
void CuptiActivityProfiler::handleRoctracerActivity(
972-
const roctracerBase* record,
983+
void CuptiActivityProfiler::handleRocprofActivity(
984+
const rocprofBase* record,
973985
ActivityLogger* logger) {
974986
switch (record->type) {
975987
case ROCTRACER_ACTIVITY_DEFAULT:
976988
handleRuntimeActivity(
977-
reinterpret_cast<const roctracerRow*>(record), logger);
989+
reinterpret_cast<const rocprofRow*>(record), logger);
978990
break;
979991
case ROCTRACER_ACTIVITY_KERNEL:
980992
handleRuntimeActivity(
981-
reinterpret_cast<const roctracerKernelRow*>(record), logger);
993+
reinterpret_cast<const rocprofKernelRow*>(record), logger);
982994
break;
983995
case ROCTRACER_ACTIVITY_COPY:
984996
handleRuntimeActivity(
985-
reinterpret_cast<const roctracerCopyRow*>(record), logger);
997+
reinterpret_cast<const rocprofCopyRow*>(record), logger);
986998
break;
987999
case ROCTRACER_ACTIVITY_MALLOC:
9881000
handleRuntimeActivity(
989-
reinterpret_cast<const roctracerMallocRow*>(record), logger);
1001+
reinterpret_cast<const rocprofMallocRow*>(record), logger);
9901002
break;
9911003
case ROCTRACER_ACTIVITY_ASYNC:
9921004
handleGpuActivity(
993-
reinterpret_cast<const roctracerAsyncRow*>(record), logger);
1005+
reinterpret_cast<const rocprofAsyncRow*>(record), logger);
9941006
break;
9951007
case ROCTRACER_ACTIVITY_NONE:
9961008
default:
@@ -1571,8 +1583,13 @@ void CuptiActivityProfiler::pushCorrelationId(uint64_t id) {
15711583
id, CuptiActivityApi::CorrelationFlowType::Default);
15721584
#endif // HAS_CUPTI
15731585
#ifdef HAS_ROCTRACER
1586+
#ifndef ROCTRACER_FALLBACK
1587+
RocprofActivityApi::pushCorrelationID(
1588+
id, RocprofActivityApi::CorrelationFlowType::Default);
1589+
#else
15741590
RoctracerActivityApi::pushCorrelationID(
15751591
id, RoctracerActivityApi::CorrelationFlowType::Default);
1592+
#endif
15761593
#endif
15771594
for (auto& session : sessions_) {
15781595
session->pushCorrelationId(id);
@@ -1585,8 +1602,13 @@ void CuptiActivityProfiler::popCorrelationId() {
15851602
CuptiActivityApi::CorrelationFlowType::Default);
15861603
#endif // HAS_CUPTI
15871604
#ifdef HAS_ROCTRACER
1605+
#ifndef ROCTRACER_FALLBACK
1606+
RocprofActivityApi::popCorrelationID(
1607+
RocprofActivityApi::CorrelationFlowType::Default);
1608+
#else
15881609
RoctracerActivityApi::popCorrelationID(
15891610
RoctracerActivityApi::CorrelationFlowType::Default);
1611+
#endif
15901612
#endif
15911613
for (auto& session : sessions_) {
15921614
session->popCorrelationId();
@@ -1599,8 +1621,13 @@ void CuptiActivityProfiler::pushUserCorrelationId(uint64_t id) {
15991621
id, CuptiActivityApi::CorrelationFlowType::User);
16001622
#endif // HAS_CUPTI
16011623
#ifdef HAS_ROCTRACER
1624+
#ifndef ROCTRACER_FALLBACK
1625+
RocprofActivityApi::pushCorrelationID(
1626+
id, RocprofActivityApi::CorrelationFlowType::User);
1627+
#else
16021628
RoctracerActivityApi::pushCorrelationID(
16031629
id, RoctracerActivityApi::CorrelationFlowType::User);
1630+
#endif
16041631
#endif
16051632
for (auto& session : sessions_) {
16061633
session->pushUserCorrelationId(id);
@@ -1613,8 +1640,13 @@ void CuptiActivityProfiler::popUserCorrelationId() {
16131640
CuptiActivityApi::CorrelationFlowType::User);
16141641
#endif // HAS_CUPTI
16151642
#ifdef HAS_ROCTRACER
1643+
#ifndef ROCTRACER_FALLBACK
1644+
RocprofActivityApi::popCorrelationID(
1645+
RocprofActivityApi::CorrelationFlowType::User);
1646+
#else
16161647
RoctracerActivityApi::popCorrelationID(
16171648
RoctracerActivityApi::CorrelationFlowType::User);
1649+
#endif
16181650
#endif
16191651
for (auto& session : sessions_) {
16201652
session->popUserCorrelationId();

0 commit comments

Comments
 (0)