Skip to content

Commit 62f3f02

Browse files
authored
Use eventset in roctxconnector (as in nvtxconnector) (#219)
* :Use eventset for roctxconnector * Improvements thanks to review.
1 parent 2ddedef commit 62f3f02

File tree

5 files changed

+106
-43
lines changed

5 files changed

+106
-43
lines changed

CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,8 @@ endif()
9595
include(cmake/configure_variorum.cmake)
9696

9797
set(KOKKOSTOOLS_HAS_CALIPER ${KokkosTools_ENABLE_CALIPER})
98-
set(KOKKOSTOOLS_HAS_NVTX ${Kokkos_ENABLE_CUDA}) # we assume that enabling CUDA for Kokkos program means nvtx should be available
98+
set(KOKKOSTOOLS_HAS_NVTX ${Kokkos_ENABLE_CUDA}) # we assume that enabling CUDA for Kokkos program means nvtx should be available
99+
set(KOKKOSTOOLS_HAS_ROCTX ${Kokkos_ENABLE_HIP}) # we assume that enabling HIP for Kokkos program means roctx should be available
99100

100101
if(DEFINED ENV{VTUNE_HOME})
101102
set(VTune_ROOT $ENV{VTUNE_HOME})

common/kp_config.hpp.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#define USE_MPI @KOKKOSTOOLS_HAS_MPI@
44

55
#cmakedefine KOKKOSTOOLS_HAS_NVTX
6+
#cmakedefine KOKKOSTOOLS_HAS_ROCTX
67
#cmakedefine KOKKOSTOOLS_HAS_CALIPER
78
#cmakedefine KOKKOSTOOLS_HAS_SYSTEMTAP
89
#cmakedefine KOKKOSTOOLS_HAS_VARIORUM

example/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,3 +48,6 @@ if(KOKKOSTOOLS_HAS_NVTX)
4848
add_kp_test(nvtx_connector "nvtx-connector")
4949
add_kp_test(nvtx_focused_connector "nvtx-focused-connector")
5050
endif()
51+
if(KOKKOSTOOLS_HAS_ROCTX)
52+
add_kp_test(roctx_connector "roctx-connector")
53+
endif()

profiling/all/kp_all.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,9 @@ KOKKOSTOOLS_EXTERN_EVENT_SET(VariorumConnector)
5252
KOKKOSTOOLS_EXTERN_EVENT_SET(NVTXConnector)
5353
KOKKOSTOOLS_EXTERN_EVENT_SET(NVTXFocusedConnector)
5454
#endif
55+
#ifdef KOKKOSTOOLS_HAS_ROCTX
56+
KOKKOSTOOLS_EXTERN_EVENT_SET(ROCTXConnector)
57+
#endif
5558
#ifdef KOKKOSTOOLS_HAS_CALIPER
5659
namespace cali {
5760
extern Kokkos::Tools::Experimental::EventSet get_kokkos_event_set(
@@ -93,6 +96,9 @@ EventSet get_event_set(const char* profiler, const char* config_str) {
9396
#ifdef KOKKOSTOOLS_HAS_NVTX
9497
handlers["nvtx-connector"] = NVTXConnector::get_event_set();
9598
handlers["nvtx-focused-connector"] = NVTXFocusedConnector::get_event_set();
99+
#endif
100+
#ifdef KOKKOSTOOLS_HAS_ROCTX
101+
handlers["roctx-connector"] = ROCTXConnector::get_event_set();
96102
#endif
97103
auto e = handlers.find(profiler);
98104
if (e != handlers.end()) return e->second;

profiling/roctx-connector/kp_roctx_connector.cpp

Lines changed: 94 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121
#include <string>
2222
#include <vector>
2323

24+
#include "kp_core.hpp"
25+
2426
namespace {
2527
struct Section {
2628
std::string label;
@@ -29,20 +31,28 @@ struct Section {
2931
std::vector<Section> kokkosp_sections;
3032
} // namespace
3133

32-
struct Kokkos_Tools_ToolSettings {
33-
bool requires_global_fencing;
34-
bool padding[255];
35-
};
34+
namespace KokkosTools {
35+
namespace ROCTXConnector {
36+
37+
static bool tool_globfences;
3638

37-
extern "C" void kokkosp_request_tool_settings(
38-
const uint32_t, Kokkos_Tools_ToolSettings* settings) {
39-
settings->requires_global_fencing = false;
39+
void kokkosp_request_tool_settings(const uint32_t,
40+
Kokkos_Tools_ToolSettings* settings) {
41+
if (tool_globfences) {
42+
settings->requires_global_fencing = true;
43+
} else {
44+
settings->requires_global_fencing = false;
45+
}
4046
}
4147

42-
extern "C" void kokkosp_init_library(const int loadSeq,
43-
const uint64_t interfaceVer,
44-
const uint32_t /*devInfoCount*/,
45-
void* /*deviceInfo*/) {
48+
void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer,
49+
const uint32_t /*devInfoCount*/,
50+
Kokkos_Profiling_KokkosPDeviceInfo* /*deviceInfo*/) {
51+
const char* tool_global_fences = std::getenv("KOKKOS_TOOLS_GLOBALFENCES");
52+
if (tool_global_fences) {
53+
tool_globfences = (atoi(tool_global_fences) != 0);
54+
}
55+
4656
std::cout << "-----------------------------------------------------------\n"
4757
<< "KokkosP: ROC Tracer Connector (sequence is " << loadSeq
4858
<< ", version: " << interfaceVer << ")\n"
@@ -51,7 +61,7 @@ extern "C" void kokkosp_init_library(const int loadSeq,
5161
roctxMark("Kokkos::Initialization Complete");
5262
}
5363

54-
extern "C" void kokkosp_finalize_library() {
64+
void kokkosp_finalize_library() {
5565
std::cout << R"(
5666
-----------------------------------------------------------
5767
KokkosP: Finalization of ROC Tracer Connector. Complete.
@@ -61,66 +71,108 @@ KokkosP: Finalization of ROC Tracer Connector. Complete.
6171
roctxMark("Kokkos::Finalization Complete");
6272
}
6373

64-
extern "C" void kokkosp_begin_parallel_for(const char* name,
65-
const uint32_t /*devID*/,
66-
uint64_t* /*kID*/) {
74+
void kokkosp_begin_parallel_for(const char* name, const uint32_t /*devID*/,
75+
uint64_t* /*kID*/) {
6776
roctxRangePush(name);
6877
}
6978

70-
extern "C" void kokkosp_end_parallel_for(const uint64_t /*kID*/) {
71-
roctxRangePop();
72-
}
79+
void kokkosp_end_parallel_for(const uint64_t /*kID*/) { roctxRangePop(); }
7380

74-
extern "C" void kokkosp_begin_parallel_scan(const char* name,
75-
const uint32_t /*devID*/,
76-
uint64_t* /*kID*/) {
81+
void kokkosp_begin_parallel_scan(const char* name, const uint32_t /*devID*/,
82+
uint64_t* /*kID*/) {
7783
roctxRangePush(name);
7884
}
7985

80-
extern "C" void kokkosp_end_parallel_scan(const uint64_t /*kID*/) {
81-
roctxRangePop();
82-
}
86+
void kokkosp_end_parallel_scan(const uint64_t /*kID*/) { roctxRangePop(); }
8387

84-
extern "C" void kokkosp_begin_parallel_reduce(const char* name,
85-
const uint32_t /*devID*/,
86-
uint64_t* /*kID*/) {
88+
void kokkosp_begin_parallel_reduce(const char* name, const uint32_t /*devID*/,
89+
uint64_t* /*kID*/) {
8790
roctxRangePush(name);
8891
}
8992

90-
extern "C" void kokkosp_end_parallel_reduce(const uint64_t /*kID*/) {
91-
roctxRangePop();
92-
}
93+
void kokkosp_end_parallel_reduce(const uint64_t /*kID*/) { roctxRangePop(); }
9394

94-
extern "C" void kokkosp_push_profile_region(char* name) {
95-
roctxRangePush(name);
96-
}
95+
void kokkosp_push_profile_region(const char* name) { roctxRangePush(name); }
9796

98-
extern "C" void kokkosp_pop_profile_region() { roctxRangePop(); }
97+
void kokkosp_pop_profile_region() { roctxRangePop(); }
9998

100-
extern "C" void kokkosp_create_profile_section(const char* name,
101-
uint32_t* sID) {
99+
void kokkosp_create_profile_section(const char* name, uint32_t* sID) {
102100
*sID = kokkosp_sections.size();
103101
kokkosp_sections.push_back(
104102
{std::string(name), static_cast<roctx_range_id_t>(-1)});
105103
}
106104

107-
extern "C" void kokkosp_start_profile_section(const uint32_t sID) {
105+
void kokkosp_start_profile_section(const uint32_t sID) {
108106
auto& section = kokkosp_sections[sID];
109107
section.id = roctxRangeStart(section.label.c_str());
110108
}
111109

112-
extern "C" void kokkosp_stop_profile_section(const uint32_t sID) {
110+
void kokkosp_stop_profile_section(const uint32_t sID) {
113111
auto const& section = kokkosp_sections[sID];
114112
roctxRangeStop(section.id);
115113
}
116114

117-
extern "C" void kokkosp_destroy_profile_section(const uint32_t sID) {
115+
void kokkosp_destroy_profile_section(const uint32_t sID) {
118116
// do nothing
119117
}
120118

121-
extern "C" void kokkosp_begin_fence(const char* name, const uint32_t /*devID*/,
122-
uint64_t* fID) {
119+
void kokkosp_profile_event(const char* name) { roctxMark(name); }
120+
121+
void kokkosp_begin_fence(const char* name, const uint32_t /*devID*/,
122+
uint64_t* fID) {
123123
*fID = roctxRangeStart(name);
124124
}
125125

126-
extern "C" void kokkosp_end_fence(const uint64_t fID) { roctxRangeStop(fID); }
126+
void kokkosp_end_fence(const uint64_t fID) { roctxRangeStop(fID); }
127+
128+
Kokkos::Tools::Experimental::EventSet get_event_set() {
129+
Kokkos::Tools::Experimental::EventSet my_event_set;
130+
memset(&my_event_set, 0,
131+
sizeof(my_event_set)); // zero any pointers not set here
132+
my_event_set.request_tool_settings = kokkosp_request_tool_settings;
133+
my_event_set.init = kokkosp_init_library;
134+
my_event_set.finalize = kokkosp_finalize_library;
135+
my_event_set.push_region = kokkosp_push_profile_region;
136+
my_event_set.pop_region = kokkosp_pop_profile_region;
137+
my_event_set.begin_parallel_for = kokkosp_begin_parallel_for;
138+
my_event_set.begin_parallel_reduce = kokkosp_begin_parallel_reduce;
139+
my_event_set.begin_parallel_scan = kokkosp_begin_parallel_scan;
140+
my_event_set.end_parallel_for = kokkosp_end_parallel_for;
141+
my_event_set.end_parallel_reduce = kokkosp_end_parallel_reduce;
142+
my_event_set.end_parallel_scan = kokkosp_end_parallel_scan;
143+
my_event_set.create_profile_section = kokkosp_create_profile_section;
144+
my_event_set.start_profile_section = kokkosp_start_profile_section;
145+
my_event_set.stop_profile_section = kokkosp_stop_profile_section;
146+
my_event_set.destroy_profile_section = kokkosp_destroy_profile_section;
147+
my_event_set.profile_event = kokkosp_profile_event;
148+
my_event_set.begin_fence = kokkosp_begin_fence;
149+
my_event_set.end_fence = kokkosp_end_fence;
150+
return my_event_set;
151+
}
152+
153+
} // namespace ROCTXConnector
154+
} // namespace KokkosTools
155+
156+
extern "C" {
157+
158+
namespace impl = KokkosTools::ROCTXConnector;
159+
160+
EXPOSE_TOOL_SETTINGS(impl::kokkosp_request_tool_settings)
161+
EXPOSE_INIT(impl::kokkosp_init_library)
162+
EXPOSE_FINALIZE(impl::kokkosp_finalize_library)
163+
EXPOSE_PUSH_REGION(impl::kokkosp_push_profile_region)
164+
EXPOSE_POP_REGION(impl::kokkosp_pop_profile_region)
165+
EXPOSE_BEGIN_PARALLEL_FOR(impl::kokkosp_begin_parallel_for)
166+
EXPOSE_END_PARALLEL_FOR(impl::kokkosp_end_parallel_for)
167+
EXPOSE_BEGIN_PARALLEL_SCAN(impl::kokkosp_begin_parallel_scan)
168+
EXPOSE_END_PARALLEL_SCAN(impl::kokkosp_end_parallel_scan)
169+
EXPOSE_BEGIN_PARALLEL_REDUCE(impl::kokkosp_begin_parallel_reduce)
170+
EXPOSE_END_PARALLEL_REDUCE(impl::kokkosp_end_parallel_reduce)
171+
EXPOSE_CREATE_PROFILE_SECTION(impl::kokkosp_create_profile_section)
172+
EXPOSE_START_PROFILE_SECTION(impl::kokkosp_start_profile_section)
173+
EXPOSE_STOP_PROFILE_SECTION(impl::kokkosp_stop_profile_section)
174+
EXPOSE_DESTROY_PROFILE_SECTION(impl::kokkosp_destroy_profile_section)
175+
EXPOSE_PROFILE_EVENT(impl::kokkosp_profile_event);
176+
EXPOSE_BEGIN_FENCE(impl::kokkosp_begin_fence);
177+
EXPOSE_END_FENCE(impl::kokkosp_end_fence);
178+
} // extern "C"

0 commit comments

Comments
 (0)