2121#include < string>
2222#include < vector>
2323
24+ #include " kp_core.hpp"
25+
2426namespace {
2527struct Section {
2628 std::string label;
@@ -29,20 +31,28 @@ struct Section {
2931std::vector<Section> kokkosp_sections;
3032} // namespace
3133
32- struct Kokkos_Tools_ToolSettings {
33- bool requires_global_fencing;
34- bool padding[ 255 ];
35- } ;
34+ namespace KokkosTools {
35+ namespace ROCTXConnector {
36+
37+ static bool tool_globfences ;
3638
37- extern " C" void kokkosp_request_tool_settings (
38- const uint32_t , Kokkos_Tools_ToolSettings* settings) {
39- settings->requires_global_fencing = false ;
39+ void kokkosp_request_tool_settings (const uint32_t ,
40+ Kokkos_Tools_ToolSettings* settings) {
41+ if (tool_globfences) {
42+ settings->requires_global_fencing = true ;
43+ } else {
44+ settings->requires_global_fencing = false ;
45+ }
4046}
4147
42- extern " C" void kokkosp_init_library (const int loadSeq,
43- const uint64_t interfaceVer,
44- const uint32_t /* devInfoCount*/ ,
45- void * /* deviceInfo*/ ) {
48+ void kokkosp_init_library (const int loadSeq, const uint64_t interfaceVer,
49+ const uint32_t /* devInfoCount*/ ,
50+ Kokkos_Profiling_KokkosPDeviceInfo* /* deviceInfo*/ ) {
51+ const char * tool_global_fences = std::getenv (" KOKKOS_TOOLS_GLOBALFENCES" );
52+ if (tool_global_fences) {
53+ tool_globfences = (atoi (tool_global_fences) != 0 );
54+ }
55+
4656 std::cout << " -----------------------------------------------------------\n "
4757 << " KokkosP: ROC Tracer Connector (sequence is " << loadSeq
4858 << " , version: " << interfaceVer << " )\n "
@@ -51,7 +61,7 @@ extern "C" void kokkosp_init_library(const int loadSeq,
5161 roctxMark (" Kokkos::Initialization Complete" );
5262}
5363
54- extern " C " void kokkosp_finalize_library () {
64+ void kokkosp_finalize_library () {
5565 std::cout << R"(
5666-----------------------------------------------------------
5767KokkosP: Finalization of ROC Tracer Connector. Complete.
@@ -61,66 +71,108 @@ KokkosP: Finalization of ROC Tracer Connector. Complete.
6171 roctxMark (" Kokkos::Finalization Complete" );
6272}
6373
64- extern " C" void kokkosp_begin_parallel_for (const char * name,
65- const uint32_t /* devID*/ ,
66- uint64_t * /* kID*/ ) {
74+ void kokkosp_begin_parallel_for (const char * name, const uint32_t /* devID*/ ,
75+ uint64_t * /* kID*/ ) {
6776 roctxRangePush (name);
6877}
6978
70- extern " C" void kokkosp_end_parallel_for (const uint64_t /* kID*/ ) {
71- roctxRangePop ();
72- }
79+ void kokkosp_end_parallel_for (const uint64_t /* kID*/ ) { roctxRangePop (); }
7380
74- extern " C" void kokkosp_begin_parallel_scan (const char * name,
75- const uint32_t /* devID*/ ,
76- uint64_t * /* kID*/ ) {
81+ void kokkosp_begin_parallel_scan (const char * name, const uint32_t /* devID*/ ,
82+ uint64_t * /* kID*/ ) {
7783 roctxRangePush (name);
7884}
7985
80- extern " C" void kokkosp_end_parallel_scan (const uint64_t /* kID*/ ) {
81- roctxRangePop ();
82- }
86+ void kokkosp_end_parallel_scan (const uint64_t /* kID*/ ) { roctxRangePop (); }
8387
84- extern " C" void kokkosp_begin_parallel_reduce (const char * name,
85- const uint32_t /* devID*/ ,
86- uint64_t * /* kID*/ ) {
88+ void kokkosp_begin_parallel_reduce (const char * name, const uint32_t /* devID*/ ,
89+ uint64_t * /* kID*/ ) {
8790 roctxRangePush (name);
8891}
8992
90- extern " C" void kokkosp_end_parallel_reduce (const uint64_t /* kID*/ ) {
91- roctxRangePop ();
92- }
93+ void kokkosp_end_parallel_reduce (const uint64_t /* kID*/ ) { roctxRangePop (); }
9394
94- extern " C" void kokkosp_push_profile_region (char * name) {
95- roctxRangePush (name);
96- }
95+ void kokkosp_push_profile_region (const char * name) { roctxRangePush (name); }
9796
98- extern " C " void kokkosp_pop_profile_region () { roctxRangePop (); }
97+ void kokkosp_pop_profile_region () { roctxRangePop (); }
9998
100- extern " C" void kokkosp_create_profile_section (const char * name,
101- uint32_t * sID ) {
99+ void kokkosp_create_profile_section (const char * name, uint32_t * sID ) {
102100 *sID = kokkosp_sections.size ();
103101 kokkosp_sections.push_back (
104102 {std::string (name), static_cast <roctx_range_id_t >(-1 )});
105103}
106104
107- extern " C " void kokkosp_start_profile_section (const uint32_t sID ) {
105+ void kokkosp_start_profile_section (const uint32_t sID ) {
108106 auto & section = kokkosp_sections[sID ];
109107 section.id = roctxRangeStart (section.label .c_str ());
110108}
111109
112- extern " C " void kokkosp_stop_profile_section (const uint32_t sID ) {
110+ void kokkosp_stop_profile_section (const uint32_t sID ) {
113111 auto const & section = kokkosp_sections[sID ];
114112 roctxRangeStop (section.id );
115113}
116114
117- extern " C " void kokkosp_destroy_profile_section (const uint32_t sID ) {
115+ void kokkosp_destroy_profile_section (const uint32_t sID ) {
118116 // do nothing
119117}
120118
121- extern " C" void kokkosp_begin_fence (const char * name, const uint32_t /* devID*/ ,
122- uint64_t * fID ) {
119+ void kokkosp_profile_event (const char * name) { roctxMark (name); }
120+
121+ void kokkosp_begin_fence (const char * name, const uint32_t /* devID*/ ,
122+ uint64_t * fID ) {
123123 *fID = roctxRangeStart (name);
124124}
125125
126- extern " C" void kokkosp_end_fence (const uint64_t fID ) { roctxRangeStop (fID ); }
126+ void kokkosp_end_fence (const uint64_t fID ) { roctxRangeStop (fID ); }
127+
128+ Kokkos::Tools::Experimental::EventSet get_event_set () {
129+ Kokkos::Tools::Experimental::EventSet my_event_set;
130+ memset (&my_event_set, 0 ,
131+ sizeof (my_event_set)); // zero any pointers not set here
132+ my_event_set.request_tool_settings = kokkosp_request_tool_settings;
133+ my_event_set.init = kokkosp_init_library;
134+ my_event_set.finalize = kokkosp_finalize_library;
135+ my_event_set.push_region = kokkosp_push_profile_region;
136+ my_event_set.pop_region = kokkosp_pop_profile_region;
137+ my_event_set.begin_parallel_for = kokkosp_begin_parallel_for;
138+ my_event_set.begin_parallel_reduce = kokkosp_begin_parallel_reduce;
139+ my_event_set.begin_parallel_scan = kokkosp_begin_parallel_scan;
140+ my_event_set.end_parallel_for = kokkosp_end_parallel_for;
141+ my_event_set.end_parallel_reduce = kokkosp_end_parallel_reduce;
142+ my_event_set.end_parallel_scan = kokkosp_end_parallel_scan;
143+ my_event_set.create_profile_section = kokkosp_create_profile_section;
144+ my_event_set.start_profile_section = kokkosp_start_profile_section;
145+ my_event_set.stop_profile_section = kokkosp_stop_profile_section;
146+ my_event_set.destroy_profile_section = kokkosp_destroy_profile_section;
147+ my_event_set.profile_event = kokkosp_profile_event;
148+ my_event_set.begin_fence = kokkosp_begin_fence;
149+ my_event_set.end_fence = kokkosp_end_fence;
150+ return my_event_set;
151+ }
152+
153+ } // namespace ROCTXConnector
154+ } // namespace KokkosTools
155+
156+ extern " C" {
157+
158+ namespace impl = KokkosTools::ROCTXConnector;
159+
160+ EXPOSE_TOOL_SETTINGS (impl::kokkosp_request_tool_settings)
161+ EXPOSE_INIT(impl::kokkosp_init_library)
162+ EXPOSE_FINALIZE(impl::kokkosp_finalize_library)
163+ EXPOSE_PUSH_REGION(impl::kokkosp_push_profile_region)
164+ EXPOSE_POP_REGION(impl::kokkosp_pop_profile_region)
165+ EXPOSE_BEGIN_PARALLEL_FOR(impl::kokkosp_begin_parallel_for)
166+ EXPOSE_END_PARALLEL_FOR(impl::kokkosp_end_parallel_for)
167+ EXPOSE_BEGIN_PARALLEL_SCAN(impl::kokkosp_begin_parallel_scan)
168+ EXPOSE_END_PARALLEL_SCAN(impl::kokkosp_end_parallel_scan)
169+ EXPOSE_BEGIN_PARALLEL_REDUCE(impl::kokkosp_begin_parallel_reduce)
170+ EXPOSE_END_PARALLEL_REDUCE(impl::kokkosp_end_parallel_reduce)
171+ EXPOSE_CREATE_PROFILE_SECTION(impl::kokkosp_create_profile_section)
172+ EXPOSE_START_PROFILE_SECTION(impl::kokkosp_start_profile_section)
173+ EXPOSE_STOP_PROFILE_SECTION(impl::kokkosp_stop_profile_section)
174+ EXPOSE_DESTROY_PROFILE_SECTION(impl::kokkosp_destroy_profile_section)
175+ EXPOSE_PROFILE_EVENT(impl::kokkosp_profile_event);
176+ EXPOSE_BEGIN_FENCE (impl::kokkosp_begin_fence);
177+ EXPOSE_END_FENCE (impl::kokkosp_end_fence);
178+ } // extern "C"
0 commit comments