@@ -33,13 +33,47 @@ static endFunction endReduceCallee = NULL;
3333
3434void kokkosp_request_tool_settings (const uint32_t ,
3535 Kokkos_Tools_ToolSettings* settings) {
36- if (0 == tool_globFence) {
37- settings->requires_global_fencing = false ;
36+ settings->requires_global_fencing = false ;
37+ }
38+
39+ // set of functions from Kokkos ToolProgrammingInterface (includes fence)
40+ Kokkos::Tools::Experimental::ToolProgrammingInterface tpi_funcs;
41+
42+ uint32_t getDeviceID (uint32_t devid_in) {
43+ int num_device_bits = 7 ;
44+ int num_instance_bits = 17 ;
45+ return (~((uint32_t (-1 )) << num_device_bits)) &
46+ (devid_in >> num_instance_bits);
47+ }
48+
49+ void invoke_ktools_fence (uint32_t devID) {
50+ if (tpi_funcs.fence != nullptr ) {
51+ tpi_funcs.fence (devID);
52+ if (tool_verbosity > 1 ) {
53+ printf (
54+ " KokkosP: Sampler utility sucessfully invoked "
55+ " tool-induced fence on device %d\n " ,
56+ getDeviceID (devID));
57+ }
3858 } else {
39- settings->requires_global_fencing = true ;
59+ printf (
60+ " KokkosP: FATAL: Kokkos Tools Programming Interface's tool-invoked "
61+ " Fence is NULL!\n " );
62+ exit (-1 );
4063 }
4164}
4265
66+ void kokkosp_provide_tool_programming_interface (
67+ uint32_t num_funcs, Kokkos_Tools_ToolProgrammingInterface* funcsFromTPI) {
68+ if (!num_funcs) {
69+ if (tool_verbosity > 0 )
70+ printf (
71+ " KokkosP: Note: Number of functions in Tools Programming Interface "
72+ " is 0!\n " );
73+ }
74+ tpi_funcs = *funcsFromTPI;
75+ }
76+
4377void kokkosp_init_library (const int loadSeq, const uint64_t interfaceVer,
4478 const uint32_t devInfoCount, void * deviceInfo) {
4579 const char * tool_verbose_str = getenv (" KOKKOS_TOOLS_SAMPLER_VERBOSE" );
@@ -164,6 +198,9 @@ void kokkosp_begin_parallel_for(const char* name, const uint32_t devID,
164198 printf (" KokkosP: sample %llu calling child-begin function...\n " ,
165199 (unsigned long long )(*kID ));
166200 }
201+ if (tool_globFence) {
202+ invoke_ktools_fence (0 );
203+ }
167204 if (NULL != beginForCallee) {
168205 uint64_t nestedkID = 0 ;
169206 (*beginForCallee)(name, devID, &nestedkID);
@@ -180,6 +217,9 @@ void kokkosp_end_parallel_for(const uint64_t kID) {
180217 printf (" KokkosP: sample %llu calling child-end function...\n " ,
181218 (unsigned long long )(kID ));
182219 }
220+ if (tool_globFence) {
221+ invoke_ktools_fence (0 );
222+ }
183223 (*endForCallee)(retrievedNestedkID);
184224 infokIDSample.erase (kID );
185225 }
@@ -198,6 +238,9 @@ void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID,
198238 }
199239 if (NULL != beginScanCallee) {
200240 uint64_t nestedkID = 0 ;
241+ if (tool_globFence) {
242+ invoke_ktools_fence (0 );
243+ }
201244 (*beginScanCallee)(name, devID, &nestedkID);
202245 infokIDSample.insert ({*kID , nestedkID});
203246 }
@@ -212,6 +255,9 @@ void kokkosp_end_parallel_scan(const uint64_t kID) {
212255 printf (" KokkosP: sample %llu calling child-end function...\n " ,
213256 (unsigned long long )(kID ));
214257 }
258+ if (tool_globFence) {
259+ invoke_ktools_fence (0 );
260+ }
215261 (*endScanCallee)(retrievedNestedkID);
216262 infokIDSample.erase (kID );
217263 }
@@ -228,9 +274,11 @@ void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devID,
228274 printf (" KokkosP: sample %llu calling child-begin function...\n " ,
229275 (unsigned long long )(*kID ));
230276 }
231-
232277 if (NULL != beginReduceCallee) {
233278 uint64_t nestedkID = 0 ;
279+ if (tool_globFence) {
280+ invoke_ktools_fence (0 );
281+ }
234282 (*beginReduceCallee)(name, devID, &nestedkID);
235283 infokIDSample.insert ({*kID , nestedkID});
236284 }
@@ -245,6 +293,9 @@ void kokkosp_end_parallel_reduce(const uint64_t kID) {
245293 printf (" KokkosP: sample %llu calling child-end function...\n " ,
246294 (unsigned long long )(kID ));
247295 }
296+ if (tool_globFence) {
297+ invoke_ktools_fence (0 );
298+ }
248299 (*endScanCallee)(retrievedNestedkID);
249300 infokIDSample.erase (kID );
250301 }
@@ -257,8 +308,9 @@ void kokkosp_end_parallel_reduce(const uint64_t kID) {
257308extern " C" {
258309
259310namespace impl = KokkosTools::Sampler;
260-
261311EXPOSE_TOOL_SETTINGS (impl::kokkosp_request_tool_settings)
312+ EXPOSE_PROVIDE_TOOL_PROGRAMMING_INTERFACE(
313+ impl::kokkosp_provide_tool_programming_interface)
262314EXPOSE_INIT(impl::kokkosp_init_library)
263315EXPOSE_FINALIZE(impl::kokkosp_finalize_library)
264316EXPOSE_BEGIN_PARALLEL_FOR(impl::kokkosp_begin_parallel_for)
0 commit comments