@@ -78,6 +78,14 @@ namespace alpaka
7878# if BOOST_COMP_CLANG
7979# pragma clang diagnostic pop
8080# endif
81+
82+ template <typename TKernelFnObj, typename TAcc, typename ... TArgs>
83+ inline void (*kernelName)(
84+ Vec<Dim<TAcc>, Idx<TAcc>> const ,
85+ TKernelFnObj const ,
86+ remove_restrict_t <std::decay_t <TArgs>>...)
87+ = gpuKernel<TKernelFnObj, TAcc, TArgs...>;
88+
8189 } // namespace detail
8290
8391 namespace uniform_cuda_hip
@@ -248,7 +256,7 @@ namespace alpaka
248256# endif
249257
250258 auto kernelName
251- = alpaka::detail::gpuKernel <TKernelFnObj, TAcc, remove_restrict_t <std::decay_t <TArgs>>...>;
259+ = alpaka::detail::kernelName <TKernelFnObj, TAcc, remove_restrict_t <std::decay_t <TArgs>>...>;
252260
253261# if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
254262 // Log the function attributes.
@@ -317,7 +325,7 @@ namespace alpaka
317325 [[maybe_unused]] TKernelFn const & kernelFn,
318326 [[maybe_unused]] TArgs&&... args) -> alpaka::KernelFunctionAttributes
319327 {
320- auto kernelName = alpaka::detail::gpuKernel <
328+ auto kernelName = alpaka::detail::kernelName <
321329 TKernelFn,
322330 AccGpuUniformCudaHipRt<TApi, TDim, TIdx>,
323331 remove_restrict_t <std::decay_t <TArgs>>...>;
@@ -363,9 +371,96 @@ namespace alpaka
363371 return kernelFunctionAttributes;
364372 }
365373 };
374+
366375 } // namespace trait
376+
367377} // namespace alpaka
368378
379+ // These macros can be used to give a more readable name to a GPU kernel.
380+ // KERNEL must be the class or struct whose operator()(acc, ...) implements the kernel.
381+ // ::NAME (or ::NAMESPACE::NAME) must be a unique name across the whole program.
382+
383+ struct The_ALPAKA_KERNEL_NAME_macro_must_be_called_in_the_global_namespace ;
384+
385+ # define ALPAKA_KERNEL_NAME (KERNEL, NAME ) \
386+ \
387+ struct The_ALPAKA_KERNEL_NAME_macro_must_be_called_in_the_global_namespace ; \
388+ \
389+ static_assert ( \
390+ std::is_same_v< \
391+ The_ALPAKA_KERNEL_NAME_macro_must_be_called_in_the_global_namespace, \
392+ ::The_ALPAKA_KERNEL_NAME_macro_must_be_called_in_the_global_namespace>, \
393+ " The ALPAKA_KERNEL_NAME macro must be called in the global namespace" ); \
394+ \
395+ template <typename TAcc, typename ... TArgs> \
396+ __global__ void NAME ( \
397+ alpaka::Vec<alpaka::Dim<TAcc>, alpaka::Idx<TAcc>> const extent, \
398+ KERNEL const kernelFnObj, \
399+ TArgs... args) \
400+ { \
401+ TAcc const acc (extent); \
402+ kernelFnObj (const_cast <TAcc const &>(acc), args...); \
403+ } \
404+ \
405+ namespace alpaka ::detail \
406+ { \
407+ template <typename TAcc, typename ... TArgs> \
408+ inline void (*kernelName<KERNEL, TAcc, TArgs...>)( \
409+ alpaka::Vec<alpaka::Dim<TAcc>, alpaka::Idx<TAcc>> const , \
410+ KERNEL const , \
411+ TArgs...) \
412+ = ::NAME<TAcc, TArgs...>; \
413+ }
414+
415+ struct The_ALPAKA_KERNEL_SCOPED_NAME_macro_must_be_called_in_the_global_namespace ;
416+
417+ # define ALPAKA_KERNEL_SCOPED_NAME (KERNEL, NAMESPACE, NAME ) \
418+ struct The_ALPAKA_KERNEL_SCOPED_NAME_macro_must_be_called_in_the_global_namespace ; \
419+ \
420+ static_assert ( \
421+ std::is_same_v< \
422+ The_ALPAKA_KERNEL_SCOPED_NAME_macro_must_be_called_in_the_global_namespace, \
423+ ::The_ALPAKA_KERNEL_SCOPED_NAME_macro_must_be_called_in_the_global_namespace>, \
424+ " The ALPAKA_KERNEL_SCOPED_NAME macro must be called in the global namespace" ); \
425+ \
426+ namespace NAMESPACE \
427+ { \
428+ template <typename TAcc, typename ... TArgs> \
429+ __global__ void NAME ( \
430+ alpaka::Vec<alpaka::Dim<TAcc>, alpaka::Idx<TAcc>> const extent, \
431+ KERNEL const kernelFnObj, \
432+ TArgs... args) \
433+ { \
434+ TAcc const acc (extent); \
435+ kernelFnObj (const_cast <TAcc const &>(acc), args...); \
436+ } \
437+ } \
438+ \
439+ namespace alpaka ::detail \
440+ { \
441+ template <typename TAcc, typename ... TArgs> \
442+ inline void (*kernelName<KERNEL, TAcc, TArgs...>)( \
443+ alpaka::Vec<alpaka::Dim<TAcc>, alpaka::Idx<TAcc>> const , \
444+ KERNEL const , \
445+ TArgs...) \
446+ = ::NAMESPACE::NAME<TAcc, TArgs...>; \
447+ }
448+
449+
450+ # else
451+
452+ // In host-only mode, expand to empty macros
453+
454+ # define ALPAKA_KERNEL_NAME (KERNEL, NAME )
455+ # define ALPAKA_KERNEL_SCOPED_NAME (KERNEL, NAMESPACE, NAME )
456+
369457# endif
370458
459+ #else
460+
461+ // If CUDA or HIP are not available, expand to empty macros
462+
463+ # define ALPAKA_KERNEL_NAME (KERNEL, NAME )
464+ # define ALPAKA_KERNEL_SCOPED_NAME (KERNEL, NAMESPACE, NAME )
465+
371466#endif
0 commit comments