Skip to content

Commit e906b1c

Browse files
authored
Implement support for explicit GPU kernel names (#2498)
Introduce two macros ALPAKA_KERNEL_SCOPED_NAME(KERNEL, NAMESPACE, NAME) and ALPAKA_KERNEL_NAME(KERNEL, NAME) to define explicit names for GPU kernels. A GPU kernel normally looks like "alpaka::detail::gpuKernel<Kernel, Acc, ...>". Using the macro ALPAKA_KERNEL_NAME(Kernel, kernel) makes it appear as "kernel<Acc, ...>". The macros can only be used in the global namespace. Using them in another namespaces causes a static assertion. Add a test for the new functionality.
1 parent 525d4a1 commit e906b1c

File tree

2 files changed

+160
-2
lines changed

2 files changed

+160
-2
lines changed

include/alpaka/kernel/TaskKernelGpuUniformCudaHipRt.hpp

Lines changed: 97 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,14 @@ namespace alpaka
7878
# if BOOST_COMP_CLANG
7979
# pragma clang diagnostic pop
8080
# endif
81+
82+
template<typename TKernelFnObj, typename TAcc, typename... TArgs>
83+
inline void (*kernelName)(
84+
Vec<Dim<TAcc>, Idx<TAcc>> const,
85+
TKernelFnObj const,
86+
remove_restrict_t<std::decay_t<TArgs>>...)
87+
= gpuKernel<TKernelFnObj, TAcc, TArgs...>;
88+
8189
} // namespace detail
8290

8391
namespace uniform_cuda_hip
@@ -248,7 +256,7 @@ namespace alpaka
248256
# endif
249257

250258
auto kernelName
251-
= alpaka::detail::gpuKernel<TKernelFnObj, TAcc, remove_restrict_t<std::decay_t<TArgs>>...>;
259+
= alpaka::detail::kernelName<TKernelFnObj, TAcc, remove_restrict_t<std::decay_t<TArgs>>...>;
252260

253261
# if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
254262
// Log the function attributes.
@@ -317,7 +325,7 @@ namespace alpaka
317325
[[maybe_unused]] TKernelFn const& kernelFn,
318326
[[maybe_unused]] TArgs&&... args) -> alpaka::KernelFunctionAttributes
319327
{
320-
auto kernelName = alpaka::detail::gpuKernel<
328+
auto kernelName = alpaka::detail::kernelName<
321329
TKernelFn,
322330
AccGpuUniformCudaHipRt<TApi, TDim, TIdx>,
323331
remove_restrict_t<std::decay_t<TArgs>>...>;
@@ -363,9 +371,96 @@ namespace alpaka
363371
return kernelFunctionAttributes;
364372
}
365373
};
374+
366375
} // namespace trait
376+
367377
} // namespace alpaka
368378

379+
// These macros can be used to give a more readable name to a GPU kernel.
380+
// KERNEL must be the class or struct whose operator()(acc, ...) implements the kernel.
381+
// ::NAME (or ::NAMESPACE::NAME) must be a unique name across the whole program.
382+
383+
struct The_ALPAKA_KERNEL_NAME_macro_must_be_called_in_the_global_namespace;
384+
385+
# define ALPAKA_KERNEL_NAME(KERNEL, NAME) \
386+
\
387+
struct The_ALPAKA_KERNEL_NAME_macro_must_be_called_in_the_global_namespace; \
388+
\
389+
static_assert( \
390+
std::is_same_v< \
391+
The_ALPAKA_KERNEL_NAME_macro_must_be_called_in_the_global_namespace, \
392+
::The_ALPAKA_KERNEL_NAME_macro_must_be_called_in_the_global_namespace>, \
393+
"The ALPAKA_KERNEL_NAME macro must be called in the global namespace"); \
394+
\
395+
template<typename TAcc, typename... TArgs> \
396+
__global__ void NAME( \
397+
alpaka::Vec<alpaka::Dim<TAcc>, alpaka::Idx<TAcc>> const extent, \
398+
KERNEL const kernelFnObj, \
399+
TArgs... args) \
400+
{ \
401+
TAcc const acc(extent); \
402+
kernelFnObj(const_cast<TAcc const&>(acc), args...); \
403+
} \
404+
\
405+
namespace alpaka::detail \
406+
{ \
407+
template<typename TAcc, typename... TArgs> \
408+
inline void (*kernelName<KERNEL, TAcc, TArgs...>)( \
409+
alpaka::Vec<alpaka::Dim<TAcc>, alpaka::Idx<TAcc>> const, \
410+
KERNEL const, \
411+
TArgs...) \
412+
= ::NAME<TAcc, TArgs...>; \
413+
}
414+
415+
struct The_ALPAKA_KERNEL_SCOPED_NAME_macro_must_be_called_in_the_global_namespace;
416+
417+
# define ALPAKA_KERNEL_SCOPED_NAME(KERNEL, NAMESPACE, NAME) \
418+
struct The_ALPAKA_KERNEL_SCOPED_NAME_macro_must_be_called_in_the_global_namespace; \
419+
\
420+
static_assert( \
421+
std::is_same_v< \
422+
The_ALPAKA_KERNEL_SCOPED_NAME_macro_must_be_called_in_the_global_namespace, \
423+
::The_ALPAKA_KERNEL_SCOPED_NAME_macro_must_be_called_in_the_global_namespace>, \
424+
"The ALPAKA_KERNEL_SCOPED_NAME macro must be called in the global namespace"); \
425+
\
426+
namespace NAMESPACE \
427+
{ \
428+
template<typename TAcc, typename... TArgs> \
429+
__global__ void NAME( \
430+
alpaka::Vec<alpaka::Dim<TAcc>, alpaka::Idx<TAcc>> const extent, \
431+
KERNEL const kernelFnObj, \
432+
TArgs... args) \
433+
{ \
434+
TAcc const acc(extent); \
435+
kernelFnObj(const_cast<TAcc const&>(acc), args...); \
436+
} \
437+
} \
438+
\
439+
namespace alpaka::detail \
440+
{ \
441+
template<typename TAcc, typename... TArgs> \
442+
inline void (*kernelName<KERNEL, TAcc, TArgs...>)( \
443+
alpaka::Vec<alpaka::Dim<TAcc>, alpaka::Idx<TAcc>> const, \
444+
KERNEL const, \
445+
TArgs...) \
446+
= ::NAMESPACE::NAME<TAcc, TArgs...>; \
447+
}
448+
449+
450+
# else
451+
452+
// In host-only mode, expand to empty macros
453+
454+
# define ALPAKA_KERNEL_NAME(KERNEL, NAME)
455+
# define ALPAKA_KERNEL_SCOPED_NAME(KERNEL, NAMESPACE, NAME)
456+
369457
# endif
370458

459+
#else
460+
461+
// If CUDA or HIP are not available, expand to empty macros
462+
463+
# define ALPAKA_KERNEL_NAME(KERNEL, NAME)
464+
# define ALPAKA_KERNEL_SCOPED_NAME(KERNEL, NAMESPACE, NAME)
465+
371466
#endif
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
/* Copyright 2025 Andrea Bocci
2+
* SPDX-License-Identifier: MPL-2.0
3+
*/
4+
5+
#include <alpaka/kernel/Traits.hpp>
6+
#include <alpaka/meta/ForEachType.hpp>
7+
#include <alpaka/test/KernelExecutionFixture.hpp>
8+
#include <alpaka/test/acc/TestAccs.hpp>
9+
10+
#include <catch2/catch_template_test_macros.hpp>
11+
#include <catch2/catch_test_macros.hpp>
12+
13+
class KernelWithName
14+
{
15+
public:
16+
ALPAKA_NO_HOST_ACC_WARNING
17+
template<typename TAcc>
18+
ALPAKA_FN_ACC auto operator()(TAcc const& /* acc */, bool* success, std::int32_t val) const -> void
19+
{
20+
ALPAKA_CHECK(*success, 42 == val);
21+
}
22+
};
23+
24+
ALPAKA_KERNEL_NAME(KernelWithName, kernelWithName)
25+
26+
TEMPLATE_LIST_TEST_CASE("KernelWithName", "[kernel]", alpaka::test::TestAccs)
27+
{
28+
using Acc = TestType;
29+
using Dim = alpaka::Dim<Acc>;
30+
using Idx = alpaka::Idx<Acc>;
31+
32+
alpaka::test::KernelExecutionFixture<Acc> fixture(alpaka::Vec<Dim, Idx>::ones());
33+
34+
KernelWithName kernel;
35+
36+
REQUIRE(fixture(kernel, 42));
37+
}
38+
39+
class KernelWithScopedName
40+
{
41+
public:
42+
ALPAKA_NO_HOST_ACC_WARNING
43+
template<typename TAcc>
44+
ALPAKA_FN_ACC auto operator()(TAcc const& /* acc */, bool* success, std::int32_t val) const -> void
45+
{
46+
ALPAKA_CHECK(*success, 42 == val);
47+
}
48+
};
49+
50+
ALPAKA_KERNEL_SCOPED_NAME(KernelWithScopedName, scope, kernelWithName)
51+
52+
TEMPLATE_LIST_TEST_CASE("KernelWithScopedName", "[kernel]", alpaka::test::TestAccs)
53+
{
54+
using Acc = TestType;
55+
using Dim = alpaka::Dim<Acc>;
56+
using Idx = alpaka::Idx<Acc>;
57+
58+
alpaka::test::KernelExecutionFixture<Acc> fixture(alpaka::Vec<Dim, Idx>::ones());
59+
60+
KernelWithScopedName kernel;
61+
62+
REQUIRE(fixture(kernel, 42));
63+
}

0 commit comments

Comments
 (0)