Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 34 additions & 24 deletions cudax/include/cuda/experimental/__hierarchy/fwd.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -21,49 +21,59 @@
# pragma system_header
#endif // no system header

// Q: Do we want to enable this by default, or do we want the user to define some macro to get the interoperability with
// cooperative groups?
#if __has_include(<cooperative_groups.h>)
# define _CCCL_HAS_COOPERATIVE_GROUPS() 1
#else // ^^^ has cooperative groups ^^^ / vvv no cooperative groups vvv
# define _CCCL_HAS_COOPERATIVE_GROUPS() 0
#endif // ^^^ no cooperative groups ^^^

#include <cuda/__fwd/hierarchy.h>
#include <cuda/std/__fwd/extents.h>

#include <cuda/std/__cccl/prologue.h>

namespace cuda::experimental
{
// hierarchy group kinds

class __this_hierarchy_group_kind
{};
using __implicit_hierarchy_t =
hierarchy<thread_level,
hierarchy_level_desc<grid_level, ::cuda::std::dims<3, unsigned>>,
hierarchy_level_desc<cluster_level, ::cuda::std::dims<3, unsigned>>,
hierarchy_level_desc<block_level, ::cuda::std::dims<3, unsigned>>>;

// hierarchy group base
// group bases

template <class _Level, class _Hierarchy, class _Kind>
class __hierarchy_group_base;
template <class _Level, class _Hierarchy>
using __this_hierarchy_group_base = __hierarchy_group_base<_Level, _Hierarchy, __this_hierarchy_group_kind>;
class __this_group_base;

// hierarchy groups
// groups

template <class _Hierarchy, class _Kind>
class thread_group;
template <class _Hierarchy, class _Kind>
class warp_group;
template <class _Hierarchy, class _Kind>
class block_group;
template <class _Hierarchy, class _Kind>
class cluster_group;
template <class _Hierarchy, class _Kind>
class grid_group;
template <class _Hierarchy = __implicit_hierarchy_t>
class this_thread;
template <class _Hierarchy = __implicit_hierarchy_t>
class this_warp;
template <class _Hierarchy = __implicit_hierarchy_t>
class this_block;
template <class _Hierarchy = __implicit_hierarchy_t>
class this_cluster;
template <class _Hierarchy = __implicit_hierarchy_t>
class this_grid;

// traits

template <class _Tp>
inline constexpr bool __is_this_hierarchy_group_v = false;
template <class _Hierarchy>
inline constexpr bool __is_this_hierarchy_group_v<thread_group<_Hierarchy, __this_hierarchy_group_kind>> = true;
inline constexpr bool __is_this_hierarchy_group_v<this_thread<_Hierarchy>> = true;
template <class _Hierarchy>
inline constexpr bool __is_this_hierarchy_group_v<warp_group<_Hierarchy, __this_hierarchy_group_kind>> = true;
inline constexpr bool __is_this_hierarchy_group_v<this_warp<_Hierarchy>> = true;
template <class _Hierarchy>
inline constexpr bool __is_this_hierarchy_group_v<block_group<_Hierarchy, __this_hierarchy_group_kind>> = true;
inline constexpr bool __is_this_hierarchy_group_v<this_block<_Hierarchy>> = true;
template <class _Hierarchy>
inline constexpr bool __is_this_hierarchy_group_v<cluster_group<_Hierarchy, __this_hierarchy_group_kind>> = true;
inline constexpr bool __is_this_hierarchy_group_v<this_cluster<_Hierarchy>> = true;
template <class _Hierarchy>
inline constexpr bool __is_this_hierarchy_group_v<grid_group<_Hierarchy, __this_hierarchy_group_kind>> = true;
inline constexpr bool __is_this_hierarchy_group_v<this_grid<_Hierarchy>> = true;
} // namespace cuda::experimental

#include <cuda/std/__cccl/epilogue.h>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
//===----------------------------------------------------------------------===//
//
// Part of CUDA Experimental in CUDA C++ Core Libraries,
// under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES.
//
//===----------------------------------------------------------------------===//

#ifndef _CUDA_EXPERIMENTAL___HIERARCHY_IMPLICIT_HIERARCHY_CUH
#define _CUDA_EXPERIMENTAL___HIERARCHY_IMPLICIT_HIERARCHY_CUH

#include <cuda/std/detail/__config>

#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
# pragma GCC system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
# pragma clang system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
# pragma system_header
#endif // no system header

#include <cuda/hierarchy>

#include <cuda/experimental/__hierarchy/fwd.cuh>

#include <cuda/std/__cccl/prologue.h>

namespace cuda::experimental
{
[[nodiscard]] _CCCL_DEVICE_API inline __implicit_hierarchy_t __implicit_hierarchy() noexcept
{
return __implicit_hierarchy_t{
cuda::gpu_thread,
hierarchy_level_desc<grid_level, ::cuda::std::dims<3, unsigned>>{cluster.extents(grid)},
hierarchy_level_desc<cluster_level, ::cuda::std::dims<3, unsigned>>{block.extents(cluster)},
hierarchy_level_desc<block_level, ::cuda::std::dims<3, unsigned>>{gpu_thread.extents(block)}};
}
} // namespace cuda::experimental

#include <cuda/std/__cccl/epilogue.h>

#endif // _CUDA_EXPERIMENTAL___HIERARCHY_IMPLICIT_HIERARCHY_CUH
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
//
//===----------------------------------------------------------------------===//

#ifndef _CUDA_EXPERIMENTAL___HIERARCHY_GROUP_CUH
#define _CUDA_EXPERIMENTAL___HIERARCHY_GROUP_CUH
#ifndef _CUDA_EXPERIMENTAL___THIS_GROUP_CUH
#define _CUDA_EXPERIMENTAL___THIS_GROUP_CUH

#include <cuda/std/detail/__config>

Expand All @@ -28,6 +28,11 @@

#include <cuda/experimental/__hierarchy/fwd.cuh>
#include <cuda/experimental/__hierarchy/grid_sync.cuh>
#include <cuda/experimental/__hierarchy/implicit_hierarchy.cuh>

#if _CCCL_HAS_COOPERATIVE_GROUPS()
# include <cooperative_groups.h>
#endif // _CCCL_HAS_COOPERATIVE_GROUPS()

#include <cuda/std/__cccl/prologue.h>

Expand All @@ -39,19 +44,23 @@ using __hierarchy_type_of =

// todo: use __hier_ in queries
template <class _Level, class _Hierarchy>
class __hierarchy_group_base<_Level, _Hierarchy, __this_hierarchy_group_kind>
class __this_group_base
{
static_assert(__is_hierarchy_level_v<_Level>);
static_assert(__is_hierarchy_v<_Hierarchy>);

const _Hierarchy& __hier_;
_Hierarchy __hier_;

public:
using hierarchy_type = _Hierarchy;

_CCCL_DEVICE_API explicit __this_group_base() noexcept
: __hier_{::cuda::experimental::__implicit_hierarchy()}
{}

_CCCL_TEMPLATE(class _HierarchyLike)
_CCCL_REQUIRES(::cuda::std::is_same_v<_Hierarchy, __hierarchy_type_of<_HierarchyLike>>)
_CCCL_DEVICE_API __hierarchy_group_base(const _HierarchyLike& __hier_like) noexcept
_CCCL_DEVICE_API __this_group_base(const _HierarchyLike& __hier_like) noexcept
: __hier_{::cuda::__unpack_hierarchy_if_needed(__hier_like)}
{}

Expand Down Expand Up @@ -94,9 +103,9 @@ public:
};

template <class _Hierarchy>
class thread_group<_Hierarchy, __this_hierarchy_group_kind> : __this_hierarchy_group_base<thread_level, _Hierarchy>
class this_thread : __this_group_base<thread_level, _Hierarchy>
{
using __base_type = __this_hierarchy_group_base<thread_level, _Hierarchy>;
using __base_type = __this_group_base<thread_level, _Hierarchy>;

public:
using level_type = thread_level;
Expand All @@ -109,19 +118,28 @@ public:
using __base_type::rank;
using __base_type::rank_as;

# if _CCCL_HAS_COOPERATIVE_GROUPS()
_CCCL_DEVICE_API this_thread(const ::cooperative_groups::thread_block_tile<1, void>&) noexcept
: __base_type{::cuda::experimental::__implicit_hierarchy()}
{}
# endif // _CCCL_HAS_COOPERATIVE_GROUPS()

_CCCL_DEVICE_API void sync() noexcept {}
#endif // _CCCL_CUDA_COMPILATION()
};

_CCCL_TEMPLATE(class _Hierarchy)
_CCCL_REQUIRES(__is_or_has_hierarchy_member_v<_Hierarchy>)
_CCCL_HOST_DEVICE thread_group(const _Hierarchy&)
-> thread_group<__hierarchy_type_of<_Hierarchy>, __this_hierarchy_group_kind>;
_CCCL_HOST_DEVICE this_thread(const _Hierarchy&) -> this_thread<__hierarchy_type_of<_Hierarchy>>;

#if _CCCL_HAS_COOPERATIVE_GROUPS()
_CCCL_HOST_DEVICE this_thread(const ::cooperative_groups::thread_block_tile<1, void>&) -> this_thread<>;
#endif // _CCCL_HAS_COOPERATIVE_GROUPS()

template <class _Hierarchy>
class warp_group<_Hierarchy, __this_hierarchy_group_kind> : __this_hierarchy_group_base<warp_level, _Hierarchy>
class this_warp : __this_group_base<warp_level, _Hierarchy>
{
using __base_type = __this_hierarchy_group_base<warp_level, _Hierarchy>;
using __base_type = __this_group_base<warp_level, _Hierarchy>;

public:
using level_type = warp_level;
Expand All @@ -143,13 +161,12 @@ public:

_CCCL_TEMPLATE(class _Hierarchy)
_CCCL_REQUIRES(__is_or_has_hierarchy_member_v<_Hierarchy>)
_CCCL_HOST_DEVICE warp_group(const _Hierarchy&)
-> warp_group<__hierarchy_type_of<_Hierarchy>, __this_hierarchy_group_kind>;
_CCCL_HOST_DEVICE this_warp(const _Hierarchy&) -> this_warp<__hierarchy_type_of<_Hierarchy>>;

template <class _Hierarchy>
class block_group<_Hierarchy, __this_hierarchy_group_kind> : __this_hierarchy_group_base<block_level, _Hierarchy>
class this_block : __this_group_base<block_level, _Hierarchy>
{
using __base_type = __this_hierarchy_group_base<block_level, _Hierarchy>;
using __base_type = __this_group_base<block_level, _Hierarchy>;

public:
using level_type = block_level;
Expand All @@ -163,22 +180,31 @@ public:
using __base_type::rank;
using __base_type::rank_as;

# if _CCCL_HAS_COOPERATIVE_GROUPS()
_CCCL_DEVICE_API this_block(const ::cooperative_groups::thread_block&) noexcept
: __base_type{::cuda::experimental::__implicit_hierarchy()}
{}
# endif // _CCCL_HAS_COOPERATIVE_GROUPS()

_CCCL_DEVICE_API void sync() noexcept
{
::__syncthreads();
::__barrier_sync(0);
}
#endif // _CCCL_CUDA_COMPILATION()
};

_CCCL_TEMPLATE(class _Hierarchy)
_CCCL_REQUIRES(__is_or_has_hierarchy_member_v<_Hierarchy>)
_CCCL_HOST_DEVICE block_group(const _Hierarchy&)
-> block_group<__hierarchy_type_of<_Hierarchy>, __this_hierarchy_group_kind>;
_CCCL_HOST_DEVICE this_block(const _Hierarchy&) -> this_block<__hierarchy_type_of<_Hierarchy>>;

#if _CCCL_HAS_COOPERATIVE_GROUPS()
_CCCL_HOST_DEVICE this_block(const ::cooperative_groups::thread_block&) -> this_block<>;
#endif // _CCCL_HAS_COOPERATIVE_GROUPS()

template <class _Hierarchy>
class cluster_group<_Hierarchy, __this_hierarchy_group_kind> : __this_hierarchy_group_base<cluster_level, _Hierarchy>
class this_cluster : __this_group_base<cluster_level, _Hierarchy>
{
using __base_type = __this_hierarchy_group_base<cluster_level, _Hierarchy>;
using __base_type = __this_group_base<cluster_level, _Hierarchy>;

public:
using level_type = cluster_level;
Expand All @@ -192,27 +218,36 @@ public:
using __base_type::rank;
using __base_type::rank_as;

# if _CCCL_HAS_COOPERATIVE_GROUPS() && defined(_CG_HAS_CLUSTER_GROUP)
_CCCL_DEVICE_API this_cluster(const ::cooperative_groups::cluster_group&) noexcept
: __base_type{::cuda::experimental::__implicit_hierarchy()}
{}
# endif // _CCCL_HAS_COOPERATIVE_GROUPS() && defined(_CG_HAS_CLUSTER_GROUP)

_CCCL_DEVICE_API void sync() noexcept
{
NV_IF_ELSE_TARGET(NV_PROVIDES_SM_90,
({
::__cluster_barrier_arrive();
::__cluster_barrier_wait();
}),
({ ::__syncthreads(); }))
({ ::__barrier_sync(0); }))
}
#endif // _CCCL_CUDA_COMPILATION()
};

_CCCL_TEMPLATE(class _Hierarchy)
_CCCL_REQUIRES(__is_or_has_hierarchy_member_v<_Hierarchy>)
_CCCL_HOST_DEVICE cluster_group(const _Hierarchy&)
-> cluster_group<__hierarchy_type_of<_Hierarchy>, __this_hierarchy_group_kind>;
_CCCL_HOST_DEVICE this_cluster(const _Hierarchy&) -> this_cluster<__hierarchy_type_of<_Hierarchy>>;

#if _CCCL_HAS_COOPERATIVE_GROUPS() && defined(_CG_HAS_CLUSTER_GROUP)
_CCCL_HOST_DEVICE this_cluster(const ::cooperative_groups::cluster_group&) -> this_cluster<>;
#endif // _CCCL_HAS_COOPERATIVE_GROUPS() && defined(_CG_HAS_CLUSTER_GROUP)

template <class _Hierarchy>
class grid_group<_Hierarchy, __this_hierarchy_group_kind> : __this_hierarchy_group_base<grid_level, _Hierarchy>
class this_grid : __this_group_base<grid_level, _Hierarchy>
{
using __base_type = __this_hierarchy_group_base<grid_level, _Hierarchy>;
using __base_type = __this_group_base<grid_level, _Hierarchy>;

public:
using level_type = grid_level;
Expand All @@ -221,6 +256,12 @@ public:
using __base_type::hierarchy;

#if _CCCL_CUDA_COMPILATION()
# if _CCCL_HAS_COOPERATIVE_GROUPS()
_CCCL_DEVICE_API this_grid(const ::cooperative_groups::grid_group&) noexcept
: __base_type{::cuda::experimental::__implicit_hierarchy()}
{}
# endif // _CCCL_HAS_COOPERATIVE_GROUPS()

_CCCL_DEVICE_API void sync() noexcept
{
::cuda::experimental::__cg_imported::__grid_sync();
Expand All @@ -230,45 +271,13 @@ public:

_CCCL_TEMPLATE(class _Hierarchy)
_CCCL_REQUIRES(__is_or_has_hierarchy_member_v<_Hierarchy>)
_CCCL_HOST_DEVICE grid_group(const _Hierarchy&)
-> grid_group<__hierarchy_type_of<_Hierarchy>, __this_hierarchy_group_kind>;

_CCCL_TEMPLATE(class _HierarchyLike)
_CCCL_REQUIRES(__is_or_has_hierarchy_member_v<_HierarchyLike>)
[[nodiscard]] _CCCL_DEVICE_API auto this_thread(const _HierarchyLike& __hier_like) noexcept
{
return thread_group{__hier_like};
}

_CCCL_TEMPLATE(class _HierarchyLike)
_CCCL_REQUIRES(__is_or_has_hierarchy_member_v<_HierarchyLike>)
[[nodiscard]] _CCCL_DEVICE_API auto this_warp(const _HierarchyLike& __hier_like) noexcept
{
return warp_group{__hier_like};
}
_CCCL_HOST_DEVICE this_grid(const _Hierarchy&) -> this_grid<__hierarchy_type_of<_Hierarchy>>;

_CCCL_TEMPLATE(class _HierarchyLike)
_CCCL_REQUIRES(__is_or_has_hierarchy_member_v<_HierarchyLike>)
[[nodiscard]] _CCCL_DEVICE_API auto this_block(const _HierarchyLike& __hier_like) noexcept
{
return block_group{__hier_like};
}

_CCCL_TEMPLATE(class _HierarchyLike)
_CCCL_REQUIRES(__is_or_has_hierarchy_member_v<_HierarchyLike>)
[[nodiscard]] _CCCL_DEVICE_API auto this_cluster(const _HierarchyLike& __hier_like) noexcept
{
return cluster_group{__hier_like};
}

_CCCL_TEMPLATE(class _HierarchyLike)
_CCCL_REQUIRES(__is_or_has_hierarchy_member_v<_HierarchyLike>)
[[nodiscard]] _CCCL_DEVICE_API auto this_grid(const _HierarchyLike& __hier_like) noexcept
{
return grid_group{__hier_like};
}
#if _CCCL_HAS_COOPERATIVE_GROUPS()
_CCCL_HOST_DEVICE this_grid(const ::cooperative_groups::grid_group&) -> this_grid<>;
#endif // _CCCL_HAS_COOPERATIVE_GROUPS()
} // namespace cuda::experimental

#include <cuda/std/__cccl/epilogue.h>

#endif // _CUDA_EXPERIMENTAL___HIERARCHY_GROUP_CUH
#endif // _CUDA_EXPERIMENTAL___THIS_GROUP_CUH
3 changes: 2 additions & 1 deletion cudax/include/cuda/experimental/hierarchy.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

#include <cuda/experimental/__hierarchy/fwd.cuh>
#include <cuda/experimental/__hierarchy/grid_sync.cuh>
#include <cuda/experimental/__hierarchy/group.cuh>
#include <cuda/experimental/__hierarchy/implicit_hierarchy.cuh>
#include <cuda/experimental/__hierarchy/this_group.cuh>

#endif // _CUDA_EXPERIMENTAL_HIERARCHY
Loading
Loading