cccl/libcudacxx/include/cuda/std/__pstl/dispatch.h at c3219c78dbf36fcce4f8b5a0002aced19d71bf44 · NVIDIA/cccl · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
//===----------------------------------------------------------------------===//
//
// Part of libcu++, the C++ Standard Library for your entire system,
// under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
//
//===----------------------------------------------------------------------===//

#ifndef _CUDA_STD___PSTL_DISPATCH_H
#define _CUDA_STD___PSTL_DISPATCH_H

#include <cuda/std/detail/__config>

#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
#  pragma GCC system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
#  pragma clang system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
#  pragma system_header
#endif // no system header

#include <cuda/std/__execution/policy.h>
#include <cuda/std/__type_traits/always_false.h>
#include <cuda/std/__type_traits/is_base_of.h>
#include <cuda/std/cstdint>

#include <cuda/std/__cccl/prologue.h>

_CCCL_BEGIN_NAMESPACE_CUDA_STD_EXECUTION

enum class __pstl_algorithm
{
  __adjacent_difference,
  __copy_if,
  __copy_n,
  __exclusive_scan,
  __find_if,
  __for_each_n,
  __generate_n,
  __inclusive_scan,
  __merge,
  __reduce,
  __remove_if,
  __transform,
  __transform_reduce,
  __unique_copy,
};

//! @brief tag type to indicate that we cannot dispatch to a parallel algorithm and should run the algorithm serially
struct __pstl_no_dispatch
{};

_CCCL_BEGIN_NAMESPACE_ARCH_DEPENDENT

//! @brief Dispatcher for a given @tparam _Algorith and @tparam _Policy
//! If @class __pstl_dispatch is not specialized by the chosen backend we will fall back to serial execution
template <__pstl_algorithm _Algorithm, __execution_backend _Backend>
struct __pstl_dispatch : public __pstl_no_dispatch
{};

_CCCL_END_NAMESPACE_ARCH_DEPENDENT

//! @brief Helper variable that detects whether @class __pstl_dispatch has been specialized so that we can
//! dispatch
template <class>
inline constexpr bool __pstl_can_dispatch = false;

template <__pstl_algorithm _Algorithm, __execution_backend _Backend>
inline constexpr bool __pstl_can_dispatch<__pstl_dispatch<_Algorithm, _Backend>> =
  !::cuda::std::is_base_of_v<__pstl_no_dispatch, __pstl_dispatch<_Algorithm, _Backend>>;

_CCCL_BEGIN_NAMESPACE_ARCH_DEPENDENT

//! @brief Top layer dispatcher that returns a concrete dispatch if possible
template <__pstl_algorithm _Algorithm, class _Policy>
[[nodiscard]] _CCCL_API _CCCL_CONSTEVAL auto __pstl_select_dispatch() noexcept
{
  // First extract the desired backend from the policy
  constexpr __execution_backend __backend = _Policy::__get_backend();

  // If the user requests a unique backends, we must take that
  if constexpr (::cuda::std::execution::__has_unique_backend(__backend))
  {
    return __pstl_dispatch<_Algorithm, __backend>{};
  }
  else
  {
    // No dispatch found, return invalid to signal serial execution
    return __pstl_dispatch<_Algorithm, __execution_backend::__none>{};
  }
}

_CCCL_END_NAMESPACE_ARCH_DEPENDENT

_CCCL_END_NAMESPACE_CUDA_STD_EXECUTION

#include <cuda/std/__cccl/epilogue.h>

#endif // _CUDA_STD___PSTL_DISPATCH_H