diff --git a/cudax/include/cuda/experimental/__execution/task_scheduler.cuh b/cudax/include/cuda/experimental/__execution/task_scheduler.cuh index d2abf246f14..b9cead79aa0 100644 --- a/cudax/include/cuda/experimental/__execution/task_scheduler.cuh +++ b/cudax/include/cuda/experimental/__execution/task_scheduler.cuh @@ -425,7 +425,9 @@ public: { _CCCL_TRY { - constexpr bool __parallelize = _Policy() == par || _Policy() == par_unseq; + constexpr bool __parallelize = + ::cuda::std::is_same_v<_Policy, ::cuda::std::execution::parallel_policy> + || ::cuda::std::is_same_v<_Policy, ::cuda::std::execution::parallel_unsequenced_policy>; __visit(__detail::__get_execute_bulk_fn<__parallelize>(_BulkTag(), __fn_, __shape_, __begin, __end), __values_); } _CCCL_CATCH_ALL diff --git a/libcudacxx/include/cuda/__execution/policy.h b/libcudacxx/include/cuda/__execution/policy.h index 27013ec9f82..fcb7ff9b8ea 100644 --- a/libcudacxx/include/cuda/__execution/policy.h +++ b/libcudacxx/include/cuda/__execution/policy.h @@ -22,207 +22,16 @@ #if _CCCL_HAS_BACKEND_CUDA() -# include -# include -# include -# include -# include -# include -# include # include -# include -# include -# include # include -_CCCL_BEGIN_NAMESPACE_CUDA_STD_EXECUTION - -template -struct __policy_stream_holder -{ - ::cuda::stream_ref __stream_; - - _CCCL_HOST_API constexpr __policy_stream_holder(::cuda::stream_ref __stream) noexcept - : __stream_(__stream) - {} -}; - -template <> -struct __policy_stream_holder -{ - _CCCL_HIDE_FROM_ABI __policy_stream_holder() = default; - - //! @brief Dummy constructor to simplify implementation of the cuda policy - _CCCL_HOST_API constexpr __policy_stream_holder(::cuda::stream_ref) noexcept {} -}; - -template -struct __policy_memory_resource_holder -{ - ::cuda::mr::resource_ref<::cuda::mr::device_accessible> __resource_; - - _CCCL_HIDE_FROM_ABI __policy_memory_resource_holder() = default; - - _CCCL_HOST_API constexpr __policy_memory_resource_holder( - ::cuda::mr::resource_ref<::cuda::mr::device_accessible> __resource) noexcept - : __resource_(__resource) - {} -}; - -template <> -struct __policy_memory_resource_holder -{ - _CCCL_HIDE_FROM_ABI __policy_memory_resource_holder() = default; - - //! @brief Dummy constructor to simplify implementation of the cuda policy - _CCCL_HOST_API constexpr __policy_memory_resource_holder( - ::cuda::mr::resource_ref<::cuda::mr::device_accessible>) noexcept - {} -}; - -template -struct _CCCL_DECLSPEC_EMPTY_BASES __execution_policy_base<_Policy, __execution_backend::__cuda> - : __execution_policy_base<_Policy, __execution_backend::__none> - , protected __policy_stream_holder<__cuda_policy_with_stream<_Policy>> - , protected __policy_memory_resource_holder<__cuda_policy_with_memory_resource<_Policy>> -{ -private: - template - friend struct __execution_policy_base; - - using __stream_holder = __policy_stream_holder<__cuda_policy_with_stream<_Policy>>; - using __resource_holder = __policy_memory_resource_holder<__cuda_policy_with_memory_resource<_Policy>>; - - _CCCL_TEMPLATE(bool _WithStream = __cuda_policy_with_stream<_Policy>, - bool _WithResource = __cuda_policy_with_memory_resource<_Policy>) - _CCCL_REQUIRES((_WithStream) _CCCL_AND(!_WithResource)) - _CCCL_HOST_API constexpr __execution_policy_base(::cuda::stream_ref __stream) noexcept - : __stream_holder(__stream) - , __resource_holder() - {} - - _CCCL_TEMPLATE(bool _WithStream = __cuda_policy_with_stream<_Policy>, - bool _WithResource = __cuda_policy_with_memory_resource<_Policy>) - _CCCL_REQUIRES((!_WithStream) _CCCL_AND _WithResource) - _CCCL_HOST_API constexpr __execution_policy_base( - ::cuda::mr::resource_ref<::cuda::mr::device_accessible> __resource) noexcept - : __stream_holder() - , __resource_holder(::cuda::std::move(__resource)) // NVCC 12.0 needs the move - {} - - _CCCL_TEMPLATE(bool _WithStream = __cuda_policy_with_stream<_Policy>, - bool _WithResource = __cuda_policy_with_memory_resource<_Policy>) - _CCCL_REQUIRES(_WithStream _CCCL_AND _WithResource) - _CCCL_HOST_API constexpr __execution_policy_base( - ::cuda::stream_ref __stream, ::cuda::mr::resource_ref<::cuda::mr::device_accessible> __resource) noexcept - : __stream_holder(__stream) - , __resource_holder(::cuda::std::move(__resource)) - {} - -public: - _CCCL_HIDE_FROM_ABI constexpr __execution_policy_base() noexcept = default; - - //! @brief Convert to a policy that holds a stream - //! @note This cannot be merged with the other case where we already have a stream as this needs to be const qualified - //! This is because we start with a constexpr global and modify that through with - [[nodiscard]] _CCCL_HOST_API auto with(const ::cuda::get_stream_t&, ::cuda::stream_ref __stream) const noexcept - { - constexpr uint32_t __new_policy = __set_cuda_backend_option<_Policy, __cuda_backend_options::__with_stream>; - if constexpr (__cuda_policy_with_memory_resource<_Policy>) - { - return __execution_policy_base<__new_policy>{__stream, this->__resource_}; - } - else - { - return __execution_policy_base<__new_policy>{__stream}; - } - } - - //! @brief Return the stream stored in the holder or a default stream - _CCCL_TEMPLATE(bool _WithStream = __cuda_policy_with_stream<_Policy>) - _CCCL_REQUIRES(_WithStream) - [[nodiscard]] _CCCL_API ::cuda::stream_ref query(const ::cuda::get_stream_t&) const noexcept - { - return this->__stream_; - } - - //! @brief Convert to a policy that holds a memory resource - //! @warning We hold the memory resource by reference, so passing rvalue is a bug - template - [[nodiscard]] _CCCL_HOST_API auto with(const cuda::mr::get_memory_resource_t&, _Resource&&) const = delete; - - //! @brief Convert to a policy that holds a memory resource - //! @note This cannot be merged with the other case as this needs to be const qualified - //! This is because we start with a constexpr global and modify that through with - template - [[nodiscard]] _CCCL_HOST_API auto with(const cuda::mr::get_memory_resource_t&, _Resource& __resource) const noexcept - { - constexpr uint32_t __new_policy = - __set_cuda_backend_option<_Policy, __cuda_backend_options::__with_memory_resource>; - if constexpr (__cuda_policy_with_stream<_Policy>) - { - return __execution_policy_base<__new_policy>{this->__stream_, __resource}; - } - else - { - return __execution_policy_base<__new_policy>{__resource}; - } - } - - //! @brief Return either a stored or a default memory resource - _CCCL_TEMPLATE(bool _WithResource = __cuda_policy_with_memory_resource<_Policy>) - _CCCL_REQUIRES(_WithResource) - [[nodiscard]] _CCCL_API auto query(const ::cuda::mr::get_memory_resource_t&) const noexcept - { - return this->__resource_; - } - - template - [[nodiscard]] _CCCL_API friend constexpr bool operator==( - const __execution_policy_base& __lhs, const __execution_policy_base<_OtherPolicy, _OtherBackend>& __rhs) noexcept - { - if constexpr (_Policy != _OtherPolicy) - { - return false; - } - - if constexpr (__cuda_policy_with_stream<_Policy>) - { - if (__lhs.query(::cuda::get_stream) != __rhs.query(::cuda::get_stream)) - { - return false; - } - } - - if constexpr (__cuda_policy_with_memory_resource<_Policy>) - { - if (__lhs.query(::cuda::mr::get_memory_resource) != __rhs.query(::cuda::mr::get_memory_resource)) - { - return false; - } - } - - return true; - } - -# if _CCCL_STD_VER <= 2017 - template - [[nodiscard]] _CCCL_API friend constexpr bool operator!=( - const __execution_policy_base& __lhs, const __execution_policy_base<_OtherPolicy, _OtherBackend>& __rhs) noexcept - { - return !(__lhs == __rhs); - } -# endif // _CCCL_STD_VER <= 2017 -}; - -_CCCL_END_NAMESPACE_CUDA_STD_EXECUTION - _CCCL_BEGIN_NAMESPACE_CUDA_EXECUTION using __cub_parallel_unsequenced_policy = - ::cuda::std::execution::__execution_policy_base<::cuda::std::execution::__with_cuda_backend( - ::cuda::std::execution::__execution_policy::__parallel_unsequenced)>()>; + ::cuda::std::execution::__execution_policy_base<::cuda::std::execution::__with_backend< + static_cast(::cuda::std::execution::__execution_policy::__parallel_unsequenced), + ::cuda::std::execution::__execution_backend::__cuda>()>; _CCCL_GLOBAL_CONSTANT __cub_parallel_unsequenced_policy __cub_par_unseq{}; _CCCL_END_NAMESPACE_CUDA_EXECUTION diff --git a/libcudacxx/include/cuda/__fwd/execution_policy.h b/libcudacxx/include/cuda/__fwd/execution_policy.h deleted file mode 100644 index 5df2d69e334..00000000000 --- a/libcudacxx/include/cuda/__fwd/execution_policy.h +++ /dev/null @@ -1,73 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES -// -//===----------------------------------------------------------------------===// - -#ifndef _CUDA___FWD_EXECUTION_POLICY_H -#define _CUDA___FWD_EXECUTION_POLICY_H - -#include - -#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) -# pragma GCC system_header -#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) -# pragma clang system_header -#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) -# pragma system_header -#endif // no system header - -#if _CCCL_HAS_BACKEND_CUDA() - -# include - -# include - -_CCCL_BEGIN_NAMESPACE_CUDA_STD_EXECUTION - -enum __cuda_backend_options : uint16_t -{ - __with_stream = 1 << 0, ///> Determines whether the policy holds a stream - __with_memory_resource = 1 << 1, ///> Determines whether the policy holds a memory resource -}; - -//! @brief Sets the execution backend to cuda -template -[[nodiscard]] _CCCL_API constexpr uint32_t __with_cuda_backend() noexcept -{ - constexpr uint32_t __backend_mask{0xFFFF00FF}; - constexpr uint32_t __new_policy = - (_Policy & __backend_mask) | (static_cast(__execution_backend::__cuda) << 8); - return __new_policy; -} - -//! @brief Backend specific options of the CUDA backend -template -inline constexpr __cuda_backend_options __policy_to_cuda_backend_options = - static_cast<__cuda_backend_options>((_Policy & uint32_t{0xFFFF0000}) >> 16); - -//! @brief Sets a backend specific option -template -inline constexpr uint32_t __set_cuda_backend_option = - _Policy | static_cast(static_cast(__option) << 16); - -//! @brief Detects whether a given policy holds a user provided stream -template -inline constexpr bool __cuda_policy_with_stream = - static_cast(__policy_to_cuda_backend_options<_Policy> & __cuda_backend_options::__with_stream); - -//! @brief Detects whether a given policy holds a user provided memory resource -template -inline constexpr bool __cuda_policy_with_memory_resource = - static_cast(__policy_to_cuda_backend_options<_Policy> & __cuda_backend_options::__with_memory_resource); - -_CCCL_END_NAMESPACE_CUDA_STD_EXECUTION - -# include - -#endif // _CCCL_HAS_BACKEND_CUDA() - -#endif // _CUDA___FWD_EXECUTION_POLICY_H diff --git a/libcudacxx/include/cuda/__fwd/get_memory_resource.h b/libcudacxx/include/cuda/__fwd/get_memory_resource.h new file mode 100644 index 00000000000..a41ff642976 --- /dev/null +++ b/libcudacxx/include/cuda/__fwd/get_memory_resource.h @@ -0,0 +1,38 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#ifndef _CUDA___FWD_GET_MEMORY_RESOURCE_H +#define _CUDA___FWD_GET_MEMORY_RESOURCE_H + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#if _CCCL_HAS_CTK() + +# include + +_CCCL_BEGIN_NAMESPACE_CUDA_MR + +struct __get_memory_resource_t; + +_CCCL_END_NAMESPACE_CUDA_MR + +# include + +#endif // _CCCL_HAS_CTK() + +#endif // _CUDA___FWD_GET_MEMORY_RESOURCE_H diff --git a/libcudacxx/include/cuda/__fwd/get_stream.h b/libcudacxx/include/cuda/__fwd/get_stream.h index 10f65318cf6..c32e34ef15e 100644 --- a/libcudacxx/include/cuda/__fwd/get_stream.h +++ b/libcudacxx/include/cuda/__fwd/get_stream.h @@ -35,4 +35,4 @@ _CCCL_END_NAMESPACE_CUDA #endif // _CCCL_HAS_CTK() -#endif // _CUDA___FWD_PIPELINE_H +#endif // _CUDA___FWD_GET_STREAM_H diff --git a/libcudacxx/include/cuda/__memory_resource/any_resource.h b/libcudacxx/include/cuda/__memory_resource/any_resource.h index 02b39a2a62a..69f38725a1f 100644 --- a/libcudacxx/include/cuda/__memory_resource/any_resource.h +++ b/libcudacxx/include/cuda/__memory_resource/any_resource.h @@ -839,6 +839,12 @@ using resource_ref = basic_resource_ref<_ResourceKind::_Asynchronous, _Propertie # endif // _CCCL_DOXYGEN_INVOKED +template +inline constexpr bool __is_resource_ref = false; + +template +inline constexpr bool __is_resource_ref> = true; + //! @rst //! .. _libcudacxx-memory-resource-make-any-resource: //! diff --git a/libcudacxx/include/cuda/__memory_resource/get_memory_resource.h b/libcudacxx/include/cuda/__memory_resource/get_memory_resource.h index 7c74bba588a..f90ebf68ace 100644 --- a/libcudacxx/include/cuda/__memory_resource/get_memory_resource.h +++ b/libcudacxx/include/cuda/__memory_resource/get_memory_resource.h @@ -23,6 +23,7 @@ #if _CCCL_HAS_CTK() +# include # include # include # include @@ -35,8 +36,6 @@ _CCCL_BEGIN_NAMESPACE_CUDA_MR -struct __get_memory_resource_t; - template _CCCL_CONCEPT __has_member_get_resource = _CCCL_REQUIRES_EXPR((_Tp), const _Tp& __t)( requires(resource<::cuda::std::remove_cvref_t>)); diff --git a/libcudacxx/include/cuda/std/__execution/policy.h b/libcudacxx/include/cuda/std/__execution/policy.h index d4bf5e05c4a..32ec7013777 100644 --- a/libcudacxx/include/cuda/std/__execution/policy.h +++ b/libcudacxx/include/cuda/std/__execution/policy.h @@ -20,8 +20,25 @@ # pragma system_header #endif // no system header +#if _CCCL_HAS_CTK() && !_CCCL_COMPILER(NVRTC) +# include +# include +# include +# include +# include +# include +# include +#endif // _CCCL_HAS_CTK() && !_CCCL_COMPILER(NVRTC) #include +#include +#include #include +#include +#include +#include +#include +#include +#include #include #include @@ -35,28 +52,12 @@ _CCCL_BEGIN_NAMESPACE_CUDA_STD_EXECUTION //! @brief Base class for our execution policies. //! It takes an untagged uint32_t because we want to be able to store 3 different enumerations in it. -template -struct __execution_policy_base +template +struct __execution_policy_base : env<__unwrap_reference_t<_Envs>...> { //! @brief Tag that identifies this and all derived classes as a CCCL execution policy static constexpr uint32_t __cccl_policy_ = _Policy; - template - [[nodiscard]] _CCCL_API friend constexpr bool - operator==(const __execution_policy_base&, const __execution_policy_base<_OtherPolicy, _OtherBackend>&) noexcept - { - return _Policy == _OtherPolicy; - } - -#if _CCCL_STD_VER <= 2017 - template - [[nodiscard]] _CCCL_API friend constexpr bool - operator!=(const __execution_policy_base&, const __execution_policy_base<_OtherPolicy, _OtherBackend>&) noexcept - { - return _Policy != _OtherPolicy; - } -#endif // _CCCL_STD_VER <= 2017 - //! @brief Extracts the execution policy from the stored _Policy [[nodiscard]] _CCCL_API static constexpr __execution_policy __get_policy() noexcept { @@ -68,6 +69,111 @@ struct __execution_policy_base { return __policy_to_execution_backend<_Policy>; } + + //! Forwards queries to the env + using env<__unwrap_reference_t<_Envs>...>::query; + + //! @brief create a new policy with additional environments attached + template + [[nodiscard]] _CCCL_API constexpr __execution_policy_base<_Policy, _Env, _Envs...> + __with(_Env&& __env, index_sequence<_Indices...>) const + { + if constexpr (sizeof...(_Envs) == 2) + { + return __execution_policy_base<_Policy, _Env, _Envs...>{ + ::cuda::std::forward<_Env>(__env), this->__env0_, this->__env1_}; + } + else + { + return __execution_policy_base<_Policy, _Env, _Envs...>{ + ::cuda::std::forward<_Env>(__env), ::cuda::std::__get<_Indices>(this->__envs_)...}; + } + } + + //! @brief Prepend an environment to the current ones + template + [[nodiscard]] _CCCL_API constexpr auto with(_Env&& __env) const + { +#if _CCCL_HAS_CTK() && !_CCCL_COMPILER(NVRTC) + if constexpr (__convertible_to_stream_ref<_Env> && (is_lvalue_reference_v<_Env> || is_pointer_v<_Env>) ) + { // streams are special in that they are their own environment, but we always want to store a stream_ref + // We must reject prvalue cuda::stream because they are not copyable + static_assert(!is_same_v, ::cuda::stream> || is_lvalue_reference_v<_Env>, + "cuda::stream is not copyable. It must be passed as a cuda::stream_ref"); + ::cuda::stream_ref __stream{__env}; + return __with(prop{::cuda::get_stream, __stream}, ::cuda::std::make_index_sequence()); + } + else if constexpr (::cuda::mr::resource<_Env>) + { // TODO(miscco): If we support more than one backend, we need to change this to satisfy the backends needs + static_assert(::cuda::mr::resource_with<_Env, ::cuda::mr::device_accessible>, + "Memory resources need to provide device accessible memory"); + static_assert(!is_const_v<_Env>, "A memory resource must be passed by non-const reference"); + if constexpr (!is_lvalue_reference_v<_Env> && !::cuda::mr::__is_resource_ref>) + { // The user passed a prvalue, which indicates we should own the resource + return __with(prop{::cuda::mr::get_memory_resource, + ::cuda::mr::any_resource<::cuda::mr::device_accessible> { + ::cuda::std::move(__env) + }}, + ::cuda::std::make_index_sequence()); + } + else + { + return __with(prop{::cuda::mr::get_memory_resource, + ::cuda::mr::resource_ref<::cuda::mr::device_accessible> { + __env + }}, + ::cuda::std::make_index_sequence()); + } + } + else +#endif // _CCCL_HAS_CTK() && !_CCCL_COMPILER(NVRTC) + { + return __with(::cuda::std::forward<_Env>(__env), ::cuda::std::make_index_sequence()); + } + } + + //! @brief Create a new environment from a tag and a value and prepend + template + [[nodiscard]] _CCCL_API constexpr auto with(const _Tag& __tag, _Value&& __value) const + { +#if _CCCL_HAS_CTK() && !_CCCL_COMPILER(NVRTC) + if constexpr (is_same_v, ::cuda::get_stream_t>) + { // We want to force the use of ::cuda::stream_ref + // We must reject prvalue cuda::stream because they are not copyable + static_assert(!is_same_v, ::cuda::stream> || is_lvalue_reference_v<_Value>, + "cuda::stream is not copyable. It must be passed as a cuda::stream_ref"); + ::cuda::stream_ref __stream{__value}; + return __with(prop{__tag, __stream}, ::cuda::std::make_index_sequence()); + } + else if constexpr (is_same_v, ::cuda::mr::get_memory_resource_t>) + { // TODO(miscco): If we support more than one backend, we need to change this to satisfy the backends needs + static_assert(::cuda::mr::resource_with<_Value, ::cuda::mr::device_accessible>, + "Memory resources need to provide device accessible memory"); + static_assert(!is_const_v<_Value>, "A memory resource must be passed by non-const reference"); + if constexpr (!is_lvalue_reference_v<_Value> && !::cuda::mr::__is_resource_ref>) + { // The user passed a prvalue, which indicates we should own the resource + return __with(prop{__tag, + ::cuda::mr::any_resource<::cuda::mr::device_accessible> { + ::cuda::std::move(__value) + }}, + ::cuda::std::make_index_sequence()); + } + else + { + return __with(prop{__tag, + ::cuda::mr::resource_ref<::cuda::mr::device_accessible> { + __value + }}, + ::cuda::std::make_index_sequence()); + } + } + else +#endif // _CCCL_HAS_CTK() && !_CCCL_COMPILER(NVRTC) + { + return __with(prop{__tag, ::cuda::std::forward<_Value>(__value)}, + ::cuda::std::make_index_sequence()); + } + } }; using sequenced_policy = __execution_policy_base(__execution_policy::__sequenced)>; diff --git a/libcudacxx/include/cuda/std/__fwd/execution_policy.h b/libcudacxx/include/cuda/std/__fwd/execution_policy.h index 61f183e13bf..ba70ee45c8b 100644 --- a/libcudacxx/include/cuda/std/__fwd/execution_policy.h +++ b/libcudacxx/include/cuda/std/__fwd/execution_policy.h @@ -63,9 +63,26 @@ template inline constexpr __execution_backend __policy_to_execution_backend = __execution_backend{(_Policy & uint32_t{0x0000FF00}) >> 8}; -template > +template struct __execution_policy_base; +//! @brief Sets the execution backend to cuda +template +[[nodiscard]] _CCCL_API constexpr uint32_t __with_backend() noexcept +{ + constexpr uint32_t __backend_mask{0xFFFF00FF}; + constexpr uint32_t __new_policy = (_Policy & __backend_mask) | (static_cast(_Backend) << 8); + return __new_policy; +} + +//! @brief Checks whether a policy supports the cuda backend +template +[[nodiscard]] _CCCL_API constexpr bool __has_backend() noexcept +{ + constexpr uint32_t __backend_mask{0xFFFF00FF}; + return (_Policy & __backend_mask) & (static_cast(_Backend) << 8); +} + _CCCL_END_NAMESPACE_CUDA_STD_EXECUTION #include diff --git a/libcudacxx/include/cuda/std/__pstl/dispatch.h b/libcudacxx/include/cuda/std/__pstl/dispatch.h index 9bf7cdd6fd0..9b8d68ac0a9 100644 --- a/libcudacxx/include/cuda/std/__pstl/dispatch.h +++ b/libcudacxx/include/cuda/std/__pstl/dispatch.h @@ -75,7 +75,7 @@ _CCCL_BEGIN_NAMESPACE_ARCH_DEPENDENT //! @brief Top layer dispatcher that returns a concrete dispatch if possible template <__pstl_algorithm _Algorithm, class _Policy> -[[nodiscard]] _CCCL_API _CCCL_CONSTEVAL auto __pstl_select_dispatch() noexcept +[[nodiscard]] _CCCL_HOST_API _CCCL_CONSTEVAL auto __pstl_select_dispatch() noexcept { // First extract the desired backend from the policy constexpr __execution_backend __backend = _Policy::__get_backend(); diff --git a/libcudacxx/include/cuda/std/execution b/libcudacxx/include/cuda/std/execution index 4442bcbbf91..ead4f47f7ab 100644 --- a/libcudacxx/include/cuda/std/execution +++ b/libcudacxx/include/cuda/std/execution @@ -21,6 +21,7 @@ # pragma system_header #endif // no system header +#include // IWYU pragma: export #include // IWYU pragma: export #include // IWYU pragma: export #include // IWYU pragma: export diff --git a/libcudacxx/test/libcudacxx/cuda/execution/execution_policy/get_memory_resource.pass.cpp b/libcudacxx/test/libcudacxx/cuda/execution/execution_policy/get_memory_resource.pass.cpp index e7c98c3e9f8..5da98cda6f1 100644 --- a/libcudacxx/test/libcudacxx/cuda/execution/execution_policy/get_memory_resource.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/execution/execution_policy/get_memory_resource.pass.cpp @@ -64,36 +64,32 @@ void test(Policy pol) { auto old_stream = cuda::__call_or(::cuda::get_stream, cuda::stream_ref{cudaStreamPerThread}, pol); auto fallback_resource = ::cuda::device_default_memory_pool(cuda::device_ref{0}); - { // Ensure that the plain policy returns a well defined memory resource + { // Ensure that the plain policy is not callable with get_memory_resource assert(cuda::__call_or(::cuda::mr::get_memory_resource, fallback_resource, pol) == fallback_resource); } { // Ensure that we can attach a memory resource to an execution policy test_resource resource{42}; auto pol_with_resource = pol.with(cuda::mr::get_memory_resource, resource); - assert(cuda::__call_or(::cuda::mr::get_memory_resource, fallback_resource, pol_with_resource) == resource); + assert(cuda::mr::get_memory_resource(pol_with_resource) == resource); assert(cuda::__call_or(::cuda::get_stream, cuda::stream_ref{cudaStreamPerThread}, pol_with_resource) == old_stream); using policy_t = decltype(pol_with_resource); - static_assert(noexcept(pol.with(cuda::mr::get_memory_resource, resource))); static_assert(cuda::std::is_execution_policy_v); } { // Ensure that attaching a memory resource multiple times just overwrites the old one test_resource resource{42}; auto pol_with_resource = pol.with(cuda::mr::get_memory_resource, resource); - assert(cuda::__call_or(::cuda::mr::get_memory_resource, fallback_resource, pol_with_resource) == resource); + assert(cuda::mr::get_memory_resource(pol_with_resource) == resource); assert(cuda::__call_or(::cuda::get_stream, cuda::stream_ref{cudaStreamPerThread}, pol_with_resource) == old_stream); - using policy_t = decltype(pol_with_resource); test_resource other_resource{1337}; decltype(auto) pol_with_other_resource = pol_with_resource.with(cuda::mr::get_memory_resource, other_resource); - static_assert(cuda::std::is_same_v); // The original resource is unchanged - assert(cuda::__call_or(::cuda::mr::get_memory_resource, fallback_resource, pol_with_resource) == resource); - assert(cuda::__call_or(::cuda::mr::get_memory_resource, fallback_resource, pol_with_other_resource) - == other_resource); + assert(cuda::mr::get_memory_resource(pol_with_resource) == resource); + assert(cuda::mr::get_memory_resource(pol_with_other_resource) == other_resource); assert(cuda::__call_or(::cuda::get_stream, cuda::stream_ref{cudaStreamPerThread}, pol_with_resource) == old_stream); } } @@ -110,7 +106,8 @@ void test() test(cuda::execution::__cub_par_unseq); // Ensure that all works even if we have a stream attached - test(cuda::execution::__cub_par_unseq.with(cuda::get_stream, ::cuda::stream{cuda::device_ref{0}})); + ::cuda::stream stream{cuda::device_ref{0}}; + test(cuda::execution::__cub_par_unseq.with(cuda::get_stream, stream)); } int main(int, char**) diff --git a/libcudacxx/test/libcudacxx/cuda/execution/execution_policy/get_stream.pass.cpp b/libcudacxx/test/libcudacxx/cuda/execution/execution_policy/get_stream.pass.cpp index 312af77fd99..a95be950e0e 100644 --- a/libcudacxx/test/libcudacxx/cuda/execution/execution_policy/get_stream.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/execution/execution_policy/get_stream.pass.cpp @@ -28,33 +28,30 @@ void test(Policy pol) namespace execution = cuda::std::execution; cuda::stream_ref default_stream{cudaStreamPerThread}; - { // Ensure that the plain policy returns a well defined stream + { // Ensure that the plain policy does not provide a stream assert(cuda::__call_or(::cuda::get_stream, default_stream, pol) == default_stream); } { // Ensure that we can attach a stream to an execution policy cuda::stream stream{cuda::device_ref{0}}; auto pol_with_stream = pol.with(cuda::get_stream, stream); - assert(cuda::__call_or(::cuda::get_stream, default_stream, pol_with_stream) == stream); + assert(cuda::get_stream(pol_with_stream) == stream); using stream_policy_t = decltype(pol_with_stream); - static_assert(noexcept(pol.with(cuda::get_stream, stream))); static_assert(cuda::std::is_execution_policy_v); } { // Ensure that attaching a stream multiple times just overwrites the old stream cuda::stream stream{cuda::device_ref{0}}; auto pol_with_stream = pol.with(cuda::get_stream, stream); - assert(cuda::__call_or(::cuda::get_stream, default_stream, pol_with_stream) == stream); + assert(cuda::get_stream(pol_with_stream) == stream); - using stream_policy_t = decltype(pol_with_stream); cuda::stream other_stream{cuda::device_ref{0}}; decltype(auto) pol_with_other_stream = pol_with_stream.with(cuda::get_stream, other_stream); - static_assert(cuda::std::is_same_v); // The original stream remains unchanged - assert(cuda::__call_or(::cuda::get_stream, default_stream, pol_with_stream) == stream); - assert(cuda::__call_or(::cuda::get_stream, default_stream, pol_with_other_stream) == other_stream); + assert(cuda::get_stream(pol_with_stream) == stream); + assert(cuda::get_stream(pol_with_other_stream) == other_stream); } } diff --git a/libcudacxx/test/libcudacxx/std/utilities/expol/environments.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/expol/environments.pass.cpp new file mode 100644 index 00000000000..2781496fac2 --- /dev/null +++ b/libcudacxx/test/libcudacxx/std/utilities/expol/environments.pass.cpp @@ -0,0 +1,278 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: nvrtc + +#include +#include +#include + +#include "test_macros.h" + +struct SomeValue +{ + int value; +}; + +struct SomeProperty +{ + _CCCL_TEMPLATE(class Env) + _CCCL_REQUIRES(cuda::std::execution::__queryable_with) + [[nodiscard]] constexpr SomeValue operator()(const Env& env) const + { + return env.query(SomeProperty{}); + } +}; + +struct test_resource +{ + __host__ __device__ void* allocate_sync(std::size_t, std::size_t) + { + return nullptr; + } + + __host__ __device__ void deallocate_sync(void* ptr, std::size_t, std::size_t) noexcept + { + // ensure that we did get the right inputs forwarded + _val = *static_cast(ptr); + } + + __host__ __device__ void* allocate(cuda::stream_ref, std::size_t, std::size_t) + { + return &_val; + } + + __host__ __device__ void deallocate(cuda::stream_ref, void* ptr, std::size_t, std::size_t) + { + // ensure that we did get the right inputs forwarded + _val = *static_cast(ptr); + } + + __host__ __device__ bool operator==(const test_resource& other) const + { + return _val == other._val; + } + __host__ __device__ bool operator!=(const test_resource& other) const + { + return _val != other._val; + } + + friend constexpr void get_property(const test_resource&, ::cuda::mr::device_accessible) noexcept {} + + int _val = 0; +}; +static_assert(::cuda::mr::resource); + +template +void test(const Policy& policy) +{ + { // the policy can take a stream + cuda::stream stream{cuda::device_ref{0}}; + static_assert(!cuda::std::__is_callable_v); + const auto new_policy = policy.with(cuda::get_stream, stream); + static_assert(cuda::std::__is_callable_v); + auto&& result = cuda::get_stream(new_policy); + static_assert(cuda::std::is_same_v); + assert(stream == result); + } + + { // the policy can take a stream_ref + cuda::stream stream{cuda::device_ref{0}}; + static_assert(!cuda::std::__is_callable_v); + const auto new_policy = policy.with(cuda::get_stream, ::cuda::stream_ref{stream}); + static_assert(cuda::std::__is_callable_v); + auto&& result = cuda::get_stream(new_policy); + static_assert(cuda::std::is_same_v); + assert(stream == result); + } + + { // the policy can take a cuStream_t + cuda::stream stream{cuda::device_ref{0}}; + static_assert(!cuda::std::__is_callable_v); + const auto new_policy = policy.with(cuda::get_stream, stream.get()); + static_assert(cuda::std::__is_callable_v); + auto&& result = cuda::get_stream(new_policy); + static_assert(cuda::std::is_same_v); + assert(stream == result); + } + + { // the policy can take a stream_ref as an environment + cuda::stream stream{cuda::device_ref{0}}; + static_assert(!cuda::std::__is_callable_v); + const auto new_policy = policy.with(cuda::stream_ref{stream}); + static_assert(cuda::std::__is_callable_v); + auto&& result = cuda::get_stream(new_policy); + static_assert(cuda::std::is_same_v); + assert(stream == result); + } + + { // the policy can take a cuda::stream as an environment + cuda::stream stream{cuda::device_ref{0}}; + static_assert(!cuda::std::__is_callable_v); + const auto new_policy = policy.with(stream); + static_assert(cuda::std::__is_callable_v); + auto&& result = cuda::get_stream(new_policy); + static_assert(cuda::std::is_same_v); + assert(stream == result); + } + + { // the policy can take a cuStream_t as an environment + cuda::stream stream{cuda::device_ref{0}}; + static_assert(!cuda::std::__is_callable_v); + const auto new_policy = policy.with(stream.get()); + static_assert(cuda::std::__is_callable_v); + auto&& result = cuda::get_stream(new_policy); + static_assert(cuda::std::is_same_v); + assert(stream == result); + } + + { // the policy can take a memory resource by lvalue + test_resource resource{}; + static_assert(!cuda::std::__is_callable_v); + const auto new_policy = policy.with(cuda::mr::get_memory_resource, resource); + static_assert(cuda::std::__is_callable_v); + auto&& result = cuda::mr::get_memory_resource(new_policy); + static_assert(cuda::std::is_same_v&>); + assert(resource == result); + } + + { // the policy can take a memory resource by prvalue -> need to own + static_assert(!cuda::std::__is_callable_v); + const auto new_policy = policy.with(cuda::mr::get_memory_resource, test_resource{}); + static_assert(cuda::std::__is_callable_v); + auto&& result = cuda::mr::get_memory_resource(new_policy); + static_assert(cuda::std::is_same_v&>); + } + + { // the policy can take a resource_ref by lvalue + test_resource resource{}; + cuda::mr::resource_ref<::cuda::mr::device_accessible> resource_ref{resource}; + static_assert(!cuda::std::__is_callable_v); + const auto new_policy = policy.with(cuda::mr::get_memory_resource, resource_ref); + static_assert(cuda::std::__is_callable_v); + auto&& result = cuda::mr::get_memory_resource(new_policy); + static_assert(cuda::std::is_same_v&>); + assert(resource == result); + } + + { // the policy can take a prvalue resource_ref -> remains resource_ref + test_resource resource{}; + static_assert(!cuda::std::__is_callable_v); + const auto new_policy = + policy.with(cuda::mr::get_memory_resource, cuda::mr::resource_ref<::cuda::mr::device_accessible>{resource}); + static_assert(cuda::std::__is_callable_v); + auto&& result = cuda::mr::get_memory_resource(new_policy); + static_assert(cuda::std::is_same_v&>); + } + + { // the policy can take a cuda::mr::any_resource by lvalue + cuda::mr::any_resource<::cuda::mr::device_accessible> resource{test_resource{}}; + static_assert(!cuda::std::__is_callable_v); + const auto new_policy = policy.with(cuda::mr::get_memory_resource, resource); + static_assert(cuda::std::__is_callable_v); + auto&& result = cuda::mr::get_memory_resource(new_policy); + static_assert(cuda::std::is_same_v&>); + assert(resource == result); + } + + { // the policy can take a prvalue cuda::mr::any_resource -> needs to own + static_assert(!cuda::std::__is_callable_v); + const auto new_policy = policy.with( + cuda::mr::get_memory_resource, cuda::mr::any_resource<::cuda::mr::device_accessible>{test_resource{}}); + static_assert(cuda::std::__is_callable_v); + auto&& result = cuda::mr::get_memory_resource(new_policy); + static_assert(cuda::std::is_same_v&>); + } + + { // the policy can take a memory resource by lvalue as an environment + test_resource resource{}; + static_assert(!cuda::std::__is_callable_v); + const auto new_policy = policy.with(resource); + static_assert(cuda::std::__is_callable_v); + auto&& result = cuda::mr::get_memory_resource(new_policy); + static_assert(cuda::std::is_same_v&>); + assert(resource == result); + } + + { // the policy can take a memory resource by prvalue as an environment -> need to own + static_assert(!cuda::std::__is_callable_v); + const auto new_policy = policy.with(test_resource{}); + static_assert(cuda::std::__is_callable_v); + auto&& result = cuda::mr::get_memory_resource(new_policy); + static_assert(cuda::std::is_same_v&>); + } + + { // the policy can take a resource_ref by lvalue as an environment + test_resource resource{}; + cuda::mr::resource_ref<::cuda::mr::device_accessible> resource_ref{resource}; + static_assert(!cuda::std::__is_callable_v); + const auto new_policy = policy.with(resource_ref); + static_assert(cuda::std::__is_callable_v); + auto&& result = cuda::mr::get_memory_resource(new_policy); + static_assert(cuda::std::is_same_v&>); + assert(resource == result); + } + + { // the policy can take a prvalue resource_ref as an environment -> remains resource_ref + test_resource resource{}; + static_assert(!cuda::std::__is_callable_v); + const auto new_policy = policy.with(cuda::mr::resource_ref<::cuda::mr::device_accessible>{resource}); + static_assert(cuda::std::__is_callable_v); + auto&& result = cuda::mr::get_memory_resource(new_policy); + static_assert(cuda::std::is_same_v&>); + } + + { // the policy can take a cuda::mr::any_resource by lvalue as an environment + cuda::mr::any_resource<::cuda::mr::device_accessible> resource{test_resource{}}; + static_assert(!cuda::std::__is_callable_v); + const auto new_policy = policy.with(resource); + static_assert(cuda::std::__is_callable_v); + auto&& result = cuda::mr::get_memory_resource(new_policy); + static_assert(cuda::std::is_same_v&>); + assert(resource == result); + } + + { // the policy can take a prvalue cuda::mr::any_resource as an environment -> needs to own + static_assert(!cuda::std::__is_callable_v); + const auto new_policy = policy.with( + cuda::mr::get_memory_resource, cuda::mr::any_resource<::cuda::mr::device_accessible>{test_resource{}}); + static_assert(cuda::std::__is_callable_v); + auto&& result = cuda::mr::get_memory_resource(new_policy); + static_assert(cuda::std::is_same_v&>); + } + + { // the policy can take an arbitrary tag that is queryable + const SomeProperty property{}; + static_assert(!cuda::std::__is_callable_v); + const auto new_policy = policy.with(property, SomeValue{42}); + static_assert(cuda::std::__is_callable_v); + auto&& result = property(new_policy); + assert(result.value == 42); + } +} + +bool test() +{ + test(cuda::std::execution::seq); + test(cuda::std::execution::par); + test(cuda::std::execution::par_unseq); + test(cuda::std::execution::unseq); + + // Cuda specific execution policy + test(cuda::execution::__cub_par_unseq); + + return true; +} + +int main(int, char**) +{ + NV_IF_TARGET(NV_IS_HOST, (test();)) + + return 0; +} diff --git a/libcudacxx/test/libcudacxx/std/utilities/expol/policies.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/expol/policies.compile.pass.cpp index 247cd442a0e..5f522d535f3 100644 --- a/libcudacxx/test/libcudacxx/std/utilities/expol/policies.compile.pass.cpp +++ b/libcudacxx/test/libcudacxx/std/utilities/expol/policies.compile.pass.cpp @@ -24,33 +24,6 @@ #include "test_macros.h" -__host__ __device__ constexpr bool test() -{ - namespace execution = cuda::std::execution; - - assert(execution::seq == execution::seq); - assert(execution::par == execution::par); - assert(execution::par_unseq == execution::par_unseq); - assert(execution::unseq == execution::unseq); - - assert(!(execution::seq != execution::seq)); - assert(!(execution::par != execution::par)); - assert(!(execution::par_unseq != execution::par_unseq)); - assert(!(execution::unseq != execution::unseq)); - - assert(!(execution::seq == execution::unseq)); - assert(!(execution::par == execution::seq)); - assert(!(execution::par_unseq == execution::par)); - assert(!(execution::unseq == execution::par_unseq)); - - assert(execution::seq != execution::unseq); - assert(execution::par != execution::seq); - assert(execution::par_unseq != execution::par); - assert(execution::unseq != execution::par_unseq); - - return true; -} - template inline constexpr bool is_same_v = cuda::std::is_same_v, Policy>; @@ -61,7 +34,5 @@ static_assert(is_same_v