Skip to content

Commit 52591c2

Browse files
authored
Merge branch 'main' into stf_deprecate_scheduler
2 parents a7b05a9 + cf864f5 commit 52591c2

File tree

9 files changed

+16
-84
lines changed

9 files changed

+16
-84
lines changed

cudax/examples/stf/linear_algebra/cg_dense_2D.cu

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,8 @@ public:
2929
{
3030
h_addr.reset(new double[N * N]);
3131
cuda_safe_call(cudaHostRegister(h_addr.get(), N * N * sizeof(double), cudaHostRegisterPortable));
32-
handle = to_shared(ctx.logical_data(make_slice(h_addr.get(), std::tuple{N, N}, N)));
32+
handle = ::std::make_shared<logical_data<slice<double, 2>>>(
33+
ctx.logical_data(make_slice(h_addr.get(), std::tuple{N, N}, N)));
3334
}
3435

3536
void fill(const std::function<double(int, int)>& f)
@@ -68,7 +69,7 @@ public:
6869
for (size_t b = 0; b < nblocks; b++)
6970
{
7071
size_t bs = std::min(N - block_size * b, block_size);
71-
handles[b] = to_shared(ctx.logical_data(shape_of<slice<double>>(bs)));
72+
handles[b] = ::std::make_shared<logical_data<slice<double>>>(ctx.logical_data(shape_of<slice<double>>(bs)));
7273
}
7374
}
7475
else
@@ -77,8 +78,9 @@ public:
7778
cuda_safe_call(cudaHostRegister(h_addr.get(), N * sizeof(double), cudaHostRegisterPortable));
7879
for (size_t b = 0; b < nblocks; b++)
7980
{
80-
size_t bs = std::min(N - block_size * b, block_size);
81-
handles[b] = to_shared(ctx.logical_data(make_slice(&h_addr[block_size * b], bs)));
81+
size_t bs = std::min(N - block_size * b, block_size);
82+
handles[b] =
83+
::std::make_shared<logical_data<slice<double>>>(ctx.logical_data(make_slice(&h_addr[block_size * b], bs)));
8284
}
8385
}
8486
}
@@ -94,7 +96,7 @@ public:
9496
for (size_t b = 0; b < nblocks; b++)
9597
{
9698
size_t bs = std::min(N - block_size * b, block_size);
97-
handles[b] = to_shared(ctx.logical_data(shape_of<slice<double>>(bs)));
99+
handles[b] = ::std::make_shared<logical_data<slice<double>>>(ctx.logical_data(shape_of<slice<double>>(bs)));
98100

99101
ctx.task(handles[b]->write(), a.handles[b]->read())->*[bs](cudaStream_t stream, auto dthis, auto da) {
100102
// There are likely much more efficient ways.
@@ -154,13 +156,13 @@ public:
154156
if (is_tmp)
155157
{
156158
// There is no physical backing for this temporary vector
157-
handle = to_shared(ctx.logical_data(shape_of<slice<double>>(1)));
159+
handle = ::std::make_shared<logical_data<slice<double>>>(ctx.logical_data(shape_of<slice<double>>(1)));
158160
}
159161
else
160162
{
161163
h_addr.reset(new double);
162164
cuda_safe_call(cudaHostRegister(h_addr.get(), s, cudaHostRegisterPortable));
163-
handle = to_shared(ctx.logical_data(make_slice(h_addr.get(), 1)));
165+
handle = ::std::make_shared<logical_data<slice<double>>>(ctx.logical_data(make_slice(h_addr.get(), 1)));
164166
}
165167
}
166168

@@ -170,7 +172,7 @@ public:
170172
// Copy constructor
171173
scalar(const scalar& a)
172174
{
173-
handle = to_shared(ctx.logical_data(shape_of<slice<double>>(1)));
175+
handle = ::std::make_shared<logical_data<slice<double>>>(ctx.logical_data(shape_of<slice<double>>(1)));
174176

175177
ctx.task(handle->write(), a.handle->read())->*[](cudaStream_t stream, auto dthis, auto da) {
176178
// There are likely much more efficient ways.

cudax/include/cuda/experimental/__stf/internal/cooperative_group_system.cuh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
# pragma system_header
2121
#endif // no system header
2222

23-
#include <cuda/experimental/__stf/utility/cuda_attributes.cuh>
2423
#if _CCCL_CUDA_COMPILATION()
2524
# include <cooperative_groups.h>
2625
#endif // _CCCL_CUDA_COMPILATION()

cudax/include/cuda/experimental/__stf/internal/execution_policy.cuh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
#endif // no system header
2525

2626
#include <cuda/experimental/__stf/utility/core.cuh>
27-
#include <cuda/experimental/__stf/utility/cuda_attributes.cuh>
2827
#include <cuda/experimental/__stf/utility/cuda_safe_call.cuh>
2928

3029
#include <cassert>

cudax/include/cuda/experimental/__stf/internal/hashtable_linearprobing.cuh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@
2727
# pragma system_header
2828
#endif // no system header
2929

30-
#include <cuda/experimental/__stf/utility/cuda_attributes.cuh>
3130
#include <cuda/experimental/__stf/utility/cuda_safe_call.cuh>
3231
#include <cuda/experimental/__stf/utility/hash.cuh>
3332

cudax/include/cuda/experimental/__stf/internal/parallel_for_scope.cuh

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -61,9 +61,9 @@ __global__ void loop(const _CCCL_GRID_CONSTANT size_t n, shape_t shape, F f, tup
6161

6262
// This will explode the targs tuple into a pack of data
6363
// Help the compiler which may not detect that a device lambda is calling a device lambda
64-
CUDASTF_NO_DEVICE_STACK
64+
_CCCL_DIAG_SUPPRESS_NVHPC(no_device_stack)
6565
auto const explode_args = [&](auto&... data) {
66-
CUDASTF_NO_DEVICE_STACK
66+
_CCCL_DIAG_SUPPRESS_NVHPC(no_device_stack)
6767
auto const explode_coords = [&](auto&&... coords) {
6868
// No move/forward for `data` because it's used multiple times.
6969
f(::std::forward<decltype(coords)>(coords)..., data...);
@@ -303,9 +303,9 @@ __global__ void loop_redux(
303303
// This is used to build the arguments passed to the user-provided lambda function.
304304

305305
// Help the compiler which may not detect that a device lambda is calling a device lambda
306-
CUDASTF_NO_DEVICE_STACK
306+
_CCCL_DIAG_SUPPRESS_NVHPC(no_device_stack)
307307
const auto explode_args = [&](auto&&... data) {
308-
CUDASTF_NO_DEVICE_STACK
308+
_CCCL_DIAG_SUPPRESS_NVHPC(no_device_stack)
309309
const auto explode_coords = [&](auto&&... coords) {
310310
// No move/forward for `data` because it's used multiple times.
311311
f(::std::forward<decltype(coords)>(coords)..., data...);

cudax/include/cuda/experimental/__stf/utility/cartesian_iterator.cuh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
# pragma system_header
2121
#endif // no system header
2222

23-
#include <cuda/experimental/__stf/utility/cuda_attributes.cuh>
2423
#include <cuda/experimental/__stf/utility/unittest.cuh>
2524

2625
namespace cuda::experimental::stf::reserved

cudax/include/cuda/experimental/__stf/utility/core.cuh

Lines changed: 0 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,9 @@
2424
# pragma system_header
2525
#endif // no system header
2626

27-
#include <cuda/experimental/__stf/utility/cuda_attributes.cuh>
28-
2927
#include <cstddef>
3028
#include <functional>
3129
#include <limits>
32-
#include <memory>
33-
#include <string>
3430
#include <tuple>
3531
#include <type_traits>
3632
#include <utility>
@@ -98,38 +94,6 @@ _CCCL_HOST_DEVICE constexpr decltype(auto) mv(T&& obj)
9894
}
9995
#endif // _CCCL_DOXYGEN_INVOKED
10096

101-
/**
102-
* @brief Creates a `std::shared_ptr` managing a copy of the given object.
103-
*
104-
* This function takes an object of any type and returns a `std::shared_ptr`
105-
* that manages a copy of that object. If the object is an lvalue reference,
106-
* it will be copied into the `shared_ptr`. If the object is an rvalue reference,
107-
* it will be moved into the `shared_ptr`.
108-
*
109-
* The type managed by the `shared_ptr` has all references and `const`/`volatile`
110-
* qualifiers removed from the original type.
111-
*
112-
* @tparam T The type of the object, deduced automatically. May be an lvalue or rvalue reference.
113-
* @param obj The object to copy into the instance managed by the `shared_ptr`.
114-
* @return A `std::shared_ptr` managing a new copy of the object.
115-
*
116-
* @note This function simplifies the creation of `std::shared_ptr`s by handling
117-
* the type deduction and appropriate forwarding of the object. It's particularly
118-
* useful when you want to create a `shared_ptr` from temporary objects or when
119-
* the object's type includes references or cv-qualifiers.
120-
*
121-
* @code
122-
* int value = 42;
123-
* auto sp1 = to_shared(value); // New shared_ptr<int>
124-
* assert(*sp1 == 42); // sp1 points to an int valued at 42
125-
* @endcode
126-
*/
127-
template <typename T>
128-
auto to_shared(T&& obj)
129-
{
130-
return ::std::make_shared<::std::remove_cv_t<::std::remove_reference_t<T>>>(::std::forward<T>(obj));
131-
}
132-
13397
/**
13498
* @brief Create an iterable range from 'from' to 'to'
13599
*

cudax/include/cuda/experimental/__stf/utility/cuda_attributes.cuh

Lines changed: 0 additions & 29 deletions
This file was deleted.

cudax/include/cuda/experimental/__stf/utility/dimensions.cuh

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
# pragma system_header
2626
#endif // no system header
2727

28-
#include <cuda/experimental/__stf/utility/cuda_attributes.cuh>
2928
#include <cuda/experimental/__stf/utility/hash.cuh>
3029
#include <cuda/experimental/__stf/utility/unittest.cuh>
3130

@@ -491,7 +490,7 @@ public:
491490
_CCCL_HOST_DEVICE coords_t index_to_coords(size_t index) const
492491
{
493492
// Help the compiler which may not detect that a device lambda is calling a device lambda
494-
CUDASTF_NO_DEVICE_STACK
493+
_CCCL_DIAG_SUPPRESS_NVHPC(no_device_stack)
495494
return make_tuple_indexwise<dimensions>([&](auto i) {
496495
// included
497496
const ::std::ptrdiff_t begin_i = get_begin(i);
@@ -500,7 +499,7 @@ public:
500499
index /= extent_i;
501500
return result;
502501
});
503-
CUDASTF_NO_DEVICE_STACK
502+
_CCCL_DIAG_SUPPRESS_NVHPC(no_device_stack)
504503
}
505504

506505
private:

0 commit comments

Comments
 (0)