5
5
#pragma once
6
6
7
7
#include < array>
8
+ #include < atomic>
8
9
#include < memory>
10
+ #include < mutex>
9
11
#include < variant>
10
12
#include < vector>
11
13
14
+ #include < cuda_runtime.h>
15
+
12
16
#include < rmm/device_buffer.hpp>
13
17
14
18
#include < rapidsmpf/error.hpp>
15
19
16
20
namespace rapidsmpf {
17
21
18
22
class BufferResource ;
23
+ class Event ;
19
24
20
25
// / @brief Enum representing the type of memory.
21
26
enum class MemoryType : int {
@@ -26,17 +31,81 @@ enum class MemoryType : int {
26
31
// / @brief Array of all the different memory types.
27
32
constexpr std::array<MemoryType, 2 > MEMORY_TYPES{{MemoryType::DEVICE, MemoryType::HOST}};
28
33
34
+ namespace {
35
+ // / @brief Helper for overloaded lambdas using std::visit.
36
+ template <class ... Ts>
37
+ struct overloaded : Ts... {
38
+ using Ts::operator ()...;
39
+ };
40
+ // / @brief Explicit deduction guide
41
+ template <class ... Ts>
42
+ overloaded (Ts...) -> overloaded<Ts...>;
43
+
44
+ } // namespace
45
+
29
46
/* *
30
47
* @brief Buffer representing device or host memory.
31
48
*
32
49
* @note The constructors are private, use `BufferResource` to construct buffers.
33
50
* @note The memory type (e.g., host or device) is constant and cannot change during
34
51
* the buffer's lifetime.
52
+ * @note A buffer is a stream-ordered object, when passing to a library which is
53
+ * not stream-aware one must ensure that `is_ready` returns `true` otherwise
54
+ * behaviour is undefined.
35
55
*/
36
56
class Buffer {
37
57
friend class BufferResource ;
38
58
39
59
public:
60
+ /* *
61
+ * @brief CUDA event to provide synchronization among set of chunks.
62
+ *
63
+ * This event is used to serve as a synchronization point for a set of chunks
64
+ * given a user-specified stream.
65
+ *
66
+ * @note To prevent undefined behavior due to unfinished memory operations, events
67
+ * should be used in the following cases, if any of the operations below was
68
+ * performed *asynchronously with respect to the host*:
69
+ * 1. Before addressing a device buffer's allocation.
70
+ * 2. Before accessing a device buffer's data whose data has been copied from
71
+ * any location, or that has been processed by a CUDA kernel.
72
+ * 3. Before accessing a host buffer's data whose data has been copied from device,
73
+ * or processed by a CUDA kernel.
74
+ */
75
+ class Event {
76
+ public:
77
+ /* *
78
+ * @brief Construct a CUDA event for a given stream.
79
+ *
80
+ * @param stream CUDA stream used for device memory operations
81
+ */
82
+ Event (rmm::cuda_stream_view stream);
83
+
84
+ /* *
85
+ * @brief Destructor for Event.
86
+ *
87
+ * Cleans up the CUDA event if one was created.
88
+ */
89
+ ~Event ();
90
+
91
+ /* *
92
+ * @brief Check if the CUDA event has been completed.
93
+ *
94
+ * @return true if the event has been completed, false otherwise.
95
+ */
96
+ [[nodiscard]] bool is_ready ();
97
+
98
+ private:
99
+ cudaEvent_t event_; // /< CUDA event used to track device memory allocation
100
+ std::atomic<bool > done_{false
101
+ }; // /< Cache of the event status to avoid unnecessary queries.
102
+ mutable std::mutex mutex_; // /< Protects access to event_
103
+ std::atomic<bool > destroying_{false
104
+ }; // /< Flag to indicate destruction in progress
105
+ std::atomic<int > active_readers_{0
106
+ }; // /< Number of threads currently executing is_ready()
107
+ };
108
+
40
109
// / @brief Storage type for the device buffer.
41
110
using DeviceStorageT = std::unique_ptr<rmm::device_buffer>;
42
111
@@ -48,15 +117,6 @@ class Buffer {
48
117
*/
49
118
using StorageT = std::variant<DeviceStorageT, HostStorageT>;
50
119
51
- // / @brief Helper for overloaded lambdas for Storage types in StorageT
52
- template <class ... Ts>
53
- struct overloaded : Ts... {
54
- using Ts::operator ()...;
55
- };
56
- // / @brief Explicit deduction guide
57
- template <class ... Ts>
58
- overloaded (Ts...) -> overloaded<Ts...>;
59
-
60
120
/* *
61
121
* @brief Access the underlying host memory buffer (const).
62
122
*
@@ -112,7 +172,7 @@ class Buffer {
112
172
*
113
173
* @throws std::logic_error if the buffer is not initialized.
114
174
*/
115
- MemoryType constexpr mem_type () const {
175
+ [[nodiscard]] MemoryType constexpr mem_type () const {
116
176
return std::visit (
117
177
overloaded{
118
178
[](const HostStorageT&) -> MemoryType { return MemoryType::HOST; },
@@ -122,8 +182,16 @@ class Buffer {
122
182
);
123
183
}
124
184
125
- // / @brief Buffer has a move ctor but no copy or assign operator.
126
- Buffer (Buffer&&) = default ;
185
+ /* *
186
+ * @brief Check if the device memory operation has completed.
187
+ *
188
+ * @return true if the device memory operation has completed or no device
189
+ * memory operation was performed, false if it is still in progress.
190
+ */
191
+ [[nodiscard]] bool is_ready () const ;
192
+
193
+ // / @brief Delete move and copy constructors and assignment operators.
194
+ Buffer (Buffer&&) = delete ;
127
195
Buffer (Buffer const &) = delete ;
128
196
Buffer& operator =(Buffer& o) = delete ;
129
197
Buffer& operator =(Buffer&& o) = delete ;
@@ -143,13 +211,20 @@ class Buffer {
143
211
* @brief Construct a Buffer from device memory.
144
212
*
145
213
* @param device_buffer A unique pointer to a device buffer.
214
+ * @param stream CUDA stream used for the device buffer allocation.
146
215
* @param br Buffer resource for memory allocation.
216
+ * @param event The shared event to use for the buffer.
147
217
*
148
218
* @throws std::invalid_argument if `device_buffer` is null.
149
219
* @throws std::invalid_argument if `stream` or `br->mr` isn't the same used by
150
220
* `device_buffer`.
151
221
*/
152
- Buffer (std::unique_ptr<rmm::device_buffer> device_buffer, BufferResource* br);
222
+ Buffer (
223
+ std::unique_ptr<rmm::device_buffer> device_buffer,
224
+ rmm::cuda_stream_view stream,
225
+ BufferResource* br,
226
+ std::shared_ptr<Event> event = nullptr
227
+ );
153
228
154
229
/* *
155
230
* @brief Access the underlying host memory buffer.
@@ -184,7 +259,7 @@ class Buffer {
184
259
/* *
185
260
* @brief Create a copy of this buffer using the same memory type.
186
261
*
187
- * @param stream CUDA stream used for device memory operations .
262
+ * @param stream CUDA stream used for the device buffer allocation and copy .
188
263
* @return A unique pointer to a new Buffer containing the copied data.
189
264
*/
190
265
[[nodiscard]] std::unique_ptr<Buffer> copy (rmm::cuda_stream_view stream) const ;
@@ -193,7 +268,7 @@ class Buffer {
193
268
* @brief Create a copy of this buffer using the specified memory type.
194
269
*
195
270
* @param target The target memory type.
196
- * @param stream CUDA stream used for device memory operations .
271
+ * @param stream CUDA stream used for device buffer allocation and copy .
197
272
* @return A unique pointer to a new Buffer containing the copied data.
198
273
*/
199
274
[[nodiscard]] std::unique_ptr<Buffer> copy (
@@ -208,6 +283,8 @@ class Buffer {
208
283
// / @brief The underlying storage host memory or device memory buffer (where
209
284
// / applicable).
210
285
StorageT storage_;
286
+ // / @brief CUDA event used to track copy operations
287
+ std::shared_ptr<Event> event_;
211
288
};
212
289
213
290
} // namespace rapidsmpf
0 commit comments