Skip to content

Commit 6d62b3d

Browse files
committed
Boolean flag per device
1 parent 5a160b2 commit 6d62b3d

File tree

2 files changed

+10
-5
lines changed

2 files changed

+10
-5
lines changed

HeterogeneousCore/CUDACore/interface/CUDAESManaged.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ class CUDAESManaged {
5151

5252
private:
5353
std::vector<std::pair<void *, size_t> > buffers_;
54-
mutable std::atomic<bool> prefetched_;
54+
mutable std::vector<std::atomic<bool>> prefetched_;
5555
};
5656

5757
#endif

HeterogeneousCore/CUDACore/src/CUDAESManaged.cc

+9-4
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
11
#include "HeterogeneousCore/CUDACore/interface/CUDAESManaged.h"
2+
#include "HeterogeneousCore/CUDAServices/interface/numberOfCUDADevices.h"
23

3-
4-
CUDAESManaged::CUDAESManaged(): prefetched_(false) {}
4+
CUDAESManaged::CUDAESManaged(): prefetched_(numberOfCUDADevices()) {
5+
for(auto& pref: prefetched_) {
6+
pref.store(false);
7+
}
8+
}
59

610
CUDAESManaged::~CUDAESManaged() {
711
for(auto& ptrSize: buffers_) {
@@ -19,12 +23,13 @@ void CUDAESManaged::prefetchAsync(cuda::stream_t<>& stream) const {
1923
// The boolean atomic is an optimization attempt, it doesn't really
2024
// matter if more than one thread/edm stream issues the prefetches
2125
// as long as most of the prefetches are avoided.
22-
if(prefetched_.load())
26+
auto& pref = prefetched_[stream.device_id()];
27+
if(pref.load())
2328
return;
2429

2530
for(const auto& ptrSize: buffers_) {
2631
cudaMemPrefetchAsync(ptrSize.first, ptrSize.second, stream.device_id(), stream.id());
2732
}
2833

29-
prefetched_.store(true);
34+
pref.store(true);
3035
}

0 commit comments

Comments
 (0)