-
Notifications
You must be signed in to change notification settings - Fork 4.4k
/
Copy pathTestCUDAProducerGPUEWTask.cc
136 lines (114 loc) · 6.8 KB
/
TestCUDAProducerGPUEWTask.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
#include "FWCore/Concurrency/interface/FunctorTask.h"
#include "FWCore/Concurrency/interface/WaitingTask.h"
#include "FWCore/Concurrency/interface/WaitingTaskHolder.h"
#include "FWCore/Framework/interface/Event.h"
#include "FWCore/Framework/interface/Frameworkfwd.h"
#include "FWCore/Framework/interface/MakerMacros.h"
#include "FWCore/Framework/interface/stream/EDProducer.h"
#include "FWCore/ParameterSet/interface/ParameterSet.h"
#include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
#include "FWCore/ServiceRegistry/interface/Service.h"
#include "CUDADataFormats/Common/interface/Product.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h"
#include "HeterogeneousCore/CUDACore/interface/ContextState.h"
#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h"
#include "HeterogeneousCore/CUDATest/interface/Thing.h"
#include "HeterogeneousCore/CUDAUtilities/interface/host_noncached_unique_ptr.h"
#include "TestCUDAProducerGPUKernel.h"
#include <thread>
class TestCUDAProducerGPUEWTask : public edm::stream::EDProducer<edm::ExternalWork> {
public:
explicit TestCUDAProducerGPUEWTask(edm::ParameterSet const& iConfig);
~TestCUDAProducerGPUEWTask() override = default;
static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
void acquire(edm::Event const& iEvent,
edm::EventSetup const& iSetup,
edm::WaitingTaskWithArenaHolder waitingTaskHolder) override;
void produce(edm::Event& iEvent, const edm::EventSetup& iSetup) override;
private:
void addSimpleWork(edm::EventNumber_t eventID, edm::StreamID streamID, cms::cuda::ScopedContextTask& ctx);
std::string const label_;
edm::EDGetTokenT<cms::cuda::Product<cms::cudatest::Thing>> const srcToken_;
edm::EDPutTokenT<cms::cuda::Product<cms::cudatest::Thing>> const dstToken_;
TestCUDAProducerGPUKernel gpuAlgo_;
cms::cuda::ContextState ctxState_;
cms::cuda::device::unique_ptr<float[]> devicePtr_;
cms::cuda::host::noncached::unique_ptr<float> hostData_;
};
TestCUDAProducerGPUEWTask::TestCUDAProducerGPUEWTask(edm::ParameterSet const& iConfig)
: label_{iConfig.getParameter<std::string>("@module_label")},
srcToken_{consumes<cms::cuda::Product<cms::cudatest::Thing>>(iConfig.getParameter<edm::InputTag>("src"))},
dstToken_{produces<cms::cuda::Product<cms::cudatest::Thing>>()} {
edm::Service<CUDAInterface> cuda;
if (cuda and cuda->enabled()) {
hostData_ = cms::cuda::make_host_noncached_unique<float>();
}
}
void TestCUDAProducerGPUEWTask::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
edm::ParameterSetDescription desc;
desc.add<edm::InputTag>("src", edm::InputTag());
descriptions.addWithDefaultLabel(desc);
descriptions.setComment(
"This EDProducer is part of the TestCUDAProducer* family. It models a GPU algorithm this is not the first "
"algorithm in the chain of the GPU EDProducers, and that transfers some data from GPU to CPU multiple times "
"alternating the transfers and kernel executions (e.g. to decide which kernel to run next based on a value from "
"GPU). A synchronization between GPU and CPU is needed after each transfer. The synchronizations are implemented "
"with the ExternalWork extension and explicit TBB tasks within the module. Produces "
"cms::cuda::Product<cms::cudatest::Thing>.");
}
void TestCUDAProducerGPUEWTask::acquire(edm::Event const& iEvent,
edm::EventSetup const& iSetup,
edm::WaitingTaskWithArenaHolder waitingTaskHolder) {
edm::LogVerbatim("TestCUDAProducerGPUEWTask") << label_ << " TestCUDAProducerGPUEWTask::acquire begin event "
<< iEvent.id().event() << " stream " << iEvent.streamID();
auto const& in = iEvent.get(srcToken_);
cms::cuda::ScopedContextAcquire ctx{in, waitingTaskHolder, ctxState_};
cms::cudatest::Thing const& input = ctx.get(in);
devicePtr_ = gpuAlgo_.runAlgo(label_, input.get(), ctx.stream());
// Mimick the need to transfer some of the GPU data back to CPU to
// be used for something within this module, or to be put in the
// event.
cudaCheck(
cudaMemcpyAsync(hostData_.get(), devicePtr_.get() + 10, sizeof(float), cudaMemcpyDeviceToHost, ctx.stream()));
// Push a task to run addSimpleWork() after the asynchronous work
// (and acquire()) has finished instead of produce()
ctx.pushNextTask([iev = iEvent.id().event(), istr = iEvent.streamID(), this](cms::cuda::ScopedContextTask ctx) {
addSimpleWork(iev, istr, ctx);
});
edm::LogVerbatim("TestCUDAProducerGPUEWTask") << label_ << " TestCUDAProducerGPUEWTask::acquire end event "
<< iEvent.id().event() << " stream " << iEvent.streamID();
}
void TestCUDAProducerGPUEWTask::addSimpleWork(edm::EventNumber_t eventID,
edm::StreamID streamID,
cms::cuda::ScopedContextTask& ctx) {
if (*hostData_ < 13) {
edm::LogVerbatim("TestCUDAProducerGPUEWTask")
<< label_ << " TestCUDAProducerGPUEWTask::addSimpleWork begin event " << eventID << " stream " << streamID
<< " 10th element " << *hostData_ << " not satisfied, queueing more work";
cudaCheck(
cudaMemcpyAsync(hostData_.get(), devicePtr_.get() + 10, sizeof(float), cudaMemcpyDeviceToHost, ctx.stream()));
ctx.pushNextTask(
[eventID, streamID, this](cms::cuda::ScopedContextTask ctx) { addSimpleWork(eventID, streamID, ctx); });
gpuAlgo_.runSimpleAlgo(devicePtr_.get(), ctx.stream());
edm::LogVerbatim("TestCUDAProducerGPUEWTask")
<< label_ << " TestCUDAProducerGPUEWTask::addSimpleWork end event " << eventID << " stream " << streamID;
} else {
edm::LogVerbatim("TestCUDAProducerGPUEWTask")
<< label_ << " TestCUDAProducerGPUEWTask::addSimpleWork event " << eventID << " stream " << streamID
<< " 10th element " << *hostData_ << " not queueing more work";
}
}
void TestCUDAProducerGPUEWTask::produce(edm::Event& iEvent, edm::EventSetup const& iSetup) {
edm::LogVerbatim("TestCUDAProducerGPUEWTask")
<< label_ << " TestCUDAProducerGPUEWTask::produce begin event " << iEvent.id().event() << " stream "
<< iEvent.streamID() << " 10th element " << *hostData_;
if (*hostData_ != 13) {
throw cms::Exception("Assert") << "Expecting 10th element to be 13, got " << *hostData_;
}
cms::cuda::ScopedContextProduce ctx{ctxState_};
ctx.emplace(iEvent, dstToken_, std::move(devicePtr_));
edm::LogVerbatim("TestCUDAProducerGPUEWTask") << label_ << " TestCUDAProducerGPUEWTask::produce end event "
<< iEvent.id().event() << " stream " << iEvent.streamID();
}
DEFINE_FWK_MODULE(TestCUDAProducerGPUEWTask);