Skip to content

Commit 586c0c0

Browse files
committed
Synchronise with CMSSW_10_6_0_pre2
2 parents 1313262 + ea82bb5 commit 586c0c0

File tree

338 files changed

+28972
-384
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

338 files changed

+28972
-384
lines changed

CUDADataFormats/Common/BuildFile.xml

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
<use name="cuda-api-wrappers"/>
2+
<use name="FWCore/ServiceRegistry"/>
3+
<use name="HeterogeneousCore/CUDAServices"/>
4+
5+
<export>
6+
<lib name="1"/>
7+
</export>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
#ifndef CUDADataFormats_Common_CUDAProduct_h
2+
#define CUDADataFormats_Common_CUDAProduct_h
3+
4+
#include <memory>
5+
6+
#include <cuda/api_wrappers.h>
7+
8+
#include "CUDADataFormats/Common/interface/CUDAProductBase.h"
9+
10+
namespace edm {
11+
template <typename T> class Wrapper;
12+
}
13+
14+
/**
15+
* The purpose of this class is to wrap CUDA data to edm::Event in a
16+
* way which forces correct use of various utilities.
17+
*
18+
* The non-default construction has to be done with CUDAScopedContext
19+
* (in order to properly register the CUDA event).
20+
*
21+
* The default constructor is needed only for the ROOT dictionary generation.
22+
*
23+
* The CUDA event is in practice needed only for stream-stream
24+
* synchronization, but someone with long-enough lifetime has to own
25+
* it. Here is a somewhat natural place. If overhead is too much, we
26+
* can e.g. make CUDAService own them (creating them on demand) and
27+
* use them only where synchronization between streams is needed.
28+
*/
29+
template <typename T>
30+
class CUDAProduct: public CUDAProductBase {
31+
public:
32+
CUDAProduct() = default; // Needed only for ROOT dictionary generation
33+
34+
CUDAProduct(const CUDAProduct&) = delete;
35+
CUDAProduct& operator=(const CUDAProduct&) = delete;
36+
CUDAProduct(CUDAProduct&&) = default;
37+
CUDAProduct& operator=(CUDAProduct&&) = default;
38+
39+
private:
40+
friend class CUDAScopedContext;
41+
friend class edm::Wrapper<CUDAProduct<T>>;
42+
43+
explicit CUDAProduct(int device, std::shared_ptr<cuda::stream_t<>> stream, std::shared_ptr<cuda::event_t> event, T data):
44+
CUDAProductBase(device, std::move(stream), std::move(event)),
45+
data_(std::move(data))
46+
{}
47+
48+
T data_; //!
49+
};
50+
51+
#endif
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
#ifndef CUDADataFormats_Common_CUDAProductBase_h
2+
#define CUDADataFormats_Common_CUDAProductBase_h
3+
4+
#include <memory>
5+
6+
#include <cuda/api_wrappers.h>
7+
8+
/**
9+
* Base class for all instantiations of CUDA<T> to hold the
10+
* non-T-dependent members.
11+
*/
12+
class CUDAProductBase {
13+
public:
14+
CUDAProductBase() = default; // Needed only for ROOT dictionary generation
15+
16+
bool isValid() const { return stream_.get() != nullptr; }
17+
bool isAvailable() const;
18+
19+
int device() const { return device_; }
20+
21+
const cuda::stream_t<>& stream() const { return *stream_; }
22+
cuda::stream_t<>& stream() { return *stream_; }
23+
const std::shared_ptr<cuda::stream_t<>>& streamPtr() const { return stream_; }
24+
25+
const cuda::event_t *event() const { return event_.get(); }
26+
cuda::event_t *event() { return event_.get(); }
27+
28+
protected:
29+
explicit CUDAProductBase(int device, std::shared_ptr<cuda::stream_t<>> stream, std::shared_ptr<cuda::event_t> event);
30+
31+
private:
32+
// The cuda::stream_t is really shared among edm::Event products, so
33+
// using shared_ptr also here
34+
std::shared_ptr<cuda::stream_t<>> stream_; //!
35+
// shared_ptr because of caching in CUDAService
36+
std::shared_ptr<cuda::event_t> event_; //!
37+
38+
int device_ = -1; //!
39+
};
40+
41+
#endif
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
#include "CUDADataFormats/Common/interface/CUDAProductBase.h"
2+
3+
#include "FWCore/ServiceRegistry/interface/Service.h"
4+
#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h"
5+
6+
CUDAProductBase::CUDAProductBase(int device, std::shared_ptr<cuda::stream_t<>> stream, std::shared_ptr<cuda::event_t> event):
7+
stream_(std::move(stream)),
8+
event_(std::move(event)),
9+
device_(device)
10+
{}
11+
12+
bool CUDAProductBase::isAvailable() const {
13+
// In absence of event, the product was available already at the end
14+
// of produce() of the producer.
15+
if(not event_) {
16+
return true;
17+
}
18+
return event_->has_occurred();
19+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
<bin file="test*.cc" name="testCUDADataFormatsCommon">
2+
<use name="HeterogeneousCore/CUDACore"/>
3+
<use name="catch2"/>
4+
<use name="cuda"/>
5+
</bin>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
#include "catch.hpp"
2+
3+
#include "CUDADataFormats/Common/interface/CUDAProduct.h"
4+
#include "HeterogeneousCore/CUDACore/interface/CUDAScopedContext.h"
5+
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
6+
#include "HeterogeneousCore/CUDAUtilities/interface/exitSansCUDADevices.h"
7+
8+
#include <cuda_runtime_api.h>
9+
10+
namespace cudatest {
11+
class TestCUDAScopedContext {
12+
public:
13+
static
14+
CUDAScopedContext make(int dev, bool createEvent) {
15+
auto device = cuda::device::get(dev);
16+
std::unique_ptr<cuda::event_t> event;
17+
if(createEvent) {
18+
event = std::make_unique<cuda::event_t>(device.create_event());
19+
}
20+
return CUDAScopedContext(dev,
21+
std::make_unique<cuda::stream_t<>>(device.create_stream(cuda::stream::implicitly_synchronizes_with_default_stream)),
22+
std::move(event));
23+
}
24+
};
25+
}
26+
27+
TEST_CASE("Use of CUDAProduct template", "[CUDACore]") {
28+
SECTION("Default constructed") {
29+
auto foo = CUDAProduct<int>();
30+
REQUIRE(!foo.isValid());
31+
32+
auto bar = std::move(foo);
33+
}
34+
35+
exitSansCUDADevices();
36+
37+
constexpr int defaultDevice = 0;
38+
{
39+
auto ctx = cudatest::TestCUDAScopedContext::make(defaultDevice, true);
40+
std::unique_ptr<CUDAProduct<int>> dataPtr = ctx.wrap(10);
41+
auto& data = *dataPtr;
42+
43+
SECTION("Construct from CUDAScopedContext") {
44+
REQUIRE(data.isValid());
45+
REQUIRE(data.device() == defaultDevice);
46+
REQUIRE(data.stream().id() == ctx.stream().id());
47+
REQUIRE(data.event() != nullptr);
48+
}
49+
50+
SECTION("Move constructor") {
51+
auto data2 = CUDAProduct<int>(std::move(data));
52+
REQUIRE(data2.isValid());
53+
REQUIRE(!data.isValid());
54+
}
55+
56+
SECTION("Move assignment") {
57+
CUDAProduct<int> data2;
58+
data2 = std::move(data);
59+
REQUIRE(data2.isValid());
60+
REQUIRE(!data.isValid());
61+
}
62+
}
63+
64+
// Destroy and clean up all resources so that the next test can
65+
// assume to start from a clean state.
66+
cudaCheck(cudaSetDevice(defaultDevice));
67+
cudaCheck(cudaDeviceSynchronize());
68+
cudaDeviceReset();
69+
}
+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
#define CATCH_CONFIG_MAIN
2+
#include "catch.hpp"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
<use name="FWCore/ServiceRegistry"/>
2+
<use name="HeterogeneousCore/CUDAServices"/>
3+
<use name="cuda-api-wrappers"/>
4+
<use name="rootcore"/>
5+
6+
<export>
7+
<lib name="1"/>
8+
</export>
9+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
#ifndef CUDADataFormats_SiPixelCluster_interface_SiPixelClustersCUDA_h
2+
#define CUDADataFormats_SiPixelCluster_interface_SiPixelClustersCUDA_h
3+
4+
#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
5+
#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"
6+
7+
#include <cuda/api_wrappers.h>
8+
9+
class SiPixelClustersCUDA {
10+
public:
11+
SiPixelClustersCUDA() = default;
12+
explicit SiPixelClustersCUDA(size_t maxClusters, cuda::stream_t<>& stream);
13+
~SiPixelClustersCUDA() = default;
14+
15+
SiPixelClustersCUDA(const SiPixelClustersCUDA&) = delete;
16+
SiPixelClustersCUDA& operator=(const SiPixelClustersCUDA&) = delete;
17+
SiPixelClustersCUDA(SiPixelClustersCUDA&&) = default;
18+
SiPixelClustersCUDA& operator=(SiPixelClustersCUDA&&) = default;
19+
20+
void setNClusters(uint32_t nClusters) {
21+
nClusters_h = nClusters;
22+
}
23+
24+
uint32_t nClusters() const { return nClusters_h; }
25+
26+
uint32_t *moduleStart() { return moduleStart_d.get(); }
27+
uint32_t *clusInModule() { return clusInModule_d.get(); }
28+
uint32_t *moduleId() { return moduleId_d.get(); }
29+
uint32_t *clusModuleStart() { return clusModuleStart_d.get(); }
30+
31+
uint32_t const *moduleStart() const { return moduleStart_d.get(); }
32+
uint32_t const *clusInModule() const { return clusInModule_d.get(); }
33+
uint32_t const *moduleId() const { return moduleId_d.get(); }
34+
uint32_t const *clusModuleStart() const { return clusModuleStart_d.get(); }
35+
36+
uint32_t const *c_moduleStart() const { return moduleStart_d.get(); }
37+
uint32_t const *c_clusInModule() const { return clusInModule_d.get(); }
38+
uint32_t const *c_moduleId() const { return moduleId_d.get(); }
39+
uint32_t const *c_clusModuleStart() const { return clusModuleStart_d.get(); }
40+
41+
class DeviceConstView {
42+
public:
43+
DeviceConstView() = default;
44+
45+
#ifdef __CUDACC__
46+
__device__ __forceinline__ uint32_t moduleStart(int i) const { return __ldg(moduleStart_+i); }
47+
__device__ __forceinline__ uint32_t clusInModule(int i) const { return __ldg(clusInModule_+i); }
48+
__device__ __forceinline__ uint32_t moduleId(int i) const { return __ldg(moduleId_+i); }
49+
__device__ __forceinline__ uint32_t clusModuleStart(int i) const { return __ldg(clusModuleStart_+i); }
50+
#endif
51+
52+
friend SiPixelClustersCUDA;
53+
54+
private:
55+
uint32_t const *moduleStart_;
56+
uint32_t const *clusInModule_;
57+
uint32_t const *moduleId_;
58+
uint32_t const *clusModuleStart_;
59+
};
60+
61+
DeviceConstView *view() const { return view_d.get(); }
62+
63+
private:
64+
cudautils::device::unique_ptr<uint32_t[]> moduleStart_d; // index of the first pixel of each module
65+
cudautils::device::unique_ptr<uint32_t[]> clusInModule_d; // number of clusters found in each module
66+
cudautils::device::unique_ptr<uint32_t[]> moduleId_d; // module id of each module
67+
68+
// originally from rechits
69+
cudautils::device::unique_ptr<uint32_t[]> clusModuleStart_d;
70+
71+
cudautils::device::unique_ptr<DeviceConstView> view_d; // "me" pointer
72+
73+
uint32_t nClusters_h;
74+
};
75+
76+
#endif
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
#include "CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h"
2+
3+
#include "FWCore/ServiceRegistry/interface/Service.h"
4+
#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h"
5+
#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h"
6+
7+
SiPixelClustersCUDA::SiPixelClustersCUDA(size_t maxClusters, cuda::stream_t<>& stream) {
8+
edm::Service<CUDAService> cs;
9+
10+
moduleStart_d = cs->make_device_unique<uint32_t[]>(maxClusters+1, stream);
11+
clusInModule_d = cs->make_device_unique<uint32_t[]>(maxClusters, stream);
12+
moduleId_d = cs->make_device_unique<uint32_t[]>(maxClusters, stream);
13+
clusModuleStart_d = cs->make_device_unique<uint32_t[]>(maxClusters+1, stream);
14+
15+
auto view = cs->make_host_unique<DeviceConstView>(stream);
16+
view->moduleStart_ = moduleStart_d.get();
17+
view->clusInModule_ = clusInModule_d.get();
18+
view->moduleId_ = moduleId_d.get();
19+
view->clusModuleStart_ = clusModuleStart_d.get();
20+
21+
view_d = cs->make_device_unique<DeviceConstView>(stream);
22+
cudautils::copyAsync(view_d, view, stream);
23+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
#ifndef CUDADataFormats_SiPixelCluster_classes_h
2+
#define CUDADataFormats_SiPixelCluster_classes_h
3+
4+
#include "CUDADataFormats/Common/interface/CUDAProduct.h"
5+
#include "CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h"
6+
#include "DataFormats/Common/interface/Wrapper.h"
7+
8+
#endif
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
<lcgdict>
2+
<class name="CUDAProduct<SiPixelClustersCUDA>" persistent="false"/>
3+
<class name="edm::Wrapper<CUDAProduct<SiPixelClustersCUDA>>" persistent="false"/>
4+
</lcgdict>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
<use name="DataFormats/SiPixelRawData"/>
2+
<use name="FWCore/ServiceRegistry"/>
3+
<use name="HeterogeneousCore/CUDAServices"/>
4+
<use name="cuda-api-wrappers"/>
5+
<use name="rootcore"/>
6+
7+
<export>
8+
<lib name="1"/>
9+
</export>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
#ifndef CUDADataFormats_SiPixelDigi_interface_SiPixelDigiErrorsCUDA_h
2+
#define CUDADataFormats_SiPixelDigi_interface_SiPixelDigiErrorsCUDA_h
3+
4+
#include "DataFormats/SiPixelDigi/interface/PixelErrors.h"
5+
#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
6+
#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"
7+
#include "HeterogeneousCore/CUDAUtilities/interface/GPUSimpleVector.h"
8+
9+
#include <cuda/api_wrappers.h>
10+
11+
class SiPixelDigiErrorsCUDA {
12+
public:
13+
SiPixelDigiErrorsCUDA() = default;
14+
explicit SiPixelDigiErrorsCUDA(size_t maxFedWords, PixelFormatterErrors errors, cuda::stream_t<>& stream);
15+
~SiPixelDigiErrorsCUDA() = default;
16+
17+
SiPixelDigiErrorsCUDA(const SiPixelDigiErrorsCUDA&) = delete;
18+
SiPixelDigiErrorsCUDA& operator=(const SiPixelDigiErrorsCUDA&) = delete;
19+
SiPixelDigiErrorsCUDA(SiPixelDigiErrorsCUDA&&) = default;
20+
SiPixelDigiErrorsCUDA& operator=(SiPixelDigiErrorsCUDA&&) = default;
21+
22+
const PixelFormatterErrors& formatterErrors() const { return formatterErrors_h; }
23+
24+
GPU::SimpleVector<PixelErrorCompact> *error() { return error_d.get(); }
25+
GPU::SimpleVector<PixelErrorCompact> const *error() const { return error_d.get(); }
26+
GPU::SimpleVector<PixelErrorCompact> const *c_error() const { return error_d.get(); }
27+
28+
using HostDataError = std::pair<GPU::SimpleVector<PixelErrorCompact>, cudautils::host::unique_ptr<PixelErrorCompact[]>>;
29+
HostDataError dataErrorToHostAsync(cuda::stream_t<>& stream) const;
30+
31+
void copyErrorToHostAsync(cuda::stream_t<>& stream);
32+
33+
private:
34+
cudautils::device::unique_ptr<PixelErrorCompact[]> data_d;
35+
cudautils::device::unique_ptr<GPU::SimpleVector<PixelErrorCompact>> error_d;
36+
cudautils::host::unique_ptr<GPU::SimpleVector<PixelErrorCompact>> error_h;
37+
PixelFormatterErrors formatterErrors_h;
38+
};
39+
40+
#endif

0 commit comments

Comments
 (0)