Skip to content

Commit 8acba84

Browse files
committed
Migrate SiPixelFedCablingMapGPUWrapper to unified memory
1 parent f74155a commit 8acba84

6 files changed

+43
-77
lines changed

RecoLocalTracker/SiPixelClusterizer/interface/SiPixelFedCablingMapGPU.h

+7-7
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,13 @@ namespace pixelgpudetails {
1515
// TODO: since this has more information than just cabling map, maybe we should invent a better name?
1616
struct SiPixelFedCablingMapGPU {
1717
unsigned int size = 0;
18-
unsigned int * fed = nullptr;
19-
unsigned int * link = nullptr;
20-
unsigned int * roc = nullptr;
21-
unsigned int * RawId = nullptr;
22-
unsigned int * rocInDet = nullptr;
23-
unsigned int * moduleId = nullptr;
24-
unsigned char * badRocs = nullptr;
18+
unsigned int const* fed = nullptr;
19+
unsigned int const* link = nullptr;
20+
unsigned int const* roc = nullptr;
21+
unsigned int const* RawId = nullptr;
22+
unsigned int const* rocInDet = nullptr;
23+
unsigned int const* moduleId = nullptr;
24+
unsigned char const* badRocs = nullptr;
2525
};
2626

2727
#endif

RecoLocalTracker/SiPixelClusterizer/interface/SiPixelFedCablingMapGPUWrapper.h

+10-16
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#ifndef RecoLocalTracker_SiPixelClusterizer_SiPixelFedCablingMapGPUWrapper_h
22
#define RecoLocalTracker_SiPixelClusterizer_SiPixelFedCablingMapGPUWrapper_h
33

4-
#include "HeterogeneousCore/CUDACore/interface/CUDAESProduct.h"
4+
#include "HeterogeneousCore/CUDACore/interface/CUDAESManaged.h"
55
#include "HeterogeneousCore/CUDAUtilities/interface/CUDAHostAllocator.h"
66
#include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelFedCablingMapGPU.h"
77

@@ -24,7 +24,7 @@ class SiPixelFedCablingMapGPUWrapper {
2424
bool hasQuality() const { return hasQuality_; }
2525

2626
// returns pointer to GPU memory
27-
const SiPixelFedCablingMapGPU *getGPUProductAsync(cuda::stream_t<>& cudaStream) const;
27+
SiPixelFedCablingMapGPU getGPUProductAsync(cuda::stream_t<>& cudaStream) const;
2828

2929

3030
// Allocates host and device memory, converts data to host memory,
@@ -46,22 +46,16 @@ class SiPixelFedCablingMapGPUWrapper {
4646
};
4747

4848
private:
49-
std::vector<unsigned int, CUDAHostAllocator<unsigned int>> fedMap;
50-
std::vector<unsigned int, CUDAHostAllocator<unsigned int>> linkMap;
51-
std::vector<unsigned int, CUDAHostAllocator<unsigned int>> rocMap;
52-
std::vector<unsigned int, CUDAHostAllocator<unsigned int>> RawId;
53-
std::vector<unsigned int, CUDAHostAllocator<unsigned int>> rocInDet;
54-
std::vector<unsigned int, CUDAHostAllocator<unsigned int>> moduleId;
55-
std::vector<unsigned char, CUDAHostAllocator<unsigned char>> badRocs;
49+
CUDAESManaged helper_;
50+
unsigned int *fedMap = nullptr;
51+
unsigned int *linkMap = nullptr;
52+
unsigned int *rocMap = nullptr;
53+
unsigned int *RawId = nullptr;
54+
unsigned int *rocInDet = nullptr;
55+
unsigned int *moduleId = nullptr;
56+
unsigned char *badRocs = nullptr;
5657
unsigned int size;
5758
bool hasQuality_;
58-
59-
struct GPUData {
60-
~GPUData();
61-
SiPixelFedCablingMapGPU *cablingMapHost = nullptr; // internal pointers are to GPU, struct itself is on CPU
62-
SiPixelFedCablingMapGPU *cablingMapDevice = nullptr; // same internal pointers as above, struct itself is on GPU
63-
};
64-
CUDAESProduct<GPUData> gpuData_;
6559
};
6660

6761

RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu

+9-9
Original file line numberDiff line numberDiff line change
@@ -178,9 +178,9 @@ namespace pixelgpudetails {
178178
return (1==((rawId>>25)&0x7));
179179
}
180180

181-
__device__ pixelgpudetails::DetIdGPU getRawId(const SiPixelFedCablingMapGPU * Map, uint32_t fed, uint32_t link, uint32_t roc) {
181+
__device__ pixelgpudetails::DetIdGPU getRawId(SiPixelFedCablingMapGPU Map, uint32_t fed, uint32_t link, uint32_t roc) {
182182
uint32_t index = fed * MAX_LINK * MAX_ROC + (link-1) * MAX_ROC + roc;
183-
pixelgpudetails::DetIdGPU detId = { Map->RawId[index], Map->rocInDet[index], Map->moduleId[index] };
183+
pixelgpudetails::DetIdGPU detId = { Map.RawId[index], Map.rocInDet[index], Map.moduleId[index] };
184184
return detId;
185185
}
186186

@@ -313,7 +313,7 @@ namespace pixelgpudetails {
313313
return ((dcol < 26) & (2 <= pxid) & (pxid < 162));
314314
}
315315

316-
__device__ uint32_t checkROC(uint32_t errorWord, uint32_t fedId, uint32_t link, const SiPixelFedCablingMapGPU *Map, bool debug = false)
316+
__device__ uint32_t checkROC(uint32_t errorWord, uint32_t fedId, uint32_t link, SiPixelFedCablingMapGPU Map, bool debug = false)
317317
{
318318
int errorType = (errorWord >> pixelgpudetails::ROC_shift) & pixelgpudetails::ERROR_mask;
319319
if (errorType < 25) return false;
@@ -323,8 +323,8 @@ namespace pixelgpudetails {
323323
case(25) : {
324324
errorFound = true;
325325
uint32_t index = fedId * MAX_LINK * MAX_ROC + (link-1) * MAX_ROC + 1;
326-
if (index > 1 && index <= Map->size) {
327-
if (!(link == Map->link[index] && 1 == Map->roc[index])) errorFound = false;
326+
if (index > 1 && index <= Map.size) {
327+
if (!(link == Map.link[index] && 1 == Map.roc[index])) errorFound = false;
328328
}
329329
if (debug&errorFound) printf("Invalid ROC = 25 found (errorType = 25)\n");
330330
break;
@@ -377,7 +377,7 @@ namespace pixelgpudetails {
377377
return errorFound? errorType : 0;
378378
}
379379

380-
__device__ uint32_t getErrRawID(uint32_t fedId, uint32_t errWord, uint32_t errorType, const SiPixelFedCablingMapGPU *Map, bool debug = false)
380+
__device__ uint32_t getErrRawID(uint32_t fedId, uint32_t errWord, uint32_t errorType, SiPixelFedCablingMapGPU Map, bool debug = false)
381381
{
382382
uint32_t rID = 0xffffffff;
383383

@@ -487,7 +487,7 @@ namespace pixelgpudetails {
487487

488488

489489
// Kernel to perform Raw to Digi conversion
490-
__global__ void RawToDigi_kernel(const SiPixelFedCablingMapGPU *Map, const unsigned char *modToUnp,
490+
__global__ void RawToDigi_kernel(SiPixelFedCablingMapGPU Map, const unsigned char *modToUnp,
491491
const uint32_t wordCounter, const uint32_t *Word, const uint8_t *fedIds,
492492
uint16_t * XX, uint16_t * YY, uint16_t * ADC,
493493
uint32_t * pdigi, uint32_t *rawIdArr, uint16_t * moduleId,
@@ -540,7 +540,7 @@ namespace pixelgpudetails {
540540
uint32_t index = fedId * MAX_LINK * MAX_ROC + (link-1) * MAX_ROC + roc;
541541
if (useQualityInfo) {
542542

543-
skipROC = Map->badRocs[index];
543+
skipROC = Map.badRocs[index];
544544
if (skipROC) continue;
545545

546546
}
@@ -609,7 +609,7 @@ namespace pixelgpudetails {
609609

610610
// Interface to outside
611611
void SiPixelRawToClusterGPUKernel::makeClustersAsync(
612-
const SiPixelFedCablingMapGPU *cablingMap,
612+
const SiPixelFedCablingMapGPU& cablingMap,
613613
const unsigned char *modToUnp,
614614
const SiPixelGainForHLTonGPU *gains,
615615
const uint32_t wordCounter, const uint32_t fedCounter,

RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ namespace pixelgpudetails {
170170

171171
void initializeWordFed(int fedId, unsigned int wordCounterGPU, const cms_uint32_t *src, unsigned int length);
172172

173-
void makeClustersAsync(const SiPixelFedCablingMapGPU *cablingMap, const unsigned char *modToUnp,
173+
void makeClustersAsync(const SiPixelFedCablingMapGPU& cablingMap, const unsigned char *modToUnp,
174174
const SiPixelGainForHLTonGPU *gains,
175175
const uint32_t wordCounter, const uint32_t fedCounter, bool convertADCtoElectrons,
176176
bool useQualityInfo, bool includeErrors, bool transferToCPU_, bool debug,

RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterHeterogeneous.cc

+1-1
Original file line numberDiff line numberDiff line change
@@ -480,7 +480,7 @@ void SiPixelRawToClusterHeterogeneous::acquireGPUCuda(const edm::HeterogeneousEv
480480
throw cms::Exception("LogicError") << "UseQuality of the module (" << useQuality<< ") differs the one from SiPixelFedCablingMapGPUWrapper. Please fix your configuration.";
481481
}
482482
// get the GPU product already here so that the async transfer can begin
483-
const auto *gpuMap = hgpuMap->getGPUProductAsync(cudaStream);
483+
const auto gpuMap = hgpuMap->getGPUProductAsync(cudaStream);
484484

485485
edm::ESHandle<SiPixelGainCalibrationForHLTGPU> hgains;
486486
es.get<SiPixelGainCalibrationForHLTGPURcd>().get(hgains);

RecoLocalTracker/SiPixelClusterizer/src/SiPixelFedCablingMapGPUWrapper.cc

+15-43
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,16 @@
2020
SiPixelFedCablingMapGPUWrapper::SiPixelFedCablingMapGPUWrapper(SiPixelFedCablingMap const& cablingMap,
2121
TrackerGeometry const& trackerGeom,
2222
SiPixelQuality const *badPixelInfo):
23-
fedMap(pixelgpudetails::MAX_SIZE), linkMap(pixelgpudetails::MAX_SIZE), rocMap(pixelgpudetails::MAX_SIZE),
24-
RawId(pixelgpudetails::MAX_SIZE), rocInDet(pixelgpudetails::MAX_SIZE), moduleId(pixelgpudetails::MAX_SIZE),
25-
badRocs(pixelgpudetails::MAX_SIZE),
2623
hasQuality_(badPixelInfo != nullptr)
2724
{
25+
helper_.allocate(&fedMap, pixelgpudetails::MAX_SIZE);
26+
helper_.allocate(&linkMap, pixelgpudetails::MAX_SIZE);
27+
helper_.allocate(&rocMap, pixelgpudetails::MAX_SIZE);
28+
helper_.allocate(&RawId, pixelgpudetails::MAX_SIZE);
29+
helper_.allocate(&rocInDet, pixelgpudetails::MAX_SIZE);
30+
helper_.allocate(&moduleId, pixelgpudetails::MAX_SIZE);
31+
helper_.allocate(&badRocs, pixelgpudetails::MAX_SIZE);
32+
2833
std::vector<unsigned int> const& fedIds = cablingMap.fedIds();
2934
std::unique_ptr<SiPixelFedCablingTree> const& cabling = cablingMap.cablingTree();
3035

@@ -90,37 +95,19 @@ SiPixelFedCablingMapGPUWrapper::SiPixelFedCablingMapGPUWrapper(SiPixelFedCabling
9095
}
9196

9297
size = index-1;
98+
helper_.advise();
9399
}
94100

95101

96102
SiPixelFedCablingMapGPUWrapper::~SiPixelFedCablingMapGPUWrapper() {}
97103

98104

99-
const SiPixelFedCablingMapGPU *SiPixelFedCablingMapGPUWrapper::getGPUProductAsync(cuda::stream_t<>& cudaStream) const {
100-
const auto& data = gpuData_.dataForCurrentDeviceAsync(cudaStream, [this](GPUData& data, cuda::stream_t<>& stream) {
101-
// allocate
102-
cudaCheck(cudaMallocHost((void**) & data.cablingMapHost, sizeof(SiPixelFedCablingMapGPU)));
103-
cudaCheck(cudaMalloc((void**) & data.cablingMapDevice, sizeof(SiPixelFedCablingMapGPU)));
104-
cudaCheck(cudaMalloc((void**) & data.cablingMapHost->fed, pixelgpudetails::MAX_SIZE_BYTE_INT));
105-
cudaCheck(cudaMalloc((void**) & data.cablingMapHost->link, pixelgpudetails::MAX_SIZE_BYTE_INT));
106-
cudaCheck(cudaMalloc((void**) & data.cablingMapHost->roc, pixelgpudetails::MAX_SIZE_BYTE_INT));
107-
cudaCheck(cudaMalloc((void**) & data.cablingMapHost->RawId, pixelgpudetails::MAX_SIZE_BYTE_INT));
108-
cudaCheck(cudaMalloc((void**) & data.cablingMapHost->rocInDet, pixelgpudetails::MAX_SIZE_BYTE_INT));
109-
cudaCheck(cudaMalloc((void**) & data.cablingMapHost->moduleId, pixelgpudetails::MAX_SIZE_BYTE_INT));
110-
cudaCheck(cudaMalloc((void**) & data.cablingMapHost->badRocs, pixelgpudetails::MAX_SIZE_BYTE_BOOL));
111-
112-
// transfer
113-
data.cablingMapHost->size = this->size;
114-
cudaCheck(cudaMemcpyAsync(data.cablingMapHost->fed, this->fedMap.data(), this->fedMap.size() * sizeof(unsigned int), cudaMemcpyDefault, stream.id()));
115-
cudaCheck(cudaMemcpyAsync(data.cablingMapHost->link, this->linkMap.data(), this->linkMap.size() * sizeof(unsigned int), cudaMemcpyDefault, stream.id()));
116-
cudaCheck(cudaMemcpyAsync(data.cablingMapHost->roc, this->rocMap.data(), this->rocMap.size() * sizeof(unsigned int), cudaMemcpyDefault, stream.id()));
117-
cudaCheck(cudaMemcpyAsync(data.cablingMapHost->RawId, this->RawId.data(), this->RawId.size() * sizeof(unsigned int), cudaMemcpyDefault, stream.id()));
118-
cudaCheck(cudaMemcpyAsync(data.cablingMapHost->rocInDet, this->rocInDet.data(), this->rocInDet.size() * sizeof(unsigned int), cudaMemcpyDefault, stream.id()));
119-
cudaCheck(cudaMemcpyAsync(data.cablingMapHost->moduleId, this->moduleId.data(), this->moduleId.size() * sizeof(unsigned int), cudaMemcpyDefault, stream.id()));
120-
cudaCheck(cudaMemcpyAsync(data.cablingMapHost->badRocs, this->badRocs.data(), this->badRocs.size() * sizeof(unsigned char), cudaMemcpyDefault, stream.id()));
121-
cudaCheck(cudaMemcpyAsync(data.cablingMapDevice, data.cablingMapHost, sizeof(SiPixelFedCablingMapGPU), cudaMemcpyDefault, stream.id()));
122-
});
123-
return data.cablingMapDevice;
105+
SiPixelFedCablingMapGPU SiPixelFedCablingMapGPUWrapper::getGPUProductAsync(cuda::stream_t<>& cudaStream) const {
106+
helper_.prefetchAsync(cudaStream);
107+
return SiPixelFedCablingMapGPU{size,
108+
fedMap, linkMap, rocMap,
109+
RawId, rocInDet, moduleId,
110+
badRocs};
124111
}
125112

126113
SiPixelFedCablingMapGPUWrapper::ModulesToUnpack::ModulesToUnpack():
@@ -156,18 +143,3 @@ void SiPixelFedCablingMapGPUWrapper::ModulesToUnpack::fillAsync(SiPixelFedCablin
156143

157144
cuda::memory::async::copy(modToUnpDevice.get(), modToUnpHost.data(), modToUnpHost.size() * sizeof(unsigned char), cudaStream.id());
158145
}
159-
160-
161-
SiPixelFedCablingMapGPUWrapper::GPUData::~GPUData() {
162-
if(cablingMapHost != nullptr) {
163-
cudaCheck(cudaFree(cablingMapHost->fed));
164-
cudaCheck(cudaFree(cablingMapHost->link));
165-
cudaCheck(cudaFree(cablingMapHost->roc));
166-
cudaCheck(cudaFree(cablingMapHost->RawId));
167-
cudaCheck(cudaFree(cablingMapHost->rocInDet));
168-
cudaCheck(cudaFree(cablingMapHost->moduleId));
169-
cudaCheck(cudaFree(cablingMapHost->badRocs));
170-
cudaCheck(cudaFreeHost(cablingMapHost));
171-
}
172-
cudaCheck(cudaFree(cablingMapDevice));
173-
}

0 commit comments

Comments
 (0)