Skip to content

Commit cbeb333

Browse files
committed
Back to GPU struct of pointers
1 parent 6d62b3d commit cbeb333

12 files changed

+50
-37
lines changed

RecoLocalTracker/SiPixelClusterizer/interface/SiPixelFedCablingMapGPUWrapper.h

+2-8
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ class SiPixelFedCablingMapGPUWrapper {
2525
bool hasQuality() const { return hasQuality_; }
2626

2727
// returns pointer to GPU memory
28-
SiPixelFedCablingMapGPU getGPUProductAsync(cuda::stream_t<>& cudaStream) const;
28+
const SiPixelFedCablingMapGPU *getGPUProductAsync(cuda::stream_t<>& cudaStream) const;
2929

3030
// returns pointer to GPU memory
3131
const unsigned char *getModToUnpAllAsync(cuda::stream_t<>& cudaStream) const;
@@ -37,13 +37,7 @@ class SiPixelFedCablingMapGPUWrapper {
3737
unsigned char *modToUnpDefault = nullptr;
3838

3939
CUDAESManaged helper_;
40-
unsigned int *fedMap = nullptr;
41-
unsigned int *linkMap = nullptr;
42-
unsigned int *rocMap = nullptr;
43-
unsigned int *RawId = nullptr;
44-
unsigned int *rocInDet = nullptr;
45-
unsigned int *moduleId = nullptr;
46-
unsigned char *badRocs = nullptr;
40+
SiPixelFedCablingMapGPU *cablingGPU_ = nullptr;
4741
unsigned int size;
4842
bool hasQuality_;
4943
};

RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu

+9-9
Original file line numberDiff line numberDiff line change
@@ -84,9 +84,9 @@ namespace pixelgpudetails {
8484
return (1==((rawId>>25)&0x7));
8585
}
8686

87-
__device__ pixelgpudetails::DetIdGPU getRawId(SiPixelFedCablingMapGPU cablingMap, uint32_t fed, uint32_t link, uint32_t roc) {
87+
__device__ pixelgpudetails::DetIdGPU getRawId(const SiPixelFedCablingMapGPU *cablingMap, uint32_t fed, uint32_t link, uint32_t roc) {
8888
uint32_t index = fed * MAX_LINK * MAX_ROC + (link-1) * MAX_ROC + roc;
89-
pixelgpudetails::DetIdGPU detId = { cablingMap.RawId[index], cablingMap.rocInDet[index], cablingMap.moduleId[index] };
89+
pixelgpudetails::DetIdGPU detId = { cablingMap->RawId[index], cablingMap->rocInDet[index], cablingMap->moduleId[index] };
9090
return detId;
9191
}
9292

@@ -219,7 +219,7 @@ namespace pixelgpudetails {
219219
return ((dcol < 26) & (2 <= pxid) & (pxid < 162));
220220
}
221221

222-
__device__ uint32_t checkROC(uint32_t errorWord, uint32_t fedId, uint32_t link, SiPixelFedCablingMapGPU cablingMap, bool debug = false)
222+
__device__ uint32_t checkROC(uint32_t errorWord, uint32_t fedId, uint32_t link, const SiPixelFedCablingMapGPU *cablingMap, bool debug = false)
223223
{
224224
int errorType = (errorWord >> pixelgpudetails::ROC_shift) & pixelgpudetails::ERROR_mask;
225225
if (errorType < 25) return false;
@@ -229,8 +229,8 @@ namespace pixelgpudetails {
229229
case(25) : {
230230
errorFound = true;
231231
uint32_t index = fedId * MAX_LINK * MAX_ROC + (link-1) * MAX_ROC + 1;
232-
if (index > 1 && index <= cablingMap.size) {
233-
if (!(link == cablingMap.link[index] && 1 == cablingMap.roc[index])) errorFound = false;
232+
if (index > 1 && index <= cablingMap->size) {
233+
if (!(link == cablingMap->link[index] && 1 == cablingMap->roc[index])) errorFound = false;
234234
}
235235
if (debug&errorFound) printf("Invalid ROC = 25 found (errorType = 25)\n");
236236
break;
@@ -283,7 +283,7 @@ namespace pixelgpudetails {
283283
return errorFound? errorType : 0;
284284
}
285285

286-
__device__ uint32_t getErrRawID(uint32_t fedId, uint32_t errWord, uint32_t errorType, SiPixelFedCablingMapGPU cablingMap, bool debug = false)
286+
__device__ uint32_t getErrRawID(uint32_t fedId, uint32_t errWord, uint32_t errorType, const SiPixelFedCablingMapGPU *cablingMap, bool debug = false)
287287
{
288288
uint32_t rID = 0xffffffff;
289289

@@ -393,7 +393,7 @@ namespace pixelgpudetails {
393393

394394

395395
// Kernel to perform Raw to Digi conversion
396-
__global__ void RawToDigi_kernel(SiPixelFedCablingMapGPU cablingMap, const unsigned char *modToUnp,
396+
__global__ void RawToDigi_kernel(const SiPixelFedCablingMapGPU *cablingMap, const unsigned char *modToUnp,
397397
const uint32_t wordCounter, const uint32_t *word, const uint8_t *fedIds,
398398
uint16_t *xx, uint16_t *yy, uint16_t *adc,
399399
uint32_t *pdigi, uint32_t *rawIdArr, uint16_t *moduleId,
@@ -442,7 +442,7 @@ namespace pixelgpudetails {
442442

443443
uint32_t index = fedId * MAX_LINK * MAX_ROC + (link-1) * MAX_ROC + roc;
444444
if (useQualityInfo) {
445-
skipROC = cablingMap.badRocs[index];
445+
skipROC = cablingMap->badRocs[index];
446446
if (skipROC) continue;
447447
}
448448
skipROC = modToUnp[index];
@@ -510,7 +510,7 @@ namespace pixelgpudetails {
510510

511511
// Interface to outside
512512
void SiPixelRawToClusterGPUKernel::makeClustersAsync(
513-
const SiPixelFedCablingMapGPU& cablingMap,
513+
const SiPixelFedCablingMapGPU *cablingMap,
514514
const unsigned char *modToUnp,
515515
const SiPixelGainForHLTonGPU *gains,
516516
const WordFedAppender& wordFed,

RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ namespace pixelgpudetails {
204204
SiPixelRawToClusterGPUKernel& operator=(const SiPixelRawToClusterGPUKernel&) = delete;
205205
SiPixelRawToClusterGPUKernel& operator=(SiPixelRawToClusterGPUKernel&&) = delete;
206206

207-
void makeClustersAsync(const SiPixelFedCablingMapGPU& cablingMap, const unsigned char *modToUnp,
207+
void makeClustersAsync(const SiPixelFedCablingMapGPU *cablingMap, const unsigned char *modToUnp,
208208
const SiPixelGainForHLTonGPU *gains,
209209
const WordFedAppender& wordFed,
210210
const uint32_t wordCounter, const uint32_t fedCounter, bool convertADCtoElectrons,

RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterHeterogeneous.cc

+1-1
Original file line numberDiff line numberDiff line change
@@ -466,7 +466,7 @@ void SiPixelRawToClusterHeterogeneous::acquireGPUCuda(const edm::HeterogeneousEv
466466
throw cms::Exception("LogicError") << "UseQuality of the module (" << useQuality<< ") differs the one from SiPixelFedCablingMapGPUWrapper. Please fix your configuration.";
467467
}
468468
// get the GPU product already here so that the async transfer can begin
469-
const auto gpuMap = hgpuMap->getGPUProductAsync(cudaStream);
469+
const auto *gpuMap = hgpuMap->getGPUProductAsync(cudaStream);
470470

471471
edm::cuda::device::unique_ptr<unsigned char[]> modulesToUnpackRegional;
472472
const unsigned char *gpuModulesToUnpack;

RecoLocalTracker/SiPixelClusterizer/src/SiPixelFedCablingMapGPUWrapper.cc

+20-7
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,14 @@ SiPixelFedCablingMapGPUWrapper::SiPixelFedCablingMapGPUWrapper(SiPixelFedCabling
2424
SiPixelQuality const *badPixelInfo):
2525
hasQuality_(badPixelInfo != nullptr)
2626
{
27+
unsigned int *fedMap = nullptr;
28+
unsigned int *linkMap = nullptr;
29+
unsigned int *rocMap = nullptr;
30+
unsigned int *RawId = nullptr;
31+
unsigned int *rocInDet = nullptr;
32+
unsigned int *moduleId = nullptr;
33+
unsigned char *badRocs = nullptr;
34+
2735
helper_.allocate(&fedMap, pixelgpudetails::MAX_SIZE);
2836
helper_.allocate(&linkMap, pixelgpudetails::MAX_SIZE);
2937
helper_.allocate(&rocMap, pixelgpudetails::MAX_SIZE);
@@ -95,12 +103,20 @@ SiPixelFedCablingMapGPUWrapper::SiPixelFedCablingMapGPUWrapper(SiPixelFedCabling
95103
LogDebug("SiPixelFedCablingMapGPU") << "----------------------------------------------------------------------------" << std::endl;
96104
LogDebug("SiPixelFedCablingMapGPU") << i << std::setw(20) << fedMap[i] << std::setw(20) << linkMap[i] << std::setw(20) << rocMap[i] << std::endl;
97105
LogDebug("SiPixelFedCablingMapGPU") << i << std::setw(20) << RawId[i] << std::setw(20) << rocInDet[i] << std::setw(20) << moduleId[i] << std::endl;
98-
LogDebug("SiPixelFedCablingMapGPU") << i << std::setw(20) << (bool)badRocs[i] << std::setw(20) << std::endl;
106+
LogDebug("SiPixelFedCablingMapGPU") << i << std::setw(20) << static_cast<bool>(badRocs[i]) << std::setw(20) << std::endl;
99107
LogDebug("SiPixelFedCablingMapGPU") << "----------------------------------------------------------------------------" << std::endl;
100108

101109
}
102110

103-
size = index-1;
111+
helper_.allocate(&cablingGPU_, 1);
112+
cablingGPU_->size = index-1;
113+
cablingGPU_->fed = fedMap;
114+
cablingGPU_->link = linkMap;
115+
cablingGPU_->roc = rocMap;
116+
cablingGPU_->RawId = RawId;
117+
cablingGPU_->rocInDet = rocInDet;
118+
cablingGPU_->moduleId = moduleId;
119+
cablingGPU_->badRocs = badRocs;
104120
helper_.advise();
105121
helperUnp_.advise();
106122
}
@@ -109,12 +125,9 @@ SiPixelFedCablingMapGPUWrapper::SiPixelFedCablingMapGPUWrapper(SiPixelFedCabling
109125
SiPixelFedCablingMapGPUWrapper::~SiPixelFedCablingMapGPUWrapper() {}
110126

111127

112-
SiPixelFedCablingMapGPU SiPixelFedCablingMapGPUWrapper::getGPUProductAsync(cuda::stream_t<>& cudaStream) const {
128+
const SiPixelFedCablingMapGPU *SiPixelFedCablingMapGPUWrapper::getGPUProductAsync(cuda::stream_t<>& cudaStream) const {
113129
helper_.prefetchAsync(cudaStream);
114-
return SiPixelFedCablingMapGPU{size,
115-
fedMap, linkMap, rocMap,
116-
RawId, rocInDet, moduleId,
117-
badRocs};
130+
return cablingGPU_;
118131
}
119132

120133
const unsigned char *SiPixelFedCablingMapGPUWrapper::getModToUnpAllAsync(cuda::stream_t<>& cudaStream) const {

RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ class PixelCPEFast final : public PixelCPEBase
4646

4747
// The return value can only be used safely in kernels launched on
4848
// the same cudaStream, or after cudaStreamSynchronize.
49-
pixelCPEforGPU::ParamsOnGPU getGPUProductAsync(cuda::stream_t<>& cudaStream) const;
49+
const pixelCPEforGPU::ParamsOnGPU *getGPUProductAsync(cuda::stream_t<>& cudaStream) const;
5050

5151
private:
5252
ClusterParam * createClusterParam(const SiPixelCluster & cl) const override;
@@ -83,6 +83,7 @@ class PixelCPEFast final : public PixelCPEBase
8383
CUDAESManaged m_helper;
8484
pixelCPEforGPU::DetParams *m_detParamsGPU = nullptr;
8585
pixelCPEforGPU::CommonParams *m_commonParamsGPU = nullptr;
86+
pixelCPEforGPU::ParamsOnGPU *m_paramsGPU = nullptr;
8687

8788
void fillParamsForGpu();
8889
};

RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHits.cu

+1-1
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ namespace pixelgpudetails {
131131

132132
void PixelRecHitGPUKernel::makeHitsAsync(const siPixelRawToClusterHeterogeneousProduct::GPUProduct& input,
133133
float const * bs,
134-
pixelCPEforGPU::ParamsOnGPU const& cpeParams,
134+
pixelCPEforGPU::ParamsOnGPU const *cpeParams,
135135
bool transferToCPU,
136136
cuda::stream_t<>& stream) {
137137
cudaCheck(cudaMemcpyAsync(gpu_.bs_d, bs, 3 * sizeof(float), cudaMemcpyDefault, stream.id()));

RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHits.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ namespace pixelgpudetails {
3333

3434
void makeHitsAsync(const siPixelRawToClusterHeterogeneousProduct::GPUProduct& input,
3535
float const * bs,
36-
pixelCPEforGPU::ParamsOnGPU const& cpeParams,
36+
pixelCPEforGPU::ParamsOnGPU const *cpeParams,
3737
bool transferToCPU,
3838
cuda::stream_t<>& stream);
3939

RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h

+4-4
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ namespace gpuPixelRecHits {
1414

1515

1616

17-
__global__ void getHits(pixelCPEforGPU::ParamsOnGPU cpeParams,
17+
__global__ void getHits(pixelCPEforGPU::ParamsOnGPU const * __restrict__ cpeParams,
1818
float const * __restrict__ bs,
1919
uint16_t const * __restrict__ id,
2020
uint16_t const * __restrict__ x,
@@ -125,8 +125,8 @@ namespace gpuPixelRecHits {
125125

126126
assert(h < 2000*256);
127127

128-
pixelCPEforGPU::position(cpeParams.commonParams(), cpeParams.detParams(me), clusParams, ic);
129-
pixelCPEforGPU::errorFromDB(cpeParams.commonParams(), cpeParams.detParams(me), clusParams, ic);
128+
pixelCPEforGPU::position(cpeParams->commonParams(), cpeParams->detParams(me), clusParams, ic);
129+
pixelCPEforGPU::errorFromDB(cpeParams->commonParams(), cpeParams->detParams(me), clusParams, ic);
130130

131131
chargeh[h] = clusParams.charge[ic];
132132

@@ -141,7 +141,7 @@ namespace gpuPixelRecHits {
141141
mc[h]= clusParams.minCol[ic];
142142

143143
// to global and compute phi...
144-
cpeParams.detParams(me).frame.toGlobal(xl[h],yl[h], xg[h],yg[h],zg[h]);
144+
cpeParams->detParams(me).frame.toGlobal(xl[h],yl[h], xg[h],yg[h],zg[h]);
145145
// here correct for the beamspot...
146146
xg[h]-=bs[0];
147147
yg[h]-=bs[1];

RecoLocalTracker/SiPixelRecHits/plugins/siPixelRecHitsHeterogeneousProduct.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ namespace siPixelRecHitsHeterogeneousProduct {
1818
using hindex_type = uint16_t; // if above is <=2^16
1919

2020
struct HitsOnGPU{
21-
pixelCPEforGPU::ParamsOnGPU cpeParams; // forwarded from setup, NOT owned
21+
const pixelCPEforGPU::ParamsOnGPU *cpeParams; // forwarded from setup, NOT owned
2222
float * bs_d;
2323
const uint32_t * hitsModuleStart_d; // forwarded from clusters
2424
uint32_t * hitsLayerStart_d;

RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc

+7-2
Original file line numberDiff line numberDiff line change
@@ -66,9 +66,9 @@ PixelCPEFast::PixelCPEFast(edm::ParameterSet const & conf,
6666
fillParamsForGpu();
6767
}
6868

69-
pixelCPEforGPU::ParamsOnGPU PixelCPEFast::getGPUProductAsync(cuda::stream_t<>& cudaStream) const {
69+
const pixelCPEforGPU::ParamsOnGPU *PixelCPEFast::getGPUProductAsync(cuda::stream_t<>& cudaStream) const {
7070
m_helper.prefetchAsync(cudaStream);
71-
return pixelCPEforGPU::ParamsOnGPU{m_commonParamsGPU, m_detParamsGPU};
71+
return m_paramsGPU;
7272
}
7373

7474
void PixelCPEFast::fillParamsForGpu() {
@@ -196,6 +196,11 @@ void PixelCPEFast::fillParamsForGpu() {
196196
}
197197

198198
}
199+
200+
m_helper.allocate(&m_paramsGPU, 1);
201+
m_paramsGPU->m_commonParams = m_commonParamsGPU;
202+
m_paramsGPU->m_detParams = m_detParamsGPU;
203+
199204
m_helper.advise();
200205
}
201206

RecoPixelVertexing/PixelTriplets/plugins/RiemannFitOnGPU.cu

+1-1
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ void kernelFastFitAllHits(TuplesOnGPU::Container const * __restrict__ foundNtupl
5353
auto hit = hitId[i];
5454
// printf("Hit global: %f,%f,%f\n", hhp->xg_d[hit],hhp->yg_d[hit],hhp->zg_d[hit]);
5555
float ge[6];
56-
hhp->cpeParams.detParams(hhp->detInd_d[hit]).frame.toGlobal(hhp->xerr_d[hit], 0, hhp->yerr_d[hit], ge);
56+
hhp->cpeParams->detParams(hhp->detInd_d[hit]).frame.toGlobal(hhp->xerr_d[hit], 0, hhp->yerr_d[hit], ge);
5757
// printf("Error: %d: %f,%f,%f,%f,%f,%f\n",hhp->detInd_d[hit],ge[0],ge[1],ge[2],ge[3],ge[4],ge[5]);
5858

5959
hits.col(i) << hhp->xg_d[hit], hhp->yg_d[hit], hhp->zg_d[hit];

0 commit comments

Comments
 (0)