Skip to content

Commit 1f15f2f

Browse files
committed
Back to GPU struct of pointers
1 parent 3b16931 commit 1f15f2f

12 files changed

+49
-37
lines changed

RecoLocalTracker/SiPixelClusterizer/interface/SiPixelFedCablingMapGPUWrapper.h

+2-8
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ class SiPixelFedCablingMapGPUWrapper {
2424
bool hasQuality() const { return hasQuality_; }
2525

2626
// returns pointer to GPU memory
27-
SiPixelFedCablingMapGPU getGPUProductAsync(cuda::stream_t<>& cudaStream) const;
27+
const SiPixelFedCablingMapGPU *getGPUProductAsync(cuda::stream_t<>& cudaStream) const;
2828

2929

3030
// Allocates host and device memory, converts data to host memory,
@@ -47,13 +47,7 @@ class SiPixelFedCablingMapGPUWrapper {
4747

4848
private:
4949
CUDAESManaged helper_;
50-
unsigned int *fedMap = nullptr;
51-
unsigned int *linkMap = nullptr;
52-
unsigned int *rocMap = nullptr;
53-
unsigned int *RawId = nullptr;
54-
unsigned int *rocInDet = nullptr;
55-
unsigned int *moduleId = nullptr;
56-
unsigned char *badRocs = nullptr;
50+
SiPixelFedCablingMapGPU *cablingGPU_ = nullptr;
5751
unsigned int size;
5852
bool hasQuality_;
5953
};

RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu

+9-9
Original file line numberDiff line numberDiff line change
@@ -179,9 +179,9 @@ namespace pixelgpudetails {
179179
return (1==((rawId>>25)&0x7));
180180
}
181181

182-
__device__ pixelgpudetails::DetIdGPU getRawId(SiPixelFedCablingMapGPU Map, uint32_t fed, uint32_t link, uint32_t roc) {
182+
__device__ pixelgpudetails::DetIdGPU getRawId(const SiPixelFedCablingMapGPU *Map, uint32_t fed, uint32_t link, uint32_t roc) {
183183
uint32_t index = fed * MAX_LINK * MAX_ROC + (link-1) * MAX_ROC + roc;
184-
pixelgpudetails::DetIdGPU detId = { Map.RawId[index], Map.rocInDet[index], Map.moduleId[index] };
184+
pixelgpudetails::DetIdGPU detId = { Map->RawId[index], Map->rocInDet[index], Map->moduleId[index] };
185185
return detId;
186186
}
187187

@@ -314,7 +314,7 @@ namespace pixelgpudetails {
314314
return ((dcol < 26) & (2 <= pxid) & (pxid < 162));
315315
}
316316

317-
__device__ uint32_t checkROC(uint32_t errorWord, uint32_t fedId, uint32_t link, SiPixelFedCablingMapGPU Map, bool debug = false)
317+
__device__ uint32_t checkROC(uint32_t errorWord, uint32_t fedId, uint32_t link, const SiPixelFedCablingMapGPU *Map, bool debug = false)
318318
{
319319
int errorType = (errorWord >> pixelgpudetails::ROC_shift) & pixelgpudetails::ERROR_mask;
320320
if (errorType < 25) return false;
@@ -324,8 +324,8 @@ namespace pixelgpudetails {
324324
case(25) : {
325325
errorFound = true;
326326
uint32_t index = fedId * MAX_LINK * MAX_ROC + (link-1) * MAX_ROC + 1;
327-
if (index > 1 && index <= Map.size) {
328-
if (!(link == Map.link[index] && 1 == Map.roc[index])) errorFound = false;
327+
if (index > 1 && index <= Map->size) {
328+
if (!(link == Map->link[index] && 1 == Map->roc[index])) errorFound = false;
329329
}
330330
if (debug&errorFound) printf("Invalid ROC = 25 found (errorType = 25)\n");
331331
break;
@@ -378,7 +378,7 @@ namespace pixelgpudetails {
378378
return errorFound? errorType : 0;
379379
}
380380

381-
__device__ uint32_t getErrRawID(uint32_t fedId, uint32_t errWord, uint32_t errorType, SiPixelFedCablingMapGPU Map, bool debug = false)
381+
__device__ uint32_t getErrRawID(uint32_t fedId, uint32_t errWord, uint32_t errorType, const SiPixelFedCablingMapGPU *Map, bool debug = false)
382382
{
383383
uint32_t rID = 0xffffffff;
384384

@@ -488,7 +488,7 @@ namespace pixelgpudetails {
488488

489489

490490
// Kernel to perform Raw to Digi conversion
491-
__global__ void RawToDigi_kernel(SiPixelFedCablingMapGPU Map, const unsigned char *modToUnp,
491+
__global__ void RawToDigi_kernel(const SiPixelFedCablingMapGPU *Map, const unsigned char *modToUnp,
492492
const uint32_t wordCounter, const uint32_t *Word, const uint8_t *fedIds,
493493
uint16_t * XX, uint16_t * YY, uint16_t * ADC,
494494
uint32_t * pdigi, uint32_t *rawIdArr, uint16_t * moduleId,
@@ -541,7 +541,7 @@ namespace pixelgpudetails {
541541
uint32_t index = fedId * MAX_LINK * MAX_ROC + (link-1) * MAX_ROC + roc;
542542
if (useQualityInfo) {
543543

544-
skipROC = Map.badRocs[index];
544+
skipROC = Map->badRocs[index];
545545
if (skipROC) continue;
546546

547547
}
@@ -610,7 +610,7 @@ namespace pixelgpudetails {
610610

611611
// Interface to outside
612612
void SiPixelRawToClusterGPUKernel::makeClustersAsync(
613-
const SiPixelFedCablingMapGPU& cablingMap,
613+
const SiPixelFedCablingMapGPU *cablingMap,
614614
const unsigned char *modToUnp,
615615
const SiPixelGainForHLTonGPU *gains,
616616
const uint32_t wordCounter, const uint32_t fedCounter,

RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ namespace pixelgpudetails {
170170

171171
void initializeWordFed(int fedId, unsigned int wordCounterGPU, const cms_uint32_t *src, unsigned int length);
172172

173-
void makeClustersAsync(const SiPixelFedCablingMapGPU& cablingMap, const unsigned char *modToUnp,
173+
void makeClustersAsync(const SiPixelFedCablingMapGPU *cablingMap, const unsigned char *modToUnp,
174174
const SiPixelGainForHLTonGPU *gains,
175175
const uint32_t wordCounter, const uint32_t fedCounter, bool convertADCtoElectrons,
176176
bool useQualityInfo, bool includeErrors, bool transferToCPU_, bool debug,

RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterHeterogeneous.cc

+1-1
Original file line numberDiff line numberDiff line change
@@ -480,7 +480,7 @@ void SiPixelRawToClusterHeterogeneous::acquireGPUCuda(const edm::HeterogeneousEv
480480
throw cms::Exception("LogicError") << "UseQuality of the module (" << useQuality<< ") differs the one from SiPixelFedCablingMapGPUWrapper. Please fix your configuration.";
481481
}
482482
// get the GPU product already here so that the async transfer can begin
483-
const auto gpuMap = hgpuMap->getGPUProductAsync(cudaStream);
483+
const auto *gpuMap = hgpuMap->getGPUProductAsync(cudaStream);
484484

485485
edm::ESHandle<SiPixelGainCalibrationForHLTGPU> hgains;
486486
es.get<SiPixelGainCalibrationForHLTGPURcd>().get(hgains);

RecoLocalTracker/SiPixelClusterizer/src/SiPixelFedCablingMapGPUWrapper.cc

+20-7
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,14 @@ SiPixelFedCablingMapGPUWrapper::SiPixelFedCablingMapGPUWrapper(SiPixelFedCabling
2222
SiPixelQuality const *badPixelInfo):
2323
hasQuality_(badPixelInfo != nullptr)
2424
{
25+
unsigned int *fedMap = nullptr;
26+
unsigned int *linkMap = nullptr;
27+
unsigned int *rocMap = nullptr;
28+
unsigned int *RawId = nullptr;
29+
unsigned int *rocInDet = nullptr;
30+
unsigned int *moduleId = nullptr;
31+
unsigned char *badRocs = nullptr;
32+
2533
helper_.allocate(&fedMap, pixelgpudetails::MAX_SIZE);
2634
helper_.allocate(&linkMap, pixelgpudetails::MAX_SIZE);
2735
helper_.allocate(&rocMap, pixelgpudetails::MAX_SIZE);
@@ -89,25 +97,30 @@ SiPixelFedCablingMapGPUWrapper::SiPixelFedCablingMapGPUWrapper(SiPixelFedCabling
8997
LogDebug("SiPixelFedCablingMapGPU") << "----------------------------------------------------------------------------" << std::endl;
9098
LogDebug("SiPixelFedCablingMapGPU") << i << std::setw(20) << fedMap[i] << std::setw(20) << linkMap[i] << std::setw(20) << rocMap[i] << std::endl;
9199
LogDebug("SiPixelFedCablingMapGPU") << i << std::setw(20) << RawId[i] << std::setw(20) << rocInDet[i] << std::setw(20) << moduleId[i] << std::endl;
92-
LogDebug("SiPixelFedCablingMapGPU") << i << std::setw(20) << (bool)badRocs[i] << std::setw(20) << std::endl;
100+
LogDebug("SiPixelFedCablingMapGPU") << i << std::setw(20) << static_cast<bool>(badRocs[i]) << std::setw(20) << std::endl;
93101
LogDebug("SiPixelFedCablingMapGPU") << "----------------------------------------------------------------------------" << std::endl;
94102

95103
}
96104

97-
size = index-1;
105+
helper_.allocate(&cablingGPU_, 1);
106+
cablingGPU_->size = index-1;
107+
cablingGPU_->fed = fedMap;
108+
cablingGPU_->link = linkMap;
109+
cablingGPU_->roc = rocMap;
110+
cablingGPU_->RawId = RawId;
111+
cablingGPU_->rocInDet = rocInDet;
112+
cablingGPU_->moduleId = moduleId;
113+
cablingGPU_->badRocs = badRocs;
98114
helper_.advise();
99115
}
100116

101117

102118
SiPixelFedCablingMapGPUWrapper::~SiPixelFedCablingMapGPUWrapper() {}
103119

104120

105-
SiPixelFedCablingMapGPU SiPixelFedCablingMapGPUWrapper::getGPUProductAsync(cuda::stream_t<>& cudaStream) const {
121+
const SiPixelFedCablingMapGPU *SiPixelFedCablingMapGPUWrapper::getGPUProductAsync(cuda::stream_t<>& cudaStream) const {
106122
helper_.prefetchAsync(cudaStream);
107-
return SiPixelFedCablingMapGPU{size,
108-
fedMap, linkMap, rocMap,
109-
RawId, rocInDet, moduleId,
110-
badRocs};
123+
return cablingGPU_;
111124
}
112125

113126
SiPixelFedCablingMapGPUWrapper::ModulesToUnpack::ModulesToUnpack():

RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ class PixelCPEFast final : public PixelCPEBase
4444

4545
// The return value can only be used safely in kernels launched on
4646
// the same cudaStream, or after cudaStreamSynchronize.
47-
pixelCPEforGPU::ParamsOnGPU getGPUProductAsync(cuda::stream_t<>& cudaStream) const;
47+
const pixelCPEforGPU::ParamsOnGPU *getGPUProductAsync(cuda::stream_t<>& cudaStream) const;
4848

4949
private:
5050
ClusterParam * createClusterParam(const SiPixelCluster & cl) const override;
@@ -79,6 +79,7 @@ class PixelCPEFast final : public PixelCPEBase
7979
CUDAESManaged m_helper;
8080
pixelCPEforGPU::DetParams *m_detParamsGPU = nullptr;
8181
pixelCPEforGPU::CommonParams *m_commonParamsGPU = nullptr;
82+
pixelCPEforGPU::ParamsOnGPU *m_paramsGPU = nullptr;
8283

8384
void fillParamsForGpu();
8485
};

RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHits.cu

+1-1
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ namespace pixelgpudetails {
124124

125125
void PixelRecHitGPUKernel::makeHitsAsync(const siPixelRawToClusterHeterogeneousProduct::GPUProduct& input,
126126
float const * bs,
127-
pixelCPEforGPU::ParamsOnGPU const& cpeParams,
127+
pixelCPEforGPU::ParamsOnGPU const *cpeParams,
128128
bool transferToCPU,
129129
cuda::stream_t<>& stream) {
130130
cudaCheck(cudaMemcpyAsync(gpu_.bs_d, bs, 3 * sizeof(float), cudaMemcpyDefault, stream.id()));

RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHits.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ namespace pixelgpudetails {
3333

3434
void makeHitsAsync(const siPixelRawToClusterHeterogeneousProduct::GPUProduct& input,
3535
float const * bs,
36-
pixelCPEforGPU::ParamsOnGPU const& cpeParams,
36+
pixelCPEforGPU::ParamsOnGPU const *cpeParams,
3737
bool transferToCPU,
3838
cuda::stream_t<>& stream);
3939

RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h

+4-4
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ namespace gpuPixelRecHits {
1414

1515

1616

17-
__global__ void getHits(pixelCPEforGPU::ParamsOnGPU cpeParams,
17+
__global__ void getHits(pixelCPEforGPU::ParamsOnGPU const * __restrict__ cpeParams,
1818
float const * __restrict__ bs,
1919
uint16_t const * __restrict__ id,
2020
uint16_t const * __restrict__ x,
@@ -125,8 +125,8 @@ namespace gpuPixelRecHits {
125125

126126
assert(h < 2000*256);
127127

128-
pixelCPEforGPU::position(cpeParams.commonParams(), cpeParams.detParams(me), clusParams, ic);
129-
pixelCPEforGPU::error(cpeParams.commonParams(), cpeParams.detParams(me), clusParams, ic);
128+
pixelCPEforGPU::position(cpeParams->commonParams(), cpeParams->detParams(me), clusParams, ic);
129+
pixelCPEforGPU::error(cpeParams->commonParams(), cpeParams->detParams(me), clusParams, ic);
130130

131131
chargeh[h] = clusParams.charge[ic];
132132

@@ -141,7 +141,7 @@ namespace gpuPixelRecHits {
141141
mc[h]= clusParams.minCol[ic];
142142

143143
// to global and compute phi...
144-
cpeParams.detParams(me).frame.toGlobal(xl[h],yl[h], xg[h],yg[h],zg[h]);
144+
cpeParams->detParams(me).frame.toGlobal(xl[h],yl[h], xg[h],yg[h],zg[h]);
145145
// here correct for the beamspot...
146146
xg[h]-=bs[0];
147147
yg[h]-=bs[1];

RecoLocalTracker/SiPixelRecHits/plugins/siPixelRecHitsHeterogeneousProduct.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ namespace siPixelRecHitsHeterogeneousProduct {
1818
using hindex_type = uint16_t; // if above is <=2^16
1919

2020
struct HitsOnGPU{
21-
pixelCPEforGPU::ParamsOnGPU cpeParams; // forwarded from setup, NOT owned
21+
const pixelCPEforGPU::ParamsOnGPU *cpeParams; // forwarded from setup, NOT owned
2222
float * bs_d;
2323
const uint32_t * hitsModuleStart_d; // forwarded from clusters
2424
uint32_t * hitsLayerStart_d;

RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc

+6-2
Original file line numberDiff line numberDiff line change
@@ -66,9 +66,9 @@ PixelCPEFast::PixelCPEFast(edm::ParameterSet const & conf,
6666
fillParamsForGpu();
6767
}
6868

69-
pixelCPEforGPU::ParamsOnGPU PixelCPEFast::getGPUProductAsync(cuda::stream_t<>& cudaStream) const {
69+
const pixelCPEforGPU::ParamsOnGPU *PixelCPEFast::getGPUProductAsync(cuda::stream_t<>& cudaStream) const {
7070
m_helper.prefetchAsync(cudaStream);
71-
return pixelCPEforGPU::ParamsOnGPU{m_commonParamsGPU, m_detParamsGPU};
71+
return m_paramsGPU;
7272
}
7373

7474
void PixelCPEFast::fillParamsForGpu() {
@@ -120,6 +120,10 @@ void PixelCPEFast::fillParamsForGpu() {
120120
g.frame = pixelCPEforGPU::Frame(vv.x(),vv.y(),vv.z(),rr);
121121
}
122122

123+
m_helper.allocate(&m_paramsGPU, 1);
124+
m_paramsGPU->m_commonParams = m_commonParamsGPU;
125+
m_paramsGPU->m_detParams = m_detParamsGPU;
126+
123127
m_helper.advise();
124128
}
125129

RecoPixelVertexing/PixelTriplets/plugins/CAHitQuadrupletGeneratorGPU.cu

+1-1
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ void kernelFastFitAllHits(GPU::SimpleVector<Quadruplet> * foundNtuplets,
4545
auto hit = (*foundNtuplets)[helix_start].hitId[i];
4646
// printf("Hit global_x: %f\n", hhp->xg_d[hit]);
4747
float ge[6];
48-
hhp->cpeParams.detParams(hhp->detInd_d[hit]).frame.toGlobal(hhp->xerr_d[hit], 0, hhp->yerr_d[hit], ge);
48+
hhp->cpeParams->detParams(hhp->detInd_d[hit]).frame.toGlobal(hhp->xerr_d[hit], 0, hhp->yerr_d[hit], ge);
4949
// printf("Error: %d: %f,%f,%f,%f,%f,%f\n",hhp->detInd_d[hit],ge[0],ge[1],ge[2],ge[3],ge[4],ge[5]);
5050

5151
hits[helix_start].col(i) << hhp->xg_d[hit], hhp->yg_d[hit], hhp->zg_d[hit];

0 commit comments

Comments
 (0)