7
7
8
8
#include < cuda.h>
9
9
10
- SiPixelGainCalibrationForHLTGPU::SiPixelGainCalibrationForHLTGPU (const SiPixelGainCalibrationForHLT& gains, const TrackerGeometry& geom):
11
- gains_(&gains)
10
+ SiPixelGainCalibrationForHLTGPU::SiPixelGainCalibrationForHLTGPU (const SiPixelGainCalibrationForHLT& gains, const TrackerGeometry& geom)
12
11
{
13
12
// bizzarre logic (looking for fist strip-det) don't ask
14
13
auto const & dus = geom.detUnits ();
@@ -25,8 +24,7 @@ SiPixelGainCalibrationForHLTGPU::SiPixelGainCalibrationForHLTGPU(const SiPixelGa
25
24
std::cout << "sizes " << sizeof(char) << ' ' << sizeof(uint8_t) << ' ' << sizeof(SiPixelGainForHLTonGPU::DecodingStructure) << std::endl;
26
25
*/
27
26
28
- cudaCheck (cudaMallocHost ((void **) & gainForHLTonHost_, sizeof (SiPixelGainForHLTonGPU)));
29
- // gainForHLTonHost_->v_pedestals = gainDataOnGPU_; // how to do this?
27
+ helper_.allocate (&gainForHLT_, 1 );
30
28
31
29
// do not read back from the (possibly write-combined) memory buffer
32
30
auto minPed = gains.getPedLow ();
@@ -36,21 +34,21 @@ SiPixelGainCalibrationForHLTGPU::SiPixelGainCalibrationForHLTGPU(const SiPixelGa
36
34
auto nBinsToUseForEncoding = 253 ;
37
35
38
36
// we will simplify later (not everything is needed....)
39
- gainForHLTonHost_ ->minPed_ = minPed;
40
- gainForHLTonHost_ ->maxPed_ = maxPed;
41
- gainForHLTonHost_ ->minGain_ = minGain;
42
- gainForHLTonHost_ ->maxGain_ = maxGain;
37
+ gainForHLT_ ->minPed_ = minPed;
38
+ gainForHLT_ ->maxPed_ = maxPed;
39
+ gainForHLT_ ->minGain_ = minGain;
40
+ gainForHLT_ ->maxGain_ = maxGain;
43
41
44
- gainForHLTonHost_ ->numberOfRowsAveragedOver_ = 80 ;
45
- gainForHLTonHost_ ->nBinsToUseForEncoding_ = nBinsToUseForEncoding;
46
- gainForHLTonHost_ ->deadFlag_ = 255 ;
47
- gainForHLTonHost_ ->noisyFlag_ = 254 ;
42
+ gainForHLT_ ->numberOfRowsAveragedOver_ = 80 ;
43
+ gainForHLT_ ->nBinsToUseForEncoding_ = nBinsToUseForEncoding;
44
+ gainForHLT_ ->deadFlag_ = 255 ;
45
+ gainForHLT_ ->noisyFlag_ = 254 ;
48
46
49
- gainForHLTonHost_ ->pedPrecision = static_cast <float >(maxPed - minPed) / nBinsToUseForEncoding;
50
- gainForHLTonHost_ ->gainPrecision = static_cast <float >(maxGain - minGain) / nBinsToUseForEncoding;
47
+ gainForHLT_ ->pedPrecision = static_cast <float >(maxPed - minPed) / nBinsToUseForEncoding;
48
+ gainForHLT_ ->gainPrecision = static_cast <float >(maxGain - minGain) / nBinsToUseForEncoding;
51
49
52
50
/*
53
- std::cout << "precisions g " << gainForHLTonHost_ ->pedPrecision << ' ' << gainForHLTonHost_ ->gainPrecision << std::endl;
51
+ std::cout << "precisions g " << gainForHLT_ ->pedPrecision << ' ' << gainForHLT_ ->gainPrecision << std::endl;
54
52
*/
55
53
56
54
// fill the index map
@@ -68,31 +66,21 @@ SiPixelGainCalibrationForHLTGPU::SiPixelGainCalibrationForHLTGPU(const SiPixelGa
68
66
assert (0 ==p->iend %2 );
69
67
assert (p->ibegin !=p->iend );
70
68
assert (p->ncols >0 );
71
- gainForHLTonHost_ ->rangeAndCols [i] = std::make_pair (SiPixelGainForHLTonGPU::Range (p->ibegin ,p->iend ), p->ncols );
69
+ gainForHLT_ ->rangeAndCols [i] = std::make_pair (SiPixelGainForHLTonGPU::Range (p->ibegin ,p->iend ), p->ncols );
72
70
// if (ind[i].detid!=dus[i]->geographicalId()) std::cout << ind[i].detid<<"!="<<dus[i]->geographicalId() << std::endl;
73
- // gainForHLTonHost_ ->rangeAndCols[i] = std::make_pair(SiPixelGainForHLTonGPU::Range(ind[i].ibegin,ind[i].iend), ind[i].ncols);
71
+ // gainForHLT_ ->rangeAndCols[i] = std::make_pair(SiPixelGainForHLTonGPU::Range(ind[i].ibegin,ind[i].iend), ind[i].ncols);
74
72
}
75
73
76
- }
74
+ helper_.allocate (&(gainForHLT_->v_pedestals ), gains.data ().size (), sizeof (char )); // override the element size because essentially we reinterpret_cast on the fly
75
+ std::memcpy (gainForHLT_->v_pedestals , gains.data ().data (), gains.data ().size ()*sizeof (char ));
77
76
78
- SiPixelGainCalibrationForHLTGPU::~SiPixelGainCalibrationForHLTGPU () {
79
- cudaCheck (cudaFreeHost (gainForHLTonHost_));
77
+ helper_.advise ();
80
78
}
81
79
82
- SiPixelGainCalibrationForHLTGPU::GPUData::~GPUData () {
83
- cudaCheck (cudaFree (gainForHLTonGPU));
84
- cudaCheck (cudaFree (gainDataOnGPU));
80
+ SiPixelGainCalibrationForHLTGPU::~SiPixelGainCalibrationForHLTGPU () {
85
81
}
86
82
87
83
const SiPixelGainForHLTonGPU *SiPixelGainCalibrationForHLTGPU::getGPUProductAsync (cuda::stream_t <>& cudaStream) const {
88
- const auto & data = gpuData_.dataForCurrentDeviceAsync (cudaStream, [this ](GPUData& data, cuda::stream_t <>& stream) {
89
- cudaCheck (cudaMalloc ((void **) & data.gainForHLTonGPU , sizeof (SiPixelGainForHLTonGPU)));
90
- cudaCheck (cudaMalloc ((void **) & data.gainDataOnGPU , this ->gains_ ->data ().size ())); // TODO: this could be changed to cuda::memory::device::unique_ptr<>
91
- // gains.data().data() is used also for non-GPU code, we cannot allocate it on aligned and write-combined memory
92
- cudaCheck (cudaMemcpyAsync (data.gainDataOnGPU , this ->gains_ ->data ().data (), this ->gains_ ->data ().size (), cudaMemcpyDefault, stream.id ()));
93
-
94
- cudaCheck (cudaMemcpyAsync (data.gainForHLTonGPU , this ->gainForHLTonHost_ , sizeof (SiPixelGainForHLTonGPU), cudaMemcpyDefault, stream.id ()));
95
- cudaCheck (cudaMemcpyAsync (&(data.gainForHLTonGPU ->v_pedestals ), &(data.gainDataOnGPU ), sizeof (SiPixelGainForHLTonGPU_DecodingStructure*), cudaMemcpyDefault, stream.id ()));
96
- });
97
- return data.gainForHLTonGPU ;
84
+ helper_.prefetchAsync (cudaStream);
85
+ return gainForHLT_;
98
86
}
0 commit comments