Skip to content

Commit 4f39fc2

Browse files
committed
Migrate SiPixelGainCalibrationForHLTGPU to unified memory
1 parent 8acba84 commit 4f39fc2

File tree

2 files changed

+24
-42
lines changed

2 files changed

+24
-42
lines changed

CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h

+3-9
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#ifndef CalibTracker_SiPixelESProducers_SiPixelGainCalibrationForHLTGPU_H
22
#define CalibTracker_SiPixelESProducers_SiPixelGainCalibrationForHLTGPU_H
33

4-
#include "HeterogeneousCore/CUDACore/interface/CUDAESProduct.h"
4+
#include "HeterogeneousCore/CUDACore/interface/CUDAESManaged.h"
55
#include "CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLT.h"
66

77
#include <cuda/api_wrappers.h>
@@ -19,14 +19,8 @@ class SiPixelGainCalibrationForHLTGPU {
1919
const SiPixelGainForHLTonGPU *getGPUProductAsync(cuda::stream_t<>& cudaStream) const;
2020

2121
private:
22-
const SiPixelGainCalibrationForHLT *gains_ = nullptr;
23-
SiPixelGainForHLTonGPU *gainForHLTonHost_ = nullptr;
24-
struct GPUData {
25-
~GPUData();
26-
SiPixelGainForHLTonGPU *gainForHLTonGPU = nullptr;
27-
SiPixelGainForHLTonGPU_DecodingStructure *gainDataOnGPU = nullptr;
28-
};
29-
CUDAESProduct<GPUData> gpuData_;
22+
CUDAESManaged helper_;
23+
SiPixelGainForHLTonGPU *gainForHLT_ = nullptr;
3024
};
3125

3226
#endif

CalibTracker/SiPixelESProducers/src/SiPixelGainCalibrationForHLTGPU.cc

+21-33
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,7 @@
77

88
#include <cuda.h>
99

10-
SiPixelGainCalibrationForHLTGPU::SiPixelGainCalibrationForHLTGPU(const SiPixelGainCalibrationForHLT& gains, const TrackerGeometry& geom):
11-
gains_(&gains)
10+
SiPixelGainCalibrationForHLTGPU::SiPixelGainCalibrationForHLTGPU(const SiPixelGainCalibrationForHLT& gains, const TrackerGeometry& geom)
1211
{
1312
// bizzarre logic (looking for fist strip-det) don't ask
1413
auto const & dus = geom.detUnits();
@@ -25,8 +24,7 @@ SiPixelGainCalibrationForHLTGPU::SiPixelGainCalibrationForHLTGPU(const SiPixelGa
2524
std::cout << "sizes " << sizeof(char) << ' ' << sizeof(uint8_t) << ' ' << sizeof(SiPixelGainForHLTonGPU::DecodingStructure) << std::endl;
2625
*/
2726

28-
cudaCheck(cudaMallocHost((void**) & gainForHLTonHost_, sizeof(SiPixelGainForHLTonGPU)));
29-
//gainForHLTonHost_->v_pedestals = gainDataOnGPU_; // how to do this?
27+
helper_.allocate(&gainForHLT_, 1);
3028

3129
// do not read back from the (possibly write-combined) memory buffer
3230
auto minPed = gains.getPedLow();
@@ -36,21 +34,21 @@ SiPixelGainCalibrationForHLTGPU::SiPixelGainCalibrationForHLTGPU(const SiPixelGa
3634
auto nBinsToUseForEncoding = 253;
3735

3836
// we will simplify later (not everything is needed....)
39-
gainForHLTonHost_->minPed_ = minPed;
40-
gainForHLTonHost_->maxPed_ = maxPed;
41-
gainForHLTonHost_->minGain_= minGain;
42-
gainForHLTonHost_->maxGain_= maxGain;
37+
gainForHLT_->minPed_ = minPed;
38+
gainForHLT_->maxPed_ = maxPed;
39+
gainForHLT_->minGain_= minGain;
40+
gainForHLT_->maxGain_= maxGain;
4341

44-
gainForHLTonHost_->numberOfRowsAveragedOver_ = 80;
45-
gainForHLTonHost_->nBinsToUseForEncoding_ = nBinsToUseForEncoding;
46-
gainForHLTonHost_->deadFlag_ = 255;
47-
gainForHLTonHost_->noisyFlag_ = 254;
42+
gainForHLT_->numberOfRowsAveragedOver_ = 80;
43+
gainForHLT_->nBinsToUseForEncoding_ = nBinsToUseForEncoding;
44+
gainForHLT_->deadFlag_ = 255;
45+
gainForHLT_->noisyFlag_ = 254;
4846

49-
gainForHLTonHost_->pedPrecision = static_cast<float>(maxPed - minPed) / nBinsToUseForEncoding;
50-
gainForHLTonHost_->gainPrecision = static_cast<float>(maxGain - minGain) / nBinsToUseForEncoding;
47+
gainForHLT_->pedPrecision = static_cast<float>(maxPed - minPed) / nBinsToUseForEncoding;
48+
gainForHLT_->gainPrecision = static_cast<float>(maxGain - minGain) / nBinsToUseForEncoding;
5149

5250
/*
53-
std::cout << "precisions g " << gainForHLTonHost_->pedPrecision << ' ' << gainForHLTonHost_->gainPrecision << std::endl;
51+
std::cout << "precisions g " << gainForHLT_->pedPrecision << ' ' << gainForHLT_->gainPrecision << std::endl;
5452
*/
5553

5654
// fill the index map
@@ -68,31 +66,21 @@ SiPixelGainCalibrationForHLTGPU::SiPixelGainCalibrationForHLTGPU(const SiPixelGa
6866
assert(0==p->iend%2);
6967
assert(p->ibegin!=p->iend);
7068
assert(p->ncols>0);
71-
gainForHLTonHost_->rangeAndCols[i] = std::make_pair(SiPixelGainForHLTonGPU::Range(p->ibegin,p->iend), p->ncols);
69+
gainForHLT_->rangeAndCols[i] = std::make_pair(SiPixelGainForHLTonGPU::Range(p->ibegin,p->iend), p->ncols);
7270
// if (ind[i].detid!=dus[i]->geographicalId()) std::cout << ind[i].detid<<"!="<<dus[i]->geographicalId() << std::endl;
73-
// gainForHLTonHost_->rangeAndCols[i] = std::make_pair(SiPixelGainForHLTonGPU::Range(ind[i].ibegin,ind[i].iend), ind[i].ncols);
71+
// gainForHLT_->rangeAndCols[i] = std::make_pair(SiPixelGainForHLTonGPU::Range(ind[i].ibegin,ind[i].iend), ind[i].ncols);
7472
}
7573

76-
}
74+
helper_.allocate(&(gainForHLT_->v_pedestals), gains.data().size(), sizeof(char)); // override the element size because essentially we reinterpret_cast on the fly
75+
std::memcpy(gainForHLT_->v_pedestals, gains.data().data(), gains.data().size()*sizeof(char));
7776

78-
SiPixelGainCalibrationForHLTGPU::~SiPixelGainCalibrationForHLTGPU() {
79-
cudaCheck(cudaFreeHost(gainForHLTonHost_));
77+
helper_.advise();
8078
}
8179

82-
SiPixelGainCalibrationForHLTGPU::GPUData::~GPUData() {
83-
cudaCheck(cudaFree(gainForHLTonGPU));
84-
cudaCheck(cudaFree(gainDataOnGPU));
80+
SiPixelGainCalibrationForHLTGPU::~SiPixelGainCalibrationForHLTGPU() {
8581
}
8682

8783
const SiPixelGainForHLTonGPU *SiPixelGainCalibrationForHLTGPU::getGPUProductAsync(cuda::stream_t<>& cudaStream) const {
88-
const auto& data = gpuData_.dataForCurrentDeviceAsync(cudaStream, [this](GPUData& data, cuda::stream_t<>& stream) {
89-
cudaCheck(cudaMalloc((void**) & data.gainForHLTonGPU, sizeof(SiPixelGainForHLTonGPU)));
90-
cudaCheck(cudaMalloc((void**) & data.gainDataOnGPU, this->gains_->data().size())); // TODO: this could be changed to cuda::memory::device::unique_ptr<>
91-
// gains.data().data() is used also for non-GPU code, we cannot allocate it on aligned and write-combined memory
92-
cudaCheck(cudaMemcpyAsync(data.gainDataOnGPU, this->gains_->data().data(), this->gains_->data().size(), cudaMemcpyDefault, stream.id()));
93-
94-
cudaCheck(cudaMemcpyAsync(data.gainForHLTonGPU, this->gainForHLTonHost_, sizeof(SiPixelGainForHLTonGPU), cudaMemcpyDefault, stream.id()));
95-
cudaCheck(cudaMemcpyAsync(&(data.gainForHLTonGPU->v_pedestals), &(data.gainDataOnGPU), sizeof(SiPixelGainForHLTonGPU_DecodingStructure*), cudaMemcpyDefault, stream.id()));
96-
});
97-
return data.gainForHLTonGPU;
84+
helper_.prefetchAsync(cudaStream);
85+
return gainForHLT_;
9886
}

0 commit comments

Comments
 (0)