|
6 | 6 |
|
7 | 7 | #include "Geometry/phase1PixelTopology.h"
|
8 | 8 | #include "CUDACore/cudaCheck.h"
|
| 9 | +#include "CUDACore/deviceCount.h" |
| 10 | +#include "CUDACore/ScopedSetDevice.h" |
| 11 | +#include "CUDACore/StreamCache.h" |
9 | 12 | #include "CondFormats/PixelCPEFast.h"
|
10 | 13 |
|
11 |
| -// Services |
12 |
| -// this is needed to get errors from templates |
13 |
| - |
14 |
| -namespace { |
15 |
| - constexpr float micronsToCm = 1.0e-4; |
16 |
| -} |
17 |
| - |
18 | 14 | //-----------------------------------------------------------------------------
|
19 | 15 | //! The constructor.
|
20 | 16 | //-----------------------------------------------------------------------------
|
21 | 17 | PixelCPEFast::PixelCPEFast(std::string const &path) {
|
| 18 | + unsigned int ndetParams; |
| 19 | + |
| 20 | + cudaCheck(cudaMallocManaged(&m_params, sizeof(pixelCPEforGPU::ParamsOnGPU))); |
| 21 | + cudaCheck(cudaMallocManaged(&m_commonParams, sizeof(pixelCPEforGPU::CommonParams))); |
| 22 | + cudaCheck(cudaMallocManaged(&m_layerGeometry, sizeof(pixelCPEforGPU::LayerGeometry))); |
| 23 | + cudaCheck(cudaMallocManaged(&m_averageGeometry, sizeof(pixelCPEforGPU::AverageGeometry))); |
| 24 | + |
22 | 25 | {
|
23 | 26 | std::ifstream in(path, std::ios::binary);
|
24 | 27 | in.exceptions(std::ifstream::badbit | std::ifstream::failbit | std::ifstream::eofbit);
|
25 |
| - in.read(reinterpret_cast<char *>(&m_commonParamsGPU), sizeof(pixelCPEforGPU::CommonParams)); |
26 |
| - unsigned int ndetParams; |
| 28 | + in.read(reinterpret_cast<char *>(m_commonParams), sizeof(pixelCPEforGPU::CommonParams)); |
27 | 29 | in.read(reinterpret_cast<char *>(&ndetParams), sizeof(unsigned int));
|
28 |
| - m_detParamsGPU.resize(ndetParams); |
29 |
| - in.read(reinterpret_cast<char *>(m_detParamsGPU.data()), ndetParams * sizeof(pixelCPEforGPU::DetParams)); |
30 |
| - in.read(reinterpret_cast<char *>(&m_averageGeometry), sizeof(pixelCPEforGPU::AverageGeometry)); |
31 |
| - in.read(reinterpret_cast<char *>(&m_layerGeometry), sizeof(pixelCPEforGPU::LayerGeometry)); |
| 30 | + cudaCheck(cudaMallocManaged(&m_detParams, ndetParams * sizeof(pixelCPEforGPU::DetParams))); |
| 31 | + in.read(reinterpret_cast<char *>(m_detParams), ndetParams * sizeof(pixelCPEforGPU::DetParams)); |
| 32 | + in.read(reinterpret_cast<char *>(m_averageGeometry), sizeof(pixelCPEforGPU::AverageGeometry)); |
| 33 | + in.read(reinterpret_cast<char *>(m_layerGeometry), sizeof(pixelCPEforGPU::LayerGeometry)); |
32 | 34 | }
|
33 | 35 |
|
34 |
| - cpuData_ = { |
35 |
| - &m_commonParamsGPU, |
36 |
| - m_detParamsGPU.data(), |
37 |
| - &m_layerGeometry, |
38 |
| - &m_averageGeometry, |
39 |
| - }; |
40 |
| -} |
41 |
| - |
42 |
| -const pixelCPEforGPU::ParamsOnGPU *PixelCPEFast::getGPUProductAsync(cudaStream_t cudaStream) const { |
43 |
| - const auto &data = gpuData_.dataForCurrentDeviceAsync(cudaStream, [this](GPUData &data, cudaStream_t stream) { |
44 |
| - // and now copy to device... |
45 |
| - cudaCheck(cudaMalloc((void **)&data.h_paramsOnGPU.m_commonParams, sizeof(pixelCPEforGPU::CommonParams))); |
46 |
| - cudaCheck(cudaMalloc((void **)&data.h_paramsOnGPU.m_detParams, |
47 |
| - this->m_detParamsGPU.size() * sizeof(pixelCPEforGPU::DetParams))); |
48 |
| - cudaCheck(cudaMalloc((void **)&data.h_paramsOnGPU.m_averageGeometry, sizeof(pixelCPEforGPU::AverageGeometry))); |
49 |
| - cudaCheck(cudaMalloc((void **)&data.h_paramsOnGPU.m_layerGeometry, sizeof(pixelCPEforGPU::LayerGeometry))); |
50 |
| - cudaCheck(cudaMalloc((void **)&data.d_paramsOnGPU, sizeof(pixelCPEforGPU::ParamsOnGPU))); |
| 36 | + m_params->m_commonParams = m_commonParams; |
| 37 | + m_params->m_detParams = m_detParams; |
| 38 | + m_params->m_layerGeometry = m_layerGeometry; |
| 39 | + m_params->m_averageGeometry = m_averageGeometry; |
51 | 40 |
|
52 |
| - cudaCheck(cudaMemcpyAsync( |
53 |
| - data.d_paramsOnGPU, &data.h_paramsOnGPU, sizeof(pixelCPEforGPU::ParamsOnGPU), cudaMemcpyDefault, stream)); |
54 |
| - cudaCheck(cudaMemcpyAsync((void *)data.h_paramsOnGPU.m_commonParams, |
55 |
| - &this->m_commonParamsGPU, |
56 |
| - sizeof(pixelCPEforGPU::CommonParams), |
57 |
| - cudaMemcpyDefault, |
58 |
| - stream)); |
59 |
| - cudaCheck(cudaMemcpyAsync((void *)data.h_paramsOnGPU.m_averageGeometry, |
60 |
| - &this->m_averageGeometry, |
61 |
| - sizeof(pixelCPEforGPU::AverageGeometry), |
62 |
| - cudaMemcpyDefault, |
63 |
| - stream)); |
64 |
| - cudaCheck(cudaMemcpyAsync((void *)data.h_paramsOnGPU.m_layerGeometry, |
65 |
| - &this->m_layerGeometry, |
66 |
| - sizeof(pixelCPEforGPU::LayerGeometry), |
67 |
| - cudaMemcpyDefault, |
68 |
| - stream)); |
69 |
| - cudaCheck(cudaMemcpyAsync((void *)data.h_paramsOnGPU.m_detParams, |
70 |
| - this->m_detParamsGPU.data(), |
71 |
| - this->m_detParamsGPU.size() * sizeof(pixelCPEforGPU::DetParams), |
72 |
| - cudaMemcpyDefault, |
73 |
| - stream)); |
74 |
| - }); |
75 |
| - return data.d_paramsOnGPU; |
| 41 | + for (int device = 0, ndev = cms::cuda::deviceCount(); device < ndev; ++device) { |
| 42 | +#ifndef CUDAUVM_DISABLE_ADVICE |
| 43 | + cudaCheck(cudaMemAdvise(m_params, sizeof(pixelCPEforGPU::ParamsOnGPU), cudaMemAdviseSetReadMostly, device)); |
| 44 | + cudaCheck(cudaMemAdvise(m_commonParams, sizeof(pixelCPEforGPU::CommonParams), cudaMemAdviseSetReadMostly, device)); |
| 45 | + cudaCheck( |
| 46 | + cudaMemAdvise(m_detParams, ndetParams * sizeof(pixelCPEforGPU::DetParams), cudaMemAdviseSetReadMostly, device)); |
| 47 | + cudaCheck( |
| 48 | + cudaMemAdvise(m_layerGeometry, sizeof(pixelCPEforGPU::LayerGeometry), cudaMemAdviseSetReadMostly, device)); |
| 49 | + cudaCheck( |
| 50 | + cudaMemAdvise(m_averageGeometry, sizeof(pixelCPEforGPU::AverageGeometry), cudaMemAdviseSetReadMostly, device)); |
| 51 | +#endif |
| 52 | +#ifndef CUDAUVM_DISABLE_PREFETCH |
| 53 | + cms::cuda::ScopedSetDevice guard{device}; |
| 54 | + auto stream = cms::cuda::getStreamCache().get(); |
| 55 | + cudaCheck(cudaMemPrefetchAsync(m_params, sizeof(pixelCPEforGPU::ParamsOnGPU), device, stream.get())); |
| 56 | + cudaCheck(cudaMemPrefetchAsync(m_commonParams, sizeof(pixelCPEforGPU::CommonParams), device, stream.get())); |
| 57 | + cudaCheck(cudaMemPrefetchAsync(m_detParams, ndetParams * sizeof(pixelCPEforGPU::DetParams), device, stream.get())); |
| 58 | + cudaCheck(cudaMemPrefetchAsync(m_layerGeometry, sizeof(pixelCPEforGPU::LayerGeometry), device, stream.get())); |
| 59 | + cudaCheck(cudaMemPrefetchAsync(m_averageGeometry, sizeof(pixelCPEforGPU::AverageGeometry), device, stream.get())); |
| 60 | +#endif |
| 61 | + } |
76 | 62 | }
|
77 | 63 |
|
78 |
| -PixelCPEFast::GPUData::~GPUData() { |
79 |
| - if (d_paramsOnGPU != nullptr) { |
80 |
| - cudaFree((void *)h_paramsOnGPU.m_commonParams); |
81 |
| - cudaFree((void *)h_paramsOnGPU.m_detParams); |
82 |
| - cudaFree((void *)h_paramsOnGPU.m_averageGeometry); |
83 |
| - cudaFree(d_paramsOnGPU); |
84 |
| - } |
| 64 | +PixelCPEFast::~PixelCPEFast() { |
| 65 | + cudaFree(m_params); |
| 66 | + cudaFree(m_commonParams); |
| 67 | + cudaFree(m_detParams); |
| 68 | + cudaFree(m_layerGeometry); |
| 69 | + cudaFree(m_averageGeometry); |
85 | 70 | }
|
0 commit comments