Skip to content

Commit dd7156a

Browse files
authored
Merge pull request #49021 from AdrianoDee/digimoprh_sharedmemory_160X
Fixes for Digi Morphing: Limiting Histogram Size and Decoupling for `TrackerTraits`
2 parents c81b8a1 + a487e5e commit dd7156a

File tree

4 files changed

+52
-19
lines changed

4 files changed

+52
-19
lines changed

Geometry/CommonTopologies/interface/SimplePixelTopology.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -377,6 +377,9 @@ namespace pixelTopology {
377377
static constexpr uint16_t last_barrel_detIndex = 864;
378378

379379
static constexpr uint32_t maxPixInModule = 6000;
380+
static constexpr uint32_t maxPixInModuleForMorphing = 0;
381+
static constexpr uint32_t maxIterClustering = 16;
382+
380383
static constexpr uint32_t maxNumClustersPerModules = phase2PixelTopology::maxNumClustersPerModules;
381384
static constexpr uint32_t maxHitsInModule = phase2PixelTopology::maxNumClustersPerModules;
382385

@@ -471,6 +474,9 @@ namespace pixelTopology {
471474
static constexpr uint16_t last_barrel_detIndex = 1184;
472475

473476
static constexpr uint32_t maxPixInModule = 6000;
477+
static constexpr uint32_t maxPixInModuleForMorphing = maxPixInModule * 2 / 5;
478+
static constexpr uint32_t maxIterClustering = 24;
479+
474480
static constexpr uint32_t maxNumClustersPerModules = phase1PixelTopology::maxNumClustersPerModules;
475481
static constexpr uint32_t maxHitsInModule = phase1PixelTopology::maxNumClustersPerModules;
476482

@@ -582,6 +588,8 @@ namespace pixelTopology {
582588
static constexpr uint32_t maxNumberOfQuadruplets = maxNumberOfTuples;
583589

584590
static constexpr uint32_t maxPixInModule = 10000;
591+
static constexpr uint32_t maxPixInModuleForMorphing = maxPixInModule * 1 / 10;
592+
static constexpr uint32_t maxIterClustering = 32;
585593

586594
static constexpr uint32_t maxNumOfActiveDoublets =
587595
maxNumberOfDoublets / 4; // TODO need to think a better way to avoid this duplication

RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/PixelClustering.h

Lines changed: 27 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -174,11 +174,14 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::pixelClustering {
174174
template <typename TrackerTraits>
175175
struct FindClus {
176176
// assume that we can cover the whole module with up to 16 blockDimension-wide iterations
177-
static constexpr uint32_t maxIterGPU = 16;
178177

179-
// this must be larger than maxPixInModule / maxIterGPU, and should be a multiple of the warp size
178+
// this must be larger than maxPixInModule / maxIterClustering, and should be a multiple of the warp size
180179
static constexpr uint32_t maxElementsPerBlock =
181-
cms::alpakatools::round_up_by(TrackerTraits::maxPixInModule / maxIterGPU, 128);
180+
cms::alpakatools::round_up_by(TrackerTraits::maxPixInModule / TrackerTraits::maxIterClustering, 64);
181+
static constexpr uint32_t maxElementsPerBlockMorph = cms::alpakatools::round_up_by(
182+
(TrackerTraits::maxPixInModule + TrackerTraits::maxPixInModuleForMorphing) / TrackerTraits::maxIterClustering,
183+
64);
184+
static_assert(maxElementsPerBlockMorph >= maxElementsPerBlock);
182185

183186
ALPAKA_FN_ACC void operator()(Acc1D const& acc,
184187
SiPixelDigisSoAView digi_view,
@@ -259,11 +262,12 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::pixelClustering {
259262
}
260263
}
261264

262-
using Hist = cms::alpakatools::HistoContainer<uint16_t,
263-
TrackerTraits::clusterBinning,
264-
TrackerTraits::maxPixInModule,
265-
TrackerTraits::clusterBits,
266-
uint16_t>;
265+
using Hist =
266+
cms::alpakatools::HistoContainer<uint16_t,
267+
TrackerTraits::clusterBinning,
268+
TrackerTraits::maxPixInModule + TrackerTraits::maxPixInModuleForMorphing,
269+
TrackerTraits::clusterBits,
270+
uint16_t>;
267271
constexpr int warpSize = cms::alpakatools::warpSize;
268272
auto& hist = alpaka::declareSharedVar<Hist, __COUNTER__>(acc);
269273
auto& ws = alpaka::declareSharedVar<typename Hist::Counter[warpSize], __COUNTER__>(acc);
@@ -568,15 +572,25 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::pixelClustering {
568572
#endif
569573

570574
[[maybe_unused]] const uint32_t blockDimension = alpaka::getWorkDiv<alpaka::Block, alpaka::Elems>(acc)[0u];
571-
// assume that we can cover the whole module with up to maxIterGPU blockDimension-wide iterations
572-
ALPAKA_ASSERT_ACC((hist.size() / blockDimension) < maxIterGPU);
575+
// assume that we can cover the whole module with up to maxIterClustering blockDimension-wide iterations
576+
ALPAKA_ASSERT_ACC((hist.size() / blockDimension) < TrackerTraits::maxIterClustering);
573577

574578
// number of elements per thread
575-
constexpr uint32_t maxElements =
576-
cms::alpakatools::requires_single_thread_per_block_v<Acc1D> ? maxElementsPerBlock : 1;
579+
const uint32_t maxElements = cms::alpakatools::requires_single_thread_per_block_v<Acc1D>
580+
? (enableDigiMorphing ? maxElementsPerBlockMorph : maxElementsPerBlock)
581+
: 1;
582+
583+
#ifdef GPU_DEBUG
584+
const auto nElementsPerThread = alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[0u];
585+
if (nElementsPerThread > maxElements)
586+
printf("This is WRONG: nElementsPerThread > maxElements : %d > %d\n", nElementsPerThread, maxElements);
587+
else if (thisModuleId % 500 == 1)
588+
printf("This is OK: nElementsPerThread <= maxElements : %d <= %d\n", nElementsPerThread, maxElements);
589+
#endif
590+
577591
ALPAKA_ASSERT_ACC((alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[0u] <= maxElements));
578592

579-
constexpr unsigned int maxIter = maxIterGPU * maxElements;
593+
const unsigned int maxIter = TrackerTraits::maxIterClustering * maxElements;
580594

581595
// nearest neighbours (nn)
582596
constexpr int maxNeighbours = 8;

RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToCluster.cc

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,14 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
210210
digiMorphingConfig_.applyDigiMorphing = iConfig.getParameter<bool>("DoDigiMorphing");
211211
digiMorphingConfig_.maxFakesInModule = iConfig.getParameter<uint32_t>("MaxFakesInModule");
212212

213+
if (digiMorphingConfig_.maxFakesInModule > TrackerTraits::maxPixInModuleForMorphing) {
214+
throw cms::Exception("Configuration")
215+
<< "[SiPixelDigiMorphing]:"
216+
<< " maxFakesInModule should be <= " << TrackerTraits::maxPixInModuleForMorphing
217+
<< " (TrackerTraits::maxPixInModuleForMorphing)"
218+
<< " while " << digiMorphingConfig_.maxFakesInModule << " was provided at config level.\n";
219+
}
220+
213221
// regions
214222
if (!iConfig.getParameter<edm::ParameterSet>("Regions").getParameterNames().empty()) {
215223
regions_ = std::make_unique<PixelUnpackingRegions>(iConfig, consumesCollector());
@@ -233,7 +241,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
233241
desc.add<double>("VCaltoElectronOffset", -60.f);
234242
desc.add<double>("VCaltoElectronOffset_L1", -670.f);
235243
desc.add<bool>("DoDigiMorphing", false);
236-
desc.add<uint32_t>("MaxFakesInModule", TrackerTraits::maxPixInModule * 2 / 5);
244+
desc.add<uint32_t>("MaxFakesInModule", TrackerTraits::maxPixInModuleForMorphing);
237245

238246
desc.add<edm::InputTag>("InputLabel", edm::InputTag("rawDataCollector"));
239247
{

RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.dev.cc

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -570,20 +570,23 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
570570

571571
{
572572
const int blocks = 64;
573-
const auto elementsPerBlockFindClus = FindClus<TrackerTraits>::maxElementsPerBlock;
574-
const auto workDivMaxNumModules = cms::alpakatools::make_workdiv<Acc1D>(blocks, elementsPerBlockFindClus);
573+
574+
const auto elementsPerBlockFindClus = digiMorphingConfig.applyDigiMorphing
575+
? FindClus<TrackerTraits>::maxElementsPerBlockMorph
576+
: FindClus<TrackerTraits>::maxElementsPerBlock;
577+
const auto workDivFindClus = cms::alpakatools::make_workdiv<Acc1D>(blocks, elementsPerBlockFindClus);
575578

576579
// allocate a transient collection for the fake pixels recovered by the digi morphing algorithm
577580
auto fakes_d = SiPixelDigisSoACollection(blocks * digiMorphingConfig.maxFakesInModule, queue);
578-
579581
#ifdef GPU_DEBUG
580-
std::cout << " FindClus kernel launch with " << numberOfModules << " blocks of " << elementsPerBlockFindClus
582+
alpaka::wait(queue);
583+
std::cout << "FindClus kernel launch with " << blocks << " blocks of " << elementsPerBlockFindClus
581584
<< " threadsPerBlockOrElementsPerThread\n";
582585
#endif
583586

584587
// Use device buffer created by producer and the module count stored in digiMorphingConfig
585588
alpaka::exec<Acc1D>(queue,
586-
workDivMaxNumModules,
589+
workDivFindClus,
587590
FindClus<TrackerTraits>{},
588591
digis_d->view(),
589592
fakes_d.view(),

0 commit comments

Comments
 (0)