@@ -174,11 +174,14 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::pixelClustering {
174174 template <typename TrackerTraits>
175175 struct FindClus {
176176 // assume that we can cover the whole module with up to 16 blockDimension-wide iterations
177- static constexpr uint32_t maxIterGPU = 16 ;
178177
179- // this must be larger than maxPixInModule / maxIterGPU , and should be a multiple of the warp size
178+ // this must be larger than maxPixInModule / maxIterClustering , and should be a multiple of the warp size
180179 static constexpr uint32_t maxElementsPerBlock =
181- cms::alpakatools::round_up_by (TrackerTraits::maxPixInModule / maxIterGPU, 128 );
180+ cms::alpakatools::round_up_by (TrackerTraits::maxPixInModule / TrackerTraits::maxIterClustering, 64 );
181+ static constexpr uint32_t maxElementsPerBlockMorph = cms::alpakatools::round_up_by(
182+ (TrackerTraits::maxPixInModule + TrackerTraits::maxPixInModuleForMorphing) / TrackerTraits::maxIterClustering,
183+ 64 );
184+ static_assert (maxElementsPerBlockMorph >= maxElementsPerBlock);
182185
183186 ALPAKA_FN_ACC void operator ()(Acc1D const & acc,
184187 SiPixelDigisSoAView digi_view,
@@ -259,11 +262,12 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::pixelClustering {
259262 }
260263 }
261264
262- using Hist = cms::alpakatools::HistoContainer<uint16_t ,
263- TrackerTraits::clusterBinning,
264- TrackerTraits::maxPixInModule,
265- TrackerTraits::clusterBits,
266- uint16_t >;
265+ using Hist =
266+ cms::alpakatools::HistoContainer<uint16_t ,
267+ TrackerTraits::clusterBinning,
268+ TrackerTraits::maxPixInModule + TrackerTraits::maxPixInModuleForMorphing,
269+ TrackerTraits::clusterBits,
270+ uint16_t >;
267271 constexpr int warpSize = cms::alpakatools::warpSize;
268272 auto & hist = alpaka::declareSharedVar<Hist, __COUNTER__>(acc);
269273 auto & ws = alpaka::declareSharedVar<typename Hist::Counter[warpSize], __COUNTER__>(acc);
@@ -568,15 +572,25 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::pixelClustering {
568572#endif
569573
570574 [[maybe_unused]] const uint32_t blockDimension = alpaka::getWorkDiv<alpaka::Block, alpaka::Elems>(acc)[0u ];
571- // assume that we can cover the whole module with up to maxIterGPU blockDimension-wide iterations
572- ALPAKA_ASSERT_ACC ((hist.size () / blockDimension) < maxIterGPU );
575+ // assume that we can cover the whole module with up to maxIterClustering blockDimension-wide iterations
576+ ALPAKA_ASSERT_ACC ((hist.size () / blockDimension) < TrackerTraits::maxIterClustering );
573577
574578 // number of elements per thread
575- constexpr uint32_t maxElements =
576- cms::alpakatools::requires_single_thread_per_block_v<Acc1D> ? maxElementsPerBlock : 1 ;
579+ const uint32_t maxElements = cms::alpakatools::requires_single_thread_per_block_v<Acc1D>
580+ ? (enableDigiMorphing ? maxElementsPerBlockMorph : maxElementsPerBlock)
581+ : 1 ;
582+
583+ #ifdef GPU_DEBUG
584+ const auto nElementsPerThread = alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[0u ];
585+ if (nElementsPerThread > maxElements)
586+ printf (" This is WRONG: nElementsPerThread > maxElements : %d > %d\n " , nElementsPerThread, maxElements);
587+ else if (thisModuleId % 500 == 1 )
588+ printf (" This is OK: nElementsPerThread <= maxElements : %d <= %d\n " , nElementsPerThread, maxElements);
589+ #endif
590+
577591 ALPAKA_ASSERT_ACC ((alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[0u ] <= maxElements));
578592
579- constexpr unsigned int maxIter = maxIterGPU * maxElements;
593+ const unsigned int maxIter = TrackerTraits::maxIterClustering * maxElements;
580594
581595 // nearest neighbours (nn)
582596 constexpr int maxNeighbours = 8 ;
0 commit comments