Skip to content

Commit 1d8b0bb

Browse files
committed
LST CPU Optimizations - trig simplifications and redundant load removal
1 parent 6d30470 commit 1d8b0bb

File tree

10 files changed

+1019
-877
lines changed

10 files changed

+1019
-877
lines changed

HeterogeneousCore/AlpakaMath/interface/deltaPhi.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ namespace cms::alpakatools {
2222

2323
template <alpaka::concepts::Acc TAcc, typename T>
2424
ALPAKA_FN_HOST_ACC inline T deltaPhi(TAcc const& acc, T x1, T y1, T x2, T y2) {
25-
return reducePhiRange(acc, alpaka::math::atan2(acc, -y2, -x2) - alpaka::math::atan2(acc, -y1, -x1));
25+
return alpaka::math::atan2(acc, x1 * y2 - x2 * y1, x1 * x2 + y1 * y2);
2626
}
2727

2828
template <alpaka::concepts::Acc TAcc, typename T>

RecoTracker/LSTCore/interface/alpaka/Common.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
4040
HOST_DEVICE_CONSTANT float kStripPSZpitch = 2.4;
4141
HOST_DEVICE_CONSTANT float kStrip2SZpitch = 5.0;
4242
HOST_DEVICE_CONSTANT float kWidth2S = 0.009;
43+
HOST_DEVICE_CONSTANT float kDisks2SMinRadius = 60.0;
4344
HOST_DEVICE_CONSTANT float kWidthPS = 0.01;
4445
HOST_DEVICE_CONSTANT float kPt_betaMax = 7.0;
4546
HOST_DEVICE_CONSTANT int kNTripletThreshold = 1000;

RecoTracker/LSTCore/src/alpaka/Kernels.h

Lines changed: 23 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
247247
if (quintuplets.isDup()[ix] & 1)
248248
continue;
249249

250-
bool isPT5_ix = quintuplets.partOfPT5()[ix];
250+
const bool isPT5_ix = quintuplets.partOfPT5()[ix];
251+
const float eta1 = __H2F(quintuplets.eta()[ix]);
252+
const float phi1 = __H2F(quintuplets.phi()[ix]);
253+
const float score_rphisum1 = __H2F(quintuplets.score_rphisum()[ix]);
251254

252255
for (unsigned int jx1 = 0; jx1 < nQuintuplets_lowmod2; jx1++) {
253256
unsigned int jx = quintupletModuleIndices_lowmod2 + jx1;
@@ -257,31 +260,24 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
257260
if (quintuplets.isDup()[jx] & 1)
258261
continue;
259262

260-
bool isPT5_jx = quintuplets.partOfPT5()[jx];
263+
const bool isPT5_jx = quintuplets.partOfPT5()[jx];
261264

262265
if (isPT5_ix && isPT5_jx)
263266
continue;
264267

265-
float eta1 = __H2F(quintuplets.eta()[ix]);
266-
float phi1 = __H2F(quintuplets.phi()[ix]);
267-
float score_rphisum1 = __H2F(quintuplets.score_rphisum()[ix]);
268-
269-
float eta2 = __H2F(quintuplets.eta()[jx]);
270-
float phi2 = __H2F(quintuplets.phi()[jx]);
271-
float score_rphisum2 = __H2F(quintuplets.score_rphisum()[jx]);
272-
273-
float dEta = alpaka::math::abs(acc, eta1 - eta2);
274-
float dPhi = cms::alpakatools::deltaPhi(acc, phi1, phi2);
275-
268+
const float eta2 = __H2F(quintuplets.eta()[jx]);
269+
const float dEta = alpaka::math::abs(acc, eta1 - eta2);
276270
if (dEta > 0.1f)
277271
continue;
278272

273+
const float phi2 = __H2F(quintuplets.phi()[jx]);
274+
const float dPhi = cms::alpakatools::deltaPhi(acc, phi1, phi2);
279275
if (alpaka::math::abs(acc, dPhi) > 0.1f)
280276
continue;
281277

282-
float dR2 = dEta * dEta + dPhi * dPhi;
283-
int nMatched = checkHitsT5(ix, jx, quintuplets);
284-
const int minNHitsForDup_T5 = 5;
278+
const float dR2 = dEta * dEta + dPhi * dPhi;
279+
const int nMatched = checkHitsT5(ix, jx, quintuplets);
280+
constexpr int minNHitsForDup_T5 = 5;
285281

286282
float d2 = 0.f;
287283
CMS_UNROLL_LOOP
@@ -291,6 +287,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
291287
}
292288

293289
if (((dR2 < 0.001f || nMatched >= minNHitsForDup_T5) && d2 < 1.0f) || (dR2 < 0.02f && d2 < 0.1f)) {
290+
const float score_rphisum2 = __H2F(quintuplets.score_rphisum()[jx]);
294291
if (isPT5_jx || score_rphisum1 > score_rphisum2) {
295292
rmQuintupletFromMemory(quintuplets, ix, true);
296293
} else if (isPT5_ix || score_rphisum1 < score_rphisum2) {
@@ -458,11 +455,21 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
458455
ALPAKA_FN_ACC void operator()(Acc2D const& acc, PixelQuintuplets pixelQuintuplets) const {
459456
unsigned int nPixelQuintuplets = pixelQuintuplets.nPixelQuintuplets();
460457
for (unsigned int ix : cms::alpakatools::uniform_elements_y(acc, nPixelQuintuplets)) {
458+
float eta1 = __H2F(pixelQuintuplets.eta()[ix]);
459+
float phi1 = __H2F(pixelQuintuplets.phi()[ix]);
461460
float score1 = __H2F(pixelQuintuplets.score()[ix]);
462461
for (unsigned int jx : cms::alpakatools::uniform_elements_x(acc, nPixelQuintuplets)) {
463462
if (ix == jx)
464463
continue;
465464

465+
float eta2 = __H2F(pixelQuintuplets.eta()[jx]);
466+
if (alpaka::math::abs(acc, eta1 - eta2) > 0.2f)
467+
continue;
468+
469+
float phi2 = __H2F(pixelQuintuplets.phi()[jx]);
470+
if (alpaka::math::abs(acc, cms::alpakatools::deltaPhi(acc, phi1, phi2)) > 0.2f)
471+
continue;
472+
466473
int nMatched = checkHitspT5(ix, jx, pixelQuintuplets);
467474
float score2 = __H2F(pixelQuintuplets.score()[jx]);
468475
const int minNHitsForDup_pT5 = 7;

0 commit comments

Comments
 (0)