Skip to content

Commit 532d885

Browse files
authored
Merge pull request #596 from stephenswat/feat/thread_id
Add helper class for thread and block identifiers
2 parents 605538d + 69c0adc commit 532d885

File tree

9 files changed

+270
-57
lines changed

9 files changed

+270
-57
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
/**
2+
* traccc library, part of the ACTS project (R&D line)
3+
*
4+
* (c) 2024 CERN for the benefit of the ACTS project
5+
*
6+
* Mozilla Public License Version 2.0
7+
*/
8+
9+
#pragma once
10+
11+
#include <alpaka/alpaka.hpp>
12+
13+
#include "traccc/definitions/qualifiers.hpp"
14+
15+
namespace traccc::alpaka {
16+
template <typename Acc>
17+
struct thread_id1 {
18+
TRACCC_DEVICE thread_id1(const Acc& acc) : m_acc(acc) {}
19+
20+
auto inline TRACCC_DEVICE getLocalThreadId() const {
21+
return ::alpaka::getIdx<::alpaka::Block, ::alpaka::Threads>(m_acc)[0u];
22+
}
23+
24+
auto inline TRACCC_DEVICE getLocalThreadIdX() const {
25+
return getLocalThreadId();
26+
}
27+
28+
auto inline TRACCC_DEVICE getGlobalThreadId() const {
29+
return getLocalThreadId() + getBlockIdX() * getBlockDimX();
30+
}
31+
32+
auto inline TRACCC_DEVICE getGlobalThreadIdX() const {
33+
return getLocalThreadId() + getBlockIdX() * getBlockDimX();
34+
}
35+
36+
auto inline TRACCC_DEVICE getBlockIdX() const {
37+
return ::alpaka::getIdx<::alpaka::Grid, ::alpaka::Blocks>(m_acc)[0u];
38+
}
39+
40+
auto inline TRACCC_DEVICE getBlockDimX() const {
41+
return ::alpaka::getWorkDiv<::alpaka::Block, ::alpaka::Threads>(
42+
m_acc)[0u];
43+
}
44+
45+
auto inline TRACCC_DEVICE getGridDimX() const {
46+
return ::alpaka::getWorkDiv<::alpaka::Grid, ::alpaka::Blocks>(
47+
m_acc)[0u];
48+
}
49+
50+
private:
51+
const Acc& m_acc;
52+
};
53+
} // namespace traccc::alpaka

device/alpaka/src/clusterization/clusterization_algorithm.cpp

+7-11
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include "../utils/utils.hpp"
1313

1414
// Project include(s)
15+
#include "traccc/alpaka/utils/thread_id.hpp"
1516
#include "traccc/clusterization/clustering_config.hpp"
1617
#include "traccc/clusterization/device/ccl_kernel.hpp"
1718

@@ -35,12 +36,7 @@ struct CCLKernel {
3536
measurement_collection_types::view measurements_view,
3637
vecmem::data::vector_view<unsigned int> cell_links) const {
3738

38-
auto const localThreadIdx =
39-
::alpaka::getIdx<::alpaka::Block, ::alpaka::Threads>(acc)[0u];
40-
auto const localBlockIdx =
41-
::alpaka::getIdx<::alpaka::Grid, ::alpaka::Blocks>(acc)[0u];
42-
auto const blockExtent =
43-
::alpaka::getWorkDiv<::alpaka::Block, ::alpaka::Threads>(acc)[0u];
39+
traccc::alpaka::thread_id1 thread_id(acc);
4440

4541
auto& partition_start =
4642
::alpaka::declareSharedVar<std::size_t, __COUNTER__>(acc);
@@ -60,11 +56,11 @@ struct CCLKernel {
6056

6157
alpaka::barrier<TAcc> barry_r(&acc);
6258

63-
device::ccl_kernel(
64-
cfg, localThreadIdx, blockExtent, localBlockIdx, cells_view,
65-
modules_view, partition_start, partition_end, outi, f_view, gf_view,
66-
f_backup_view, gf_backup_view, adjc_backup_view, adjv_backup_view,
67-
backup_mutex, barry_r, measurements_view, cell_links);
59+
device::ccl_kernel(cfg, thread_id, cells_view, modules_view,
60+
partition_start, partition_end, outi, f_view,
61+
gf_view, f_backup_view, gf_backup_view,
62+
adjc_backup_view, adjv_backup_view, backup_mutex,
63+
barry_r, measurements_view, cell_links);
6864
}
6965
};
7066

device/common/include/traccc/clusterization/device/ccl_kernel.hpp

+5-6
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include "traccc/definitions/hints.hpp"
1414
#include "traccc/definitions/qualifiers.hpp"
1515
#include "traccc/device/concepts/barrier.hpp"
16+
#include "traccc/device/concepts/thread_id.hpp"
1617
#include "traccc/edm/cell.hpp"
1718
#include "traccc/edm/measurement.hpp"
1819
#include "traccc/edm/spacepoint.hpp"
@@ -29,9 +30,7 @@ namespace traccc::device {
2930
/// Function which reads raw detector cells and turns them into measurements.
3031
///
3132
/// @param[in] cfg clustering configuration
32-
/// @param[in] threadId current thread index
33-
/// @param[in] blckDim current thread block size
34-
/// @param[in] blckId current thread block index
33+
/// @param[in] thread_id a thread identifier object
3534
/// @param[in] cells_view collection of cells
3635
/// @param[in] modules_view collection of modules to which the cells are linked
3736
/// @param partition_start partition start point for this thread block
@@ -54,10 +53,10 @@ namespace traccc::device {
5453
/// @param[out] measurements_view collection of measurements
5554
/// @param[out] cell_links collection of links to measurements each cell is
5655
/// put into
57-
template <device::concepts::barrier barrier_t>
56+
template <device::concepts::barrier barrier_t,
57+
device::concepts::thread_id1 thread_id_t>
5858
TRACCC_DEVICE inline void ccl_kernel(
59-
const clustering_config cfg, details::index_t threadId,
60-
details::index_t blckDim, unsigned int blockId,
59+
const clustering_config cfg, const thread_id_t& thread_id,
6160
const cell_collection_types::const_view cells_view,
6261
const cell_module_collection_types::const_view modules_view,
6362
std::size_t& partition_start, std::size_t& partition_end, std::size_t& outi,

device/common/include/traccc/clusterization/device/impl/ccl_kernel.ipp

+39-27
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
#include "traccc/clusterization/device/aggregate_cluster.hpp"
1414
#include "traccc/clusterization/device/ccl_kernel_definitions.hpp"
1515
#include "traccc/clusterization/device/reduce_problem_cell.hpp"
16+
#include "traccc/device/concepts/barrier.hpp"
17+
#include "traccc/device/concepts/thread_id.hpp"
1618
#include "traccc/device/mutex.hpp"
1719
#include "traccc/device/unique_lock.hpp"
1820
#include "traccc/edm/cell.hpp"
@@ -40,13 +42,13 @@ namespace traccc::device {
4042
/// iteration.
4143
/// @param[in] barrier A generic object for block-wide synchronisation
4244
///
43-
template <device::concepts::barrier barrier_t>
44-
TRACCC_DEVICE void fast_sv_1(vecmem::device_vector<details::index_t>& f,
45+
template <device::concepts::barrier barrier_t,
46+
device::concepts::thread_id1 thread_id_t>
47+
TRACCC_DEVICE void fast_sv_1(const thread_id_t& thread_id,
48+
vecmem::device_vector<details::index_t>& f,
4549
vecmem::device_vector<details::index_t>& gf,
4650
unsigned char* adjc, details::index_t* adjv,
4751
details::index_t thread_cell_count,
48-
const details::index_t tid,
49-
const details::index_t blckDim,
5052
barrier_t& barrier) {
5153
/*
5254
* The algorithm finishes if an iteration leaves the arrays unchanged.
@@ -70,7 +72,8 @@ TRACCC_DEVICE void fast_sv_1(vecmem::device_vector<details::index_t>& f,
7072
* together.
7173
*/
7274
for (details::index_t tst = 0; tst < thread_cell_count; ++tst) {
73-
const details::index_t cid = tst * blckDim + tid;
75+
const details::index_t cid =
76+
tst * thread_id.getBlockDimX() + thread_id.getLocalThreadIdX();
7477

7578
TRACCC_ASSUME(adjc[tst] <= 8);
7679
for (unsigned char k = 0; k < adjc[tst]; ++k) {
@@ -90,7 +93,8 @@ TRACCC_DEVICE void fast_sv_1(vecmem::device_vector<details::index_t>& f,
9093
barrier.blockBarrier();
9194

9295
for (details::index_t tst = 0; tst < thread_cell_count; ++tst) {
93-
const details::index_t cid = tst * blckDim + tid;
96+
const details::index_t cid =
97+
tst * thread_id.getBlockDimX() + thread_id.getLocalThreadIdX();
9498
/*
9599
* The second stage is shortcutting, which is an optimisation that
96100
* allows us to look at any shortcuts in the cluster IDs that we
@@ -107,7 +111,8 @@ TRACCC_DEVICE void fast_sv_1(vecmem::device_vector<details::index_t>& f,
107111
barrier.blockBarrier();
108112

109113
for (details::index_t tst = 0; tst < thread_cell_count; ++tst) {
110-
const details::index_t cid = tst * blckDim + tid;
114+
const details::index_t cid =
115+
tst * thread_id.getBlockDimX() + thread_id.getLocalThreadIdX();
111116
/*
112117
* Update the array for the next generation, keeping track of any
113118
* changes we make.
@@ -128,11 +133,11 @@ TRACCC_DEVICE void fast_sv_1(vecmem::device_vector<details::index_t>& f,
128133
} while (barrier.blockOr(gf_changed));
129134
}
130135

131-
template <device::concepts::barrier barrier_t>
136+
template <device::concepts::barrier barrier_t,
137+
device::concepts::thread_id1 thread_id_t>
132138
TRACCC_DEVICE inline void ccl_core(
133-
const details::index_t threadId, const details::index_t blckDim,
134-
std::size_t& partition_start, std::size_t& partition_end,
135-
vecmem::device_vector<details::index_t> f,
139+
const thread_id_t& thread_id, std::size_t& partition_start,
140+
std::size_t& partition_end, vecmem::device_vector<details::index_t> f,
136141
vecmem::device_vector<details::index_t> gf,
137142
vecmem::data::vector_view<unsigned int> cell_links, details::index_t* adjv,
138143
unsigned char* adjc, const cell_collection_types::const_device cells_device,
@@ -145,20 +150,23 @@ TRACCC_DEVICE inline void ccl_core(
145150
assert(size <= gf.size());
146151

147152
details::index_t thread_cell_count =
148-
(size - threadId + blckDim - 1) / blckDim;
153+
(size - thread_id.getLocalThreadIdX() + thread_id.getBlockDimX() - 1) /
154+
thread_id.getBlockDimX();
149155

150156
for (details::index_t tst = 0; tst < thread_cell_count; ++tst) {
151157
/*
152158
* Look for adjacent cells to the current one.
153159
*/
154-
const details::index_t cid = tst * blckDim + threadId;
160+
const details::index_t cid =
161+
tst * thread_id.getBlockDimX() + thread_id.getLocalThreadIdX();
155162
adjc[tst] = 0;
156163
reduce_problem_cell(cells_device, cid, partition_start, partition_end,
157164
adjc[tst], &adjv[8 * tst]);
158165
}
159166

160167
for (details::index_t tst = 0; tst < thread_cell_count; ++tst) {
161-
const details::index_t cid = tst * blckDim + threadId;
168+
const details::index_t cid =
169+
tst * thread_id.getBlockDimX() + thread_id.getLocalThreadIdX();
162170
/*
163171
* At the start, the values of f and gf should be equal to the
164172
* ID of the cell.
@@ -177,12 +185,13 @@ TRACCC_DEVICE inline void ccl_core(
177185
* Run FastSV algorithm, which will update the father index to that of
178186
* the cell belonging to the same cluster with the lowest index.
179187
*/
180-
fast_sv_1(f, gf, adjc, adjv, thread_cell_count, threadId, blckDim, barrier);
188+
fast_sv_1(thread_id, f, gf, adjc, adjv, thread_cell_count, barrier);
181189

182190
barrier.blockBarrier();
183191

184192
for (details::index_t tst = 0; tst < thread_cell_count; ++tst) {
185-
const details::index_t cid = tst * blckDim + threadId;
193+
const details::index_t cid =
194+
tst * thread_id.getBlockDimX() + thread_id.getLocalThreadIdX();
186195
if (f.at(cid) == cid) {
187196
// Add a new measurement to the output buffer. Remembering its
188197
// position inside of the container.
@@ -196,10 +205,10 @@ TRACCC_DEVICE inline void ccl_core(
196205
}
197206
}
198207

199-
template <device::concepts::barrier barrier_t>
208+
template <device::concepts::barrier barrier_t,
209+
device::concepts::thread_id1 thread_id_t>
200210
TRACCC_DEVICE inline void ccl_kernel(
201-
const clustering_config cfg, const details::index_t threadId,
202-
const details::index_t blckDim, const unsigned int blockId,
211+
const clustering_config cfg, const thread_id_t& thread_id,
203212
const cell_collection_types::const_view cells_view,
204213
const cell_module_collection_types::const_view modules_view,
205214
std::size_t& partition_start, std::size_t& partition_end, std::size_t& outi,
@@ -237,8 +246,9 @@ TRACCC_DEVICE inline void ccl_kernel(
237246
* (to a later point in the array); start and end may be moved different
238247
* amounts.
239248
*/
240-
if (threadId == 0) {
241-
std::size_t start = blockId * cfg.target_partition_size();
249+
if (thread_id.getLocalThreadIdX() == 0) {
250+
std::size_t start =
251+
thread_id.getBlockIdX() * cfg.target_partition_size();
242252
assert(start < num_cells);
243253
std::size_t end =
244254
std::min(num_cells, start + cfg.target_partition_size());
@@ -313,24 +323,26 @@ TRACCC_DEVICE inline void ccl_kernel(
313323
* rare edge case.
314324
*/
315325
if (size > cfg.max_partition_size()) {
316-
if (threadId == 0) {
326+
if (thread_id.getLocalThreadIdX() == 0) {
317327
lock.lock();
318328
}
319329

320330
barrier.blockBarrier();
321331

322-
adjc = adjc_backup.data() + (threadId * cfg.max_cells_per_thread *
323-
cfg.backup_size_multiplier);
324-
adjv = adjv_backup.data() + (threadId * 8 * cfg.max_cells_per_thread *
325-
cfg.backup_size_multiplier);
332+
adjc = adjc_backup.data() +
333+
(thread_id.getLocalThreadIdX() * cfg.max_cells_per_thread *
334+
cfg.backup_size_multiplier);
335+
adjv = adjv_backup.data() +
336+
(thread_id.getLocalThreadIdX() * 8 * cfg.max_cells_per_thread *
337+
cfg.backup_size_multiplier);
326338
use_scratch = true;
327339
} else {
328340
adjc = _adjc;
329341
adjv = _adjv;
330342
use_scratch = false;
331343
}
332344

333-
ccl_core(threadId, blckDim, partition_start, partition_end,
345+
ccl_core(thread_id, partition_start, partition_end,
334346
use_scratch ? f_backup : f_primary,
335347
use_scratch ? gf_backup : gf_primary, cell_links, adjv, adjc,
336348
cells_device, modules_device, measurements_device, barrier);
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
/**
2+
* traccc library, part of the ACTS project (R&D line)
3+
*
4+
* (c) 2024 CERN for the benefit of the ACTS project
5+
*
6+
* Mozilla Public License Version 2.0
7+
*/
8+
9+
#pragma once
10+
11+
#include <concepts>
12+
13+
namespace traccc::device::concepts {
14+
/**
15+
* @brief Concept to ensure that a type behaves like a thread identification
16+
* type which allows us to access thread and block IDs. This concept assumes
17+
* one-dimensional grids.
18+
*
19+
* @tparam T The thread identifier-like type.
20+
*/
21+
template <typename T>
22+
concept thread_id1 = requires(T& i) {
23+
/*
24+
* This function should return the local thread identifier in a *flat* way,
25+
* e.g. compressing two or three dimensional blocks into one dimension.
26+
*/
27+
{ i.getLocalThreadId() }
28+
->std::integral;
29+
30+
/*
31+
* This function should return the local thread identifier in the X-axis.
32+
*/
33+
{ i.getLocalThreadIdX() }
34+
->std::integral;
35+
36+
/*
37+
* This function should return the global thread identifier in a *flat*
38+
* way, e.g. compressing two or three dimensional blocks into one
39+
* dimension.
40+
*/
41+
{ i.getGlobalThreadId() }
42+
->std::integral;
43+
44+
/*
45+
* This function should return the global thread identifier in the X-axis.
46+
*/
47+
{ i.getGlobalThreadIdX() }
48+
->std::integral;
49+
50+
/*
51+
* This function should return the block identifier in the X-axis.
52+
*/
53+
{ i.getBlockIdX() }
54+
->std::integral;
55+
56+
/*
57+
* This function should return the block size in the X-axis.
58+
*/
59+
{ i.getBlockIdX() }
60+
->std::integral;
61+
62+
/*
63+
* This function should return the grid identifier in the X-axis.
64+
*/
65+
{ i.getBlockIdX() }
66+
->std::integral;
67+
};
68+
} // namespace traccc::device::concepts

0 commit comments

Comments
 (0)