Skip to content

Commit 0f06710

Browse files
committed
add native support for OpenMP and SYCL
1 parent ebfbf1c commit 0f06710

4 files changed

Lines changed: 82 additions & 69 deletions

File tree

common/unified/components/bitvector.generic.hpp

Lines changed: 0 additions & 64 deletions
This file was deleted.

common/unified/components/bitvector.hpp

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,10 @@
1111

1212
#if defined(GKO_COMPILING_CUDA) || defined(GKO_COMPILING_HIP)
1313
#include "common/cuda_hip/components/bitvector.hpp"
14-
#elif defined(GKO_COMPILING_DPCPP) || defined(GKO_COMPILING_OMP)
15-
#include "common/unified/components/bitvector.generic.hpp"
16-
#ifdef GKO_COMPILING_OMP
14+
#elif defined(GKO_COMPILING_OMP)
1715
#include "omp/components/bitvector.hpp"
18-
#else
16+
#elif defined(GKO_COMPILING_DPCPP)
1917
#include "dpcpp/components/bitvector.dp.hpp"
20-
#endif
2118
#else
2219
#error "This file should only be used inside Ginkgo device compilation"
2320
#endif

dpcpp/components/bitvector.dp.hpp

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,47 @@ namespace GKO_DEVICE_NAMESPACE {
2121
namespace bitvector {
2222

2323

24+
template <typename IndexType, typename DevicePredicate>
25+
gko::bitvector<IndexType> from_predicate(
26+
std::shared_ptr<const DefaultExecutor> exec, IndexType size,
27+
DevicePredicate device_predicate)
28+
{
29+
using storage_type = typename device_bitvector<IndexType>::storage_type;
30+
constexpr auto block_size = device_bitvector<IndexType>::block_size;
31+
const auto num_blocks = static_cast<size_type>(ceildiv(size, block_size));
32+
array<uint32> bit_array{exec, num_blocks};
33+
array<IndexType> rank_array{exec, num_blocks};
34+
const auto bits = bit_array.get_data();
35+
const auto ranks = rank_array.get_data();
36+
const auto queue = exec->get_queue();
37+
queue->submit([&](sycl::handler& cgh) {
38+
cgh.parallel_for(num_blocks, [=](sycl::id<1> block_i) {
39+
const auto base_i = static_cast<IndexType>(block_i) * block_size;
40+
storage_type mask{};
41+
if (base_i + block_size <= size) {
42+
for (int local_i = 0; local_i < block_size; local_i++) {
43+
const storage_type bit =
44+
device_predicate(base_i + local_i) ? 1 : 0;
45+
mask |= bit << local_i;
46+
}
47+
} else {
48+
for (int local_i = 0; base_i + local_i < size; local_i++) {
49+
const storage_type bit =
50+
device_predicate(base_i + local_i) ? 1 : 0;
51+
mask |= bit << local_i;
52+
}
53+
}
54+
bits[block_i] = mask;
55+
ranks[block_i] = gko::detail::popcount(mask);
56+
});
57+
});
58+
components::prefix_sum_nonnegative(exec, ranks, num_blocks);
59+
60+
return gko::bitvector<IndexType>{std::move(bit_array),
61+
std::move(rank_array), size};
62+
}
63+
64+
2465
template <typename IndexIterator>
2566
gko::bitvector<typename std::iterator_traits<IndexIterator>::value_type>
2667
from_sorted_indices(

omp/components/bitvector.hpp

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,45 @@ namespace omp {
1818
namespace bitvector {
1919

2020

21+
template <typename IndexType, typename DevicePredicate>
22+
gko::bitvector<IndexType> from_predicate(
23+
std::shared_ptr<const DefaultExecutor> exec, IndexType size,
24+
DevicePredicate device_predicate)
25+
{
26+
using storage_type = typename device_bitvector<IndexType>::storage_type;
27+
constexpr auto block_size = device_bitvector<IndexType>::block_size;
28+
const auto num_blocks = static_cast<size_type>(ceildiv(size, block_size));
29+
array<uint32> bit_array{exec, num_blocks};
30+
array<IndexType> rank_array{exec, num_blocks};
31+
const auto bits = bit_array.get_data();
32+
const auto ranks = rank_array.get_data();
33+
#pragma omp parallel for
34+
for (IndexType block_i = 0; block_i < num_blocks; block_i++) {
35+
const auto base_i = block_i * block_size;
36+
storage_type mask{};
37+
if (base_i + block_size <= size) {
38+
for (int local_i = 0; local_i < block_size; local_i++) {
39+
const storage_type bit =
40+
device_predicate(base_i + local_i) ? 1 : 0;
41+
mask |= bit << local_i;
42+
}
43+
} else {
44+
for (int local_i = 0; base_i + local_i < size; local_i++) {
45+
const storage_type bit =
46+
device_predicate(base_i + local_i) ? 1 : 0;
47+
mask |= bit << local_i;
48+
}
49+
}
50+
bits[block_i] = mask;
51+
ranks[block_i] = gko::detail::popcount(mask);
52+
}
53+
components::prefix_sum_nonnegative(exec, ranks, num_blocks);
54+
55+
return gko::bitvector<IndexType>{std::move(bit_array),
56+
std::move(rank_array), size};
57+
}
58+
59+
2160
template <typename IndexIterator>
2261
gko::bitvector<typename std::iterator_traits<IndexIterator>::value_type>
2362
from_sorted_indices(

0 commit comments

Comments
 (0)