Skip to content

Commit 2a810e4

Browse files
bernhardmgruberj-stephan
authored andcommitted
refactor and fix CPU intrinsics
Fixes: #1715
1 parent 936b659 commit 2a810e4

File tree

2 files changed

+23
-44
lines changed

2 files changed

+23
-44
lines changed

include/alpaka/intrinsic/IntrinsicCpu.hpp

+21-42
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include <alpaka/intrinsic/Traits.hpp>
1515

1616
#include <bitset>
17+
#include <climits>
1718
#if __has_include(<bit>)
1819
# include <bit>
1920
#endif
@@ -34,71 +35,49 @@ namespace alpaka
3435
template<>
3536
struct Popcount<IntrinsicCpu>
3637
{
37-
static auto popcount(IntrinsicCpu const& /*intrinsic*/, std::uint32_t value) -> std::int32_t
38+
template<typename UnsignedIntegral>
39+
static auto popcount(IntrinsicCpu const& /*intrinsic*/, UnsignedIntegral value) -> std::int32_t
3840
{
3941
#ifdef __cpp_lib_bitops
4042
return std::popcount(value);
4143
#elif BOOST_COMP_GNUC || BOOST_COMP_CLANG
42-
return __builtin_popcount(value);
43-
#elif BOOST_COMP_MSVC
44-
return __popcnt(value);
45-
#else
46-
// Fallback to standard library
47-
return static_cast<std::int32_t>(std::bitset<32>(value).count());
48-
#endif
49-
}
50-
51-
static auto popcount(IntrinsicCpu const& /*intrinsic*/, std::uint64_t value) -> std::int32_t
52-
{
53-
#ifdef __cpp_lib_bitops
54-
return std::popcount(value);
55-
#elif BOOST_COMP_GNUC || BOOST_COMP_CLANG
56-
return __builtin_popcountll(value);
44+
if constexpr(sizeof(UnsignedIntegral) == 8)
45+
return __builtin_popcountll(value);
46+
else
47+
return __builtin_popcount(value);
5748
#elif BOOST_COMP_MSVC
58-
return static_cast<std::int32_t>(__popcnt64(value));
49+
if constexpr(sizeof(UnsignedIntegral) == 8)
50+
return static_cast<std::int32_t>(__popcnt64(value));
51+
else
52+
return __popcnt(value);
5953
#else
6054
// Fallback to standard library
61-
return static_cast<std::int32_t>(std::bitset<64>(value).count());
55+
return static_cast<std::int32_t>(std::bitset<sizeof(UnsignedIntegral) * CHAR_BIT>(value).count());
6256
#endif
6357
}
6458
};
6559

6660
template<>
6761
struct Ffs<IntrinsicCpu>
6862
{
69-
static auto ffs(IntrinsicCpu const& /*intrinsic*/, std::int32_t value) -> std::int32_t
63+
template<typename Integral>
64+
static auto ffs(IntrinsicCpu const& /*intrinsic*/, Integral value) -> std::int32_t
7065
{
7166
#ifdef __cpp_lib_bitops
72-
return std::countr_zero(value);
67+
return value == 0 ? 0 : std::countr_zero(static_cast<std::make_unsigned_t<Integral>>(value)) + 1;
7368
#elif BOOST_COMP_GNUC || BOOST_COMP_CLANG
74-
return __builtin_ffs(value);
75-
#elif BOOST_COMP_MSVC
76-
// Implementation based on
77-
// https://gitlab.freedesktop.org/cairo/cairo/commit/f5167dc2e1a13d8c4e5d66d7178a24b9b5e7ac7a
78-
unsigned long index = 0u;
79-
if(_BitScanForward(&index, value) != 0)
80-
return static_cast<std::int32_t>(index + 1u);
69+
if constexpr(sizeof(Integral) == 8)
70+
return __builtin_ffsll(value);
8171
else
82-
return 0;
83-
#else
84-
return alpaka::detail::ffsFallback(value);
85-
#endif
86-
}
87-
88-
static auto ffs(IntrinsicCpu const& /*intrinsic*/, std::int64_t value) -> std::int32_t
89-
{
90-
#ifdef __cpp_lib_bitops
91-
return std::countr_zero(value);
92-
#elif BOOST_COMP_GNUC || BOOST_COMP_CLANG
93-
return __builtin_ffsll(value);
72+
return __builtin_ffs(value);
9473
#elif BOOST_COMP_MSVC
9574
// Implementation based on
9675
// https://gitlab.freedesktop.org/cairo/cairo/commit/f5167dc2e1a13d8c4e5d66d7178a24b9b5e7ac7a
9776
unsigned long index = 0u;
98-
if(_BitScanForward64(&index, value) != 0)
99-
return static_cast<std::int32_t>(index + 1u);
77+
if constexpr(sizeof(Integral) == 8)
78+
return _BitScanForward64(&index, value) == 0 ? 0 : static_cast<std::int32_t>(index + 1u);
10079
else
101-
return 0;
80+
return _BitScanForward(&index, value) == 0 ? 0 : static_cast<std::int32_t>(index + 1u);
10281
#else
10382
return alpaka::detail::ffsFallback(value);
10483
#endif

include/alpaka/intrinsic/IntrinsicFallback.hpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ namespace alpaka
1515
{
1616
namespace detail
1717
{
18-
//! Fallback implementaion of popcount.
18+
//! Fallback implementation of popcount.
1919
template<typename TValue>
2020
static auto popcountFallback(TValue value) -> std::int32_t
2121
{
@@ -28,7 +28,7 @@ namespace alpaka
2828
return static_cast<std::int32_t>(count);
2929
}
3030

31-
//! Fallback implementaion of ffs.
31+
//! Fallback implementation of ffs.
3232
template<typename TValue>
3333
static auto ffsFallback(TValue value) -> std::int32_t
3434
{

0 commit comments

Comments
 (0)