alpaka::math custom implementations for CPU backend (#2525)

fwyzard · mehmetyusufoglu · web-flow · commit 6017280cd967 · 2025-06-26T14:02:47.000+02:00
Co-authored-by: mehmet yusufoglu &lt;mehmetyusufoglu01@gmail.com&gt;
diff --git a/include/alpaka/core/BitCast.hpp b/include/alpaka/core/BitCast.hpp
@@ -0,0 +1,24 @@
+/* Copyright 2025 Andrea Bocci
+ * SPDX-License-Identifier: MPL-2.0
+ */
+
+#pragma once
+
+#include <cstring>
+#include <type_traits>
+
+namespace alpaka::core
+{
+    //! From https://en.cppreference.com/w/cpp/numeric/bit_cast.html
+    template<class To, class From>
+    std::enable_if_t<
+        sizeof(To) == sizeof(From) && std::is_trivially_copyable_v<From> && std::is_trivially_copyable_v<To>,
+        To>
+    bit_cast(From const& src) noexcept
+    {
+        std::aligned_storage_t<sizeof(To), alignof(To)> dst;
+        std::memcpy(&dst, &src, sizeof(To));
+        return *reinterpret_cast<To*>(&dst);
+    }
+
+} // namespace alpaka::core
diff --git a/include/alpaka/math/MathStdLib.hpp b/include/alpaka/math/MathStdLib.hpp
@@ -5,9 +5,14 @@
 
 #pragma once
 
+#include "alpaka/core/BitCast.hpp"
 #include "alpaka/core/Decay.hpp"
+#include "alpaka/core/Unreachable.hpp"
 #include "alpaka/math/Traits.hpp"
 
+#include <cstdint>
+#include <type_traits>
+
 namespace alpaka::math
 {
     //! The standard library abs, implementation covered by the general template.
@@ -294,6 +299,99 @@ namespace alpaka::math
                 ALPAKA_UNREACHABLE(std::common_type_t<Tx, Ty>{});
             }
         };
+
+        //! Custom IEEE 754 bitwise implementation of isfinite.
+        //! std counterpart does not work correctly for some compiler flags at CPU backend
+        template<typename TArg>
+        struct Isfinite<IsfiniteStdLib, TArg, std::enable_if_t<std::is_floating_point_v<TArg>>>
+        {
+            auto operator()(IsfiniteStdLib const& /* ctx */, TArg const& arg) -> bool
+            {
+                if constexpr(std::is_same_v<TArg, float>)
+                {
+                    constexpr std::uint32_t expMask = 0x7F80'0000;
+                    std::uint32_t bits = alpaka::core::bit_cast<std::uint32_t>(arg);
+                    bool result = (bits & expMask) != expMask;
+                    return result;
+                }
+                else if constexpr(std::is_same_v<TArg, double>)
+                {
+                    constexpr std::uint64_t expMask = 0x7FF0'0000'0000'0000ULL;
+                    std::uint64_t bits = alpaka::core::bit_cast<std::uint64_t>(arg);
+                    bool result = (bits & expMask) != expMask;
+                    return result;
+                }
+                else
+                {
+                    static_assert(!sizeof(TArg), "Unsupported floating-point type");
+                }
+                ALPAKA_UNREACHABLE(false);
+            }
+        };
+
+        //! Custom IEEE 754 bitwise implementation of isinf
+        //! std counterpart does not work correctly for some compiler flags at CPU backend
+        template<typename TArg>
+        struct Isinf<IsinfStdLib, TArg, std::enable_if_t<std::is_floating_point_v<TArg>>>
+        {
+            auto operator()(IsinfStdLib const& /* ctx */, TArg const& arg) -> bool
+            {
+                if constexpr(std::is_same_v<TArg, float>)
+                {
+                    constexpr std::uint32_t expMask = 0x7F80'0000;
+                    constexpr std::uint32_t fracMask = 0x007F'FFFF;
+                    std::uint32_t bits = alpaka::core::bit_cast<std::uint32_t>(arg);
+                    bool result = ((bits & expMask) == expMask) && !(bits & fracMask);
+                    return result;
+                }
+                else if constexpr(std::is_same_v<TArg, double>)
+                {
+                    constexpr std::uint64_t expMask = 0x7FF0'0000'0000'0000ULL;
+                    constexpr std::uint64_t fracMask = 0x000F'FFFF'FFFF'FFFFULL;
+                    std::uint64_t bits = alpaka::core::bit_cast<std::uint64_t>(arg);
+                    bool result = ((bits & expMask) == expMask) && !(bits & fracMask);
+                    return result;
+                }
+                else
+                {
+                    static_assert(!sizeof(TArg), "Unsupported floating-point type");
+                }
+                ALPAKA_UNREACHABLE(false);
+            }
+        };
+
+        //! Custom IEEE 754 bitwise implementation of isnan
+        //! std counterpart does not work correctly for some compiler flags at CPU backend
+        template<typename TArg>
+        struct Isnan<IsnanStdLib, TArg, std::enable_if_t<std::is_floating_point_v<TArg>>>
+        {
+            auto operator()(IsnanStdLib const& /* ctx */, TArg const& arg) -> bool
+            {
+                if constexpr(std::is_same_v<TArg, float>)
+                {
+                    constexpr std::uint32_t expMask = 0x7F80'0000;
+                    constexpr std::uint32_t fracMask = 0x007F'FFFF;
+                    std::uint32_t bits = alpaka::core::bit_cast<std::uint32_t>(arg);
+                    bool result = ((bits & expMask) == expMask) && (bits & fracMask);
+                    return result;
+                }
+                else if constexpr(std::is_same_v<TArg, double>)
+                {
+                    constexpr std::uint64_t expMask = 0x7FF0'0000'0000'0000ULL;
+                    constexpr std::uint64_t fracMask = 0x000F'FFFF'FFFF'FFFFULL;
+                    std::uint64_t bits = alpaka::core::bit_cast<std::uint64_t>(arg);
+                    bool result = ((bits & expMask) == expMask) && (bits & fracMask);
+                    return result;
+                }
+                else
+                {
+                    static_assert(!sizeof(TArg), "Unsupported floating-point type");
+                }
+                ALPAKA_UNREACHABLE(false);
+            }
+        };
+
+
     } // namespace trait
 
 } // namespace alpaka::math
diff --git a/test/unit/math/src/DataGen.hpp b/test/unit/math/src/DataGen.hpp
@@ -185,14 +185,37 @@ namespace mathtest
                 args(2).arg[k] = std::numeric_limits<TData>::signaling_NaN();
                 args(3).arg[k] = std::numeric_limits<TData>::infinity();
                 args(4).arg[k] = -std::numeric_limits<TData>::infinity();
-                constexpr size_t nFixed = 5;
+                // One negative one positive value
+                if constexpr(std::is_same_v<TData, float>)
+                {
+                    args(5).arg[k] = 1.1f; // Use float literal
+                    args(6).arg[k] = -1.1f;
+                }
+                else if constexpr(std::is_same_v<TData, double>)
+                {
+                    args(5).arg[k] = 1.1; // Use double literal
+                    args(6).arg[k] = -1.1;
+                }
+                else if constexpr(std::is_same_v<TData, alpaka::Complex<float>>)
+                {
+                    args(5).arg[k] = alpaka::Complex<float>{1.1f, 2.1f}; // Complex float
+                    args(6).arg[k] = alpaka::Complex<float>{-1.1f, -2.1f};
+                }
+                else if constexpr(std::is_same_v<TData, alpaka::Complex<double>>)
+                {
+                    args(5).arg[k] = alpaka::Complex<double>{1.1, 2.1}; // Complex double
+                    args(6).arg[k] = alpaka::Complex<double>{-1.1, -2.1};
+                }
+
+                constexpr size_t nFixed = 7;
                 size_t i = nFixed;
                 // no need to test for denormal for now: not supported by CUDA
                 // for(; i < nFixed + (TArgs::capacity - nFixed) / 2; ++i)
                 // {
                 //     const TData v = rngWrapper.getNumber(dist, eng) *
                 //     std::numeric_limits<TData>::denorm_min(); args(i).arg[k] = (i % 2 == 0) ? v : -v;
                 // }
+                // Next values
                 for(; i < TArgs::capacity; ++i)
                 {
                     TData const v = rngWrapper.getNumber(dist, eng);
diff --git a/test/unit/math/src/TestTemplate.hpp b/test/unit/math/src/TestTemplate.hpp
@@ -53,6 +53,53 @@ namespace mathtest
         using type = T;
     };
 
+    //!
+    //! \brief setExpectedResultForSpecificInput
+    //! This function is for testing alpaka::math functions isinf, isnan, isfinite. Since for some compile
+    //! options for CPU backends; std::isnan, std::isinf and std::isfinite does not work properpy; test results can
+    //! only be tested by setting the expected results for the known input. For 3 testing operators OpIsnan, OpIsinf,
+    //! OpIsfinite; at the beginning of test input array, specific values are used and their expected results are set
+    //! in that function.
+    //!   input[0]: [ 0 ]
+    //!   input[1]: [ nan ]
+    //!   input[2]: [ nan ]
+    //!   input[3]: [ inf ]
+    //!   input[4]: [ -inf ]
+    //! \param stdExpectedResult Expected value for the operator, the type of resulting operation could either be type
+    //! of operand (although for uniary op. like isInf it is bool) Since all operation outputs are represented by
+    //! operand type in the code, this function uses 0 and 1 for the results. \param idx is the index in the input
+    //! buffer
+    //!
+
+    template<typename TFunctor, typename TData>
+    void setExpectedResultForSpecificInput(TData& stdExpectedResult, size_t idx)
+    {
+        // marked as [[maybe_unused]] because nvcc 11.2 ignores the "else" branch when a previous "if" is true.
+        [[maybe_unused]] constexpr bool isIsnan = std::is_same_v<TFunctor, OpIsnan>;
+        [[maybe_unused]] constexpr bool isIsinf = std::is_same_v<TFunctor, OpIsinf>;
+        [[maybe_unused]] constexpr bool isIsfinite = std::is_same_v<TFunctor, OpIsfinite>;
+
+        if constexpr(isIsnan)
+        {
+            // for the input[1] and input[2] input is Nan and isNan should be tested by result 1.
+            stdExpectedResult = (idx == 1 || idx == 2) ? static_cast<TData>(1) : static_cast<TData>(0);
+        }
+        else if constexpr(isIsinf)
+        {
+            // for the input[3] and input[4] input is Inf and -Inf should be tested by result 1.
+            stdExpectedResult = (idx == 3 || idx == 4) ? static_cast<TData>(1) : static_cast<TData>(0);
+        }
+        else if constexpr(isIsfinite)
+        {
+            // input[0] is 0 hence it is finite, other data starting after nan and infs are finite.
+            stdExpectedResult = (idx == 0 || idx > 4) ? static_cast<TData>(1) : static_cast<TData>(0);
+        }
+        else
+        {
+            stdExpectedResult = static_cast<TData>(0);
+        }
+    }
+
     //! Base test template for math unit tests
     //! @tparam TAcc Accelerator.
     //! @tparam TFunctor Functor defined in Functor.hpp.
@@ -140,9 +187,25 @@ namespace mathtest
 #endif
             for(size_t i = 0; i < Args::capacity; ++i)
             {
-                TData std_result = functor(args(i));
-                INFO("Idx i: " << i << " computed : " << results(i) << " vs expected: " << std_result);
-                REQUIRE(isApproxEqual(results(i), std_result));
+                TData stdExpectedResult{};
+
+                constexpr bool isSpecialCase = std::is_same_v<TFunctor, OpIsnan> || std::is_same_v<TFunctor, OpIsinf>
+                                               || std::is_same_v<TFunctor, OpIsfinite>;
+
+                // Only for specific operators, the results for the test inputs can only be verified by setting the
+                // expected specific result manually.
+                if constexpr((std::is_same_v<TData, float> || std::is_same_v<TData, double>) &&isSpecialCase)
+                {
+                    setExpectedResultForSpecificInput<TFunctor>(stdExpectedResult, i);
+                }
+                else
+                {
+                    // Calculated expected result using std functions
+                    stdExpectedResult = functor(args(i));
+                }
+                INFO("Idx i: " << i << " computed : " << results(i) << " vs expected: " << stdExpectedResult);
+                // Validate
+                REQUIRE(isApproxEqual(results(i), stdExpectedResult));
             }
         }