spcl · ChrisPattison · Nov 14, 2021 · Nov 26, 2021 · Nov 26, 2021 · Nov 26, 2021
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -1,6 +1,6 @@
 cmake_minimum_required(VERSION 3.0)
 project(apfp)
- 
+
 set(CMAKE_CXX_STANDARD 17)
 
 # Target options 
@@ -12,6 +12,8 @@ set(APFP_TILE_SIZE_M 32 CACHE STRING "Tile size in the M-dimension when running
 set(APFP_SEMANTICS "MPFR" CACHE STRING "Which semantics to use for floating point operations [GMP/MPFR].")
 set(APFP_PROFILING OFF CACHE BOOL "Enable profiling in the generated kernel.")
 set_property(CACHE APFP_SEMANTICS PROPERTY STRINGS GMP MPFR)
+set(APFP_INTERFACE_TYPE "MPFR" CACHE STRING "Which data types to use for the interface [GMP/MPFR].")
+set_property(CACHE APFP_INTERFACE_TYPE PROPERTY STRINGS GMP MPFR)
 
 # Validation and derived numbers
 math(EXPR APFP_ALIGNED "${APFP_BITS} % 512")
@@ -28,7 +30,7 @@ find_package(GMP REQUIRED)
 find_package(Threads REQUIRED)
 
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Wpedantic -Wno-unused-label -DAPFP_${APFP_SEMANTICS}_SEMANTICS")
-include_directories(${CMAKE_BINARY_DIR} include SYSTEM hlslib/include ${Vitis_INCLUDE_DIRS} )
+include_directories(${CMAKE_BINARY_DIR} include SYSTEM hlslib/include ${Vitis_INCLUDE_DIRS} interface)
 
 configure_file(include/Config.h.in Config.h)
 
@@ -38,7 +40,7 @@ set(APFP_KERNEL_FILES device/MatrixMultiplication.cpp
 
 # Setup FPGA kernel targets
 add_vitis_kernel(MatrixMultiplication FILES ${APFP_KERNEL_FILES}
-                 INCLUDE_DIRS include hlslib/include ${CMAKE_BINARY_DIR}
+                 INCLUDE_DIRS include hlslib/include ${CMAKE_BINARY_DIR} ${GMP_INCLUDES}
                  HLS_FLAGS "-DAP_INT_MAX_W=${APFP_MAX_BITS} -DAPFP_${APFP_SEMANTICS}_SEMANTICS"
                  HLS_CONFIG "config_compile -pipeline_style frp\nconfig_dataflow -fifo_depth 16"
                  DEPENDS ${CMAKE_BINARY_DIR}/Config.h
@@ -61,7 +63,7 @@ add_library(simulation ${APFP_KERNEL_FILES})
 target_compile_options(simulation PRIVATE -Wno-unknown-pragmas -DAP_INT_MAX_W=${APFP_MAX_BITS})
 target_link_libraries(simulation ${CMAKE_THREAD_LIBS_INIT})
 
-add_library(ApfpHostlib SHARED interface/Apfp.cpp)
+add_library(ApfpHostlib SHARED interface/Apfp.cpp interface/ApfpBlas.cpp interface/ApfpInterfaceType.cpp)
 target_link_libraries(ApfpHostlib ${Vitis_LIBRARIES} ${GMP_LIBRARIES}) 
 target_compile_definitions(ApfpHostlib PRIVATE HLSLIB_SIMULATE_OPENCL)
 
@@ -79,7 +81,20 @@ enable_testing()
 add_test(TestSimulation TestSimulation 4 4 4)
 add_library(Catch host/Catch.cpp)
 add_executable(UnitTests host/UnitTests.cpp)
-target_link_libraries(UnitTests Catch ${GMP_LIBRARIES} ${MPFR_LIBRARIES} apfp simulation)
+target_link_libraries(UnitTests Catch ${GMP_LIBRARIES} ${MPFR_LIBRARIES} apfp ApfpHostlib simulation)
 add_test(UnitTests UnitTests)
 
-install(TARGETS ApfpHostlib)
+add_executable(BlasUnitTests host/BlasUnitTests.cpp)
+target_link_libraries(BlasUnitTests Catch ${GMP_LIBRARIES} ${MPFR_LIBRARIES} apfp ApfpHostlib simulation)
+
+install(TARGETS ApfpHostlib)
+install(FILES
+    interface/Apfp.h
+    interface/ApfpBlas.h
+    interface/ApfpInterfaceType.h
+    ${CMAKE_BINARY_DIR}/Config.h
+    DESTINATION include/apfp)
+install(FILES
+    ${CMAKE_BINARY_DIR}/MatrixMultiplication_hw.xclbin
+    ${CMAKE_BINARY_DIR}/MatrixMultiplication_hw_emu.xclbin
+    DESTINATION lib)
diff --git a/host/BlasUnitTests.cpp b/host/BlasUnitTests.cpp
@@ -0,0 +1,139 @@
+#include <catch.hpp>
+#include <iostream>
+#include <limits>
+
+#include "Config.h"
+
+// #include "ArithmeticOperations.h"
+// #include "Karatsuba.h"
+// #include "PackedFloat.h"
+#include "ApfpBlas.h"
+#include "Random.h"
+
+void ApfpSetup() {
+#ifdef APFP_GMP_INTERFACE_TYPE
+    mpf_set_default_prec(kMantissaBits);
+#else
+    mpfr_set_default_prec(kMantissaBits);
+#endif
+    auto apfp_error_code = apfp::Init(kMantissaBits);
+    REQUIRE(apfp_error_code == apfp::BlasError::success);
+}
+
+void ApfpTeardown() {
+    apfp::Finalize();
+}
+
+bool IsZero(apfp::interface::ConstPtr a) {
+#ifdef APFP_GMP_INTERFACE_TYPE
+    return mpf_sgn(a) == 0;
+#else
+    return mpfr_sgn(a) == 0;
+#endif
+}
+
+bool IsClose(apfp::interface::ConstPtr a, apfp::interface::ConstPtr b) {
+    // Avoids divide by zero if a = b = 0
+    if (IsZero(a) && IsZero(b)) {
+        return true;
+    }
+
+    apfp::interface::Wrapper diff, sum, ratio;
+#ifdef APFP_GMP_INTERFACE_TYPE
+    mpf_sub(diff.get(), a, b);
+    mpf_add(sum.get(), a, b);
+    mpf_div(ratio.get(), diff.get(), sum.get());
+    long exp;
+    mpf_get_d_2exp(&exp, ratio.get());
+#else
+    auto rounding_mode = mpfr_get_default_rounding_mode();
+    mpfr_sub(diff.get(), a, b, rounding_mode);
+    mpfr_add(sum.get(), a, b, rounding_mode);
+    mpfr_div(ratio.get(), diff.get(), sum.get(), rounding_mode);
+    auto exp = mpfr_get_exp(ratio.get());
+#endif
+    // Require the numbers to match to the first 90% decimal places
+    return exp < -((kMantissaBits * 3 * 9) / 10);
+}
+
+TEST_CASE("Init_Teardown") {
+    ApfpSetup();
+    ApfpTeardown();
+}
+
+TEST_CASE("SYRK") {
+    ApfpSetup();
+
+    auto rng = RandomNumberGenerator();
+
+    unsigned long N = GENERATE(0, 1, 2, 8, 15, 16, 31, 32, 33);
+    unsigned long K = GENERATE(0, 1, 2, 8, 15, 16, 31, 32, 33);
+    auto mode = GENERATE(apfp::BlasTrans::normal, apfp::BlasTrans::transpose);
+    auto uplo_mode = GENERATE(apfp::BlasUplo::upper, apfp::BlasUplo::lower);
+    // Test SYRK
+    // In 'N' mode, we perform AA^T + C
+    // A is NxK (A : R^K -> R^N)
+    // C is NxN
+    // Matrices are stored column major because BLAS
+    {
+        std::vector<apfp::interface::Wrapper> a_matrix;
+        a_matrix.resize(N * K);
+        for (auto& v : a_matrix) {
+            rng.Generate(v.get());
+        }
+
+        std::vector<apfp::interface::Wrapper> c_matrix;
+        c_matrix.resize(N * N);
+        for (auto& v : c_matrix) {
+            rng.Generate(v.get());
+        }
+
+        std::vector<apfp::interface::Wrapper> ref_result;
+        ref_result.resize(N * N);
+
+        // Compute reference result
+        apfp::interface::Wrapper prod_temp;
+        for (unsigned long j = 0; j < N; ++j) {
+            // lower half
+            for (unsigned long i = 0; i < N; ++i) {
+                auto r_idx = i + j * N;
+                apfp::interface::Set(ref_result.at(r_idx).get(), c_matrix.at(r_idx).get());
+
+                for (unsigned long k = 0; k < K; ++k) {
+                    // A is NxK if N, KxN if T
+                    if (mode == apfp::BlasTrans::normal) {
+                        // (AB)_ij = sum_k A(i,k)B(k,j)
+                        apfp::interface::Mul(prod_temp.get(), a_matrix.at(i + k * N).get(),
+                                             a_matrix.at(j + k * N).get());
+                    } else {
+                        // (AB)_ij = sum_k A(i,k) B(k,j)
+                        apfp::interface::Mul(prod_temp.get(), a_matrix.at(k + i * K).get(),
+                                             a_matrix.at(k + j * K).get());
+                    }
+                    apfp::interface::Add(ref_result.at(r_idx).get(), prod_temp.get(), ref_result.at(r_idx).get());
+                }
+            }
+        }
+
+        // Use APFP BLAS library
+        auto error_code = apfp::Syrk(
+            uplo_mode, mode, N, K, [&](unsigned long i) { return a_matrix.at(i).get(); },
+            mode == apfp::BlasTrans::normal ? N : K, [&](unsigned long i) { return c_matrix.at(i).get(); }, N);
+        REQUIRE(error_code == apfp::BlasError::success);
+
+        // Check all entries are sufficiently close
+        apfp::interface::Wrapper diff;
+        for (unsigned long j = 0; j < N; ++j) {
+            // lower half
+            for (unsigned long i = 0; i < j; ++i) {
+                auto ref_value = uplo_mode == apfp::BlasUplo::lower ? ref_result.at(i + j * N).get()
+                                                                    : ref_result.at(j + i * N).get();
+                auto test_value =
+                    uplo_mode == apfp::BlasUplo::lower ? c_matrix.at(i + j * N).get() : c_matrix.at(j + i * N).get();
+                REQUIRE(IsClose(ref_value, test_value));
+            }
+        }
+    }
+
+    ApfpTeardown();
+}
diff --git a/host/Random.cpp b/host/Random.cpp
@@ -29,12 +29,12 @@ __mpfr_struct RandomNumberGenerator::GenerateMpfr() {
     return num[0];
 }
 
-void RandomNumberGenerator::Generate(mpfr_t &num) {
+void RandomNumberGenerator::Generate(mpfr_ptr num) {
     std::unique_lock<std::mutex> lock(mutex_);
     mpfr_urandom(num, state_, kRoundingMode);
 }
 
-void RandomNumberGenerator::Generate(mpf_t &num) {
+void RandomNumberGenerator::Generate(mpf_ptr num) {
     std::unique_lock<std::mutex> lock(mutex_);
     mpf_urandomb(num, state_, kMantissaBits);
 }
diff --git a/include/Config.h.in b/include/Config.h.in
@@ -7,3 +7,5 @@ constexpr int kTileSizeN = ${APFP_TILE_SIZE_N};
 constexpr int kTileSizeM = ${APFP_TILE_SIZE_M};
 constexpr auto kBuildDir = "${CMAKE_BINARY_DIR}";
 static_assert(kBits % 8 == 0, "Number of bits must be byte-aligned.");
+
+#define APFP_${APFP_INTERFACE_TYPE}_INTERFACE_TYPE
diff --git a/include/PackedFloat.h b/include/PackedFloat.h
@@ -85,7 +85,7 @@ class PackedFloat {
         return *this;
     }
 
-    inline void ToGmp(mpf_ptr num) {
+    inline void ToGmp(mpf_ptr num) const {
         const size_t gmp_limbs = (mpf_get_prec(num) + 8 * sizeof(mp_limb_t) - 1) / (8 * sizeof(mp_limb_t));
         constexpr size_t kNumLimbs = kMantissaBytes / sizeof(Limb);
         // GMP does not allow graceful rounding, so we cannot handle having insufficient bits in the target GMP number
@@ -104,7 +104,7 @@ class PackedFloat {
         }
     }
 
-    inline void ToMpfr(mpfr_t num) {
+    inline void ToMpfr(mpfr_t num) const {
         // Copy the most significant bytes, padding zeros if necessary
         const auto mpfr_limbs = (mpfr_get_prec(num) + 8 * sizeof(mp_limb_t) - 1) / (8 * sizeof(mp_limb_t));
         const size_t mpfr_bytes = mpfr_limbs * sizeof(mp_limb_t);

diff --git a/include/Random.h b/include/Random.h
@@ -23,13 +23,13 @@ class RandomNumberGenerator {
     __mpf_struct GenerateGmp();
 
     /// Generate a random GMP number into the specified output variable.
-    void Generate(mpf_t &);
+    void Generate(mpf_ptr);
 
     /// Generate a random MPFR number.
     __mpfr_struct GenerateMpfr();
 
     /// Generate a random MPFR into the specified output variable.
-    void Generate(mpfr_t &);
+    void Generate(mpfr_ptr);
 
    private:
     gmp_randstate_t state_;