diff --git a/CMakeLists.txt b/CMakeLists.txt index a6913bd8f..fe8954708 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -57,6 +57,12 @@ endif() # Python Support option(PythonSupport "Build with Python support" OFF) +# H5 support +option(HDF5Support "Build with HDF5 support" OFF) +# MPI support +option(MPISupport "Build with MPI support" OFF) +# OpenMP support +option(OpenMPSupport "Build with OpenMP support" OFF) # Documentation option(Build_Documentation "Build documentation" OFF) diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index 00728b352..dcf10c852 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -7,6 +7,9 @@ foreach(bench ${all_benchs}) add_executable(${bench_name} ${bench}) target_link_libraries(${bench_name} ${PROJECT_NAME}::${PROJECT_NAME}_c ${PROJECT_NAME}_warnings benchmark_main) set_property(TARGET ${bench_name} PROPERTY RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/${bench_dir}) + if (CudaSupport) + target_compile_definitions(${bench_name} PRIVATE -DNDA_CUDA_SUPPORT) + endif() #add_bench(NAME ${bench_name} COMMAND ${bench_name} WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/${bench_dir}) # Run clang-tidy if found if(CLANG_TIDY_EXECUTABLE) diff --git a/benchmarks/blas.cpp b/benchmarks/blas.cpp index 2d1db358b..fd17ce450 100644 --- a/benchmarks/blas.cpp +++ b/benchmarks/blas.cpp @@ -22,7 +22,9 @@ static void DOT(benchmark::State &state) { state.counters["bytesize"] = double(NBytes); } BENCHMARK_TEMPLATE(DOT, nda::vector)->RangeMultiplier(2)->Range(Nmin, Nmax)->Unit(benchmark::kMicrosecond); // NOLINT +#ifdef NDA_CUDA_SUPPORT BENCHMARK_TEMPLATE(DOT, nda::cuvector)->RangeMultiplier(2)->Range(Nmin, Nmax)->Unit(benchmark::kMicrosecond); // NOLINT +#endif template static void GEMM(benchmark::State &state) { @@ -36,7 +38,9 @@ static void GEMM(benchmark::State &state) { state.counters["bytesize"] = double(NBytes); } BENCHMARK_TEMPLATE(GEMM, nda::matrix)->RangeMultiplier(2)->Range(Nmin, Nmax)->Unit(benchmark::kMicrosecond); // NOLINT +#ifdef NDA_CUDA_SUPPORT BENCHMARK_TEMPLATE(GEMM, nda::cumatrix)->RangeMultiplier(2)->Range(Nmin, Nmax)->Unit(benchmark::kMicrosecond); // NOLINT +#endif template static void GER(benchmark::State &state) { @@ -50,7 +54,9 @@ static void GER(benchmark::State &state) { state.counters["bytesize"] = double(NBytes); } BENCHMARK_TEMPLATE(GER, nda::vector, nda::matrix)->RangeMultiplier(2)->Range(Nmin, Nmax)->Unit(benchmark::kMicrosecond); // NOLINT +#ifdef NDA_CUDA_SUPPORT BENCHMARK_TEMPLATE(GER, nda::cuvector, nda::cumatrix) ->RangeMultiplier(2) ->Range(Nmin, Nmax) ->Unit(benchmark::kMicrosecond); // NOLINT +#endif diff --git a/benchmarks/copy.cpp b/benchmarks/copy.cpp index 8848d4471..856c4d40f 100644 --- a/benchmarks/copy.cpp +++ b/benchmarks/copy.cpp @@ -10,8 +10,10 @@ using value_t = double; template using array_t = nda::array; +#ifdef NDA_CUDA_SUPPORT template using device_array_t = nda::cuarray; +#endif const long KBmin = 8; const long KBmax = 1 << 15; @@ -27,8 +29,9 @@ static void Copy(benchmark::State &state) { state.counters["processed"] = double(NBytes); } BENCHMARK_TEMPLATE(Copy, array_t<1>)->RangeMultiplier(8)->Range(KBmin, KBmax); // NOLINT +#ifdef NDA_CUDA_SUPPORT BENCHMARK_TEMPLATE(Copy, device_array_t<1>)->RangeMultiplier(8)->Range(KBmin, KBmax); // NOLINT - +#endif template static void Copy1DStrided(benchmark::State &state) { long NBytes = state.range(0) * 1024; @@ -44,7 +47,9 @@ static void Copy1DStrided(benchmark::State &state) { state.counters["step"] = double(step); } BENCHMARK_TEMPLATE(Copy1DStrided, array_t<1>)->RangeMultiplier(8)->Range(KBmin, KBmax); // NOLINT +#ifdef NDA_CUDA_SUPPORT BENCHMARK_TEMPLATE(Copy1DStrided, device_array_t<1>)->RangeMultiplier(8)->Range(KBmin, KBmax); // NOLINT +#endif template static void CopyBlockStrided(benchmark::State &state) { @@ -63,6 +68,8 @@ static void CopyBlockStrided(benchmark::State &state) { state.counters["n_blocks"] = double(n_blocks); } BENCHMARK_TEMPLATE(CopyBlockStrided, array_t<2>, array_t<2>)->RangeMultiplier(8)->Range(KBmin, KBmax); // NOLINT +#ifdef NDA_CUDA_SUPPORT BENCHMARK_TEMPLATE(CopyBlockStrided, device_array_t<2>, device_array_t<2>)->RangeMultiplier(8)->Range(KBmin, KBmax); // NOLINT BENCHMARK_TEMPLATE(CopyBlockStrided, array_t<2>, device_array_t<2>)->RangeMultiplier(8)->Range(KBmin, KBmax); // NOLINT BENCHMARK_TEMPLATE(CopyBlockStrided, device_array_t<2>, array_t<2>)->RangeMultiplier(8)->Range(KBmin, KBmax); // NOLINT +#endif diff --git a/benchmarks/gemm_batch.cpp b/benchmarks/gemm_batch.cpp index ed2aef1b3..f58b5047c 100644 --- a/benchmarks/gemm_batch.cpp +++ b/benchmarks/gemm_batch.cpp @@ -27,7 +27,9 @@ static void GEMM_BATCH(benchmark::State &state) { state.counters["bytesize"] = double(NBytes); } BENCHMARK_TEMPLATE(GEMM_BATCH, nda::matrix)->RangeMultiplier(2)->Range(Nmin, Nmax)->Unit(benchmark::kMicrosecond); // NOLINT +#ifdef NDA_CUDA_SUPPORT BENCHMARK_TEMPLATE(GEMM_BATCH, nda::cumatrix)->RangeMultiplier(2)->Range(Nmin, Nmax)->Unit(benchmark::kMicrosecond); // NOLINT +#endif template static void GEMM_VBATCH(benchmark::State &state) { @@ -45,7 +47,9 @@ static void GEMM_VBATCH(benchmark::State &state) { state.counters["bytesize"] = double(NBytes); } BENCHMARK_TEMPLATE(GEMM_VBATCH, nda::matrix)->RangeMultiplier(2)->Range(Nmin, Nmax)->Unit(benchmark::kMicrosecond); // NOLINT +#ifdef NDA_CUDA_SUPPORT BENCHMARK_TEMPLATE(GEMM_VBATCH, nda::cumatrix)->RangeMultiplier(2)->Range(Nmin, Nmax)->Unit(benchmark::kMicrosecond); // NOLINT +#endif template static void GEMM_BATCH_STRIDED(benchmark::State &state) { @@ -63,4 +67,6 @@ static void GEMM_BATCH_STRIDED(benchmark::State &state) { state.counters["bytesize"] = double(NBytes); } BENCHMARK_TEMPLATE(GEMM_BATCH_STRIDED, nda::array)->RangeMultiplier(2)->Range(Nmin, Nmax)->Unit(benchmark::kMicrosecond); // NOLINT +#ifdef NDA_CUDA_SUPPORT BENCHMARK_TEMPLATE(GEMM_BATCH_STRIDED, nda::cuarray)->RangeMultiplier(2)->Range(Nmin, Nmax)->Unit(benchmark::kMicrosecond); // NOLINT +#endif diff --git a/c++/nda/CMakeLists.txt b/c++/nda/CMakeLists.txt index 4644eeea1..30cc6391b 100644 --- a/c++/nda/CMakeLists.txt +++ b/c++/nda/CMakeLists.txt @@ -30,17 +30,30 @@ install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} DESTINATION include FILES_MATCHING configure_file(version.hpp.in version.hpp @ONLY) install(FILES ${CMAKE_CURRENT_BINARY_DIR}/version.hpp DESTINATION include/${PROJECT_NAME}) + +# ========= Itertools ========== +target_link_libraries(${PROJECT_NAME}_c PUBLIC itertools::itertools_c) + # ========= Additional Depdencies ========== # Link against HDF5 C++ Interface -target_link_libraries(${PROJECT_NAME}_c PUBLIC h5::h5_c) +if(HDF5Support) + message(STATUS "-------- HDF5 detection -------------") + target_link_libraries(${PROJECT_NAME}_c PUBLIC h5::h5_c) +endif() # Link against MPI C++ Interface -target_link_libraries(${PROJECT_NAME}_c PUBLIC mpi::mpi_c) +if (MPISupport) + message(STATUS "-------- MPI detection -------------") + add_subdirectory(mpi) +endif() # OpenMP -find_package(OpenMP REQUIRED COMPONENTS CXX) -target_link_libraries(${PROJECT_NAME}_c PUBLIC OpenMP::OpenMP_CXX) +if (OpenMPSupport) + message(STATUS "-------- OpenMP detection -------------") + find_package(OpenMP REQUIRED COMPONENTS CXX) + target_link_libraries(${PROJECT_NAME}_c PUBLIC OpenMP::OpenMP_CXX) +endif () # ========= Blas / Lapack ========== diff --git a/c++/nda/h5.hpp b/c++/nda/h5.hpp index 9225f1f8c..88833d281 100644 --- a/c++/nda/h5.hpp +++ b/c++/nda/h5.hpp @@ -10,12 +10,12 @@ #pragma once -#include "./concepts.hpp" -#include "./declarations.hpp" -#include "./exceptions.hpp" -#include "./layout/for_each.hpp" -#include "./layout/range.hpp" -#include "./traits.hpp" +#include "concepts.hpp" +#include "declarations.hpp" +#include "exceptions.hpp" +#include "layout/for_each.hpp" +#include "layout/range.hpp" +#include "traits.hpp" #include
diff --git a/c++/nda/mpi.hpp b/c++/nda/mpi.hpp index 8fe4bc6c8..9f52b8dc4 100644 --- a/c++/nda/mpi.hpp +++ b/c++/nda/mpi.hpp @@ -10,8 +10,8 @@ #pragma once -#include "./mpi/broadcast.hpp" -#include "./mpi/gather.hpp" -#include "./mpi/reduce.hpp" -#include "./mpi/scatter.hpp" -#include "./mpi/utils.hpp" +#include "mpi/broadcast.hpp" +#include "mpi/gather.hpp" +#include "mpi/reduce.hpp" +#include "mpi/scatter.hpp" +#include "mpi/utils.hpp" diff --git a/c++/nda/mpi/CMakeLists.txt b/c++/nda/mpi/CMakeLists.txt new file mode 100644 index 000000000..cdeaf3abb --- /dev/null +++ b/c++/nda/mpi/CMakeLists.txt @@ -0,0 +1,6 @@ +target_link_libraries(${PROJECT_NAME}_c PUBLIC mpi::mpi_c) + +target_compile_definitions(${PROJECT_NAME}_c PRIVATE + $ + $ +) \ No newline at end of file diff --git a/c++/nda/sym_grp.hpp b/c++/nda/sym_grp.hpp index 225c4527e..051d20d01 100644 --- a/c++/nda/sym_grp.hpp +++ b/c++/nda/sym_grp.hpp @@ -11,10 +11,12 @@ #pragma once #include "./nda.hpp" -#include "./mpi.hpp" +#ifdef MPI_SUPPORT +#include "mpi.hpp" +#endif +#include #include -#include #include #include @@ -182,7 +184,7 @@ namespace nda { if (parallel) { // reset input array to allow for mpi reduction a() = 0.0; - +#ifdef MPI_SUPPORT #pragma omp parallel for (auto const &sym_class : itertools::omp_chunk(mpi::chunk(sym_classes))) { auto idx = a.indexmap().to_idx(sym_class[0].first); @@ -190,9 +192,25 @@ namespace nda { std::apply(a, idx) = ref_val; for (auto const &[lin_idx, op] : sym_class) { std::apply(a, a.indexmap().to_idx(lin_idx)) = op(ref_val); } } - // distribute data among all ranks a = mpi::all_reduce(a); +#elifdef _OPENMP +#pragma omp parallel + for (auto const &sym_class : itertools::omp_chunk(sym_classes)) { + auto idx = a.indexmap().to_idx(sym_class[0].first); + auto ref_val = init_func(idx); + std::apply(a, idx) = ref_val; + for (auto const &[lin_idx, op] : sym_class) { std::apply(a, a.indexmap().to_idx(lin_idx)) = op(ref_val); } + } +#else + for (auto const &sym_class : sym_classes) { + auto idx = a.indexmap().to_idx(sym_class[0].first); + auto ref_val = init_func(idx); + std::apply(a, idx) = ref_val; + for (auto const &[lin_idx, op] : sym_class) { std::apply(a, a.indexmap().to_idx(lin_idx)) = op(ref_val); } + } +#endif + } else { for (auto const &sym_class : sym_classes) { auto idx = a.indexmap().to_idx(sym_class[0].first); diff --git a/deps/CMakeLists.txt b/deps/CMakeLists.txt index 8101214c8..a5ef4fbcc 100644 --- a/deps/CMakeLists.txt +++ b/deps/CMakeLists.txt @@ -84,19 +84,21 @@ external_dependency(itertools ) # -- h5 -- -external_dependency(h5 - GIT_REPO https://github.com/TRIQS/h5 - VERSION 1.3 - GIT_TAG unstable -) - +if (HDF5Support) + external_dependency(h5 + GIT_REPO https://github.com/TRIQS/h5 + VERSION 1.3 + GIT_TAG unstable + ) +endif() # -- MPI -- +if (MPISupport) external_dependency(mpi GIT_REPO https://github.com/TRIQS/mpi VERSION 1.3 GIT_TAG unstable ) - +endif () ## Pybind 11 #find_package(Python) #add_subdirectory(pybind11) diff --git a/test/c++/CMakeLists.txt b/test/c++/CMakeLists.txt index 9fcbe7481..a17b85d04 100644 --- a/test/c++/CMakeLists.txt +++ b/test/c++/CMakeLists.txt @@ -1,15 +1,26 @@ # Copy h5 files to binary dir -file(GLOB_RECURSE all_h5_ref_files RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.ref.h5) -foreach(file ${all_h5_ref_files}) - configure_file(${file} ${file} COPYONLY) -endforeach() +if(HDF5Support) + find_package(HDF5 REQUIRED COMPONENTS C HL) + file(GLOB_RECURSE all_h5_ref_files RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.ref.h5) + foreach(file ${all_h5_ref_files}) + configure_file(${file} ${file} COPYONLY) + endforeach() +endif() # List of all tests file(GLOB_RECURSE all_tests RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.cpp) if(NOT CudaSupport) list(FILTER all_tests EXCLUDE REGEX "nda_cu") endif() +if(NOT MPISupport) + list(FILTER all_tests EXCLUDE REGEX "mpi") +endif() + +if(NOT HDF5Support) + list(FILTER all_tests EXCLUDE REGEX "h5") +endif() + macro(SetUpAllTestWithMacroDef extension macrodef) foreach(test ${all_tests}) get_filename_component(test_name ${test} NAME_WE) @@ -17,7 +28,10 @@ foreach(test ${all_tests}) #MESSAGE("${test_name} with option ${ARGV1}") get_filename_component(test_dir ${test} DIRECTORY) add_executable(${test_name} ${test}) - target_link_libraries(${test_name} ${PROJECT_NAME}_c gtest_main ${PROJECT_NAME}_warnings hdf5::hdf5) + target_link_libraries(${test_name} ${PROJECT_NAME}_c gtest_main ${PROJECT_NAME}_warnings) + if (HDF5Support) + target_link_libraries(${test_name} hdf5::hdf5) + endif() target_compile_options(${test_name} PRIVATE "${ARGV1}") set_property(TARGET ${test_name} PROPERTY RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/${test_dir}) add_test(NAME ${test_name} COMMAND ${test_name} WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/${test_dir}) diff --git a/test/c++/nda_h5.cpp b/test/c++/nda_h5.cpp index 94e162278..a44323a49 100644 --- a/test/c++/nda_h5.cpp +++ b/test/c++/nda_h5.cpp @@ -13,6 +13,7 @@ #include
#include + using namespace nda::clef::literals; using nda::ellipsis; using nda::range;