diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 1f322b7ba..510faebe0 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -34,137 +34,151 @@ jobs: runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v4 - - - uses: actions/cache/restore@v4 - with: - path: ${{ env.CCACHE_DIR }} - key: ccache-${{ matrix.os }}-${{ matrix.cc }}-${{ github.run_id }} - restore-keys: - ccache-${{ matrix.os }}-${{ matrix.cc }}- - - - name: Install ubuntu dependencies - if: ${{ contains(matrix.os, 'ubuntu') }} - run: > - sudo apt-get update && - sudo apt-get install lsb-release wget software-properties-common && - sudo apt-get install - ccache - clang - g++ - gfortran - hdf5-tools - libblas-dev - libclang-dev - libc++-dev - libc++abi-dev - libomp-dev - libfftw3-dev - libgfortran5 - libgmp-dev - libhdf5-dev - liblapack-dev - libopenmpi-dev - openmpi-bin - openmpi-common - openmpi-doc - python3-clang - python3-dev - python3-mako - python3-mpi4py - python3-numpy - python3-pip - python3-scipy - python3-sphinx - python3-nbsphinx - - - name: Set up virtualenv - run: | - mkdir $HOME/.venv - python3 -m venv --system-site-packages $HOME/.venv/my_python - source $HOME/.venv/my_python/bin/activate - echo "VIRTUAL_ENV=$VIRTUAL_ENV" >> $GITHUB_ENV - echo "PATH=$PATH" >> $GITHUB_ENV - - - name: Install homebrew dependencies - if: ${{ contains(matrix.os, 'macos') }} - run: | - brew update - brew install ccache gcc llvm hdf5 open-mpi openblas doxygen - pip install mako numpy scipy mpi4py - pip install -r requirements.txt - echo "PATH=$(brew --prefix llvm)/bin:$(brew --prefix gcc)/bin:$PATH" >> $GITHUB_ENV - echo "PYTHONPATH=$(brew --prefix llvm)/lib/python3.13/site-packages" >> $GITHUB_ENV - - - name: Add clang CXXFLAGS - if: ${{ contains(matrix.cxx, 'clang') }} - run: | - echo "CXXFLAGS=-stdlib=libc++" >> $GITHUB_ENV - - - name: Add clang LDFLAGS for macos to link against brew's libc++ - if: ${{ contains(matrix.os, 'macos') && contains(matrix.cxx, 'clang') }} - run: | - echo 'LDFLAGS="-L$(brew --prefix llvm)/lib/c++ -L$(brew --prefix llvm)/lib -lunwind"' >> $GITHUB_ENV - - - name: Set up test coverage - if: matrix.cov == 'ON' - run: | - pip install gcovr - echo "CXXFLAGS=--coverage" >> $GITHUB_ENV - - - name: Prepare source files for doxygen - if: matrix.doc == 'ON' - working-directory: ./c++/nda - run: | - sed -e '/#include .*impl.*.hpp/{r _impl_basic_array_view_common.hpp' -e 'd' -e '}' basic_array.hpp > tmp_basic_array.hpp - sed -e '/#include .*impl.*.hpp/{r _impl_basic_array_view_common.hpp' -e 'd' -e '}' basic_array_view.hpp > tmp_basic_array_view.hpp - mv tmp_basic_array.hpp basic_array.hpp - mv tmp_basic_array_view.hpp basic_array_view.hpp - - - name: Build nda - env: - CC: ${{ matrix.cc }} - CXX: ${{ matrix.cxx }} - run: | - mkdir build && cd build && cmake .. -DCMAKE_INSTALL_PREFIX=$HOME/install -DPythonSupport=ON -DBuild_Documentation=${{ matrix.doc }} -DCMAKE_BUILD_TYPE=${{ matrix.cov == 'ON' && 'Debug' || 'Release' }} - make -j2 || make -j1 VERBOSE=1 - - - name: Test nda - env: - OPENBLAS_NUM_THREADS: "1" - run: | - cd build - ctest -j2 --output-on-failure - - - name: Generate test coverage HTML output - if: matrix.cov == 'ON' - run: | - cd build - mkdir coverage - gcovr --gcov-executable "gcov" --root ../c++ --html-details -o coverage/coverage.html . - - - name: ccache statistics - if: always() - run: ccache -sv - - - uses: actions/cache/save@v4 - if: always() - with: - path: ${{ env.CCACHE_DIR }} - key: ccache-${{ matrix.os }}-${{ matrix.cc }}-${{ github.run_id }} - - - name: Deploy documentation - if: matrix.doc == 'ON' && github.ref == 'refs/heads/unstable' - uses: JamesIves/github-pages-deploy-action@v4 - with: - folder: build/doc/html - branch: github.io - target-folder: docs/unstable - - - name: Deploy test coverage - if: matrix.cov == 'ON' && github.ref == 'refs/heads/unstable' - uses: JamesIves/github-pages-deploy-action@v4 - with: - folder: build/coverage - branch: github.io - target-folder: docs/coverage + - uses: actions/checkout@v4 + + - uses: actions/cache/restore@v4 + with: + path: ${{ env.CCACHE_DIR }} + key: ccache-${{ matrix.os }}-${{ matrix.cc }}-${{ github.run_id }} + restore-keys: + ccache-${{ matrix.os }}-${{ matrix.cc }}- + + - name: Install ubuntu dependencies + if: ${{ contains(matrix.os, 'ubuntu') }} + run: > + sudo apt-get update && + sudo apt-get install lsb-release wget software-properties-common && + sudo apt-get install + ccache + clang + g++ + gfortran + hdf5-tools + libblas-dev + libclang-dev + libc++-dev + libc++abi-dev + libomp-dev + libfftw3-dev + libgfortran5 + libgmp-dev + libhdf5-dev + liblapack-dev + libopenmpi-dev + openmpi-bin + openmpi-common + openmpi-doc + python3-clang + python3-dev + python3-mako + python3-mpi4py + python3-numpy + python3-pip + python3-scipy + python3-sphinx + python3-nbsphinx + + - name: Set up virtualenv + run: | + mkdir $HOME/.venv + python3 -m venv --system-site-packages $HOME/.venv/my_python + source $HOME/.venv/my_python/bin/activate + echo "VIRTUAL_ENV=$VIRTUAL_ENV" >> $GITHUB_ENV + echo "PATH=$PATH" >> $GITHUB_ENV + + - name: Install homebrew dependencies + if: ${{ contains(matrix.os, 'macos') }} + run: | + brew update + brew install ccache gcc llvm hdf5 open-mpi openblas doxygen + pip install mako numpy scipy mpi4py + pip install -r requirements.txt + echo "PATH=$(brew --prefix llvm)/bin:$(brew --prefix gcc)/bin:$PATH" >> $GITHUB_ENV + echo "PYTHONPATH=$(brew --prefix llvm)/lib/python3.13/site-packages" >> $GITHUB_ENV + + - name: Add clang CXXFLAGS + if: ${{ contains(matrix.cxx, 'clang') }} + run: | + echo "CXXFLAGS=-stdlib=libc++" >> $GITHUB_ENV + + - name: Add clang LDFLAGS for macos to link against brew's libc++ + if: ${{ contains(matrix.os, 'macos') && contains(matrix.cxx, 'clang') }} + run: | + echo 'LDFLAGS="-L$(brew --prefix llvm)/lib/c++ -L$(brew --prefix llvm)/lib -lunwind"' >> $GITHUB_ENV + + - name: Set up test coverage + if: matrix.cov == 'ON' + run: | + pip install gcovr + echo "CXXFLAGS=--coverage" >> $GITHUB_ENV + + - name: Prepare source files for doxygen + if: matrix.doc == 'ON' + working-directory: ./c++/nda + run: | + sed -e '/#include .*impl.*.hpp/{r _impl_basic_array_view_common.hpp' -e 'd' -e '}' basic_array.hpp > tmp_basic_array.hpp + sed -e '/#include .*impl.*.hpp/{r _impl_basic_array_view_common.hpp' -e 'd' -e '}' basic_array_view.hpp > tmp_basic_array_view.hpp + mv tmp_basic_array.hpp basic_array.hpp + mv tmp_basic_array_view.hpp basic_array_view.hpp + + - name: Test flag combinations + env: + CC: ${{ matrix.cc }} + CXX: ${{ matrix.cxx }} + OPENBLAS_NUM_THREADS: "1" + run: | + set -e + for hdf5 in ON OFF; do + for openmp in ON OFF; do + for mpi in ON OFF; do + echo "::group::[Config] HDF5=$hdf5 OMP=$openmp MPI=$mpi" + rm -rf build && mkdir build && cd build + cmake .. \ + -DCMAKE_INSTALL_PREFIX=$HOME/install \ + -DPythonSupport=ON \ + -DBuild_Documentation=${{ matrix.doc }} \ + -DCMAKE_BUILD_TYPE=${{ matrix.cov == 'ON' && 'Debug' || 'Release' }} \ + -DHDF5Support=$hdf5 \ + -DOpenMPSupport=$openmp \ + -DMPISupport=$mpi + make -j2 || make -j1 VERBOSE=1 + ctest -j2 --output-on-failure + cd .. + echo "::endgroup::" + done + done + done + done + + - name: Generate test coverage HTML output + if: matrix.cov == 'ON' + run: | + cd build + mkdir coverage + gcovr --gcov-executable "gcov" --root ../c++ --html-details -o coverage/coverage.html . + + - name: ccache statistics + if: always() + run: ccache -sv + + - uses: actions/cache/save@v4 + if: always() + with: + path: ${{ env.CCACHE_DIR }} + key: ccache-${{ matrix.os }}-${{ matrix.cc }}-${{ github.run_id }} + + - name: Deploy documentation + if: matrix.doc == 'ON' && github.ref == 'refs/heads/unstable' + uses: JamesIves/github-pages-deploy-action@v4 + with: + folder: build/doc/html + branch: github.io + target-folder: docs/unstable + + - name: Deploy test coverage + if: matrix.cov == 'ON' && github.ref == 'refs/heads/unstable' + uses: JamesIves/github-pages-deploy-action@v4 + with: + folder: build/coverage + branch: github.io + target-folder: docs/coverage diff --git a/CMakeLists.txt b/CMakeLists.txt index a6913bd8f..fe8954708 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -57,6 +57,12 @@ endif() # Python Support option(PythonSupport "Build with Python support" OFF) +# H5 support +option(HDF5Support "Build with HDF5 support" OFF) +# MPI support +option(MPISupport "Build with MPI support" OFF) +# OpenMP support +option(OpenMPSupport "Build with OpenMP support" OFF) # Documentation option(Build_Documentation "Build documentation" OFF) diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index 00728b352..dcf10c852 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -7,6 +7,9 @@ foreach(bench ${all_benchs}) add_executable(${bench_name} ${bench}) target_link_libraries(${bench_name} ${PROJECT_NAME}::${PROJECT_NAME}_c ${PROJECT_NAME}_warnings benchmark_main) set_property(TARGET ${bench_name} PROPERTY RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/${bench_dir}) + if (CudaSupport) + target_compile_definitions(${bench_name} PRIVATE -DNDA_CUDA_SUPPORT) + endif() #add_bench(NAME ${bench_name} COMMAND ${bench_name} WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/${bench_dir}) # Run clang-tidy if found if(CLANG_TIDY_EXECUTABLE) diff --git a/benchmarks/blas.cpp b/benchmarks/blas.cpp index 2d1db358b..fd17ce450 100644 --- a/benchmarks/blas.cpp +++ b/benchmarks/blas.cpp @@ -22,7 +22,9 @@ static void DOT(benchmark::State &state) { state.counters["bytesize"] = double(NBytes); } BENCHMARK_TEMPLATE(DOT, nda::vector)->RangeMultiplier(2)->Range(Nmin, Nmax)->Unit(benchmark::kMicrosecond); // NOLINT +#ifdef NDA_CUDA_SUPPORT BENCHMARK_TEMPLATE(DOT, nda::cuvector)->RangeMultiplier(2)->Range(Nmin, Nmax)->Unit(benchmark::kMicrosecond); // NOLINT +#endif template static void GEMM(benchmark::State &state) { @@ -36,7 +38,9 @@ static void GEMM(benchmark::State &state) { state.counters["bytesize"] = double(NBytes); } BENCHMARK_TEMPLATE(GEMM, nda::matrix)->RangeMultiplier(2)->Range(Nmin, Nmax)->Unit(benchmark::kMicrosecond); // NOLINT +#ifdef NDA_CUDA_SUPPORT BENCHMARK_TEMPLATE(GEMM, nda::cumatrix)->RangeMultiplier(2)->Range(Nmin, Nmax)->Unit(benchmark::kMicrosecond); // NOLINT +#endif template static void GER(benchmark::State &state) { @@ -50,7 +54,9 @@ static void GER(benchmark::State &state) { state.counters["bytesize"] = double(NBytes); } BENCHMARK_TEMPLATE(GER, nda::vector, nda::matrix)->RangeMultiplier(2)->Range(Nmin, Nmax)->Unit(benchmark::kMicrosecond); // NOLINT +#ifdef NDA_CUDA_SUPPORT BENCHMARK_TEMPLATE(GER, nda::cuvector, nda::cumatrix) ->RangeMultiplier(2) ->Range(Nmin, Nmax) ->Unit(benchmark::kMicrosecond); // NOLINT +#endif diff --git a/benchmarks/copy.cpp b/benchmarks/copy.cpp index 8848d4471..856c4d40f 100644 --- a/benchmarks/copy.cpp +++ b/benchmarks/copy.cpp @@ -10,8 +10,10 @@ using value_t = double; template using array_t = nda::array; +#ifdef NDA_CUDA_SUPPORT template using device_array_t = nda::cuarray; +#endif const long KBmin = 8; const long KBmax = 1 << 15; @@ -27,8 +29,9 @@ static void Copy(benchmark::State &state) { state.counters["processed"] = double(NBytes); } BENCHMARK_TEMPLATE(Copy, array_t<1>)->RangeMultiplier(8)->Range(KBmin, KBmax); // NOLINT +#ifdef NDA_CUDA_SUPPORT BENCHMARK_TEMPLATE(Copy, device_array_t<1>)->RangeMultiplier(8)->Range(KBmin, KBmax); // NOLINT - +#endif template static void Copy1DStrided(benchmark::State &state) { long NBytes = state.range(0) * 1024; @@ -44,7 +47,9 @@ static void Copy1DStrided(benchmark::State &state) { state.counters["step"] = double(step); } BENCHMARK_TEMPLATE(Copy1DStrided, array_t<1>)->RangeMultiplier(8)->Range(KBmin, KBmax); // NOLINT +#ifdef NDA_CUDA_SUPPORT BENCHMARK_TEMPLATE(Copy1DStrided, device_array_t<1>)->RangeMultiplier(8)->Range(KBmin, KBmax); // NOLINT +#endif template static void CopyBlockStrided(benchmark::State &state) { @@ -63,6 +68,8 @@ static void CopyBlockStrided(benchmark::State &state) { state.counters["n_blocks"] = double(n_blocks); } BENCHMARK_TEMPLATE(CopyBlockStrided, array_t<2>, array_t<2>)->RangeMultiplier(8)->Range(KBmin, KBmax); // NOLINT +#ifdef NDA_CUDA_SUPPORT BENCHMARK_TEMPLATE(CopyBlockStrided, device_array_t<2>, device_array_t<2>)->RangeMultiplier(8)->Range(KBmin, KBmax); // NOLINT BENCHMARK_TEMPLATE(CopyBlockStrided, array_t<2>, device_array_t<2>)->RangeMultiplier(8)->Range(KBmin, KBmax); // NOLINT BENCHMARK_TEMPLATE(CopyBlockStrided, device_array_t<2>, array_t<2>)->RangeMultiplier(8)->Range(KBmin, KBmax); // NOLINT +#endif diff --git a/benchmarks/gemm_batch.cpp b/benchmarks/gemm_batch.cpp index ed2aef1b3..f58b5047c 100644 --- a/benchmarks/gemm_batch.cpp +++ b/benchmarks/gemm_batch.cpp @@ -27,7 +27,9 @@ static void GEMM_BATCH(benchmark::State &state) { state.counters["bytesize"] = double(NBytes); } BENCHMARK_TEMPLATE(GEMM_BATCH, nda::matrix)->RangeMultiplier(2)->Range(Nmin, Nmax)->Unit(benchmark::kMicrosecond); // NOLINT +#ifdef NDA_CUDA_SUPPORT BENCHMARK_TEMPLATE(GEMM_BATCH, nda::cumatrix)->RangeMultiplier(2)->Range(Nmin, Nmax)->Unit(benchmark::kMicrosecond); // NOLINT +#endif template static void GEMM_VBATCH(benchmark::State &state) { @@ -45,7 +47,9 @@ static void GEMM_VBATCH(benchmark::State &state) { state.counters["bytesize"] = double(NBytes); } BENCHMARK_TEMPLATE(GEMM_VBATCH, nda::matrix)->RangeMultiplier(2)->Range(Nmin, Nmax)->Unit(benchmark::kMicrosecond); // NOLINT +#ifdef NDA_CUDA_SUPPORT BENCHMARK_TEMPLATE(GEMM_VBATCH, nda::cumatrix)->RangeMultiplier(2)->Range(Nmin, Nmax)->Unit(benchmark::kMicrosecond); // NOLINT +#endif template static void GEMM_BATCH_STRIDED(benchmark::State &state) { @@ -63,4 +67,6 @@ static void GEMM_BATCH_STRIDED(benchmark::State &state) { state.counters["bytesize"] = double(NBytes); } BENCHMARK_TEMPLATE(GEMM_BATCH_STRIDED, nda::array)->RangeMultiplier(2)->Range(Nmin, Nmax)->Unit(benchmark::kMicrosecond); // NOLINT +#ifdef NDA_CUDA_SUPPORT BENCHMARK_TEMPLATE(GEMM_BATCH_STRIDED, nda::cuarray)->RangeMultiplier(2)->Range(Nmin, Nmax)->Unit(benchmark::kMicrosecond); // NOLINT +#endif diff --git a/c++/nda/CMakeLists.txt b/c++/nda/CMakeLists.txt index 4644eeea1..de2d1758f 100644 --- a/c++/nda/CMakeLists.txt +++ b/c++/nda/CMakeLists.txt @@ -30,17 +30,36 @@ install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} DESTINATION include FILES_MATCHING configure_file(version.hpp.in version.hpp @ONLY) install(FILES ${CMAKE_CURRENT_BINARY_DIR}/version.hpp DESTINATION include/${PROJECT_NAME}) + +# ========= Itertools ========== +target_link_libraries(${PROJECT_NAME}_c PUBLIC itertools::itertools_c) + # ========= Additional Depdencies ========== # Link against HDF5 C++ Interface -target_link_libraries(${PROJECT_NAME}_c PUBLIC h5::h5_c) +if(HDF5Support) + message(STATUS "-------- HDF5 detection -------------") + target_link_libraries(${PROJECT_NAME}_c PUBLIC h5::h5_c) +endif() # Link against MPI C++ Interface -target_link_libraries(${PROJECT_NAME}_c PUBLIC mpi::mpi_c) +if (MPISupport) + message(STATUS "-------- MPI detection -------------") + target_link_libraries(${PROJECT_NAME}_c PUBLIC mpi::mpi_c) + + target_compile_definitions(${PROJECT_NAME}_c PUBLIC + $ + $ + ) + +endif() # OpenMP -find_package(OpenMP REQUIRED COMPONENTS CXX) -target_link_libraries(${PROJECT_NAME}_c PUBLIC OpenMP::OpenMP_CXX) +if (OpenMPSupport) + message(STATUS "-------- OpenMP detection -------------") + find_package(OpenMP REQUIRED COMPONENTS CXX) + target_link_libraries(${PROJECT_NAME}_c PUBLIC OpenMP::OpenMP_CXX) +endif () # ========= Blas / Lapack ========== @@ -83,7 +102,10 @@ if(CudaSupport) message(STATUS "-------- CUDAToolkit detection -------------") find_package(CUDAToolkit REQUIRED) target_link_libraries(${PROJECT_NAME}_c PUBLIC CUDA::cudart CUDA::cublas CUDA::cusolver) - target_compile_definitions(${PROJECT_NAME}_c PUBLIC NDA_HAVE_CUDA NDA_HAVE_DEVICE) + target_compile_definitions(${PROJECT_NAME}_c PUBLIC + $ + $ + ) endif() # ========= Magma ========== diff --git a/c++/nda/h5.hpp b/c++/nda/h5.hpp index 9225f1f8c..88833d281 100644 --- a/c++/nda/h5.hpp +++ b/c++/nda/h5.hpp @@ -10,12 +10,12 @@ #pragma once -#include "./concepts.hpp" -#include "./declarations.hpp" -#include "./exceptions.hpp" -#include "./layout/for_each.hpp" -#include "./layout/range.hpp" -#include "./traits.hpp" +#include "concepts.hpp" +#include "declarations.hpp" +#include "exceptions.hpp" +#include "layout/for_each.hpp" +#include "layout/range.hpp" +#include "traits.hpp" #include
diff --git a/c++/nda/mpi.hpp b/c++/nda/mpi.hpp index 8fe4bc6c8..ac7bc02f3 100644 --- a/c++/nda/mpi.hpp +++ b/c++/nda/mpi.hpp @@ -10,8 +10,10 @@ #pragma once -#include "./mpi/broadcast.hpp" -#include "./mpi/gather.hpp" -#include "./mpi/reduce.hpp" -#include "./mpi/scatter.hpp" -#include "./mpi/utils.hpp" + +#include "mpi/broadcast.hpp" +#include "mpi/gather.hpp" +#include "mpi/reduce.hpp" +#include "mpi/scatter.hpp" +#include "mpi/utils.hpp" + diff --git a/c++/nda/mpi/broadcast.hpp b/c++/nda/mpi/broadcast.hpp index f9dd0ffa0..f8cda328c 100644 --- a/c++/nda/mpi/broadcast.hpp +++ b/c++/nda/mpi/broadcast.hpp @@ -10,7 +10,6 @@ #pragma once -#include "./utils.hpp" #include "../basic_functions.hpp" #include "../traits.hpp" diff --git a/c++/nda/mpi/gather.hpp b/c++/nda/mpi/gather.hpp index a96448a78..cf8308fb2 100644 --- a/c++/nda/mpi/gather.hpp +++ b/c++/nda/mpi/gather.hpp @@ -12,9 +12,7 @@ #include "./utils.hpp" #include "../basic_functions.hpp" -#include "../concepts.hpp" #include "../declarations.hpp" -#include "../layout/range.hpp" #include "../macros.hpp" #include "../stdutil/array.hpp" #include "../traits.hpp" diff --git a/c++/nda/mpi/reduce.hpp b/c++/nda/mpi/reduce.hpp index 401b72814..65b2df61a 100644 --- a/c++/nda/mpi/reduce.hpp +++ b/c++/nda/mpi/reduce.hpp @@ -10,11 +10,8 @@ #pragma once -#include "./utils.hpp" #include "../basic_functions.hpp" -#include "../concepts.hpp" #include "../declarations.hpp" -#include "../exceptions.hpp" #include "../macros.hpp" #include "../map.hpp" #include "../traits.hpp" diff --git a/c++/nda/mpi/scatter.hpp b/c++/nda/mpi/scatter.hpp index a8fb831ad..a33ed2249 100644 --- a/c++/nda/mpi/scatter.hpp +++ b/c++/nda/mpi/scatter.hpp @@ -11,12 +11,10 @@ #pragma once #include "./utils.hpp" -#include "../concepts.hpp" #include "../declarations.hpp" #include "../macros.hpp" #include "../traits.hpp" -#include #include #include @@ -25,7 +23,6 @@ #include #include #include -#include namespace nda::detail { diff --git a/c++/nda/sym_grp.hpp b/c++/nda/sym_grp.hpp index 225c4527e..bbe099aad 100644 --- a/c++/nda/sym_grp.hpp +++ b/c++/nda/sym_grp.hpp @@ -11,10 +11,12 @@ #pragma once #include "./nda.hpp" -#include "./mpi.hpp" +#ifdef MPI_SUPPORT +#include "mpi.hpp" +#endif +#include #include -#include #include #include @@ -165,6 +167,8 @@ namespace nda { */ [[nodiscard]] long num_classes() const { return sym_classes.size(); } + enum class Parallel { HYBRID, MPI, OMP, NONE }; + /** * @brief Initialize an nda::Array using an nda::NdaInitFunc. * @@ -176,24 +180,64 @@ namespace nda { * @param init_func Callable that is used to initialize the array. * @param parallel Parallelize using openmp and mpi. */ - template + private: + static constexpr Parallel compute_parallel_default() noexcept { +#if defined(MPI_SUPPORT) && defined(_OPENMP) + return Parallel::HYBRID; +#elif defined(MPI_SUPPORT) + return Parallel::MPI; +#elif defined(_OPENMP) + return Parallel::OMP; +#else + return Parallel::NONE; +#endif + } + + public: + template requires(NdaInitFunc) - void init(A &a, H const &init_func, bool parallel = false) const { - if (parallel) { - // reset input array to allow for mpi reduction - a() = 0.0; + void init(A &a, H const &init_func) const { + if constexpr (P == Parallel::HYBRID) { + static_assert(MPI_SUPPORT, "Parallel::HYBRID requires MPI support."); + static_assert(_OPENMP, "Parallel::HYBRID requires OpenMP support."); -#pragma omp parallel + a() = 0.0; +#pragma omp parallel for for (auto const &sym_class : itertools::omp_chunk(mpi::chunk(sym_classes))) { auto idx = a.indexmap().to_idx(sym_class[0].first); auto ref_val = init_func(idx); std::apply(a, idx) = ref_val; for (auto const &[lin_idx, op] : sym_class) { std::apply(a, a.indexmap().to_idx(lin_idx)) = op(ref_val); } } + a = mpi::all_reduce(a); + + } else if constexpr (P == Parallel::MPI) { + static_assert(MPI_SUPPORT, "Parallel::MPI requires MPI support."); - // distribute data among all ranks + a() = 0.0; + for (auto const &sym_class : mpi::chunk(sym_classes)) { + auto idx = a.indexmap().to_idx(sym_class[0].first); + auto ref_val = init_func(idx); + std::apply(a, idx) = ref_val; + for (auto const &[lin_idx, op] : sym_class) { std::apply(a, a.indexmap().to_idx(lin_idx)) = op(ref_val); } + } a = mpi::all_reduce(a); + + } else if constexpr (P == Parallel::OMP) { + static_assert(_OPENMP, "Parallel::OMP requires OpenMP support."); + + a() = 0.0; +#pragma omp parallel for + for (auto const &sym_class : itertools::omp_chunk(sym_classes)) { + auto idx = a.indexmap().to_idx(sym_class[0].first); + auto ref_val = init_func(idx); + std::apply(a, idx) = ref_val; + for (auto const &[lin_idx, op] : sym_class) { std::apply(a, a.indexmap().to_idx(lin_idx)) = op(ref_val); } + } + } else { + // Sequential fallback + a() = 0.0; for (auto const &sym_class : sym_classes) { auto idx = a.indexmap().to_idx(sym_class[0].first); auto ref_val = init_func(idx); @@ -202,7 +246,6 @@ namespace nda { } } } - /** * @brief Symmetrize an array and return the maximum symmetry violation and its corresponding array index. * diff --git a/deps/CMakeLists.txt b/deps/CMakeLists.txt index 8101214c8..a5ef4fbcc 100644 --- a/deps/CMakeLists.txt +++ b/deps/CMakeLists.txt @@ -84,19 +84,21 @@ external_dependency(itertools ) # -- h5 -- -external_dependency(h5 - GIT_REPO https://github.com/TRIQS/h5 - VERSION 1.3 - GIT_TAG unstable -) - +if (HDF5Support) + external_dependency(h5 + GIT_REPO https://github.com/TRIQS/h5 + VERSION 1.3 + GIT_TAG unstable + ) +endif() # -- MPI -- +if (MPISupport) external_dependency(mpi GIT_REPO https://github.com/TRIQS/mpi VERSION 1.3 GIT_TAG unstable ) - +endif () ## Pybind 11 #find_package(Python) #add_subdirectory(pybind11) diff --git a/test/c++/CMakeLists.txt b/test/c++/CMakeLists.txt index 9fcbe7481..a17b85d04 100644 --- a/test/c++/CMakeLists.txt +++ b/test/c++/CMakeLists.txt @@ -1,15 +1,26 @@ # Copy h5 files to binary dir -file(GLOB_RECURSE all_h5_ref_files RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.ref.h5) -foreach(file ${all_h5_ref_files}) - configure_file(${file} ${file} COPYONLY) -endforeach() +if(HDF5Support) + find_package(HDF5 REQUIRED COMPONENTS C HL) + file(GLOB_RECURSE all_h5_ref_files RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.ref.h5) + foreach(file ${all_h5_ref_files}) + configure_file(${file} ${file} COPYONLY) + endforeach() +endif() # List of all tests file(GLOB_RECURSE all_tests RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.cpp) if(NOT CudaSupport) list(FILTER all_tests EXCLUDE REGEX "nda_cu") endif() +if(NOT MPISupport) + list(FILTER all_tests EXCLUDE REGEX "mpi") +endif() + +if(NOT HDF5Support) + list(FILTER all_tests EXCLUDE REGEX "h5") +endif() + macro(SetUpAllTestWithMacroDef extension macrodef) foreach(test ${all_tests}) get_filename_component(test_name ${test} NAME_WE) @@ -17,7 +28,10 @@ foreach(test ${all_tests}) #MESSAGE("${test_name} with option ${ARGV1}") get_filename_component(test_dir ${test} DIRECTORY) add_executable(${test_name} ${test}) - target_link_libraries(${test_name} ${PROJECT_NAME}_c gtest_main ${PROJECT_NAME}_warnings hdf5::hdf5) + target_link_libraries(${test_name} ${PROJECT_NAME}_c gtest_main ${PROJECT_NAME}_warnings) + if (HDF5Support) + target_link_libraries(${test_name} hdf5::hdf5) + endif() target_compile_options(${test_name} PRIVATE "${ARGV1}") set_property(TARGET ${test_name} PROPERTY RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/${test_dir}) add_test(NAME ${test_name} COMMAND ${test_name} WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/${test_dir}) diff --git a/test/c++/nda_h5.cpp b/test/c++/nda_h5.cpp index 94e162278..a44323a49 100644 --- a/test/c++/nda_h5.cpp +++ b/test/c++/nda_h5.cpp @@ -13,6 +13,7 @@ #include
#include + using namespace nda::clef::literals; using nda::ellipsis; using nda::range;