Skip to content

Commit e1be009

Browse files
authored
Merge pull request kokkos#163 from dutkalex/error-handling
2 parents f910a73 + 391cdaa commit e1be009

File tree

12 files changed

+133
-107
lines changed

12 files changed

+133
-107
lines changed

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ include(cmake/mpi-vendor.cmake)
3434
option(KokkosComm_ENABLE_PERFTESTS "Build KokkosComm perf tests" OFF)
3535
option(KokkosComm_ENABLE_TESTS "Build KokkosComm tests" OFF)
3636
option(KokkosComm_ENABLE_MPI "Build KokkosComm with MPI transport" ON)
37+
option(KokkosComm_ABORT_ON_ERROR "Runtime error checks trigger a global abort" OFF)
3738

3839
# Resolve options
3940
set(KOKKOSCOMM_ENABLE_PERFTESTS ${KokkosComm_ENABLE_PERFTESTS} CACHE BOOL "" FORCE)

src/KokkosComm/CMakeLists.txt

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,10 +76,19 @@ if(KOKKOSCOMM_ENABLE_MPI)
7676
FILE_SET kokkoscomm_mpi_impl_headers
7777
TYPE HEADERS
7878
BASE_DIRS ${PROJECT_SOURCE_DIR}/src
79-
FILES mpi/impl/pack_traits.hpp mpi/impl/packer.hpp mpi/impl/tags.hpp mpi/impl/types.hpp
79+
FILES
80+
mpi/impl/pack_traits.hpp
81+
mpi/impl/packer.hpp
82+
mpi/impl/tags.hpp
83+
mpi/impl/types.hpp
84+
mpi/impl/error_handling.hpp
8085
)
8186
endif()
8287

88+
if(KokkosComm_ABORT_ON_ERROR)
89+
target_compile_definitions(KokkosComm INTERFACE KOKKOSCOMM_ABORT_ON_ERROR)
90+
endif()
91+
8392
# --- COMPILE FLAGS --- #
8493
include(CheckCXXCompilerFlag)
8594

src/KokkosComm/mpi/allgather.hpp

Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include <KokkosComm/traits.hpp>
2424

2525
#include "impl/types.hpp"
26+
#include "impl/error_handling.hpp"
2627

2728
namespace KokkosComm::mpi {
2829

@@ -36,12 +37,9 @@ void allgather(const SendView &sv, const RecvView &rv, MPI_Comm comm) {
3637
static_assert(KokkosComm::rank<SendView>() <= 1, "allgather for SendView::rank > 1 not supported");
3738
static_assert(KokkosComm::rank<RecvView>() <= 1, "allgather for RecvView::rank > 1 not supported");
3839

39-
if (!KokkosComm::is_contiguous(sv)) {
40-
throw std::runtime_error("low-level allgather requires contiguous send view");
41-
}
42-
if (!KokkosComm::is_contiguous(rv)) {
43-
throw std::runtime_error("low-level allgather requires contiguous recv view");
44-
}
40+
KokkosComm::mpi::fail_if(!KokkosComm::is_contiguous(sv), "low-level allgather requires contiguous send view");
41+
KokkosComm::mpi::fail_if(!KokkosComm::is_contiguous(rv), "low-level allgather requires contiguous recv view");
42+
4543
const int count = KokkosComm::span(sv); // all ranks send/recv same count
4644
MPI_Allgather(KokkosComm::data_handle(sv), count, KokkosComm::Impl::mpi_type_v<SendScalar>,
4745
KokkosComm::data_handle(rv), count, KokkosComm::Impl::mpi_type_v<RecvScalar>, comm);
@@ -58,9 +56,8 @@ void allgather(const ExecSpace &space, const RecvView &rv, const size_t recvCoun
5856

5957
static_assert(KokkosComm::rank<RecvView>() <= 1, "allgather for RecvView::rank > 1 not supported");
6058

61-
if (!KokkosComm::is_contiguous(rv)) {
62-
throw std::runtime_error("low-level allgather requires contiguous recv view");
63-
}
59+
KokkosComm::mpi::fail_if(!KokkosComm::is_contiguous(rv), "low-level allgather requires contiguous recv view");
60+
6461
space.fence("fence before allgather"); // work in space may have been used to produce send view data
6562
MPI_Allgather(MPI_IN_PLACE, 0 /*ignored*/, MPI_DATATYPE_NULL /*ignored*/, KokkosComm::data_handle(rv), recvCount,
6663
KokkosComm::Impl::mpi_type_v<RecvScalar>, comm);
@@ -72,12 +69,11 @@ template <KokkosExecutionSpace ExecSpace, KokkosView SendView, KokkosView RecvVi
7269
void allgather(const ExecSpace &space, const SendView &sv, const RecvView &rv, MPI_Comm comm) {
7370
Kokkos::Tools::pushRegion("KokkosComm::Mpi::allgather");
7471

75-
if (!KokkosComm::is_contiguous(sv) || !KokkosComm::is_contiguous(rv)) {
76-
throw std::runtime_error("allgather for non-contiguous views not implemented");
77-
} else {
78-
space.fence("fence before allgather"); // work in space may have been used to produce send view data
79-
allgather(sv, rv, comm);
80-
}
72+
KokkosComm::mpi::fail_if(!KokkosComm::is_contiguous(sv) || !KokkosComm::is_contiguous(rv),
73+
"allgather for non-contiguous views not implemented");
74+
75+
space.fence("fence before allgather"); // work in space may have been used to produce send view data
76+
allgather(sv, rv, comm);
8177

8278
Kokkos::Tools::popRegion();
8379
}

src/KokkosComm/mpi/allreduce.hpp

Lines changed: 11 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -38,15 +38,10 @@ void allreduce(SendView const &sv, RecvView const &rv, MPI_Op op, MPI_Comm comm)
3838
static_assert(KokkosComm::rank<SendView>() <= 1, "allreduce for SendView::rank > 1 not supported");
3939
static_assert(KokkosComm::rank<RecvView>() <= 1, "allreduce for RecvView::rank > 1 not supported");
4040

41-
if (!KokkosComm::is_contiguous(sv)) {
42-
throw std::runtime_error{"low-level allreduce requires contiguous send view"};
43-
}
44-
if (!KokkosComm::is_contiguous(rv)) {
45-
throw std::runtime_error{"low-level allreduce requires contiguous recv view"};
46-
}
47-
if (sv.size() != rv.size()) {
48-
throw std::runtime_error{"allreduce requires send and receive views to have the same size"};
49-
}
41+
KokkosComm::mpi::fail_if(!KokkosComm::is_contiguous(sv), "low-level allreduce requires contiguous send view");
42+
KokkosComm::mpi::fail_if(!KokkosComm::is_contiguous(rv), "low-level allreduce requires contiguous recv view");
43+
KokkosComm::mpi::fail_if(sv.size() != rv.size(), "allreduce requires send and receive views to have the same size");
44+
5045
int const count = sv.size();
5146
MPI_Allreduce(KokkosComm::data_handle(sv), KokkosComm::data_handle(rv), count,
5247
KokkosComm::Impl::mpi_type_v<SendScalar>, op, comm);
@@ -62,9 +57,8 @@ void allreduce(View const &v, MPI_Op op, MPI_Comm comm) {
6257

6358
static_assert(KokkosComm::rank<View>() <= 1, "allreduce for View::rank > 1 not supported");
6459

65-
if (!KokkosComm::is_contiguous(v)) {
66-
throw std::runtime_error("low-level allgather requires contiguous recv view");
67-
}
60+
KokkosComm::mpi::fail_if(!KokkosComm::is_contiguous(v), "low-level allgather requires contiguous recv view");
61+
6862
int const count = v.size();
6963
MPI_Allreduce(MPI_IN_PLACE, KokkosComm::data_handle(v), count, KokkosComm::Impl::mpi_type_v<Scalar>, op, comm);
7064

@@ -75,9 +69,9 @@ template <KokkosExecutionSpace ExecSpace, KokkosView SendView, KokkosView RecvVi
7569
void allreduce(ExecSpace const &space, SendView const &sv, RecvView const &rv, MPI_Op op, MPI_Comm comm) {
7670
Kokkos::Tools::pushRegion("KokkosComm::mpi::allreduce");
7771

78-
if (!KokkosComm::is_contiguous(sv) || !KokkosComm::is_contiguous(rv)) {
79-
throw std::runtime_error("allreduce for non-contiguous views not implemented");
80-
}
72+
KokkosComm::mpi::fail_if(!KokkosComm::is_contiguous(sv) || !KokkosComm::is_contiguous(rv),
73+
"allreduce for non-contiguous views not implemented");
74+
8175
space.fence("fence before allreduce"); // work in space may have been used to produce send view data
8276
allreduce(sv, rv, op, comm);
8377

@@ -88,9 +82,8 @@ template <KokkosExecutionSpace ExecSpace, KokkosView View>
8882
void allreduce(ExecSpace const &space, View const &v, MPI_Op op, MPI_Comm comm) {
8983
Kokkos::Tools::pushRegion("KokkosComm::mpi::allreduce");
9084

91-
if (!KokkosComm::is_contiguous(v)) {
92-
throw std::runtime_error("allreduce for non-contiguous views not implemented");
93-
}
85+
KokkosComm::mpi::fail_if(!KokkosComm::is_contiguous(v), "allreduce for non-contiguous views not implemented");
86+
9487
space.fence("fence before allreduce"); // work in space may have been used to produce send view data
9588
allreduce(v, op, comm);
9689

src/KokkosComm/mpi/alltoall.hpp

Lines changed: 34 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424

2525
#include "impl/pack_traits.hpp"
2626
#include "impl/types.hpp"
27+
#include "impl/error_handling.hpp"
2728

2829
namespace KokkosComm::Impl {
2930

@@ -41,28 +42,27 @@ void alltoall(const ExecSpace &space, const SendView &sv, const size_t sendCount
4142
// Make sure views are ready
4243
space.fence("KokkosComm::Impl::alltoall");
4344

44-
if (!KokkosComm::is_contiguous(sv) || !KokkosComm::is_contiguous(rv)) {
45-
throw std::runtime_error("alltoall for non-contiguous views not implemented");
46-
} else {
47-
int size;
48-
MPI_Comm_size(comm, &size);
49-
50-
if (sendCount * size > KokkosComm::extent(sv, 0)) {
51-
std::stringstream ss;
52-
ss << "alltoall sendCount * communicator size (" << sendCount << " * " << size
53-
<< ") is greater than send view size";
54-
throw std::runtime_error(ss.str());
55-
}
56-
if (recvCount * size > KokkosComm::extent(rv, 0)) {
57-
std::stringstream ss;
58-
ss << "alltoall recvCount * communicator size (" << recvCount << " * " << size
59-
<< ") is greater than recv view size";
60-
throw std::runtime_error(ss.str());
61-
}
62-
63-
MPI_Alltoall(KokkosComm::data_handle(sv), sendCount, mpi_type_v<SendScalar>, KokkosComm::data_handle(rv), recvCount,
64-
mpi_type_v<RecvScalar>, comm);
45+
KokkosComm::mpi::fail_if(!KokkosComm::is_contiguous(sv) || !KokkosComm::is_contiguous(rv),
46+
"alltoall for non-contiguous views not implemented");
47+
48+
int size;
49+
MPI_Comm_size(comm, &size);
50+
51+
if (sendCount * size > KokkosComm::extent(sv, 0)) {
52+
std::stringstream ss;
53+
ss << "alltoall sendCount * communicator size (" << sendCount << " * " << size
54+
<< ") is greater than send view size";
55+
KokkosComm::mpi::fail_if(true, ss.str().data());
6556
}
57+
if (recvCount * size > KokkosComm::extent(rv, 0)) {
58+
std::stringstream ss;
59+
ss << "alltoall recvCount * communicator size (" << recvCount << " * " << size
60+
<< ") is greater than recv view size";
61+
KokkosComm::mpi::fail_if(true, ss.str().data());
62+
}
63+
64+
MPI_Alltoall(KokkosComm::data_handle(sv), sendCount, mpi_type_v<SendScalar>, KokkosComm::data_handle(rv), recvCount,
65+
mpi_type_v<RecvScalar>, comm);
6666

6767
Kokkos::Tools::popRegion();
6868
}
@@ -79,23 +79,21 @@ void alltoall(const ExecSpace &space, const RecvView &rv, const size_t recvCount
7979
// Make sure views are ready
8080
space.fence("KokkosComm::Impl::alltoall");
8181

82-
if (!KokkosComm::is_contiguous(rv)) {
83-
throw std::runtime_error("alltoall for non-contiguous views not implemented");
84-
} else {
85-
int size;
86-
MPI_Comm_size(comm, &size);
87-
88-
if (recvCount * size > KokkosComm::extent(rv, 0)) {
89-
std::stringstream ss;
90-
ss << "alltoall recvCount * communicator size (" << recvCount << " * " << size
91-
<< ") is greater than recv view size";
92-
throw std::runtime_error(ss.str());
93-
}
94-
95-
MPI_Alltoall(MPI_IN_PLACE, 0 /*ignored*/, MPI_BYTE /*ignored*/, KokkosComm::data_handle(rv), recvCount,
96-
mpi_type_v<RecvScalar>, comm);
82+
KokkosComm::mpi::fail_if(!KokkosComm::is_contiguous(rv), "alltoall for non-contiguous views not implemented");
83+
84+
int size;
85+
MPI_Comm_size(comm, &size);
86+
87+
if (recvCount * size > KokkosComm::extent(rv, 0)) {
88+
std::stringstream ss;
89+
ss << "alltoall recvCount * communicator size (" << recvCount << " * " << size
90+
<< ") is greater than recv view size";
91+
KokkosComm::mpi::fail_if(true, ss.str().data());
9792
}
9893

94+
MPI_Alltoall(MPI_IN_PLACE, 0 /*ignored*/, MPI_BYTE /*ignored*/, KokkosComm::data_handle(rv), recvCount,
95+
mpi_type_v<RecvScalar>, comm);
96+
9997
Kokkos::Tools::popRegion();
10098
}
10199

src/KokkosComm/mpi/broadcast.hpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,7 @@ void broadcast(View const& v, int root, MPI_Comm comm) {
3232

3333
using Scalar = typename View::value_type;
3434

35-
if (!KokkosComm::is_contiguous(v)) {
36-
throw std::runtime_error("low-level broadcast requires contiguous view");
37-
}
35+
KokkosComm::mpi::fail_if(!KokkosComm::is_contiguous(v), "low-level broadcast requires contiguous view");
3836

3937
MPI_Bcast(KokkosComm::data_handle(v), KokkosComm::span(v), KokkosComm::Impl::mpi_type_v<Scalar>, root, comm);
4038

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
//@HEADER
2+
// ************************************************************************
3+
//
4+
// Kokkos v. 4.0
5+
// Copyright (2022) National Technology & Engineering
6+
// Solutions of Sandia, LLC (NTESS).
7+
//
8+
// Under the terms of Contract DE-NA0003525 with NTESS,
9+
// the U.S. Government retains certain rights in this software.
10+
//
11+
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12+
// See https://kokkos.org/LICENSE for license information.
13+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14+
//
15+
//@HEADER
16+
17+
#pragma once
18+
19+
#include <iostream>
20+
#include <mpi.h>
21+
22+
namespace KokkosComm::mpi {
23+
inline void fail_if(bool condition, const char* error_msg) {
24+
if (condition) {
25+
#ifdef KOKKOSCOMM_ABORT_ON_ERROR
26+
std::cerr << error_msg << std::endl;
27+
MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
28+
#else
29+
Kokkos::abort(error_msg);
30+
#endif
31+
}
32+
}
33+
} // namespace KokkosComm::mpi

src/KokkosComm/mpi/irecv.hpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "impl/pack_traits.hpp"
2525
#include "impl/tags.hpp"
2626
#include "impl/types.hpp"
27+
#include "impl/error_handling.hpp"
2728

2829
namespace KokkosComm {
2930
namespace Impl {
@@ -63,12 +64,11 @@ template <KokkosView RecvView>
6364
void irecv(const RecvView &rv, int src, int tag, MPI_Comm comm, MPI_Request &req) {
6465
Kokkos::Tools::pushRegion("KokkosComm::mpi::irecv");
6566

66-
if (KokkosComm::is_contiguous(rv)) {
67-
using RecvScalar = typename RecvView::non_const_value_type;
68-
MPI_Irecv(KokkosComm::data_handle(rv), KokkosComm::span(rv), Impl::mpi_type_v<RecvScalar>, src, tag, comm, &req);
69-
} else {
70-
throw std::runtime_error("Only contiguous irecv viewsupported");
71-
}
67+
KokkosComm::mpi::fail_if(!KokkosComm::is_contiguous(rv), "Only contiguous irecv viewsupported");
68+
69+
using RecvScalar = typename RecvView::non_const_value_type;
70+
MPI_Irecv(KokkosComm::data_handle(rv), KokkosComm::span(rv), Impl::mpi_type_v<RecvScalar>, src, tag, comm, &req);
71+
7272
Kokkos::Tools::popRegion();
7373
}
7474

src/KokkosComm/mpi/isend.hpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include "impl/pack_traits.hpp"
2828
#include "impl/tags.hpp"
2929
#include "impl/types.hpp"
30+
#include "impl/error_handling.hpp"
3031

3132
namespace KokkosComm {
3233

@@ -92,12 +93,11 @@ template <KokkosView SendView>
9293
void isend(const SendView &sv, int dest, int tag, MPI_Comm comm, MPI_Request &req) {
9394
Kokkos::Tools::pushRegion("KokkosComm::Impl::isend");
9495

95-
if (KokkosComm::is_contiguous(sv)) {
96-
using SendScalar = typename SendView::non_const_value_type;
97-
MPI_Isend(KokkosComm::data_handle(sv), KokkosComm::span(sv), Impl::mpi_type_v<SendScalar>, dest, tag, comm, &req);
98-
} else {
99-
throw std::runtime_error("only contiguous views supported for low-level isend");
100-
}
96+
KokkosComm::mpi::fail_if(!KokkosComm::is_contiguous(sv), "only contiguous views supported for low-level isend");
97+
98+
using SendScalar = typename SendView::non_const_value_type;
99+
MPI_Isend(KokkosComm::data_handle(sv), KokkosComm::span(sv), Impl::mpi_type_v<SendScalar>, dest, tag, comm, &req);
100+
101101
Kokkos::Tools::popRegion();
102102
}
103103

src/KokkosComm/mpi/recv.hpp

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -24,20 +24,19 @@
2424

2525
#include "impl/pack_traits.hpp"
2626
#include "impl/types.hpp"
27+
#include "impl/error_handling.hpp"
2728

2829
namespace KokkosComm::mpi {
2930

3031
template <KokkosView RecvView>
3132
void recv(const RecvView &rv, int src, int tag, MPI_Comm comm, MPI_Status *status) {
3233
Kokkos::Tools::pushRegion("KokkosComm::mpi::recv");
3334

34-
if (KokkosComm::is_contiguous(rv)) {
35-
using ScalarType = typename RecvView::non_const_value_type;
36-
MPI_Recv(KokkosComm::data_handle(rv), KokkosComm::span(rv), KokkosComm::Impl::mpi_type_v<ScalarType>, src, tag,
37-
comm, status);
38-
} else {
39-
throw std::runtime_error("only contiguous views supported for low-level recv");
40-
}
35+
KokkosComm::mpi::fail_if(!KokkosComm::is_contiguous(rv), "only contiguous views supported for low-level recv");
36+
37+
using ScalarType = typename RecvView::non_const_value_type;
38+
MPI_Recv(KokkosComm::data_handle(rv), KokkosComm::span(rv), KokkosComm::Impl::mpi_type_v<ScalarType>, src, tag, comm,
39+
status);
4140

4241
Kokkos::Tools::popRegion();
4342
}

0 commit comments

Comments
 (0)