Skip to content

Commit d353cce

Browse files
committed
Merge branch 'warn_32_bit' into 'master'
Add a warning when exceeding the 32-bit MPI limit when distributing the tensor See merge request etphipp/genten!64
2 parents b7a6dde + 249d825 commit d353cce

1 file changed

Lines changed: 12 additions & 3 deletions

File tree

src/Genten_DistTensorContext.cpp

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,10 @@ std::vector<ttb_real>
226226
distributeTensorToVectorsDense(const Tensor& dn_tensor_host, ttb_indx nnz,
227227
MPI_Comm comm, ttb_indx rank, ttb_indx nprocs,
228228
ttb_indx& offset) {
229-
constexpr ttb_indx dt_size = sizeof(ttb_real);
229+
// Send tensor as ttb_real instead of bytes to allow for (somewhat)
230+
// larger tensors that can fit within the 32-bit MPI limit
231+
const auto mpi_dtype = DistContext::toMpiType<ttb_real>();
232+
constexpr ttb_indx dt_size = 1;
230233
std::vector<ttb_real> Tvec;
231234
small_vector<ttb_indx> who_gets_what =
232235
detail::singleDimUniformBlocking(nnz, nprocs);
@@ -248,7 +251,7 @@ distributeTensorToVectorsDense(const Tensor& dn_tensor_host, ttb_indx nnz,
248251
total_sent += nelements;
249252

250253
const ttb_indx index_of_first_element = who_gets_what[i];
251-
MPI_Isend(Tvec.data() + index_of_first_element, nbytes, MPI_BYTE, i, i,
254+
MPI_Isend(Tvec.data() + index_of_first_element, nbytes, mpi_dtype, i, i,
252255
comm, &requests[i - 1]);
253256
}
254257
MPI_Waitall(requests.size(), requests.data(), statuses.data());
@@ -271,7 +274,13 @@ distributeTensorToVectorsDense(const Tensor& dn_tensor_host, ttb_indx nnz,
271274
const ttb_indx nelements = who_gets_what[rank + 1] - who_gets_what[rank];
272275
Tvec.resize(nelements);
273276
const ttb_indx nbytes = nelements * dt_size;
274-
MPI_Recv(Tvec.data(), nbytes, MPI_BYTE, 0, rank, comm, MPI_STATUS_IGNORE);
277+
if (nbytes > std::numeric_limits<int>::max()) {
278+
std::cout << "Warning on MPI processor " << rank << ":" << std::endl
279+
<< " The number of receives exceeds the maximum size of a 32-bit integer." << std::endl
280+
<< " This will likely fail with most MPI implementations!" << std::endl
281+
<< " Try distributing your tensor across more MPI processors." << std::endl;
282+
}
283+
MPI_Recv(Tvec.data(), nbytes, mpi_dtype, 0, rank, comm, MPI_STATUS_IGNORE);
275284
}
276285

277286
return Tvec;

0 commit comments

Comments
 (0)