Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions src/exchangeDonors.C
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ void tioga::exchangeDonors(void)
// and receiving
//
pc->getMap(&nsend,&nrecv,&sndMap,&rcvMap);
if (nsend == 0) return;
// if (nsend == 0) return;
//
// create packets to send and receive
// and initialize them to zero
Expand Down Expand Up @@ -74,7 +74,9 @@ void tioga::exchangeDonors(void)
//
// communicate donors (comm1)
//
pc->sendRecvPackets(sndPack,rcvPack);
MPI_Barrier(scomm);
pc->sendRecvPackets2(sndPack,rcvPack);
if (nsend == 0) return;
// Initialize linked lists and populate donor data from rcvPack
for (int ib=0;ib<nblocks;ib++) {
auto& mb = mblocks[ib];
Expand Down
135 changes: 135 additions & 0 deletions src/parallelComm.C
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ void parallelComm::sendRecvPacketsAll(PACKET *sndPack, PACKET *rcvPack)
scomm,
&real_request);

// FIXME: here and above I think I should move this a bit lower
MPI_Wait(&int_request, MPI_STATUS_IGNORE);
for(i=0;i<numprocs;i++){
if (rcvPack[i].nints > 0) {
Expand All @@ -116,6 +117,7 @@ void parallelComm::sendRecvPacketsAll(PACKET *sndPack, PACKET *rcvPack)
}
}

// FIXME: here and above I think I should move this a bit lower
MPI_Wait(&real_request, MPI_STATUS_IGNORE);
for (int i=0; i < numprocs; i++) {
int displ = rcv_int_displs[i];
Expand Down Expand Up @@ -212,6 +214,139 @@ void parallelComm::sendRecvPacketsAll(PACKET *sndPack, PACKET *rcvPack)
// TIOGA_FREE(status);
// }

void parallelComm::sendRecvPackets2(PACKET *sndPack,PACKET *rcvPack)
{
int i;
int *sint,*sreal,*rint,*rreal;
//
sint=(int *)malloc(sizeof(int)*numprocs);
sreal=(int *) malloc(sizeof(int)*numprocs);
rint=(int *)malloc(sizeof(int)*numprocs);
rreal=(int *) malloc(sizeof(int)*numprocs);
// remove when using stl vectors and just init the vectors to 0
for(i=0;i<numprocs;i++){
sint[i]=sreal[i]=0;
rint[i]=rreal[i]=0;
}
for(i=0;i<nsend;i++){
sint[sndMap[i]]=sndPack[i].nints;
sreal[sndMap[i]]=sndPack[i].nreals;
}
//
MPI_Alltoall(sint,1,MPI_INT,rint,1,MPI_INT,scomm);
MPI_Alltoall(sreal,1,MPI_INT,rreal,1,MPI_INT,scomm);
//
for(i=0;i<nrecv;i++) {
rcvPack[i].nints=rint[rcvMap[i]];
rcvPack[i].nreals=rreal[rcvMap[i]];
}

int all_snd_nints = std::accumulate(sint, sint + numprocs, 0);
int all_rcv_nints = std::accumulate(rint, rint + numprocs, 0);
int *all_snd_intData, *all_rcv_intData;
all_snd_intData=(int *) malloc(sizeof(int)*all_snd_nints);
all_rcv_intData=(int *) malloc(sizeof(int)*all_rcv_nints);
for (int i = 0; i < all_snd_nints; i++) {
all_snd_intData[i] = 0;
}
for (int i = 0; i < all_rcv_nints; i++) {
all_rcv_intData[i] = 0;
}
std::vector<int> snd_int_displs(numprocs+1, 0);
std::vector<int> rcv_int_displs(numprocs+1, 0);
for (int i=1; i <= numprocs; i++) {
snd_int_displs[i] = snd_int_displs[i-1] + sint[i-1];
rcv_int_displs[i] = rcv_int_displs[i-1] + rint[i-1];
}
for (int i=0; i < nsend; i++) {
int displ = snd_int_displs[sndMap[i]];
for(int j=0; j < sint[sndMap[i]]; j++){
all_snd_intData[displ+j] = sndPack[i].intData[j];
}
}
MPI_Request int_request;
MPI_Ialltoallv(all_snd_intData,
sint,
snd_int_displs.data(),
MPI_INT,
all_rcv_intData,
rint,
rcv_int_displs.data(),
MPI_INT,
scomm,
&int_request);

int all_snd_nreals = std::accumulate(sreal, sreal + numprocs, 0);
int all_rcv_nreals = std::accumulate(rreal, rreal + numprocs, 0);
REAL *all_snd_realData, *all_rcv_realData;
all_snd_realData=(REAL *) malloc(sizeof(REAL)*all_snd_nreals);
all_rcv_realData=(REAL *) malloc(sizeof(REAL)*all_rcv_nreals);
for (int i = 0; i < all_snd_nreals; i++) {
all_snd_realData[i] = 0;
}
for (int i = 0; i < all_rcv_nreals; i++) {
all_rcv_realData[i] = 0;
}
std::vector<int> snd_real_displs(numprocs+1, 0);
std::vector<int> rcv_real_displs(numprocs+1, 0);
for (int i=1; i <= numprocs; i++) {
snd_real_displs[i] = snd_real_displs[i-1] + sreal[i-1];
rcv_real_displs[i] = rcv_real_displs[i-1] + rreal[i-1];
}
for (int i=0; i < nsend; i++) {
int displ = snd_real_displs[sndMap[i]];
for(int j=0; j < sreal[sndMap[i]]; j++){
all_snd_realData[displ+j] = sndPack[i].realData[j];
}
}
MPI_Request real_request;
MPI_Ialltoallv(all_snd_realData,
sreal,
snd_real_displs.data(),
MPI_DOUBLE,
all_rcv_realData,
rreal,
rcv_real_displs.data(),
MPI_DOUBLE,
scomm,
&real_request);

// FIXME: here and above I think I should move this a bit lower
MPI_Wait(&int_request, MPI_STATUS_IGNORE);
for(i=0;i<nrecv;i++){
if (rcvPack[i].nints > 0) {
rcvPack[i].intData=(int *) malloc(sizeof(int)*rcvPack[i].nints);
}
if (rcvPack[i].nreals > 0) {
rcvPack[i].realData=(REAL *) malloc(sizeof(REAL)*rcvPack[i].nreals);
}
}

// FIXME: here and above I think I should move this a bit lower
MPI_Wait(&real_request, MPI_STATUS_IGNORE);
for (int i=0; i < nrecv; i++) {
int displ = rcv_int_displs[rcvMap[i]];
for(int j=0; j < rint[rcvMap[i]]; j++){
rcvPack[i].intData[j] = all_rcv_intData[displ+j];
}
}
for (int i=0; i < nrecv; i++) {
int displ = rcv_real_displs[rcvMap[i]];
for(int j=0; j < rreal[rcvMap[i]]; j++){
rcvPack[i].realData[j] = all_rcv_realData[displ+j];
}
}

TIOGA_FREE(all_snd_intData);
TIOGA_FREE(all_rcv_intData);
TIOGA_FREE(all_snd_realData);
TIOGA_FREE(all_rcv_realData);
TIOGA_FREE(sint);
TIOGA_FREE(sreal);
TIOGA_FREE(rint);
TIOGA_FREE(rreal);
}

void parallelComm::sendRecvPackets(PACKET *sndPack,PACKET *rcvPack)
{
int i;
Expand Down
2 changes: 2 additions & 0 deletions src/parallelComm.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ class parallelComm

void sendRecvPacketsAll(PACKET *sndPack,PACKET *rcvPack);

void sendRecvPackets2(PACKET *sndPack,PACKET *rcvPack);

void sendRecvPackets(PACKET *sndPack,PACKET *rcvPack);

void sendRecvPacketsCheck(PACKET *sndPack,PACKET *rcvPack);
Expand Down