-
Notifications
You must be signed in to change notification settings - Fork 902
Han gatherv noncontiguous datatype fix #12439
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -55,6 +55,12 @@ | |
* to send the data in the correct order even if the process are NOT mapped by core. | ||
* 2. In the send buffer, other than the root's node, data destined to the same node are continuous | ||
* - it is ok if data to different nodes has gap. | ||
* | ||
* Limitation: | ||
* The node leader acts as a broker between the Root and node followers, but it cannot match the | ||
* exact type signature of the followers; instead it forwards the intermediate data from Root in its | ||
* packed form of MPI_BYTE type. This works for Gatherv but NOT for Scatterv provided that the Root | ||
* has a different architecture, e.g. endianness, integer representation, etc. | ||
*/ | ||
int mca_coll_han_scatterv_intra(const void *sbuf, const int *scounts, const int *displs, | ||
struct ompi_datatype_t *sdtype, void *rbuf, int rcount, | ||
|
@@ -94,6 +100,14 @@ int mca_coll_han_scatterv_intra(const void *sbuf, const int *scounts, const int | |
return han_module->previous_scatterv(sbuf, scounts, displs, sdtype, rbuf, rcount, rdtype, | ||
root, comm, han_module->previous_scatterv_module); | ||
} | ||
if (han_module->is_heterogeneous) { | ||
OPAL_OUTPUT_VERBOSE((30, mca_coll_han_component.han_output, | ||
"han cannot handle scatterv with this communicator (heterogeneous). Fall " | ||
"back on another component\n")); | ||
HAN_LOAD_FALLBACK_COLLECTIVE(han_module, comm, scatterv); | ||
return han_module->previous_scatterv(sbuf, scounts, displs, sdtype, rbuf, rcount, rdtype, | ||
root, comm, han_module->previous_scatterv_module); | ||
} | ||
|
||
w_rank = ompi_comm_rank(comm); | ||
w_size = ompi_comm_size(comm); | ||
|
@@ -125,7 +139,6 @@ int mca_coll_han_scatterv_intra(const void *sbuf, const int *scounts, const int | |
int need_bounce_buf = 0, total_up_scounts = 0, *up_displs = NULL, *up_scounts = NULL, | ||
*up_peer_lb = NULL, *up_peer_ub = NULL; | ||
char *reorder_sbuf = (char *) sbuf, *bounce_buf = NULL; | ||
size_t sdsize; | ||
|
||
low_scounts = malloc(low_size * sizeof(int)); | ||
low_displs = malloc(low_size * sizeof(int)); | ||
|
@@ -144,8 +157,6 @@ int mca_coll_han_scatterv_intra(const void *sbuf, const int *scounts, const int | |
low_scounts[low_peer] = scounts[w_peer]; | ||
} | ||
|
||
ompi_datatype_type_size(sdtype, &sdsize); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same comment as for gather except that you should use unpack to go from a packed buffer into the local type. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Currently inside ompi we need extra data size check in order to use MPI_PACKED - it is currently possible that the total byte size of I think we should switch to explicit pack/unpack after large count support. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @bosilca I took time to come up with possible optimizations following our discussion on Monday. I was trying to take advantage of the type map, but soon realized that this is not generally useful between node leader and follower. For scatterv, the invariant is actually between Root and other processes:
When we focus on the node leader and its local neighbors, this information is not helpful, since they do not need to match in their respective There is a simplification(rather than optimization) opportunity though, if and only if the node leader's There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Because the typemap provided by the different processes must match when complemented with the count, we could use |
||
|
||
up_scounts = calloc(up_size, sizeof(int)); | ||
up_displs = malloc(up_size * sizeof(int)); | ||
up_peer_ub = calloc(up_size, sizeof(int)); | ||
|
@@ -201,11 +212,14 @@ int mca_coll_han_scatterv_intra(const void *sbuf, const int *scounts, const int | |
} | ||
|
||
if (need_bounce_buf) { | ||
bounce_buf = malloc(sdsize * total_up_scounts); | ||
ptrdiff_t ssize, sgap; | ||
ssize = opal_datatype_span(&rdtype->super, total_up_scounts, &sgap); | ||
bounce_buf = malloc(ssize); | ||
if (!bounce_buf) { | ||
err = OMPI_ERR_OUT_OF_RESOURCE; | ||
goto root_out; | ||
} | ||
reorder_sbuf = bounce_buf - sgap; | ||
|
||
/* Calculate displacements for the inter-node scatterv */ | ||
for (up_peer = 0; up_peer < up_size; ++up_peer) { | ||
|
@@ -214,7 +228,8 @@ int mca_coll_han_scatterv_intra(const void *sbuf, const int *scounts, const int | |
} | ||
|
||
/* Use a temp buffer to reorder the send buffer if needed */ | ||
ptrdiff_t offset = 0; | ||
ptrdiff_t offset = 0, sdext; | ||
ompi_datatype_type_extent(sdtype, &sdext); | ||
|
||
for (int i = 0; i < w_size; ++i) { | ||
up_peer = topo[2 * i]; | ||
|
@@ -225,13 +240,11 @@ int mca_coll_han_scatterv_intra(const void *sbuf, const int *scounts, const int | |
w_peer = topo[2 * i + 1]; | ||
|
||
ompi_datatype_copy_content_same_ddt(sdtype, (size_t) scounts[w_peer], | ||
bounce_buf + offset, | ||
reorder_sbuf + offset, | ||
(char *) sbuf | ||
+ (size_t) displs[w_peer] * sdsize); | ||
offset += sdsize * (size_t) scounts[w_peer]; | ||
+ (size_t) displs[w_peer] * sdext); | ||
offset += sdext * (size_t) scounts[w_peer]; | ||
} | ||
|
||
reorder_sbuf = bounce_buf; | ||
} | ||
|
||
/* Up Iscatterv */ | ||
|
Uh oh!
There was an error while loading. Please reload this page.