From fe16ae7692da83e9cd247782f596a46a1d010699 Mon Sep 17 00:00:00 2001 From: Carl Pearson Date: Thu, 1 Apr 2021 20:52:12 -0600 Subject: [PATCH] add stream_swap to enable sorting, simplify stream_elision --- src/internal/types.cpp | 86 +++++++++++++++++++++++++++++++++++++- src/neighbor_alltoallv.cpp | 2 +- support/type.cpp | 32 ++++++++++++++ support/type.hpp | 16 +++++++ test/type_commit.cpp | 16 ++++++- test/type_equivalence.cpp | 27 ++++++++++++ 6 files changed, 175 insertions(+), 4 deletions(-) diff --git a/src/internal/types.cpp b/src/internal/types.cpp index 131b3a4..a33210d 100644 --- a/src/internal/types.cpp +++ b/src/internal/types.cpp @@ -362,6 +362,37 @@ Type traverse(MPI_Datatype datatype) { } }; +/* + if there are two stream data, ensure the larger extent is higher up +*/ +bool stream_swap(Type &type) { + + bool changed = false; + + // only works if I'm a stream and my child is stream + if (!std::holds_alternative(type.data)) { + return false; + } + assert(1 == type.children().size()); + Type &child = type.children()[0]; + if (!std::holds_alternative(child.data)) { + return false; + } + + StreamData &pData = std::get(type.data); + StreamData &cData = std::get(child.data); + + if (pData.stride < cData.stride) { + changed = true; + std::swap(pData, cData); + } + + // descend into child + changed |= stream_swap(child); + + return changed; +} + /* if a stream has a dense child, and that stream's stride is the same as the child's extent, then the stream is also dense */ @@ -409,6 +440,7 @@ bool stream_dense_fold(Type &type) { /* if nested streams, if the child count is 1, the parent's elements are just the child's element + */ bool stream_elision(Type &type) { @@ -442,6 +474,37 @@ bool stream_elision(Type &type) { return changed; } +/* if a stream has count 1, it can be removed + + */ +bool stream_elision2(Type &type) { + + bool changed = false; + + // try to remove all size 1 children first + for (Type &child : type.children()) { + changed |= stream_elision2(child); + } + + // type must be StreamData + if (!std::holds_alternative(type.data)) { + return false; + } + assert(1 == type.children().size()); + + const StreamData &tData = std::get(type.data); + + if (1 == tData.count) { + changed = true; + // replace this guy with his child + Type child = type.children()[0]; + child.extent = type.extent; + type = child; + } + + return changed; +} + /* detect, for example, where two vectors of two blocks is just one vector of four blocks @@ -505,17 +568,36 @@ Type simplify(const Type &type) { changed = false; ++iter; LOG_SPEW("optimization iter " << iter); + + /* new, less-tested swapper with new stream elision + if problems, remove the swap and go to stream_elision + */ + changed |= stream_swap(simp); + LOG_SPEW("after stream_swap"); + LOG_SPEW("\n" + simp.str()); changed |= stream_dense_fold(simp); LOG_SPEW("after stream_dense_fold"); LOG_SPEW("\n" + simp.str()); changed |= stream_flatten(simp); LOG_SPEW("after stream_flatten"); LOG_SPEW("\n" + simp.str()); - changed |= stream_elision(simp); - LOG_SPEW("after stream_elision"); + changed |= stream_elision2(simp); + LOG_SPEW("after stream_elision2"); LOG_SPEW("\n" + simp.str()); } + /* TODO + Here, we sort by the stride after doing canonicalization + + */ + + changed = true; + while (changed) { + changed = false; + ++iter; + LOG_SPEW("sort iter " << iter); + } + LOG_SPEW("simplify done. " << iter << " iterations"); LOG_SPEW("type.height=" << simp.height()); return simp; diff --git a/src/neighbor_alltoallv.cpp b/src/neighbor_alltoallv.cpp index c939074..841783b 100644 --- a/src/neighbor_alltoallv.cpp +++ b/src/neighbor_alltoallv.cpp @@ -11,7 +11,7 @@ extern "C" int MPI_Neighbor_alltoallv(PARAMS_MPI_Neighbor_alltoallv) { if (environment::noTempi) { - return libmpi.MPI_Neighbor_alltoallv(ARGS_MPI_Alltoallv); + return libmpi.MPI_Neighbor_alltoallv(ARGS_MPI_Neighbor_alltoallv); } /* this call does not take ranks, so there is no need to handle diff --git a/support/type.cpp b/support/type.cpp index 7a85b07..43b3332 100644 --- a/support/type.cpp +++ b/support/type.cpp @@ -241,6 +241,38 @@ MPI_Datatype make_2d_byte_subarray(const int64_t numBlocks, return ty; } +// make a 2d type by rows of blocks, then a stack of rows +MPI_Datatype make_2d_hv_by_rows(int blockSize, int c1, + int s1, // stride between blocks + int c2, + int s2 // stride between rows +) { + + MPI_Datatype bytes, row, ret; + + MPI_Type_contiguous(blockSize, MPI_BYTE, &bytes); + MPI_Type_create_hvector(c1, 1, s1, bytes, &row); + MPI_Type_create_hvector(c2, 1, s2, row, &ret); + + return ret; +} + +// make a 2d type by a column of blocks, and then a stack of columns. +MPI_Datatype make_2d_hv_by_cols(int blockSize, int c1, + int s1, // stride between blocks + int c2, + int s2 // stride between rows +) { + + MPI_Datatype bytes, col, ret; + + MPI_Type_contiguous(blockSize, MPI_BYTE, &bytes); + MPI_Type_create_hvector(c2, 1, s2, bytes, &col); + MPI_Type_create_hvector(c1, 1, s1, col, &ret); + + return ret; +} + /*1D layouts */ diff --git a/support/type.hpp b/support/type.hpp index 0bce2c1..896daa9 100644 --- a/support/type.hpp +++ b/support/type.hpp @@ -68,6 +68,22 @@ MPI_Datatype make_2d_byte_subarray(const int64_t numBlocks, const int64_t blockLength, const int64_t stride); + +// make a 2d type by rows of blocks, then a stack of rows +MPI_Datatype make_2d_hv_by_rows(int blockSize, int c1, + int s1, // stride between blocks + int c2, + int s2 // stride between rows +); + +// make a 2d type by a column of blocks, and then a stack of columns. +MPI_Datatype make_2d_hv_by_cols(int blockSize, int c1, + int s1, // stride between blocks + int c2, + int s2 // stride between rows +); + + // n contiguous bytes typedef MPI_Datatype (*TypeFactory1D)(int n); MPI_Datatype make_contiguous_byte_v1(int n); diff --git a/test/type_commit.cpp b/test/type_commit.cpp index fac37e9..1a98d51 100644 --- a/test/type_commit.cpp +++ b/test/type_commit.cpp @@ -13,6 +13,20 @@ int main(int argc, char **argv) { environment::noTempi = false; MPI_Init(&argc, &argv); + { + std::cerr << "TEST: make_2d_hv_by_rows\n"; + MPI_Datatype ty = make_2d_hv_by_rows(13, 5, 16, 3, 53); + MPI_Type_commit(&ty); + } + + { + std::cerr << "TEST: make_2d_hv_by_cols\n"; + MPI_Datatype ty = make_2d_hv_by_cols(13, 5, 16, 3, 53); + MPI_Type_commit(&ty); + } + // MPI_Finalize(); + // exit(0); + Dim3 copyExt = {.x = 100, .y = 13, .z = 47}; Dim3 allocExt = {.x = 256, .y = 512, .z = 1024}; @@ -24,7 +38,7 @@ int main(int argc, char **argv) { { std::cerr << "TEST: MPI_Type_create_subarray\n"; - MPI_Datatype ty = make_off_subarray(copyExt, allocExt, Dim3(4,4,4)); + MPI_Datatype ty = make_off_subarray(copyExt, allocExt, Dim3(4, 4, 4)); MPI_Type_commit(&ty); } diff --git a/test/type_equivalence.cpp b/test/type_equivalence.cpp index 1af8578..9a57ad4 100644 --- a/test/type_equivalence.cpp +++ b/test/type_equivalence.cpp @@ -164,6 +164,33 @@ int main(int argc, char **argv) { REQUIRE(size == copyExt.flatten()); } + { + std::cerr << "TEST: by rows / by cols for MPI equivalence\n"; + MPI_Datatype t1 = make_2d_hv_by_rows(13, 3, 16, 5, 53); + MPI_Datatype t2 = make_2d_hv_by_cols(13, 3, 16, 5, 53); + MPI_Type_commit(&t1); + MPI_Type_commit(&t2); + + { + int size; + MPI_Type_size(t1, &size); + REQUIRE(size == 15 * 13); + MPI_Type_size(t2, &size); + REQUIRE(size == 15 * 13); + } + + { + MPI_Aint lb, ext; + MPI_Type_get_extent(t1, &lb, &ext); + REQUIRE(ext == 53 * 4 + 16 * 2 + 13); + MPI_Type_get_extent(t2, &lb, &ext); + REQUIRE(ext == 53 * 4 + 16 * 2 + 13); + } + + MPI_Type_free(&t1); + MPI_Type_free(&t2); + } + MPI_Finalize(); unsetenv("TEMPI_DISABLE");