diff --git a/core/src/Cabana_CommunicationPlan.hpp b/core/src/Cabana_CommunicationPlan.hpp index ac011542a..70f035478 100644 --- a/core/src/Cabana_CommunicationPlan.hpp +++ b/core/src/Cabana_CommunicationPlan.hpp @@ -996,6 +996,44 @@ struct CommunicationDataAoSoA std::size_t _num_comp = 0; }; +/*! + \brief Store AoSoA send/receive buffers with separate destination. +*/ +template +struct CommunicationDataAoSoASeparate : public CommunicationDataAoSoA +{ + using base_type = CommunicationDataAoSoA; + //! Particle data type. + using particle_data_type = typename base_type::particle_data_type; + //! Kokkos memory space. + using memory_space = typename base_type::memory_space; + //! Communication data type. + using data_type = typename base_type::data_type; + //! Communication buffer type. + using buffer_type = typename base_type::buffer_type; + + /*! + Constructor + \param particles The particle data (either AoSoA or slice). + */ + CommunicationDataAoSoASeparate( const AoSoAType& src, const AoSoAType& dst ) + : base_type( src ) + , _dst_particles( dst ) + { + } + + //! Send buffer. + using base_type::_send_buffer; + //! Receive buffer. + using base_type::_recv_buffer; + //! Particle slice. + AoSoAType _src_particles = base_type::_particles; + //! Destination AoSoA. + AoSoAType _dst_particles; + //! Slice components. + using base_type::_num_comp; +}; + /*! \brief Store slice send/receive buffers. */ @@ -1057,8 +1095,47 @@ struct CommunicationDataSlice //! Slice components. std::size_t _num_comp; }; -//---------------------------------------------------------------------------// +/*! + \brief Store slice send/receive buffers with separate destination. +*/ +template +struct CommunicationDataSliceSeparate : public CommunicationDataSlice +{ + using base_type = CommunicationDataSlice; + //! Particle data type. + using particle_data_type = typename base_type::particle_data_type; + //! Kokkos memory space. + using memory_space = typename base_type::memory_space; + //! Communication data type. + using data_type = typename base_type::data_type; + //! Communication buffer type. + using buffer_type = typename base_type::buffer_type; + + /*! + Constructor + \param particles The particle data. + */ + CommunicationDataSliceSeparate( const particle_data_type& src, + const particle_data_type& dst ) + : base_type( src ) + , _dst_particles( dst ) + { + } + + //! Send buffer. + using base_type::_send_buffer; + //! Receive buffer. + using base_type::_recv_buffer; + //! Particle slice. + particle_data_type _src_particles = base_type::_particles; + //! Destination slice. + particle_data_type _dst_particles; + //! Slice components. + using base_type::_num_comp; +}; + +//---------------------------------------------------------------------------// /*! \brief Store communication plan and communication buffers. */ @@ -1096,6 +1173,16 @@ class CommunicationData , _comm_data( CommDataType( particles ) ) , _overallocation( overallocation ) { + updateRangePolicy(); + } + CommunicationData( const CommPlanType& comm_plan, + const CommDataType& comm_data, + const double overallocation = 1.0 ) + : _comm_plan( comm_plan ) + , _comm_data( comm_data ) + , _overallocation( overallocation ) + { + updateRangePolicy(); } //! Get the communication send buffer. @@ -1104,7 +1191,7 @@ class CommunicationData buffer_type getReceiveBuffer() const { return _comm_data._recv_buffer; } //! Get the particles to communicate. - particle_data_type getData() const { return _comm_data._particles; } + particle_data_type getParticles() const { return _comm_data._particles; } //! Update particles to communicate. void setData( const particle_data_type& particles ) { @@ -1145,6 +1232,11 @@ class CommunicationData _comm_data.reallocateReceive( shrunk_recv_size ); } + void resizeParticles( const std::size_t new_size ) + { + _comm_data._particles.resize( new_size ); + } + //! Perform the communication (migrate, gather, scatter). virtual void apply() = 0; @@ -1163,22 +1255,34 @@ class CommunicationData reserveImpl( comm_plan, particles, total_send, total_recv ); } void reserveImpl( const CommPlanType& comm_plan, - const particle_data_type particles, + const particle_data_type& particles, const std::size_t total_send, const std::size_t total_recv ) { _comm_plan = comm_plan; setData( particles ); + updateBuffers( total_send, total_recv ); + } + //! \endcond + + protected: + void updateBuffers( const std::size_t total_send, + const std::size_t total_recv ) + { auto send_capacity = sendCapacity(); std::size_t new_send_size = total_send * _overallocation; if ( new_send_size > send_capacity ) _comm_data.reallocateSend( new_send_size ); + std::cout << "send cap " << new_send_size << " " << sendCapacity() + << "\n"; auto recv_capacity = receiveCapacity(); std::size_t new_recv_size = total_recv * _overallocation; if ( new_recv_size > recv_capacity ) _comm_data.reallocateReceive( new_recv_size ); + std::cout << "recv cap " << new_recv_size << " " << receiveCapacity() + << "\n"; _send_size = total_send; _recv_size = total_recv; @@ -1186,9 +1290,7 @@ class CommunicationData // Update policies with new sizes. updateRangePolicy(); } - //! \endcond - protected: //! Update range policy based on new communication plan. void updateRangePolicy() { @@ -1215,6 +1317,103 @@ class CommunicationData std::size_t _recv_size; }; +/*! + \brief Store communication plan and communication buffers. +*/ +template +class CommunicationDataSeparate + : public CommunicationData +{ + public: + using base_type = CommunicationData; + //! Communication plan type (Halo, Distributor) + using plan_type = typename base_type::plan_type; + //! Kokkos execution space. + using execution_space = typename base_type::execution_space; + //! Kokkos execution policy. + using policy_type = typename base_type::policy_type; + //! Communication data type. + using comm_data_type = typename base_type::comm_data_type; + //! Particle data type. + using particle_data_type = typename base_type::particle_data_type; + //! Kokkos memory space. + using memory_space = typename base_type::memory_space; + //! Communication data type. + using data_type = typename base_type::data_type; + //! Communication buffer type. + using buffer_type = typename base_type::buffer_type; + + /*! + \param comm_plan The communication plan. + \param particles The particle data (either AoSoA or slice). + \param overallocation An optional factor to keep extra space in the + buffers to avoid frequent resizing. + */ + CommunicationDataSeparate( const CommPlanType& comm_plan, + const particle_data_type& src, + const particle_data_type& dst, + const double overallocation = 1.0 ) + : base_type( comm_plan, CommDataType( src, dst ), overallocation ) + { + } + + //! Get the destination particles. + particle_data_type getDestinationParticles() const + { + return _comm_data._dst_particles; + } + //! Update particles to communicate. + void setParticles( const particle_data_type& src, + const particle_data_type& dst ) + { + _comm_data._src_particles = src; + _comm_data._dst_particles = dst; + } + + //! \cond Impl + void + reserveImpl( const CommPlanType& comm_plan, const particle_data_type& src, + const particle_data_type& dst, const std::size_t total_send, + const std::size_t total_recv, const double overallocation ) + { + if ( overallocation < 1.0 ) + throw std::runtime_error( "Cannot allocate buffers with less space " + "than data to communicate!" ); + _overallocation = overallocation; + + reserveImpl( comm_plan, src, dst, total_send, total_recv ); + } + void reserveImpl( const CommPlanType& comm_plan, + const particle_data_type& src, + const particle_data_type& dst, + const std::size_t total_send, + const std::size_t total_recv ) + { + _comm_plan = comm_plan; + setParticles( src, dst ); + this->updateRangePolicy(); + + this->updateBuffers( total_send, total_recv ); + } + //! \endcond + + protected: + //! Communication plan. + using base_type::_comm_plan; + //! Send range policy. + using base_type::_send_policy; + //! Receive range policy. + using base_type::_recv_policy; + //! Communication plan. + using base_type::_comm_data; + //! Overallocation factor. + using base_type::_overallocation; + //! Send sizes. + using base_type::_send_size; + //! Receive sizes. + using base_type::_recv_size; +}; + } // end namespace Cabana #endif // end CABANA_COMMUNICATIONPLAN_HPP diff --git a/core/src/Cabana_Distributor.hpp b/core/src/Cabana_Distributor.hpp index 6ab2f858b..ac92f0c03 100644 --- a/core/src/Cabana_Distributor.hpp +++ b/core/src/Cabana_Distributor.hpp @@ -100,6 +100,7 @@ class Distributor : public CommunicationPlan const std::vector& neighbor_ranks ) : CommunicationPlan( comm ) { + setRank(); auto neighbor_ids = this->createFromExportsAndTopology( element_export_ranks, neighbor_ranks ); this->createExportSteering( neighbor_ids, element_export_ranks ); @@ -136,9 +137,41 @@ class Distributor : public CommunicationPlan Distributor( MPI_Comm comm, const ViewType& element_export_ranks ) : CommunicationPlan( comm ) { + setRank(); auto neighbor_ids = this->createFromExportsOnly( element_export_ranks ); this->createExportSteering( neighbor_ids, element_export_ranks ); } + + //! Total migrate send size for this rank. + auto totalSend() const { return this->totalNumExport() - numStay(); } + //! Total migrate receive size for this rank. + auto totalReceive() const { return this->totalNumImport(); } + + //! Total migrate size staying on the current rank. + auto numStay() const + { + // Calculate the number of elements that are staying on this rank and + // therefore can be directly copied. If any of the neighbor ranks are + // this rank it will be stored in first position (i.e. the first + // neighbor in the local list is always yourself if you are sending to + // yourself). + return ( this->numNeighbor() > 0 && + this->neighborRank( 0 ) == _my_rank ) + ? this->numExport( 0 ) + : 0; + } + + //! Set the current MPI rank. + void setRank() + { + _my_rank = -1; + MPI_Comm_rank( this->comm(), &_my_rank ); + } + //! Get the current MPI rank. + auto getRank() { return _my_rank; } + + private: + int _my_rank; }; //---------------------------------------------------------------------------// @@ -162,158 +195,576 @@ struct is_distributor }; //---------------------------------------------------------------------------// -namespace Impl +/*! + \brief Ensure the particle size matches the distributor size. + + \param distributor The distributor that will be used for the migrate. Used to + query import and export sizes. + + \param particles The particle data (either AoSoA or slice). Used to query the + total size. +*/ +template +bool distributorCheckValidSize( + const DistributorType& distributor, const ParticleData& particles, + typename std::enable_if<( is_distributor::value ), + int>::type* = 0 ) { -//! \cond Impl -//---------------------------------------------------------------------------// -// Synchronously move data between a source and destination AoSoA by executing -// the forward communication plan. -template -void distributeData( - const Distributor_t& distributor, const AoSoA_t& src, AoSoA_t& dst, - typename std::enable_if<( is_distributor::value && - is_aosoa::value ), + std::cout << "src " << particles.size() << " " << distributor.exportSize() + << "\n"; + return ( particles.size() == distributor.exportSize() ); +} +/*! + \brief Ensure the particle size matches the distributor size. + + \param distributor The distributor that will be used for the migrate. Used to + query import and export sizes. + + \param src The source particle data (either AoSoA or slice). Used to query the + total size. + + \param dst The destination particle data (either AoSoA or slice). Used to + query the total size. +*/ +template +bool distributorCheckValidSize( + const DistributorType& distributor, const ParticleData& src, + const ParticleData& dst, + typename std::enable_if<( is_distributor::value ), int>::type* = 0 ) { - // Get the MPI rank we are currently on. - int my_rank = -1; - MPI_Comm_rank( distributor.comm(), &my_rank ); - - // Get the number of neighbors. - int num_n = distributor.numNeighbor(); - - // Calculate the number of elements that are staying on this rank and - // therefore can be directly copied. If any of the neighbor ranks are this - // rank it will be stored in first position (i.e. the first neighbor in - // the local list is always yourself if you are sending to yourself). - std::size_t num_stay = - ( num_n > 0 && distributor.neighborRank( 0 ) == my_rank ) - ? distributor.numExport( 0 ) - : 0; - - // Allocate a send buffer. - std::size_t num_send = distributor.totalNumExport() - num_stay; - Kokkos::View - send_buffer( Kokkos::ViewAllocateWithoutInitializing( - "distributor_send_buffer" ), - num_send ); - - // Allocate a receive buffer. - Kokkos::View - recv_buffer( Kokkos::ViewAllocateWithoutInitializing( - "distributor_recv_buffer" ), - distributor.totalNumImport() ); - - // Get the steering vector for the sends. - auto steering = distributor.getExportSteering(); - - // Gather the exports from the source AoSoA into the tuple-contiguous send - // buffer or the receive buffer if the data is staying. We know that the - // steering vector is ordered such that the data staying on this rank - // comes first. - auto build_send_buffer_func = KOKKOS_LAMBDA( const std::size_t i ) + std::cout << "dst " << dst.size() << " " << distributor.totalNumImport() + << "\n"; + return ( distributorCheckValidSize( distributor, src ) && + dst.size() == distributor.totalNumImport() ); +} + +//---------------------------------------------------------------------------// +template +class Migrate; + +/*! + \brief Synchronously migrate data between two different decompositions using + the distributor forward communication plan. AoSoA version. + + Migrate moves all data to a new distribution that is uniquely owned - each + element will only have a single destination rank. +*/ +template +class Migrate::value>::type> + : public CommunicationDataSeparate< + DistributorType, CommunicationDataAoSoASeparate> +{ + public: + static_assert( is_distributor::value, "" ); + + //! Base type. Note we use separate AoSoA even if it's in place. + using base_type = + CommunicationDataSeparate>; + //! Communication plan type (Distributor) + using plan_type = typename base_type::plan_type; + //! Kokkos execution space. + using execution_space = typename base_type::execution_space; + //! Kokkos memory space. + using memory_space = typename base_type::memory_space; + //! Communication data type. + using data_type = typename base_type::data_type; + //! Communication buffer type. + using buffer_type = typename base_type::buffer_type; + + /*! + \param distributor The Distributor to be used for the migrate. + + \param aosoa Upon input, must have the same number of elements as the + inputs used to construct the distributor. At output, it will be the same + size as the number of import elements on this rank provided by the + distributor. Before using this function, consider reserving enough memory + in the data structure so reallocating is not necessary. + + \param overallocation An optional factor to keep extra space in the + buffers to avoid frequent resizing. + */ + Migrate( DistributorType distributor, AoSoAType aosoa, + const double overallocation = 1.0 ) + : base_type( distributor, aosoa, aosoa, overallocation ) { - auto tpl = src.getTuple( steering( i ) ); - if ( i < num_stay ) - recv_buffer( i ) = tpl; - else - send_buffer( i - num_stay ) = tpl; - }; - Kokkos::RangePolicy - build_send_buffer_policy( 0, distributor.totalNumExport() ); - Kokkos::parallel_for( "Cabana::Impl::distributeData::build_send_buffer", - build_send_buffer_policy, build_send_buffer_func ); - Kokkos::fence(); - - // The distributor has its own communication space so choose any tag. - const int mpi_tag = 1234; - - // Post non-blocking receives. - std::vector requests; - requests.reserve( num_n ); - std::pair recv_range = { 0, 0 }; - for ( int n = 0; n < num_n; ++n ) + _my_rank = _distributor.getRank(); + reserve( _distributor, aosoa ); + } + + /*! + \param distributor The Distributor to be used for the migrate. + + \param src The AoSoA containing the data to be migrated. Must have the + same number of elements as the inputs used to construct the distributor. + + \param dst The AoSoA to which the migrated data will be written. Must be + the same size as the number of imports given by the distributor on this + rank. Call totalNumImport() on the distributor to get this size value. + + \param overallocation An optional factor to keep extra space in the + buffers to avoid frequent resizing. + */ + Migrate( DistributorType distributor, AoSoAType src, AoSoAType dst, + const double overallocation = 1.0 ) + : base_type( distributor, src, dst, overallocation ) + { + _my_rank = _distributor.getRank(); + reserve( _distributor, src, dst ); + } + + /*! + \brief Perform the migrate operation. + */ + void apply() override { - recv_range.second = recv_range.first + distributor.numImport( n ); + // Get the buffers (local copies for lambdas below). + auto send_buffer = this->getSendBuffer(); + auto recv_buffer = this->getReceiveBuffer(); + + // Get the particle data. Note that the src could be the same as dst. + auto src = this->getParticles(); + auto dst = this->getDestinationParticles(); + + std::cout << "capacity " << this->sendCapacity() << " (" + << this->receiveCapacity() << "\n"; + + // Get the number of neighbors. + int num_n = _distributor.numNeighbor(); + + // Number of elements that are staying on this rank. + auto num_stay = _distributor.numStay(); + + // Get the steering vector for the sends. + auto steering = _distributor.getExportSteering(); + + // Gather the exports from the source AoSoA into the + // tuple-contiguous send buffer or the receive buffer if the data is + // staying. We know that the steering vector is ordered such that + // the data staying on this rank comes first. + auto build_send_buffer_func = KOKKOS_LAMBDA( const std::size_t i ) + { + auto tpl = src.getTuple( steering( i ) ); + if ( i < num_stay ) + recv_buffer( i ) = tpl; + else + send_buffer( i - num_stay ) = tpl; + }; + Kokkos::parallel_for( "Cabana::migrate::build_send_buffer", + _send_policy, build_send_buffer_func ); + Kokkos::fence(); + + // The distributor has its own communication space so choose any + // tag. + const int mpi_tag = 1234; + + // Post non-blocking receives. + std::vector requests; + requests.reserve( num_n ); + std::pair recv_range = { 0, 0 }; + for ( int n = 0; n < num_n; ++n ) + { + recv_range.second = recv_range.first + _distributor.numImport( n ); - if ( ( distributor.numImport( n ) > 0 ) && - ( distributor.neighborRank( n ) != my_rank ) ) + if ( ( _distributor.numImport( n ) > 0 ) && + ( _distributor.neighborRank( n ) != _my_rank ) ) + { + auto recv_subview = Kokkos::subview( recv_buffer, recv_range ); + + requests.push_back( MPI_Request() ); + + MPI_Irecv( recv_subview.data(), + recv_subview.size() * sizeof( data_type ), MPI_BYTE, + _distributor.neighborRank( n ), mpi_tag, + _distributor.comm(), &( requests.back() ) ); + } + + recv_range.first = recv_range.second; + } + + // Do blocking sends. + std::pair send_range = { 0, 0 }; + for ( int n = 0; n < num_n; ++n ) { - auto recv_subview = Kokkos::subview( recv_buffer, recv_range ); + if ( ( _distributor.numExport( n ) > 0 ) && + ( _distributor.neighborRank( n ) != _my_rank ) ) + { + send_range.second = + send_range.first + _distributor.numExport( n ); + + auto send_subview = Kokkos::subview( send_buffer, send_range ); - requests.push_back( MPI_Request() ); + MPI_Send( send_subview.data(), + send_subview.size() * sizeof( data_type ), MPI_BYTE, + _distributor.neighborRank( n ), mpi_tag, + _distributor.comm() ); - MPI_Irecv( recv_subview.data(), - recv_subview.size() * - sizeof( typename AoSoA_t::tuple_type ), - MPI_BYTE, distributor.neighborRank( n ), mpi_tag, - distributor.comm(), &( requests.back() ) ); + send_range.first = send_range.second; + } } - recv_range.first = recv_range.second; + // Wait on non-blocking receives. + std::vector status( requests.size() ); + const int ec = + MPI_Waitall( requests.size(), requests.data(), status.data() ); + if ( MPI_SUCCESS != ec ) + throw std::logic_error( "Failed MPI Communication" ); + + // Extract the receive buffer into the destination AoSoA. + auto extract_recv_buffer_func = KOKKOS_LAMBDA( const std::size_t i ) + { + dst.setTuple( i, recv_buffer( i ) ); + }; + Kokkos::parallel_for( "Cabana::migrate::extract_recv_buffer", + _recv_policy, extract_recv_buffer_func ); + Kokkos::fence(); + + // Barrier before completing to ensure synchronization. + MPI_Barrier( _distributor.comm() ); + + // If the destination decomposition is smaller than the source + // decomposition resize after we have moved the data. + bool dst_is_bigger = + ( _distributor.totalNumImport() > _distributor.exportSize() ); + if ( !dst_is_bigger ) + this->resizeParticles( _distributor.totalNumImport() ); } - // Do blocking sends. - std::pair send_range = { 0, 0 }; - for ( int n = 0; n < num_n; ++n ) + /*! + \brief Update the distributor and AoSoA data for migration. + + \param distributor The Distributor to be used for the migrate. + \param aosoa The AoSoA on which to perform the migrate. + \param overallocation An optional factor to keep extra space in the + buffers to avoid frequent resizing. + */ + void reserve( const DistributorType& distributor, AoSoAType& aosoa, + const double overallocation = 0.0 ) { - if ( ( distributor.numExport( n ) > 0 ) && - ( distributor.neighborRank( n ) != my_rank ) ) + // Check that the AoSoA is the right size. + if ( !distributorCheckValidSize( distributor, aosoa ) ) + throw std::runtime_error( "AoSoA is the wrong size for migrate!" ); + + // Determine if the source of destination decomposition has more data on + // this rank. + bool dst_is_bigger = + ( distributor.totalNumImport() > distributor.exportSize() ); + + // If the destination decomposition is bigger than the source + // decomposition resize now so we have enough space to do the operation. + if ( dst_is_bigger ) + aosoa.resize( distributor.totalNumImport() ); + + if ( overallocation >= 1.0 ) + this->reserveImpl( distributor, aosoa, aosoa, + distributor.totalSend(), + distributor.totalReceive(), overallocation ); + else + this->reserveImpl( distributor, aosoa, aosoa, + distributor.totalSend(), + distributor.totalReceive() ); + } + + /*! + \brief Update the distributor and AoSoA data for migration. + + \param distributor The Distributor to be used for the migrate. + \param src The AoSoA containing the data to be migrated. Must have the + same number of elements as the inputs used to construct the distributor. + \param dst The AoSoA to which the migrated data will be written. Must be + the same size as the number of imports given by the distributor on this + rank. + \param overallocation An optional factor to keep extra space in the + buffers to avoid frequent resizing. + */ + void reserve( const DistributorType& distributor, const AoSoAType& src, + AoSoAType& dst, const double overallocation = 0.0 ) + { + // Check that src and dst are the right size. + if ( !distributorCheckValidSize( distributor, src, dst ) ) + throw std::runtime_error( "AoSoA is the wrong size for migrate!" ); + + if ( overallocation >= 1.0 ) + this->reserveImpl( distributor, src, dst, distributor.totalSend(), + distributor.totalReceive(), overallocation ); + else + this->reserveImpl( distributor, src, dst, distributor.totalSend(), + distributor.totalReceive() ); + } + + private: + plan_type _distributor = base_type::_comm_plan; + using base_type::_recv_policy; + using base_type::_send_policy; + + int _my_rank; +}; + +/*! + \brief Synchronously migrate data between two different decompositions using + the distributor forward communication plan. Slice version. + + Migrate moves all data to a new distribution that is uniquely owned - each + element will only have a single destination rank. +*/ +template +class Migrate::value>::type> + : public CommunicationDataSeparate< + DistributorType, CommunicationDataSliceSeparate> +{ + public: + static_assert( is_distributor::value, "" ); + + //! Base type. + using base_type = + CommunicationDataSeparate>; + //! Communication plan type (Distributor) + using plan_type = typename base_type::plan_type; + //! Kokkos execution space. + using execution_space = typename base_type::execution_space; + //! Kokkos memory space. + using memory_space = typename base_type::memory_space; + //! Communication data type. + using data_type = typename base_type::data_type; + //! Communication buffer type. + using buffer_type = typename base_type::buffer_type; + + /*! + \param distributor The Distributor to be used for the migrate. + \param src The slice containing the data to be migrated. + \param dst The slice to which the migrated data will be written. + \param overallocation An optional factor to keep extra space in the + buffers to avoid frequent resizing. + */ + Migrate( const DistributorType& distributor, const SliceType& src, + SliceType& dst, const double overallocation = 1.0 ) + : base_type( distributor, src, dst, overallocation ) + { + _my_rank = _distributor.getRank(); + reserve( _distributor, src, dst ); + } + + /*! + \brief Perform the migrate operation. + + \param src The slice containing the data to be migrated. + \param dst The slice to which the migrated data will be written. + */ + void apply() override + { + // Get the buffers (local copies for lambdas below). + auto send_buffer = this->getSendBuffer(); + auto recv_buffer = this->getReceiveBuffer(); + + // Get the number of components in the slices. + auto num_comp = this->getSliceComponents(); + + // Get the raw slice data. + auto src = this->getParticles(); + auto src_data = src.data(); + auto dst = this->getDestinationParticles(); + auto dst_data = dst.data(); + + // Get the number of neighbors. + int num_n = _distributor.numNeighbor(); + + // Number of elements that are staying on this rank. + auto num_stay = _distributor.numStay(); + + // Get the steering vector for the sends. + auto steering = _distributor.getExportSteering(); + + // Gather from the source Slice into the contiguous send buffer or, + // if it is part of the local copy, put it directly in the destination + // Slice. + auto build_send_buffer_func = KOKKOS_LAMBDA( const std::size_t i ) + { + auto s_src = SliceType::index_type::s( steering( i ) ); + auto a_src = SliceType::index_type::a( steering( i ) ); + std::size_t src_offset = s_src * src.stride( 0 ) + a_src; + if ( i < num_stay ) + for ( std::size_t n = 0; n < num_comp; ++n ) + recv_buffer( i, n ) = + src_data[src_offset + n * SliceType::vector_length]; + else + for ( std::size_t n = 0; n < num_comp; ++n ) + send_buffer( i - num_stay, n ) = + src_data[src_offset + n * SliceType::vector_length]; + }; + Kokkos::parallel_for( "Cabana::migrate::build_send_buffer", + _send_policy, build_send_buffer_func ); + Kokkos::fence(); + + // The distributor has its own communication space so choose any tag. + const int mpi_tag = 1234; + + // Post non-blocking receives. + std::vector requests; + requests.reserve( num_n ); + std::pair recv_range = { 0, 0 }; + for ( int n = 0; n < num_n; ++n ) { - send_range.second = send_range.first + distributor.numExport( n ); + recv_range.second = recv_range.first + _distributor.numImport( n ); + + if ( ( _distributor.numImport( n ) > 0 ) && + ( _distributor.neighborRank( n ) != _my_rank ) ) + { + auto recv_subview = + Kokkos::subview( recv_buffer, recv_range, Kokkos::ALL ); - auto send_subview = Kokkos::subview( send_buffer, send_range ); + requests.push_back( MPI_Request() ); - MPI_Send( send_subview.data(), - send_subview.size() * - sizeof( typename AoSoA_t::tuple_type ), - MPI_BYTE, distributor.neighborRank( n ), mpi_tag, - distributor.comm() ); + MPI_Irecv( recv_subview.data(), + recv_subview.size() * sizeof( data_type ), MPI_BYTE, + _distributor.neighborRank( n ), mpi_tag, + _distributor.comm(), &( requests.back() ) ); + } - send_range.first = send_range.second; + recv_range.first = recv_range.second; + } + + // Do blocking sends. + std::pair send_range = { 0, 0 }; + for ( int n = 0; n < num_n; ++n ) + { + if ( ( _distributor.numExport( n ) > 0 ) && + ( _distributor.neighborRank( n ) != _my_rank ) ) + { + send_range.second = + send_range.first + _distributor.numExport( n ); + + auto send_subview = + Kokkos::subview( send_buffer, send_range, Kokkos::ALL ); + + MPI_Send( send_subview.data(), + send_subview.size() * sizeof( data_type ), MPI_BYTE, + _distributor.neighborRank( n ), mpi_tag, + _distributor.comm() ); + + send_range.first = send_range.second; + } } + + // Wait on non-blocking receives. + std::vector status( requests.size() ); + const int ec = + MPI_Waitall( requests.size(), requests.data(), status.data() ); + if ( MPI_SUCCESS != ec ) + throw std::logic_error( "Failed MPI Communication" ); + + // Extract the data from the receive buffer into the destination Slice. + auto extract_recv_buffer_func = KOKKOS_LAMBDA( const std::size_t i ) + { + auto s = SliceType::index_type::s( i ); + auto a = SliceType::index_type::a( i ); + std::size_t dst_offset = s * dst.stride( 0 ) + a; + for ( std::size_t n = 0; n < num_comp; ++n ) + dst_data[dst_offset + n * SliceType::vector_length] = + recv_buffer( i, n ); + }; + Kokkos::parallel_for( "Cabana::migrate::extract_recv_buffer", + _recv_policy, extract_recv_buffer_func ); + Kokkos::fence(); + + // Barrier before completing to ensure synchronization. + MPI_Barrier( _distributor.comm() ); } - // Wait on non-blocking receives. - std::vector status( requests.size() ); - const int ec = - MPI_Waitall( requests.size(), requests.data(), status.data() ); - if ( MPI_SUCCESS != ec ) - throw std::logic_error( "Failed MPI Communication" ); + /*! + \brief Update the distributor and slice data for migrate. - // Extract the receive buffer into the destination AoSoA. - auto extract_recv_buffer_func = KOKKOS_LAMBDA( const std::size_t i ) + \param distributor The Distributor to be used for the migrate. + \param src The slice containing the data to be migrated. + \param dst The slice to which the migrated data will be written. + \param overallocation An optional factor to keep extra space in the + buffers to avoid frequent resizing. + */ + void reserve( const DistributorType& distributor, const SliceType& src, + SliceType& dst, const double overallocation ) { - dst.setTuple( i, recv_buffer( i ) ); - }; - Kokkos::RangePolicy - extract_recv_buffer_policy( 0, distributor.totalNumImport() ); - Kokkos::parallel_for( "Cabana::Impl::distributeData::extract_recv_buffer", - extract_recv_buffer_policy, - extract_recv_buffer_func ); - Kokkos::fence(); - - // Barrier before completing to ensure synchronization. - MPI_Barrier( distributor.comm() ); -} + // Check that src and dst are the right size. + if ( !distributorCheckValidSize( distributor, src, dst ) ) + throw std::runtime_error( "AoSoA is the wrong size for migrate!" ); + + this->reserveImpl( distributor, src, dst, distributor.totalSend(), + distributor.totalReceive(), overallocation ); + } + /*! + \brief Update the distributor and slice data for migrate. + + \param distributor The Distributor to be used for the migrate. + \param src The slice containing the data to be migrated. + \param dst The slice to which the migrated data will be written. + */ + void reserve( const DistributorType& distributor, const SliceType& src, + SliceType& dst ) + { + // Check that src and dst are the right size. + if ( !distributorCheckValidSize( distributor, src, dst ) ) + throw std::runtime_error( "AoSoA is the wrong size for migrate!" ); + + this->reserveImpl( distributor, src, dst, distributor.totalSend(), + distributor.totalReceive() ); + } + + private: + plan_type _distributor = base_type::_comm_plan; + using base_type::_recv_policy; + using base_type::_send_policy; + + int _my_rank; +}; //---------------------------------------------------------------------------// -//! \endcond -} // end namespace Impl +/*! + \brief Create the migrate. + + \param distributor The distributor to use for the migrate. + \param data The data on which to perform the migrate. + \param overallocation An optional factor to keep extra space in the buffers to + avoid frequent resizing. +*/ +template +auto createMigrate( DistributorType distributor, ParticleDataType data, + const double overallocation = 1.0 ) +{ + return Migrate( distributor, data, + overallocation ); +} + +/*! + \brief Create the migrate. + + \param distributor The distributor to use for the migrate. + \param src The AoSoA containing the data to be migrated. + \param dst The AoSoA to which the migrated data will be written. + \param overallocation An optional factor to keep extra space in the buffers to + avoid frequent resizing. +*/ +template +auto createMigrate( const DistributorType& distributor, + const ParticleDataType& src, ParticleDataType& dst, + const double overallocation = 1.0 ) +{ + return Migrate( distributor, src, dst, + overallocation ); +} //---------------------------------------------------------------------------// /*! \brief Synchronously migrate data between two different decompositions using - the distributor forward communication plan. Multiple AoSoA version. + the distributor forward communication plan. Separate destination version. Migrate moves all data to a new distribution that is uniquely owned - each element will only have a single destination rank. - \tparam Distributor_t Distributor type - must be a distributor. - - \tparam AoSoA_t AoSoA type - must be an AoSoA. + \note This routine allocates send and receive buffers internally. This is + often not performant due to frequent buffer reallocations - consider creating + and reusing Migrate instead. \param distributor The distributor to use for the migration. @@ -324,22 +775,13 @@ void distributeData( same size as the number of imports given by the distributor on this rank. Call totalNumImport() on the distributor to get this size value. */ -template -void migrate( const Distributor_t& distributor, const AoSoA_t& src, - AoSoA_t& dst, - typename std::enable_if<( is_distributor::value && - is_aosoa::value ), - int>::type* = 0 ) +template +void migrate( const DistributorType& distributor, const ParticleDataType& src, + ParticleDataType& dst ) { - // Check that src and dst are the right size. - if ( src.size() != distributor.exportSize() ) - throw std::runtime_error( "Source is the wrong size for migration!" ); - if ( dst.size() != distributor.totalNumImport() ) - throw std::runtime_error( - "Destination is the wrong size for migration!" ); - - // Move the data. - Impl::distributeData( distributor, src, dst ); + auto migrate = createMigrate( distributor, src, dst ); + migrate.apply(); + dst = migrate.getDestinationParticles(); } //---------------------------------------------------------------------------// @@ -353,232 +795,30 @@ void migrate( const Distributor_t& distributor, const AoSoA_t& src, Migrate moves all data to a new distribution that is uniquely owned - each element will only have a single destination rank. - \tparam Distributor_t Distributor type - must be a distributor. - - \tparam AoSoA_t AoSoA type - must be an AoSoA. + \note This routine allocates send and receive buffers internally. This is + often not performant due to frequent buffer reallocations - consider creating + and reusing Migrate instead. \param distributor The distributor to use for the migration. \param aosoa The AoSoA containing the data to be migrated. Upon input, must have the same number of elements as the inputs used to construct the - destributor. At output, it will be the same size as th enumber of import + distributor. At output, it will be the same size as the number of import elements on this rank provided by the distributor. Before using this function, consider reserving enough memory in the data structure so reallocating is not necessary. */ -template -void migrate( const Distributor_t& distributor, AoSoA_t& aosoa, - typename std::enable_if<( is_distributor::value && - is_aosoa::value ), - int>::type* = 0 ) -{ - // Check that the AoSoA is the right size. - if ( aosoa.size() != distributor.exportSize() ) - throw std::runtime_error( "AoSoA is the wrong size for migration!" ); - - // Determine if the source of destination decomposition has more data on - // this rank. - bool dst_is_bigger = - ( distributor.totalNumImport() > distributor.exportSize() ); - - // If the destination decomposition is bigger than the source - // decomposition resize now so we have enough space to do the operation. - if ( dst_is_bigger ) - aosoa.resize( distributor.totalNumImport() ); - - // Move the data. - Impl::distributeData( distributor, aosoa, aosoa ); - - // If the destination decomposition is smaller than the source - // decomposition resize after we have moved the data. - if ( !dst_is_bigger ) - aosoa.resize( distributor.totalNumImport() ); -} - -//---------------------------------------------------------------------------// -/*! - \brief Synchronously migrate data between two different decompositions using - the distributor forward communication plan. Slice version. The user can do - this in-place with the same slice but they will need to manage the resizing - themselves as we can't resize slices. - - Migrate moves all data to a new distribution that is uniquely owned - each - element will only have a single destination rank. - - \tparam Distributor_t Distributor type - must be a distributor. - - \tparam Slice_t Slice type - must be an Slice. - - \param distributor The distributor to use for the migration. - - \param src The slice containing the data to be migrated. Must have the same - number of elements as the inputs used to construct the destributor. - - \param dst The slice to which the migrated data will be written. Must be the - same size as the number of imports given by the distributor on this - rank. Call totalNumImport() on the distributor to get this size value. -*/ -template -void migrate( const Distributor_t& distributor, const Slice_t& src, - Slice_t& dst, - typename std::enable_if<( is_distributor::value && - is_slice::value ), - int>::type* = 0 ) +template +void migrate( + const DistributorType& distributor, AoSoAType& aosoa, + typename std::enable_if<( is_distributor::value && + is_aosoa::value ), + int>::type* = 0 ) { - // Check that src and dst are the right size. - if ( src.size() != distributor.exportSize() ) - throw std::runtime_error( "Source is the wrong size for migration!" ); - if ( dst.size() != distributor.totalNumImport() ) - throw std::runtime_error( - "Destination is the wrong size for migration!" ); - - // Get the number of components in the slices. - size_t num_comp = 1; - for ( size_t d = 2; d < src.rank(); ++d ) - num_comp *= src.extent( d ); - - // Get the raw slice data. - auto src_data = src.data(); - auto dst_data = dst.data(); - - // Get the MPI rank we are currently on. - int my_rank = -1; - MPI_Comm_rank( distributor.comm(), &my_rank ); - - // Get the number of neighbors. - int num_n = distributor.numNeighbor(); - - // Calculate the number of elements that are staying on this rank and - // therefore can be directly copied. If any of the neighbor ranks are this - // rank it will be stored in first position (i.e. the first neighbor in - // the local list is always yourself if you are sending to yourself). - std::size_t num_stay = - ( num_n > 0 && distributor.neighborRank( 0 ) == my_rank ) - ? distributor.numExport( 0 ) - : 0; - - // Allocate a send buffer. Note this one is layout right so the components - // of each element are consecutive in memory. - std::size_t num_send = distributor.totalNumExport() - num_stay; - Kokkos::View - send_buffer( Kokkos::ViewAllocateWithoutInitializing( - "distributor_send_buffer" ), - num_send, num_comp ); - - // Allocate a receive buffer. Note this one is layout right so the - // components of each element are consecutive in memory. - Kokkos::View - recv_buffer( Kokkos::ViewAllocateWithoutInitializing( - "distributor_recv_buffer" ), - distributor.totalNumImport(), num_comp ); - - // Get the steering vector for the sends. - auto steering = distributor.getExportSteering(); - - // Gather from the source Slice into the contiguous send buffer or, - // if it is part of the local copy, put it directly in the destination - // Slice. - auto build_send_buffer_func = KOKKOS_LAMBDA( const std::size_t i ) - { - auto s_src = Slice_t::index_type::s( steering( i ) ); - auto a_src = Slice_t::index_type::a( steering( i ) ); - std::size_t src_offset = s_src * src.stride( 0 ) + a_src; - if ( i < num_stay ) - for ( std::size_t n = 0; n < num_comp; ++n ) - recv_buffer( i, n ) = - src_data[src_offset + n * Slice_t::vector_length]; - else - for ( std::size_t n = 0; n < num_comp; ++n ) - send_buffer( i - num_stay, n ) = - src_data[src_offset + n * Slice_t::vector_length]; - }; - Kokkos::RangePolicy - build_send_buffer_policy( 0, distributor.totalNumExport() ); - Kokkos::parallel_for( "Cabana::migrate::build_send_buffer", - build_send_buffer_policy, build_send_buffer_func ); - Kokkos::fence(); - - // The distributor has its own communication space so choose any tag. - const int mpi_tag = 1234; - - // Post non-blocking receives. - std::vector requests; - requests.reserve( num_n ); - std::pair recv_range = { 0, 0 }; - for ( int n = 0; n < num_n; ++n ) - { - recv_range.second = recv_range.first + distributor.numImport( n ); - - if ( ( distributor.numImport( n ) > 0 ) && - ( distributor.neighborRank( n ) != my_rank ) ) - { - auto recv_subview = - Kokkos::subview( recv_buffer, recv_range, Kokkos::ALL ); - - requests.push_back( MPI_Request() ); - - MPI_Irecv( recv_subview.data(), - recv_subview.size() * - sizeof( typename Slice_t::value_type ), - MPI_BYTE, distributor.neighborRank( n ), mpi_tag, - distributor.comm(), &( requests.back() ) ); - } - - recv_range.first = recv_range.second; - } - - // Do blocking sends. - std::pair send_range = { 0, 0 }; - for ( int n = 0; n < num_n; ++n ) - { - if ( ( distributor.numExport( n ) > 0 ) && - ( distributor.neighborRank( n ) != my_rank ) ) - { - send_range.second = send_range.first + distributor.numExport( n ); - - auto send_subview = - Kokkos::subview( send_buffer, send_range, Kokkos::ALL ); - - MPI_Send( send_subview.data(), - send_subview.size() * - sizeof( typename Slice_t::value_type ), - MPI_BYTE, distributor.neighborRank( n ), mpi_tag, - distributor.comm() ); - - send_range.first = send_range.second; - } - } - - // Wait on non-blocking receives. - std::vector status( requests.size() ); - const int ec = - MPI_Waitall( requests.size(), requests.data(), status.data() ); - if ( MPI_SUCCESS != ec ) - throw std::logic_error( "Failed MPI Communication" ); - - // Extract the data from the receive buffer into the destination Slice. - auto extract_recv_buffer_func = KOKKOS_LAMBDA( const std::size_t i ) - { - auto s = Slice_t::index_type::s( i ); - auto a = Slice_t::index_type::a( i ); - std::size_t dst_offset = s * dst.stride( 0 ) + a; - for ( std::size_t n = 0; n < num_comp; ++n ) - dst_data[dst_offset + n * Slice_t::vector_length] = - recv_buffer( i, n ); - }; - Kokkos::RangePolicy - extract_recv_buffer_policy( 0, distributor.totalNumImport() ); - Kokkos::parallel_for( "Cabana::migrate::extract_recv_buffer", - extract_recv_buffer_policy, - extract_recv_buffer_func ); - Kokkos::fence(); - - // Barrier before completing to ensure synchronization. - MPI_Barrier( distributor.comm() ); + auto migrate = createMigrate( distributor, aosoa ); + migrate.apply(); + aosoa = migrate.getParticles(); } - //---------------------------------------------------------------------------// } // end namespace Cabana diff --git a/core/src/Cabana_Halo.hpp b/core/src/Cabana_Halo.hpp index 3a3bf013f..61238cf69 100644 --- a/core/src/Cabana_Halo.hpp +++ b/core/src/Cabana_Halo.hpp @@ -288,7 +288,7 @@ class GathergetSendBuffer(); auto recv_buffer = this->getReceiveBuffer(); - auto aosoa = this->getData(); + auto aosoa = this->getParticles(); // Get the steering vector for the sends. auto steering = _halo.getExportSteering(); @@ -460,7 +460,7 @@ class GathergetSendBuffer(); auto recv_buffer = this->getReceiveBuffer(); - auto slice = this->getData(); + auto slice = this->getParticles(); // Get the number of components in the slice. std::size_t num_comp = this->getSliceComponents(); @@ -700,7 +700,7 @@ class Scatter // Get the buffers (local copies for lambdas below). auto send_buffer = this->getSendBuffer(); auto recv_buffer = this->getReceiveBuffer(); - auto slice = this->getData(); + auto slice = this->getParticles(); // Get the number of components in the slice. std::size_t num_comp = this->getSliceComponents(); diff --git a/core/unit_test/tstDistributor.hpp b/core/unit_test/tstDistributor.hpp index 2b0632081..1068a7e54 100644 --- a/core/unit_test/tstDistributor.hpp +++ b/core/unit_test/tstDistributor.hpp @@ -27,12 +27,89 @@ namespace Test { -//---------------------------------------------------------------------------// -void test1( const bool use_topology ) +struct DistributorData +{ + // Create an AoSoA of local data with space allocated for local data. + using DataTypes = Cabana::MemberTypes; + using AoSoA_t = Cabana::AoSoA; + using AoSoA_Host_t = Cabana::AoSoA; + AoSoA_t aosoa; + + DistributorData( const int num_data ) + { + aosoa = AoSoA_t( "src", num_data ); + } + + AoSoA_t createSrcData1( const int num_data, const int my_rank ) + { + auto slice_int_src = Cabana::slice<0>( aosoa ); + auto slice_dbl_src = Cabana::slice<1>( aosoa ); + + // Fill the data. + auto fill_func = KOKKOS_LAMBDA( const int i ) + { + slice_int_src( i ) = my_rank + i; + slice_dbl_src( i, 0 ) = my_rank + i; + slice_dbl_src( i, 1 ) = my_rank + i + 0.5; + }; + Kokkos::RangePolicy range_policy( 0, num_data ); + Kokkos::parallel_for( range_policy, fill_func ); + Kokkos::fence(); + return aosoa; + } + + AoSoA_t createSrcData2( const int num_data, const int my_rank ) + { + auto slice_int_src = Cabana::slice<0>( aosoa ); + auto slice_dbl_src = Cabana::slice<1>( aosoa ); + + // Fill the data. + auto fill_func = KOKKOS_LAMBDA( const int i ) + { + slice_int_src( i ) = my_rank; + slice_dbl_src( i, 0 ) = my_rank; + slice_dbl_src( i, 1 ) = my_rank + 0.5; + }; + Kokkos::RangePolicy range_policy( 0, num_data ); + Kokkos::parallel_for( range_policy, fill_func ); + Kokkos::fence(); + return aosoa; + } + + AoSoA_t createDstData( const int num_data ) + { + return AoSoA_t( "dst", num_data ); + } + + AoSoA_Host_t createHostData( const int num_data ) + { + // Create empty host copy. + AoSoA_Host_t aosoa_host( "data_host", num_data ); + return aosoa_host; + } +}; + +template +auto createDistributor( ExportRankView export_ranks, + const std::vector neighbor_ranks, + const int use_topology ) { - // Make a communication plan. std::shared_ptr> distributor; + // Create the plan. + if ( use_topology ) + distributor = std::make_shared>( + MPI_COMM_WORLD, export_ranks, neighbor_ranks ); + else + distributor = std::make_shared>( + MPI_COMM_WORLD, export_ranks ); + + return distributor; +} + +//---------------------------------------------------------------------------// +void test1( const bool use_topology ) +{ // Get my rank. int my_rank = -1; MPI_Comm_rank( MPI_COMM_WORLD, &my_rank ); @@ -43,34 +120,15 @@ void test1( const bool use_topology ) Kokkos::deep_copy( export_ranks, my_rank ); std::vector neighbor_ranks( 1, my_rank ); - // Create the plan. - if ( use_topology ) - distributor = std::make_shared>( - MPI_COMM_WORLD, export_ranks, neighbor_ranks ); - else - distributor = std::make_shared>( - MPI_COMM_WORLD, export_ranks ); + auto distributor = + createDistributor( export_ranks, neighbor_ranks, use_topology ); // Make some data to migrate. - using DataTypes = Cabana::MemberTypes; - using AoSoA_t = Cabana::AoSoA; - AoSoA_t data_src( "src", num_data ); - auto slice_int_src = Cabana::slice<0>( data_src ); - auto slice_dbl_src = Cabana::slice<1>( data_src ); - - // Fill the data. - auto fill_func = KOKKOS_LAMBDA( const int i ) - { - slice_int_src( i ) = my_rank + i; - slice_dbl_src( i, 0 ) = my_rank + i; - slice_dbl_src( i, 1 ) = my_rank + i + 0.5; - }; - Kokkos::RangePolicy range_policy( 0, num_data ); - Kokkos::parallel_for( range_policy, fill_func ); - Kokkos::fence(); + DistributorData dist_data( num_data ); + auto data_src = dist_data.createSrcData1( num_data, my_rank ); // Create a second set of data to which we will migrate. - AoSoA_t data_dst( "dst", num_data ); + auto data_dst = dist_data.createDstData( num_data ); auto slice_int_dst = Cabana::slice<0>( data_dst ); auto slice_dbl_dst = Cabana::slice<1>( data_dst ); @@ -78,8 +136,7 @@ void test1( const bool use_topology ) Cabana::migrate( *distributor, data_src, data_dst ); // Check the migration. - Cabana::AoSoA data_dst_host( "data_dst_host", - num_data ); + auto data_dst_host = dist_data.createHostData( num_data ); auto slice_int_dst_host = Cabana::slice<0>( data_dst_host ); auto slice_dbl_dst_host = Cabana::slice<1>( data_dst_host ); Cabana::deep_copy( data_dst_host, data_dst ); @@ -99,9 +156,6 @@ void test1( const bool use_topology ) //---------------------------------------------------------------------------// void test2( const bool use_topology ) { - // Make a communication plan. - std::shared_ptr> distributor; - // Get my rank. int my_rank = -1; MPI_Comm_rank( MPI_COMM_WORLD, &my_rank ); @@ -117,44 +171,24 @@ void test2( const bool use_topology ) TEST_MEMSPACE(), export_ranks_host ); std::vector neighbor_ranks( 1, my_rank ); - // Create the plan - if ( use_topology ) - distributor = std::make_shared>( - MPI_COMM_WORLD, export_ranks, neighbor_ranks ); - else - distributor = std::make_shared>( - MPI_COMM_WORLD, export_ranks ); + auto distributor = + createDistributor( export_ranks, neighbor_ranks, use_topology ); // Make some data to migrate. - using DataTypes = Cabana::MemberTypes; - using AoSoA_t = Cabana::AoSoA; - AoSoA_t data( "data", num_data ); - auto slice_int = Cabana::slice<0>( data ); - auto slice_dbl = Cabana::slice<1>( data ); - - // Fill the data. - auto fill_func = KOKKOS_LAMBDA( const int i ) - { - slice_int( i ) = my_rank + i; - slice_dbl( i, 0 ) = my_rank + i; - slice_dbl( i, 1 ) = my_rank + i + 0.5; - }; - Kokkos::RangePolicy range_policy( 0, num_data ); - Kokkos::parallel_for( range_policy, fill_func ); - Kokkos::fence(); + DistributorData dist_data( num_data ); + auto data = dist_data.createSrcData1( num_data, my_rank ); // Do the migration in-place Cabana::migrate( *distributor, data ); // Get host copies of the migrated data. - Cabana::AoSoA data_host( "data_host", - num_data / 2 ); + auto data_host = dist_data.createHostData( num_data / 2 ); auto slice_int_host = Cabana::slice<0>( data_host ); auto slice_dbl_host = Cabana::slice<1>( data_host ); Cabana::deep_copy( data_host, data ); // Check the migration. We received less than we sent so this should have - // resized the aososa. + // resized the aosoa. auto steering = distributor->getExportSteering(); auto host_steering = Kokkos::create_mirror_view_and_copy( Kokkos::HostSpace(), steering ); @@ -172,9 +206,6 @@ void test2( const bool use_topology ) //---------------------------------------------------------------------------// void test3( const bool use_topology ) { - // Make a communication plan. - std::shared_ptr> distributor; - // Get my rank. int my_rank = -1; MPI_Comm_rank( MPI_COMM_WORLD, &my_rank ); @@ -192,34 +223,17 @@ void test3( const bool use_topology ) Kokkos::deep_copy( export_ranks, inverse_rank ); std::vector neighbor_ranks( 1, inverse_rank ); - // Create the plan with both export ranks and the topology. - if ( use_topology ) - distributor = std::make_shared>( - MPI_COMM_WORLD, export_ranks, neighbor_ranks ); - else - distributor = std::make_shared>( - MPI_COMM_WORLD, export_ranks ); + auto distributor = + createDistributor( export_ranks, neighbor_ranks, use_topology ); // Make some data to migrate. - using DataTypes = Cabana::MemberTypes; - using AoSoA_t = Cabana::AoSoA; - AoSoA_t data_src( "data_src", num_data ); + DistributorData dist_data( num_data ); + auto data_src = dist_data.createSrcData1( num_data, my_rank ); auto slice_int_src = Cabana::slice<0>( data_src ); auto slice_dbl_src = Cabana::slice<1>( data_src ); - // Fill the data. - auto fill_func = KOKKOS_LAMBDA( const int i ) - { - slice_int_src( i ) = my_rank + i; - slice_dbl_src( i, 0 ) = my_rank + i; - slice_dbl_src( i, 1 ) = my_rank + i + 0.5; - }; - Kokkos::RangePolicy range_policy( 0, num_data ); - Kokkos::parallel_for( range_policy, fill_func ); - Kokkos::fence(); - // Create a second set of data to which we will migrate. - AoSoA_t data_dst( "data_dst", num_data ); + auto data_dst = dist_data.createDstData( num_data ); auto slice_int_dst = Cabana::slice<0>( data_dst ); auto slice_dbl_dst = Cabana::slice<1>( data_dst ); @@ -246,8 +260,7 @@ void test3( const bool use_topology ) Kokkos::HostSpace(), inverse_steering ); // Check the migration. - Cabana::AoSoA data_dst_host( "data_dst_host", - num_data ); + auto data_dst_host = dist_data.createHostData( num_data ); Cabana::deep_copy( data_dst_host, data_dst ); auto slice_int_dst_host = Cabana::slice<0>( data_dst_host ); auto slice_dbl_dst_host = Cabana::slice<1>( data_dst_host ); @@ -264,9 +277,6 @@ void test3( const bool use_topology ) //---------------------------------------------------------------------------// void test4( const bool use_topology ) { - // Make a communication plan. - std::shared_ptr> distributor; - // Get my rank. int my_rank = -1; MPI_Comm_rank( MPI_COMM_WORLD, &my_rank ); @@ -289,34 +299,17 @@ void test4( const bool use_topology ) auto export_ranks = Kokkos::create_mirror_view_and_copy( TEST_MEMSPACE(), export_ranks_host ); - // Create the plan - if ( use_topology ) - distributor = std::make_shared>( - MPI_COMM_WORLD, export_ranks, neighbor_ranks ); - else - distributor = std::make_shared>( - MPI_COMM_WORLD, export_ranks ); + auto distributor = + createDistributor( export_ranks, neighbor_ranks, use_topology ); // Make some data to migrate. - using DataTypes = Cabana::MemberTypes; - using AoSoA_t = Cabana::AoSoA; - AoSoA_t data_src( "data_src", num_data ); + DistributorData dist_data( num_data ); + auto data_src = dist_data.createSrcData2( num_data, my_rank ); auto slice_int_src = Cabana::slice<0>( data_src ); auto slice_dbl_src = Cabana::slice<1>( data_src ); - // Fill the data. - auto fill_func = KOKKOS_LAMBDA( const int i ) - { - slice_int_src( i ) = my_rank; - slice_dbl_src( i, 0 ) = my_rank; - slice_dbl_src( i, 1 ) = my_rank + 0.5; - }; - Kokkos::RangePolicy range_policy( 0, num_data ); - Kokkos::parallel_for( range_policy, fill_func ); - Kokkos::fence(); - // Create a second set of data to which we will migrate. - AoSoA_t data_dst( "data_dst", num_data ); + auto data_dst = dist_data.createDstData( num_data ); auto slice_int_dst = Cabana::slice<0>( data_dst ); auto slice_dbl_dst = Cabana::slice<1>( data_dst ); @@ -324,8 +317,7 @@ void test4( const bool use_topology ) Cabana::migrate( *distributor, data_src, data_dst ); // Check the migration. - Cabana::AoSoA data_dst_host( "data_dst_host", - num_data ); + auto data_dst_host = dist_data.createHostData( num_data ); auto slice_int_dst_host = Cabana::slice<0>( data_dst_host ); auto slice_dbl_dst_host = Cabana::slice<1>( data_dst_host ); Cabana::deep_copy( data_dst_host, data_dst ); @@ -368,9 +360,6 @@ void test4( const bool use_topology ) //---------------------------------------------------------------------------// void test5( const bool use_topology ) { - // Make a communication plan. - std::shared_ptr> distributor; - // Get my rank. int my_rank = -1; MPI_Comm_rank( MPI_COMM_WORLD, &my_rank ); @@ -394,34 +383,17 @@ void test5( const bool use_topology ) auto export_ranks = Kokkos::create_mirror_view_and_copy( TEST_MEMSPACE(), export_ranks_host ); - // Create the plan - if ( use_topology ) - distributor = std::make_shared>( - MPI_COMM_WORLD, export_ranks, neighbor_ranks ); - else - distributor = std::make_shared>( - MPI_COMM_WORLD, export_ranks ); + auto distributor = + createDistributor( export_ranks, neighbor_ranks, use_topology ); // Make some data to migrate. - using DataTypes = Cabana::MemberTypes; - using AoSoA_t = Cabana::AoSoA; - AoSoA_t data_src( "data_src", num_data ); + DistributorData dist_data( num_data ); + auto data_src = dist_data.createSrcData2( num_data, my_rank ); auto slice_int_src = Cabana::slice<0>( data_src ); auto slice_dbl_src = Cabana::slice<1>( data_src ); - // Fill the data. - auto fill_func = KOKKOS_LAMBDA( const int i ) - { - slice_int_src( i ) = my_rank; - slice_dbl_src( i, 0 ) = my_rank; - slice_dbl_src( i, 1 ) = my_rank + 0.5; - }; - Kokkos::RangePolicy range_policy( 0, num_data ); - Kokkos::parallel_for( range_policy, fill_func ); - Kokkos::fence(); - // Create a second set of data to which we will migrate. - AoSoA_t data_dst( "data_dst", my_size ); + auto data_dst = dist_data.createDstData( my_size ); auto slice_int_dst = Cabana::slice<0>( data_dst ); auto slice_dbl_dst = Cabana::slice<1>( data_dst ); @@ -430,8 +402,7 @@ void test5( const bool use_topology ) Cabana::migrate( *distributor, slice_dbl_src, slice_dbl_dst ); // Check the migration. - Cabana::AoSoA data_host( "data_host", - my_size ); + auto data_host = dist_data.createHostData( my_size ); auto slice_int_host = Cabana::slice<0>( data_host ); auto slice_dbl_host = Cabana::slice<1>( data_host ); Cabana::deep_copy( data_host, data_dst ); @@ -462,9 +433,6 @@ void test5( const bool use_topology ) //---------------------------------------------------------------------------// void test6( const bool use_topology ) { - // Make a communication plan. - std::shared_ptr> distributor; - // Get my rank. int my_rank = -1; MPI_Comm_rank( MPI_COMM_WORLD, &my_rank ); @@ -488,32 +456,15 @@ void test6( const bool use_topology ) neighbor_ranks.assign( 1, 0 ); } - // Create the plan. - if ( use_topology ) - distributor = std::make_shared>( - MPI_COMM_WORLD, export_ranks, neighbor_ranks ); - else - distributor = std::make_shared>( - MPI_COMM_WORLD, export_ranks ); + auto distributor = + createDistributor( export_ranks, neighbor_ranks, use_topology ); // Make some data to migrate. - using DataTypes = Cabana::MemberTypes; - using AoSoA_t = Cabana::AoSoA; - AoSoA_t data( "data", num_data ); + DistributorData dist_data( num_data ); + auto data = dist_data.createSrcData2( num_data, my_rank ); auto slice_int = Cabana::slice<0>( data ); auto slice_dbl = Cabana::slice<1>( data ); - // Fill the data. - auto fill_func = KOKKOS_LAMBDA( const int i ) - { - slice_int( i ) = my_rank; - slice_dbl( i, 0 ) = my_rank; - slice_dbl( i, 1 ) = my_rank + 0.5; - }; - Kokkos::RangePolicy range_policy( 0, num_data ); - Kokkos::parallel_for( range_policy, fill_func ); - Kokkos::fence(); - // Do the migration Cabana::migrate( *distributor, data ); @@ -524,8 +475,7 @@ void test6( const bool use_topology ) EXPECT_EQ( data.size(), 0 ); // Check the migration. - Cabana::AoSoA data_host( - "data_host", distributor->totalNumImport() ); + auto data_host = dist_data.createHostData( distributor->totalNumImport() ); auto slice_int_host = Cabana::slice<0>( data_host ); auto slice_dbl_host = Cabana::slice<1>( data_host ); Cabana::deep_copy( data_host, data ); @@ -545,9 +495,6 @@ void test6( const bool use_topology ) //---------------------------------------------------------------------------// void test7( const bool use_topology ) { - // Make a communication plan. - std::shared_ptr> distributor; - // Get my rank. int my_rank = -1; MPI_Comm_rank( MPI_COMM_WORLD, &my_rank ); @@ -574,13 +521,8 @@ void test7( const bool use_topology ) neighbor_ranks.assign( 1, 0 ); } - // Create the plan. - if ( use_topology ) - distributor = std::make_shared>( - MPI_COMM_WORLD, export_ranks, neighbor_ranks ); - else - distributor = std::make_shared>( - MPI_COMM_WORLD, export_ranks ); + auto distributor = + createDistributor( export_ranks, neighbor_ranks, use_topology ); // Make some data to migrate. using DataTypes = Cabana::MemberTypes; @@ -619,9 +561,6 @@ void test7( const bool use_topology ) //---------------------------------------------------------------------------// void test8( const bool use_topology ) { - // Make a communication plan. - std::shared_ptr> distributor; - // Get my rank. int my_rank = -1; MPI_Comm_rank( MPI_COMM_WORLD, &my_rank ); @@ -653,13 +592,8 @@ void test8( const bool use_topology ) neighbor_ranks[1] = my_rank; neighbor_ranks[2] = ( my_rank == my_size - 1 ) ? 0 : my_rank + 1; - // Create the plan. - if ( use_topology ) - distributor = std::make_shared>( - MPI_COMM_WORLD, export_ranks, neighbor_ranks ); - else - distributor = std::make_shared>( - MPI_COMM_WORLD, export_ranks ); + auto distributor = + createDistributor( export_ranks, neighbor_ranks, use_topology ); // Make some data to migrate. using DataTypes = Cabana::MemberTypes; @@ -703,9 +637,6 @@ void test8( const bool use_topology ) //---------------------------------------------------------------------------// void test9( const bool use_topology ) { - // Make a communication plan. - std::shared_ptr> distributor; - // Edge case where all particles will be removed - nothing is kept, sent, or // received. int num_data = 2; @@ -716,18 +647,10 @@ void test9( const bool use_topology ) Kokkos::parallel_for( range_policy, fill_ranks ); Kokkos::fence(); - // Create the plan. - if ( use_topology ) - { - std::vector neighbor_ranks; - distributor = std::make_shared>( - MPI_COMM_WORLD, export_ranks, neighbor_ranks ); - } - else - { - distributor = std::make_shared>( - MPI_COMM_WORLD, export_ranks ); - } + std::vector neighbor_ranks; + + auto distributor = + createDistributor( export_ranks, neighbor_ranks, use_topology ); // Make empty data to migrate. using DataTypes = Cabana::MemberTypes;