diff --git a/Source/FieldSolver/ImplicitSolvers/WarpXSolverDOF.H b/Source/FieldSolver/ImplicitSolvers/WarpXSolverDOF.H index a2a2b9c5116..2c2b2005f01 100644 --- a/Source/FieldSolver/ImplicitSolvers/WarpXSolverDOF.H +++ b/Source/FieldSolver/ImplicitSolvers/WarpXSolverDOF.H @@ -24,9 +24,7 @@ class WarpX; struct WarpXSolverDOF { amrex::Vector,3>> m_array; - amrex::Vector,3>> m_array_lhs; amrex::Vector> m_scalar; - amrex::Vector> m_scalar_lhs; warpx::fields::FieldType m_array_type = warpx::fields::FieldType::None; warpx::fields::FieldType m_scalar_type = warpx::fields::FieldType::None; @@ -35,6 +33,9 @@ struct WarpXSolverDOF amrex::Long m_nDoFs_g = 0; /*!< Global nDOF */ void Define ( WarpX* const, int, const std::string&, const std::string&); + + void fill_local_dof (amrex::iMultiFab& dof, amrex::iMultiFab const& mask); + void fill_global_dof (); }; #endif diff --git a/Source/FieldSolver/ImplicitSolvers/WarpXSolverDOF.cpp b/Source/FieldSolver/ImplicitSolvers/WarpXSolverDOF.cpp index e521325a0d7..06b8737e927 100644 --- a/Source/FieldSolver/ImplicitSolvers/WarpXSolverDOF.cpp +++ b/Source/FieldSolver/ImplicitSolvers/WarpXSolverDOF.cpp @@ -11,7 +11,10 @@ #include #include +#include + using warpx::fields::FieldType; +using namespace amrex; void WarpXSolverDOF::Define ( WarpX* const a_WarpX, const int a_num_amr_levels, @@ -41,10 +44,7 @@ void WarpXSolverDOF::Define ( WarpX* const a_WarpX, m_array.resize(a_num_amr_levels); m_scalar.resize(a_num_amr_levels); - m_array_lhs.resize(a_num_amr_levels); - m_scalar_lhs.resize(a_num_amr_levels); - amrex::Long offset = 0; m_nDoFs_l = 0; // Define the 3D vector field data container @@ -62,26 +62,9 @@ void WarpXSolverDOF::Define ( WarpX* const a_WarpX, this_array[n]->DistributionMap(), 2*ncomp, // {local, global} for each comp this_array[n]->nGrowVect() ); - m_nDoFs_g += this_array[n]->boxArray().numPts()*ncomp; - - m_array[lev][n]->setVal(-1.0); - amrex::Long offset_mf = 0; - for (amrex::MFIter mfi(*m_array[lev][n]); mfi.isValid(); ++mfi) { - auto bx = mfi.tilebox(); - auto dof_arr = m_array[lev][n]->array(mfi); - ParallelFor( bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) - { - for (int v = 0; v < ncomp; v++) { - dof_arr(i,j,k,2*v) = bx.index(amrex::IntVect(AMREX_D_DECL(i, j, k))) * ncomp - + v - + offset_mf - + offset; - } - }); - offset_mf += bx.numPts()*ncomp; - } - offset += offset_mf; - m_nDoFs_l += offset_mf; + + auto* mask = a_WarpX->getFieldDotMaskPointer(m_array_type, lev, ablastr::fields::Direction{n}); + fill_local_dof(*m_array[lev][n], *mask); } } @@ -101,106 +84,137 @@ void WarpXSolverDOF::Define ( WarpX* const a_WarpX, this_mf->DistributionMap(), 2*ncomp, // {local, global} for each comp this_mf->nGrowVect() ); - m_nDoFs_g += this_mf->boxArray().numPts()*ncomp; - - m_scalar[lev]->setVal(-1.0); - amrex::Long offset_mf = 0; - for (amrex::MFIter mfi(*m_scalar[lev]); mfi.isValid(); ++mfi) { - auto bx = mfi.tilebox(); - auto dof_arr = m_scalar[lev]->array(mfi); - ParallelFor( bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) - { - for (int v = 0; v < ncomp; v++) { - dof_arr(i,j,k,2*v) = bx.index(amrex::IntVect(AMREX_D_DECL(i, j, k))) * ncomp - + v - + offset_mf - + offset; - } - }); - offset_mf += bx.numPts()*ncomp; - } - offset += offset_mf; - m_nDoFs_l += offset_mf; + + auto* mask = a_WarpX->getFieldDotMaskPointer(m_scalar_type, lev, ablastr::fields::Direction{0}); + fill_local_dof(*m_scalar[lev], *mask); } } - auto nDoFs_g = m_nDoFs_l; - amrex::ParallelDescriptor::ReduceLongSum(&nDoFs_g,1); - WARPX_ALWAYS_ASSERT_WITH_MESSAGE( - m_nDoFs_g == nDoFs_g, - "WarpXSolverDOF::Define(): something has gone wrong in DoF counting"); - - auto num_procs = amrex::ParallelDescriptor::NProcs(); - auto my_proc = amrex::ParallelDescriptor::MyProc(); - amrex::Vector dof_proc_arr(num_procs,0); - dof_proc_arr[my_proc] = m_nDoFs_l; - amrex::ParallelDescriptor::ReduceIntSum(dof_proc_arr.data(), num_procs); + fill_global_dof(); - int offset_global = 0; - for (int i = 0; i < my_proc; i++) { offset_global += dof_proc_arr[i]; } - - if (m_array_type != FieldType::None) { - for (int lev = 0; lev < a_num_amr_levels; ++lev) { - const ablastr::fields::VectorField this_array = a_WarpX->m_fields.get_alldirs(a_vector_type_name, lev); - for (int n = 0; n < 3; n++) { - auto ncomp = this_array[n]->nComp(); - for (amrex::MFIter mfi(*m_array[lev][n]); mfi.isValid(); ++mfi) { - auto bx = mfi.tilebox(); - auto dof_arr = m_array[lev][n]->array(mfi); - ParallelFor( bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) - { - for (int v = 0; v < ncomp; v++) { - dof_arr(i,j,k,2*v+1) = dof_arr(i,j,k,2*v) + offset_global; - } - }); + for (int lev = 0; lev < a_num_amr_levels; ++lev) { + for (int n = 0; n < 3; n++) { + if (auto* dof = m_array[lev][n].get()) { + for (int comp = 1; comp < dof->nComp(); comp += 2) { // Only call this on global id + dof->FillBoundaryAndSync(comp, 1, dof->nGrowVect(), a_WarpX->Geom(lev).periodicity()); } } } + if (auto* dof = m_scalar[lev].get()) { + for (int comp = 1; comp < dof->nComp(); comp += 2) { // Only call this on global id + dof->FillBoundaryAndSync(comp, 1, dof->nGrowVect(), a_WarpX->Geom(lev).periodicity()); + } + } } - if (m_scalar_type != FieldType::None) { - for (int lev = 0; lev < a_num_amr_levels; ++lev) { - const amrex::MultiFab* this_mf = a_WarpX->m_fields.get(a_scalar_type_name,lev); - auto ncomp = this_mf->nComp(); - for (amrex::MFIter mfi(*m_scalar[lev]); mfi.isValid(); ++mfi) { - auto bx = mfi.tilebox(); - auto dof_arr = m_scalar[lev]->array(mfi); - ParallelFor( bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) - { - for (int v = 0; v < ncomp; v++) { - dof_arr(i,j,k,2*v+1) = dof_arr(i,j,k,2*v) + offset_global; + + amrex::Print() << "Defined DOF object for linear solves (total DOFs = " << m_nDoFs_g << ").\n"; +} + +void WarpXSolverDOF::fill_local_dof (iMultiFab& dof, iMultiFab const& mask) +{ + int ncomp = dof.nComp() / 2; // /2 because both local and global ids are stored in dof + + AMREX_ALWAYS_ASSERT(dof.boxArray().numPts()*ncomp < static_cast(std::numeric_limits::max())); + + dof.setVal(std::numeric_limits::lowest()); + +#ifdef AMREX_USE_MPI + int nprocs = ParallelDescriptor::NProcs(); +#endif + + for (MFIter mfi(dof); mfi.isValid(); ++mfi) { + Box const& vbx = mfi.validbox(); + int npts = vbx.numPts(); + BoxIndexer boxindex(vbx); + auto const& m = mask.const_array(mfi); + auto const& d = dof.array(mfi); + auto start_id = m_nDoFs_l; + auto ndofs = Scan::PrefixSum( + npts, + [=] AMREX_GPU_DEVICE (int offset) -> int + { + auto [i,j,k] = boxindex(offset); + return m(i,j,k) ? 1 : 0; + }, + [=] AMREX_GPU_DEVICE (int offset, int ps) + { + auto [i,j,k] = boxindex(offset); + if (m(i,j,k)) { + d(i,j,k,0) = ps + start_id; +#ifdef AMREX_USE_MPI + if (nprocs == 1) +#endif + { + d(i,j,k,1) = ps + start_id; } - }); - } + } + }, + Scan::Type::exclusive, Scan::retSum); + if (ncomp > 1) { + ParallelFor(vbx, ncomp-1, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) + { + if (m(i,j,k)) { + d(i,j,k,2*(n+1)) = d(i,j,k,0) + ndofs*(n+1); +#ifdef AMREX_USE_MPI + if (nprocs == 1) +#endif + { + d(i,j,k,2*(n+1)+1) = d(i,j,k,0) + ndofs*(n+1); + } + } + }); } + m_nDoFs_l += Long(ndofs)*ncomp; } +} - if (m_array_type != FieldType::None) { - for (int lev = 0; lev < a_num_amr_levels; ++lev) { - const auto& geom = a_WarpX->Geom(lev); - for (int n = 0; n < 3; n++) { - m_array_lhs[lev][n] = std::make_unique(m_array[lev][n]->boxArray(), - m_array[lev][n]->DistributionMap(), - m_array[lev][n]->nComp(), - 0 ); - amrex::iMultiFab::Copy(*m_array_lhs[lev][n], *m_array[lev][n], 0, 0, m_array[lev][n]->nComp(), 0); - m_array[lev][n]->FillBoundary(geom.periodicity()); - // do NOT call FillBoundary() on m_array_lhs +void WarpXSolverDOF::fill_global_dof () +{ +#ifndef AMREX_USE_MPI + m_nDoFs_g = m_nDoFs_l; +#else + int nprocs = ParallelDescriptor::NProcs(); + if (nprocs == 1) { + m_nDoFs_g = m_nDoFs_l; + } else { + Vector ndofs_allprocs(nprocs); + MPI_Allgather(&m_nDoFs_l, 1, ParallelDescriptor::Mpi_typemap::type(), + ndofs_allprocs.data(), 1, ParallelDescriptor::Mpi_typemap::type(), + ParallelDescriptor::Communicator()); + Long proc_begin = 0; + int myproc = ParallelDescriptor::MyProc(); + m_nDoFs_g = 0; + for (int iproc = 0; iproc < nprocs; ++iproc) { + if (iproc < myproc) { + proc_begin += ndofs_allprocs[iproc]; } + m_nDoFs_g += ndofs_allprocs[iproc]; } - } - if (m_scalar_type != FieldType::None) { - for (int lev = 0; lev < a_num_amr_levels; ++lev) { - m_scalar_lhs[lev] = std::make_unique(m_scalar[lev]->boxArray(), - m_scalar[lev]->DistributionMap(), - m_scalar[lev]->nComp(), - 0 ); - amrex::iMultiFab::Copy(*m_scalar_lhs[lev], *m_scalar[lev], 0, 0, m_scalar[lev]->nComp(), 0); - const auto& geom = a_WarpX->Geom(lev); - m_scalar[lev]->FillBoundary(geom.periodicity()); - // do NOT call FillBoundary() on m_scalar_lhs + for (auto& x : m_array) { + for (auto& y : x) { + if (y) { + auto const& dof = y->arrays(); + auto ncomp = y->nComp() / 2; + ParallelFor(*y, IntVect(0), ncomp, [=] AMREX_GPU_DEVICE (int b, int i, int j, int k, int n) + { + dof[b](i,j,k,2*n+1) = dof[b](i,j,k,2*n) + int(proc_begin); + }); + } + } + } + for (auto& x : m_scalar) { + if (x) { + auto const& dof = x->arrays(); + auto ncomp = x->nComp() / 2; + ParallelFor(*x, IntVect(0), ncomp, [=] AMREX_GPU_DEVICE (int b, int i, int j, int k, int n) + { + dof[b](i,j,k,2*n+1) = dof[b](i,j,k,2*n) + int(proc_begin); + }); + } } + Gpu::streamSynchronize(); } +#endif - amrex::Print() << "Defined DOF object for linear solves (total DOFs = " << m_nDoFs_g << ").\n"; } diff --git a/Source/FieldSolver/ImplicitSolvers/WarpXSolverVec.cpp b/Source/FieldSolver/ImplicitSolvers/WarpXSolverVec.cpp index 81738019f6c..030e7e3bfff 100644 --- a/Source/FieldSolver/ImplicitSolvers/WarpXSolverVec.cpp +++ b/Source/FieldSolver/ImplicitSolvers/WarpXSolverVec.cpp @@ -160,10 +160,13 @@ void WarpXSolverVec::copyFrom ( const amrex::Real* const a_arr) { for (int v = 0; v < ncomp; v++) { int dof = dof_arr(i,j,k,2*v); // local - data_arr(i,j,k,v) = a_arr[dof]; + if (dof >= 0) { + data_arr(i,j,k,v) = a_arr[dof]; + } } }); } + m_array_vec[lev][n]->FillBoundaryAndSync(m_WarpX->Geom(lev).periodicity()); } } if (m_scalar_type != FieldType::None) { @@ -176,10 +179,13 @@ void WarpXSolverVec::copyFrom ( const amrex::Real* const a_arr) { for (int v = 0; v < ncomp; v++) { int dof = dof_arr(i,j,k,2*v); // local - data_arr(i,j,k,v) = a_arr[dof]; + if (dof >= 0) { + data_arr(i,j,k,v) = a_arr[dof]; + } } }); } + m_scalar_vec[lev]->FillBoundaryAndSync(m_WarpX->Geom(lev).periodicity()); } } } @@ -205,7 +211,9 @@ void WarpXSolverVec::copyTo ( amrex::Real* const a_arr) const { for (int v = 0; v < ncomp; v++) { int dof = dof_arr(i,j,k,2*v); // local - a_arr[dof] = data_arr(i,j,k,v); + if (dof >= 0) { + a_arr[dof] = data_arr(i,j,k,v); + } } }); } @@ -221,7 +229,9 @@ void WarpXSolverVec::copyTo ( amrex::Real* const a_arr) const { for (int v = 0; v < ncomp; v++) { int dof = dof_arr(i,j,k,2*v); // local - a_arr[dof] = data_arr(i,j,k,v); + if (dof >= 0) { + a_arr[dof] = data_arr(i,j,k,v); + } } }); } diff --git a/Source/NonlinearSolvers/MatrixPC.H b/Source/NonlinearSolvers/MatrixPC.H index 7c96ceb9fa8..de54e940bbf 100644 --- a/Source/NonlinearSolvers/MatrixPC.H +++ b/Source/NonlinearSolvers/MatrixPC.H @@ -361,7 +361,6 @@ int MatrixPC::Assemble (const T& a_U) // Get DOF object from a_U const auto& dofs_obj = a_U.getDOFsObject(); const auto& dofs_mfarrvec = dofs_obj->m_array; - const auto& dofs_lhs_mfarrvec = dofs_obj->m_array_lhs; AMREX_ALWAYS_ASSERT(m_ndofs_l == dofs_obj->m_nDoFs_l); AMREX_ALWAYS_ASSERT(m_ndofs_g == dofs_obj->m_nDoFs_g); @@ -383,8 +382,6 @@ int MatrixPC::Assemble (const T& a_U) auto c_indices_g_ptr = m_c_indices_g.data(); auto a_ij_ptr = m_a_ij.data(); - const auto ndofs_l = m_ndofs_l; - const auto ndofs_g = m_ndofs_g; const auto nnz_max = m_pc_mat_nnz; auto nnz_actual = nnz_max; @@ -404,11 +401,9 @@ int MatrixPC::Assemble (const T& a_U) for (amrex::MFIter mfi(*dofs_mfarrvec[lev][dir]); mfi.isValid(); ++mfi) { auto bx = mfi.tilebox(); - auto dof_lhs_arr = dofs_lhs_mfarrvec[lev][dir]->const_array(mfi); - -#if defined(WARPX_DIM_1D_Z) auto dof_arr = dofs_mfarrvec[lev][dir]->const_array(mfi); -#elif defined(WARPX_DIM_RCYLINDER) + +#if defined(WARPX_DIM_RCYLINDER) amrex::ignore_unused(dxi); WARPX_ABORT_WITH_MESSAGE("MatrixPC::Assemble() not yet implemented for WARPX_DIM_RCYLINDER"); #elif defined(WARPX_DIM_RSPHERE) @@ -418,7 +413,6 @@ int MatrixPC::Assemble (const T& a_U) amrex::ignore_unused(dxi); WARPX_ABORT_WITH_MESSAGE("MatrixPC::Assemble() not yet implemented for WARPX_DIM_RZ"); #elif defined(WARPX_DIM_XZ) - auto dof_arr = dofs_mfarrvec[lev][dir]->const_array(mfi); int tdir = -1; if (dir == 0) { tdir = 2; } else if (dir == 2) { tdir = 0; } @@ -440,16 +434,16 @@ int MatrixPC::Assemble (const T& a_U) if (thetaDt > 0.0) { ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) { + int ridx_l = dof_arr(i,j,k,0); + if (ridx_l < 0) { return; } + int icol = 0; - int ridx_l = dof_lhs_arr(i,j,k,0); - int ridx_g = dof_lhs_arr(i,j,k,1); - AMREX_ALWAYS_ASSERT((ridx_l >= 0) && (ridx_l < ndofs_l)); - AMREX_ALWAYS_ASSERT((ridx_g >= 0) && (ridx_g < ndofs_g)); + int ridx_g = dof_arr(i,j,k,1); r_indices_g_ptr[ridx_l] = ridx_g; { - int cidx_g_lhs = dof_lhs_arr(i,j,k,1); + int cidx_g_lhs = dof_arr(i,j,k,1); amrex::Real val = 1.0; auto flag = MatrixPCUtils::insertOrAdd( cidx_g_lhs, val, &c_indices_g_ptr[ridx_l*nnz_max], @@ -638,12 +632,13 @@ int MatrixPC::Assemble (const T& a_U) ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) { - int ridx_l = dof_lhs_arr(i,j,k,0); - AMREX_ALWAYS_ASSERT((ridx_l >= 0) && (ridx_l < ndofs_l)); + int ridx_l = dof_arr(i,j,k,0); + if (ridx_l < 0) { return; } + int icol = num_nz_ptr[ridx_l]; { - int cidx_g_lhs = dof_lhs_arr(i,j,k,1); + int cidx_g_lhs = dof_arr(i,j,k,1); amrex::Real val = sigma_ii_arr(i,j,k,0) - 1.0; auto flag = MatrixPCUtils::insertOrAdd( cidx_g_lhs, val, &c_indices_g_ptr[ridx_l*nnz_max],