Fix solvability issue in the nodal solver RAP approach (#2783)

WeiqunZhang · web-flow · commit b78921a2d80d · 2022-05-27T10:46:12.000-07:00
In the RAP approach of the nodal solver, the RHS at Neumann boundaries only
includes the integral inside the domain.  That is it's only half of what its
"physical" value is.  The usual way of subtracting a constant from the RHS
does not work for RAP.  We should only subtract half of the constant offset
at Neumann boundaries.  The computation of the constant offset needed for
the solvability fix is also affected by the way how RHS is computed at
Neumann boundaries.  For EB, the computation of the constant offset is an
integral of the RHS multiplied by the volume fraction, and the subtraction
also needs to be weighted by the volume fraction.
diff --git a/Src/LinearSolvers/MLMG/AMReX_MLLinOp.H b/Src/LinearSolvers/MLMG/AMReX_MLLinOp.H
@@ -231,6 +231,8 @@ public:
     virtual void applyInhomogNeumannTerm (int /*amrlev*/, MultiFab& /*rhs*/) const {}
     virtual void applyOverset (int /*amlev*/, MultiFab& /*rhs*/) const {}
     virtual void scaleRHS (int /*amrlev*/, MultiFab& /*rhs*/) const {}
+    virtual Real getSolvabilityOffset (int /*amrlev*/, int /*mglev*/, MultiFab const& /*rhs*/) const { return 0._rt; } // Only nodal solvers need it
+    virtual void fixSolvabilityByOffset (int /*amrlev*/, int /*mglev*/, MultiFab& /*rhs*/, Real /*offset*/) const {} // Only nodal solvers need it
 
     virtual void prepareForSolve () = 0;
     virtual bool isSingular (int amrlev) const = 0;
diff --git a/Src/LinearSolvers/MLMG/AMReX_MLMG.H b/Src/LinearSolvers/MLMG/AMReX_MLMG.H
@@ -156,7 +156,6 @@ public:
     void computeVolInv ();
     void makeSolvable ();
     void makeSolvable (int amrlev, int mglev, MultiFab& mf);
-    Real getNodalSum (int amrlev, int mglev, MultiFab& mf) const;
 
 #if defined(AMREX_USE_HYPRE) && (AMREX_SPACEDIM > 1)
     void bottomSolveWithHypre (MultiFab& x, const MultiFab& b);
diff --git a/Src/LinearSolvers/MLMG/AMReX_MLMG.cpp b/Src/LinearSolvers/MLMG/AMReX_MLMG.cpp
@@ -1776,12 +1776,12 @@ MLMG::makeSolvable ()
     else
     {
         AMREX_ASSERT_WITH_MESSAGE(ncomp==1, "ncomp > 1 not supported for singular nodal problem");
-        Real offset = getNodalSum(0, 0, rhs[0]);
+        Real offset = linop.getSolvabilityOffset(0, 0, rhs[0]);
         if (verbose >= 4) {
             amrex::Print() << "MLMG: Subtracting " << offset << " from rhs\n";
         }
         for (int alev = 0; alev < namrlevs; ++alev) {
-            rhs[alev].plus(-offset, 0, 1);
+            linop.fixSolvabilityByOffset(alev, 0, rhs[alev], offset);
         }
     }
 }
@@ -1833,27 +1833,15 @@ MLMG::makeSolvable (int amrlev, int mglev, MultiFab& mf)
     else
     {
         AMREX_ASSERT_WITH_MESSAGE(ncomp==1, "ncomp > 1 not supported for singular nodal problem");
-        Real offset = getNodalSum(amrlev, mglev, mf);
+        Real offset = linop.getSolvabilityOffset(amrlev, mglev, mf);
         if (verbose >= 4) {
             amrex::Print() << "MLMG: Subtracting " << offset << " on level (" << amrlev << ", "
                            << mglev << ")\n";
         }
-        mf.plus(-offset, 0, 1);
+        linop.fixSolvabilityByOffset(amrlev, mglev, mf, offset);
     }
 }
 
-Real
-MLMG::getNodalSum (int amrlev, int mglev, MultiFab& mf) const
-{
-    MultiFab one(mf.boxArray(), mf.DistributionMap(), 1, 0, MFInfo(), mf.Factory());
-    one.setVal(Real(1.0));
-    const bool local = true;
-    Real s1 = linop.xdoty(amrlev, mglev, mf, one, local);
-    Real s2 = linop.xdoty(amrlev, mglev, one, one, local);
-    ParallelAllReduce::Sum<Real>({s1,s2}, ParallelContext::CommunicatorSub());
-    return s1/s2;
-}
-
 #if defined(AMREX_USE_HYPRE) && (AMREX_SPACEDIM > 1)
 void
 MLMG::bottomSolveWithHypre (MultiFab& x, const MultiFab& b)
diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian.H b/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian.H
@@ -117,6 +117,8 @@ public :
     virtual void getFluxes (const Vector<MultiFab*>& a_flux,
                             const Vector<MultiFab*>& a_sol) const final override;
     virtual void unimposeNeumannBC (int amrlev, MultiFab& rhs) const final override;
+    virtual Real getSolvabilityOffset (int amrlev, int mglev, MultiFab const& rhs) const override;
+    virtual void fixSolvabilityByOffset (int amrlev, int mglev, MultiFab& rhs, Real offset) const override;
 
     virtual void compGrad (int /*amrlev*/, const Array<MultiFab*,AMREX_SPACEDIM>& /*grad*/,
                            MultiFab& /*sol*/, Location /*loc*/) const final override {
diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian.cpp b/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian.cpp
@@ -171,6 +171,219 @@ MLNodeLaplacian::unimposeNeumannBC (int amrlev, MultiFab& rhs) const
     }
 }
 
+Real
+MLNodeLaplacian::getSolvabilityOffset (int amrlev, int mglev, MultiFab const& rhs) const
+{
+    amrex::ignore_unused(amrlev);
+    AMREX_ASSERT(amrlev==0);
+    AMREX_ASSERT(mglev+1==m_num_mg_levels[0] || mglev==0);
+
+    if (m_coarsening_strategy == CoarseningStrategy::RAP) {
+#ifdef AMREX_USE_EB
+        auto factory = dynamic_cast<EBFArrayBoxFactory const*>(m_factory[amrlev][0].get());
+        if (factory && !factory->isAllRegular()) {
+            if (mglev > 0) {
+                return 0._rt;
+            } else {
+                const MultiFab& vfrac = factory->getVolFrac();
+                const auto& vfrac_ma = vfrac.const_arrays();
+
+                Box dom = Geom(amrlev,mglev).Domain();
+                for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) {
+                    if (m_lobc[0][idim] != LinOpBCType::Neumann &&
+                        m_lobc[0][idim] != LinOpBCType::inflow)
+                    {
+                        dom.growLo(idim, 10);
+                    }
+                    if (m_hibc[0][idim] != LinOpBCType::Neumann &&
+                        m_hibc[0][idim] != LinOpBCType::inflow)
+                    {
+                        dom.growHi(idim, 10);
+                    }
+                }
+
+                const auto& mask = (mglev+1 == m_num_mg_levels[0]) ? m_bottom_dot_mask : m_coarse_dot_mask;
+                const auto& mask_ma = mask.const_arrays();
+                const auto& rhs_ma = rhs.const_arrays();
+                auto r = ParReduce(TypeList<ReduceOpSum,ReduceOpSum>{}, TypeList<Real,Real>{},
+                                   rhs, IntVect(0),
+                                   [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept
+                                       -> GpuTuple<Real,Real>
+                                   {
+                                       Real scale = 0.0_rt;
+#if (AMREX_SPACEDIM == 3)
+                                       int const koff = 1;
+                                       Real const fac = 0.125_rt;
+#else
+                                       int const koff = 0;
+                                       Real const fac = 0.25_rt;
+#endif
+                                       for (int kc = k-koff; kc <= k; ++kc) {
+                                       for (int jc = j-1   ; jc <= j; ++jc) {
+                                       for (int ic = i-1   ; ic <= i; ++ic) {
+                                           if (dom.contains(ic,jc,kc)) {
+                                               scale += vfrac_ma[box_no](ic,jc,kc) * fac;
+                                           }
+                                       }}}
+                                       return { mask_ma[box_no](i,j,k) * rhs_ma[box_no](i,j,k),
+                                                mask_ma[box_no](i,j,k) * scale };
+                                   });
+
+                Real s1 = amrex::get<0>(r);
+                Real s2 = amrex::get<1>(r);
+                ParallelAllReduce::Sum<Real>({s1,s2}, ParallelContext::CommunicatorSub());
+                return s1/s2;
+            }
+        } else
+#endif
+        {
+            Box nddom = amrex::surroundingNodes(Geom(amrlev,mglev).Domain());
+            for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) {
+                if (m_lobc[0][idim] != LinOpBCType::Neumann &&
+                    m_lobc[0][idim] != LinOpBCType::inflow)
+                {
+                    nddom.growLo(idim, 10); // so that the test in ParReduce will faill
+                }
+                if (m_hibc[0][idim] != LinOpBCType::Neumann &&
+                    m_hibc[0][idim] != LinOpBCType::inflow)
+                {
+                    nddom.growHi(idim, 10);
+                }
+            }
+
+            const auto& mask = (mglev+1 == m_num_mg_levels[0]) ? m_bottom_dot_mask : m_coarse_dot_mask;
+            const auto& mask_ma = mask.const_arrays();
+            const auto& rhs_ma = rhs.const_arrays();
+            auto r = ParReduce(TypeList<ReduceOpSum,ReduceOpSum>{}, TypeList<Real,Real>{},
+                               rhs, IntVect(0),
+                               [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept
+                                   -> GpuTuple<Real,Real>
+                               {
+                                   Real scale = 1.0_rt;
+                                   if (i == nddom.smallEnd(0) ||
+                                       i == nddom.bigEnd(0)) {
+                                       scale *= 0.5_rt;
+                                   }
+#if (AMREX_SPACEDIM >= 2)
+                                   if (j == nddom.smallEnd(1) ||
+                                       j == nddom.bigEnd(1)) {
+                                       scale *= 0.5_rt;
+                                   }
+#endif
+#if (AMREX_SPACEDIM == 3)
+                                   if (k == nddom.smallEnd(2) ||
+                                       k == nddom.bigEnd(2)) {
+                                       scale *= 0.5_rt;
+                                   }
+#endif
+                                   return { mask_ma[box_no](i,j,k) * rhs_ma[box_no](i,j,k),
+                                            mask_ma[box_no](i,j,k) * scale };
+                               });
+
+            Real s1 = amrex::get<0>(r);
+            Real s2 = amrex::get<1>(r);
+            ParallelAllReduce::Sum<Real>({s1,s2}, ParallelContext::CommunicatorSub());
+            return s1/s2;
+        }
+    } else {
+        return MLNodeLinOp::getSolvabilityOffset(amrlev, mglev, rhs);
+    }
+}
+
+void
+MLNodeLaplacian::fixSolvabilityByOffset (int amrlev, int mglev, MultiFab& rhs, Real offset) const
+{
+    if (m_coarsening_strategy == CoarseningStrategy::RAP) {
+#ifdef AMREX_USE_EB
+        auto factory = dynamic_cast<EBFArrayBoxFactory const*>(m_factory[amrlev][0].get());
+        if (factory && !factory->isAllRegular()) {
+            if (mglev == 0) {
+                const MultiFab& vfrac = factory->getVolFrac();
+                const auto& vfrac_ma = vfrac.const_arrays();
+
+                Box dom = Geom(amrlev,mglev).Domain();
+                for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) {
+                    if (m_lobc[0][idim] != LinOpBCType::Neumann &&
+                        m_lobc[0][idim] != LinOpBCType::inflow)
+                    {
+                        dom.growLo(idim, 10);
+                    }
+                    if (m_hibc[0][idim] != LinOpBCType::Neumann &&
+                        m_hibc[0][idim] != LinOpBCType::inflow)
+                    {
+                        dom.growHi(idim, 10);
+                    }
+                }
+
+                auto const& rhs_ma = rhs.arrays();
+                ParallelFor(rhs, IntVect(0),
+                            [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept
+                            {
+                                Real scale = 0.0_rt;
+#if (AMREX_SPACEDIM == 3)
+                                int const koff = 1;
+                                Real const fac = 0.125_rt;
+#else
+                                int const koff = 0;
+                                Real const fac = 0.25_rt;
+#endif
+                                for (int kc = k-koff; kc <= k; ++kc) {
+                                for (int jc = j-1   ; jc <= j; ++jc) {
+                                for (int ic = i-1   ; ic <= i; ++ic) {
+                                    if (dom.contains(ic,jc,kc)) {
+                                        scale += vfrac_ma[box_no](ic,jc,kc) * fac;
+                                    }
+                                }}}
+                                rhs_ma[box_no](i,j,k) -= offset * scale;
+                            });
+            }
+        } else
+#endif
+        {
+            Box nddom = amrex::surroundingNodes(Geom(amrlev,mglev).Domain());
+            for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) {
+                if (m_lobc[0][idim] != LinOpBCType::Neumann &&
+                    m_lobc[0][idim] != LinOpBCType::inflow)
+                {
+                    nddom.growLo(idim, 10); // so that the test in ParReduce will faill
+                }
+                if (m_hibc[0][idim] != LinOpBCType::Neumann &&
+                    m_hibc[0][idim] != LinOpBCType::inflow)
+                {
+                    nddom.growHi(idim, 10);
+                }
+            }
+
+            auto const& rhs_ma = rhs.arrays();
+            ParallelFor(rhs, IntVect(0),
+                        [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept
+                        {
+                            Real scale = 1.0_rt;
+                            if (i == nddom.smallEnd(0) ||
+                                i == nddom.bigEnd(0)) {
+                                scale *= 0.5_rt;
+                            }
+#if (AMREX_SPACEDIM >= 2)
+                            if (j == nddom.smallEnd(1) ||
+                                j == nddom.bigEnd(1)) {
+                                scale *= 0.5_rt;
+                            }
+#endif
+#if (AMREX_SPACEDIM == 3)
+                            if (k == nddom.smallEnd(2) ||
+                                k == nddom.bigEnd(2)) {
+                                scale *= 0.5_rt;
+                            }
+#endif
+                            rhs_ma[box_no](i,j,k) -= offset * scale;
+                        });
+        }
+        Gpu::streamSynchronize();
+    } else {
+        rhs.plus(-offset, 0, 1);
+    }
+}
+
 void
 MLNodeLaplacian::setSigma (int amrlev, const MultiFab& a_sigma)
 {
diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.H b/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.H
@@ -67,7 +67,10 @@ public:
         amrex::Abort("AMReX_MLNodeLinOp::fillSolutionBC::How did we get here?");
     }
 
-    virtual void applyInhomogNeumannTerm (int armlev, MultiFab& rhs) const override;
+    virtual void applyInhomogNeumannTerm (int amrlev, MultiFab& rhs) const override;
+
+    virtual Real getSolvabilityOffset (int amrlev, int mglev, MultiFab const& rhs) const override;
+    virtual void fixSolvabilityByOffset (int amrlev, int mglev, MultiFab& rhs, Real offset) const override;
 
     virtual void prepareForSolve () override {}
 
diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.cpp b/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.cpp
@@ -182,6 +182,36 @@ MLNodeLinOp::applyInhomogNeumannTerm (int /*amrlev*/, MultiFab& /*rhs*/) const
 {
 }
 
+Real
+MLNodeLinOp::getSolvabilityOffset (int amrlev, int mglev, MultiFab const& rhs) const
+{
+    amrex::ignore_unused(amrlev);
+    AMREX_ASSERT(amrlev==0);
+    AMREX_ASSERT(mglev+1==m_num_mg_levels[0] || mglev==0);
+    const auto& mask = (mglev+1 == m_num_mg_levels[0]) ? m_bottom_dot_mask : m_coarse_dot_mask;
+    const auto& mask_ma = mask.const_arrays();
+    const auto& rhs_ma = rhs.const_arrays();
+    auto r = ParReduce(TypeList<ReduceOpSum,ReduceOpSum>{}, TypeList<Real,Real>{},
+                       rhs, IntVect(0),
+                       [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept
+                           -> GpuTuple<Real,Real>
+                       {
+                           return { mask_ma[box_no](i,j,k) * rhs_ma[box_no](i,j,k),
+                                    mask_ma[box_no](i,j,k) };
+                       });
+
+    Real s1 = amrex::get<0>(r);
+    Real s2 = amrex::get<1>(r);
+    ParallelAllReduce::Sum<Real>({s1,s2}, ParallelContext::CommunicatorSub());
+    return s1/s2;
+}
+
+void
+MLNodeLinOp::fixSolvabilityByOffset (int /*amrlev*/, int /*mglev*/, MultiFab& rhs, Real offset) const
+{
+    rhs.plus(-offset, 0, 1);
+}
+
 namespace {
 
 void MLNodeLinOp_set_dot_mask (MultiFab& dot_mask, iMultiFab const& omask, Geometry const& geom,

Original file line number	Diff line number	Diff line change
`@@ -1776,12 +1776,12 @@ MLMG::makeSolvable ()`
`1776`	`1776`	`else`
`1777`	`1777`	`{`
`1778`	`1778`	`AMREX_ASSERT_WITH_MESSAGE(ncomp==1, "ncomp > 1 not supported for singular nodal problem");`
`1779`		`- Real offset = getNodalSum(0, 0, rhs[0]);`
	`1779`	`+ Real offset = linop.getSolvabilityOffset(0, 0, rhs[0]);`
`1780`	`1780`	`if (verbose >= 4) {`
`1781`	`1781`	`amrex::Print() << "MLMG: Subtracting " << offset << " from rhs\n";`
`1782`	`1782`	`}`
`1783`	`1783`	`for (int alev = 0; alev < namrlevs; ++alev) {`
`1784`		`- rhs[alev].plus(-offset, 0, 1);`
	`1784`	`+ linop.fixSolvabilityByOffset(alev, 0, rhs[alev], offset);`
`1785`	`1785`	`}`
`1786`	`1786`	`}`
`1787`	`1787`	`}`
`@@ -1833,27 +1833,15 @@ MLMG::makeSolvable (int amrlev, int mglev, MultiFab& mf)`
`1833`	`1833`	`else`
`1834`	`1834`	`{`
`1835`	`1835`	`AMREX_ASSERT_WITH_MESSAGE(ncomp==1, "ncomp > 1 not supported for singular nodal problem");`
`1836`		`- Real offset = getNodalSum(amrlev, mglev, mf);`
	`1836`	`+ Real offset = linop.getSolvabilityOffset(amrlev, mglev, mf);`
`1837`	`1837`	`if (verbose >= 4) {`
`1838`	`1838`	`amrex::Print() << "MLMG: Subtracting " << offset << " on level (" << amrlev << ", "`
`1839`	`1839`	`<< mglev << ")\n";`
`1840`	`1840`	`}`
`1841`		`- mf.plus(-offset, 0, 1);`
	`1841`	`+ linop.fixSolvabilityByOffset(amrlev, mglev, mf, offset);`
`1842`	`1842`	`}`
`1843`	`1843`	`}`
`1844`	`1844`
`1845`		`-Real`
`1846`		`-MLMG::getNodalSum (int amrlev, int mglev, MultiFab& mf) const`
`1847`		`-{`
`1848`		`- MultiFab one(mf.boxArray(), mf.DistributionMap(), 1, 0, MFInfo(), mf.Factory());`
`1849`		`- one.setVal(Real(1.0));`
`1850`		`- const bool local = true;`
`1851`		`- Real s1 = linop.xdoty(amrlev, mglev, mf, one, local);`
`1852`		`- Real s2 = linop.xdoty(amrlev, mglev, one, one, local);`
`1853`		`- ParallelAllReduce::Sum<Real>({s1,s2}, ParallelContext::CommunicatorSub());`
`1854`		`- return s1/s2;`
`1855`		`-}`
`1856`		`-`
`1857`	`1845`	`#if defined(AMREX_USE_HYPRE) && (AMREX_SPACEDIM > 1)`
`1858`	`1846`	`void`
`1859`	`1847`	`MLMG::bottomSolveWithHypre (MultiFab& x, const MultiFab& b)`
Original file line number	Diff line number	Diff line change
`@@ -67,7 +67,10 @@ public:`
`67`	`67`	`amrex::Abort("AMReX_MLNodeLinOp::fillSolutionBC::How did we get here?");`
`68`	`68`	`}`
`69`	`69`
`70`		`- virtual void applyInhomogNeumannTerm (int armlev, MultiFab& rhs) const override;`
	`70`	`+ virtual void applyInhomogNeumannTerm (int amrlev, MultiFab& rhs) const override;`
	`71`	`+`
	`72`	`+ virtual Real getSolvabilityOffset (int amrlev, int mglev, MultiFab const& rhs) const override;`
	`73`	`+ virtual void fixSolvabilityByOffset (int amrlev, int mglev, MultiFab& rhs, Real offset) const override;`
`71`	`74`
`72`	`75`	`virtual void prepareForSolve () override {}`
`73`	`76`