Skip to content

Commit 9751217

Browse files
authored
Curl Curl solver: 4-color Gauss-Seidel smoother (#3778)
This implements the 4-color Gauss-Seidel smoother of Li et. al. 2020. "An Efficient Preconditioner for 3-D Finite Difference Modeling of the Electromagnetic Diffusion Process in the Frequency Domain", IEEE Transactions on Geoscience and Remote Sensing, 58, 500-509.
1 parent 2ecafcf commit 9751217

File tree

10 files changed

+959
-333
lines changed

10 files changed

+959
-333
lines changed

Src/Base/AMReX_GpuMemory.H

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ private:
104104

105105
#else
106106

107-
DeviceScalar (T init_val) : d(init_val) {}
107+
DeviceScalar (T const& init_val) : d(init_val) {}
108108
DeviceScalar () = default;
109109
~DeviceScalar () = default;
110110

Src/Base/AMReX_LUSolver.H

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
#ifndef AMREX_LU_SOLVER_H_
2+
#define AMREX_LU_SOLVER_H_
3+
#include <AMReX_Config.H>
4+
5+
#include <AMReX_Arena.H>
6+
#include <AMReX_Array.H>
7+
#include <cmath>
8+
#include <limits>
9+
10+
namespace amrex {
11+
12+
// https://en.wikipedia.org/wiki/LU_decomposition
13+
14+
template <int N, typename T>
15+
class LUSolver
16+
{
17+
public:
18+
19+
LUSolver () = default;
20+
21+
LUSolver (Array2D<T, 0, N-1, 0, N-1, Order::C> const& a_mat);
22+
23+
void define (Array2D<T, 0, N-1, 0, N-1, Order::C> const& a_mat);
24+
25+
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
26+
void operator() (T* AMREX_RESTRICT x, T const* AMREX_RESTRICT b) const
27+
{
28+
for (int i = 0; i < N; ++i) {
29+
x[i] = b[m_piv(i)];
30+
for (int k = 0; k < i; ++k) {
31+
x[i] -= m_mat(i,k) * x[k];
32+
}
33+
}
34+
35+
for (int i = N-1; i >= 0; --i) {
36+
for (int k = i+1; k < N; ++k) {
37+
x[i] -= m_mat(i,k) * x[k];
38+
}
39+
x[i] *= m_mat(i,i);
40+
}
41+
}
42+
43+
[[nodiscard]] AMREX_GPU_HOST_DEVICE
44+
Array2D<T,0,N-1,0,N-1,Order::C> invert () const
45+
{
46+
Array2D<T,0,N-1,0,N-1,Order::C> IA;
47+
for (int j = 0; j < N; ++j) {
48+
for (int i = 0; i < N; ++i) {
49+
IA(i,j) = (m_piv(i) == j) ? T(1.0) : T(0.0);
50+
for (int k = 0; k < i; ++k) {
51+
IA(i,j) -= m_mat(i,k) * IA(k,j);
52+
}
53+
}
54+
for (int i = N-1; i >= 0; --i) {
55+
for (int k = i+1; k < N; ++k) {
56+
IA(i,j) -= m_mat(i,k) * IA(k,j);
57+
}
58+
IA(i,j) *= m_mat(i,i);
59+
}
60+
}
61+
return IA;
62+
}
63+
64+
[[nodiscard]] AMREX_GPU_HOST_DEVICE
65+
T determinant () const
66+
{
67+
T det = m_mat(0,0);
68+
for (int i = 1; i < N; ++i) {
69+
det *= m_mat(i,i);
70+
}
71+
det = T(1.0) / det;
72+
return (m_npivs % 2 == 0) ? det : -det;
73+
}
74+
75+
private:
76+
77+
void define_innard ();
78+
79+
Array2D<T, 0, N-1, 0, N-1, Order::C> m_mat;
80+
Array1D<int, 0, N-1> m_piv;
81+
int m_npivs = 0;
82+
};
83+
84+
template <int N, typename T>
85+
LUSolver<N,T>::LUSolver (Array2D<T, 0, N-1, 0, N-1, Order::C> const& a_mat)
86+
: m_mat(a_mat)
87+
{
88+
define_innard();
89+
}
90+
91+
template <int N, typename T>
92+
void LUSolver<N,T>::define (Array2D<T, 0, N-1, 0, N-1, Order::C> const& a_mat)
93+
{
94+
m_mat = a_mat;
95+
define_innard();
96+
}
97+
98+
template <int N, typename T>
99+
void LUSolver<N,T>::define_innard ()
100+
{
101+
static_assert(N > 1);
102+
static_assert(std::is_floating_point_v<T>);
103+
104+
for (int i = 0; i < N; ++i) { m_piv(i) = i; }
105+
m_npivs = 0;
106+
107+
for (int i = 0; i < N; ++i) {
108+
T maxA = 0;
109+
int imax = i;
110+
111+
for (int k = i; k < N; ++k) {
112+
auto const absA = std::abs(m_mat(k,i));
113+
if (absA > maxA) {
114+
maxA = absA;
115+
imax = k;
116+
}
117+
}
118+
119+
if (maxA < std::numeric_limits<T>::min()) {
120+
amrex::Abort("LUSolver: matrix is degenerate");
121+
}
122+
123+
if (imax != i) {
124+
std::swap(m_piv(i), m_piv(imax));
125+
for (int j = 0; j < N; ++j) {
126+
std::swap(m_mat(i,j), m_mat(imax,j));
127+
}
128+
++m_npivs;
129+
}
130+
131+
for (int j = i+1; j < N; ++j) {
132+
m_mat(j,i) /= m_mat(i,i);
133+
for (int k = i+1; k < N; ++k) {
134+
m_mat(j,k) -= m_mat(j,i) * m_mat(i,k);
135+
}
136+
}
137+
}
138+
139+
for (int i = 0; i < N; ++i) {
140+
m_mat(i,i) = T(1) / m_mat(i,i);
141+
}
142+
}
143+
144+
}
145+
146+
#endif

Src/Base/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,8 @@ foreach(D IN LISTS AMReX_SPACEDIM)
270270
Parser/amrex_iparser.tab.cpp
271271
Parser/amrex_iparser.tab.nolint.H
272272
Parser/amrex_iparser.tab.h
273+
# Dense linear algebra solver using LU decomposition
274+
AMReX_LUSolver.H
273275
# AMReX Hydro -----------------------------------------------------
274276
AMReX_Slopes_K.H
275277
# Forward declaration -----------------------------------------------------

Src/Base/Make.package

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -326,5 +326,8 @@ CEXE_sources += AMReX_IParser_Exe.cpp
326326
CEXE_headers += AMReX_IParser.H
327327
CEXE_sources += AMReX_IParser.cpp
328328

329+
# Dense linear algebra solver using LU decomposition
330+
CEXE_headers += AMReX_LUSolver.H
331+
329332
VPATH_LOCATIONS += $(AMREX_HOME)/Src/Base/Parser
330333
INCLUDE_LOCATIONS += $(AMREX_HOME)/Src/Base/Parser

Src/LinearSolvers/MLMG/AMReX_MLCurlCurl.H

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include <AMReX_Config.H>
44

55
#include <AMReX_MLLinOp.H>
6+
#include <AMReX_MLCurlCurl_K.H>
67

78
namespace amrex {
89

@@ -12,8 +13,16 @@ namespace amrex {
1213
* Here E is an Array of 3 MultiFabs on staggered grid, alpha is a positive
1314
* scalar, and beta is a non-negative scalar.
1415
*
16+
* It's the caller's responsibility to make sure rhs has consistent nodal
17+
* data. If needed, one could use FabArray::OverrideSync to synchronize
18+
* nodal data.
19+
*
20+
* The smoother is based on the 4-color Gauss-Seidel smoother of Li
21+
* et. al. 2020. "An Efficient Preconditioner for 3-D Finite Difference
22+
* Modeling of the Electromagnetic Diffusion Process in the Frequency
23+
* Domain", IEEE Transactions on Geoscience and Remote Sensing, 58, 500-509.
24+
*
1525
* TODO: If beta is zero, the system could be singular.
16-
* TODO: Try different restriction & interpolation strategies.
1726
*/
1827
class MLCurlCurl
1928
: public MLLinOpT<Array<MultiFab,3> >
@@ -92,21 +101,20 @@ public:
92101

93102
// public for cuda
94103

95-
void smooth (int amrlev, int mglev, MF& sol, MultiFab const& rhs,
96-
int redblack) const;
104+
void smooth4 (int amrlev, int mglev, MF& sol, MF const& rhs, int color) const;
97105

98106
void compresid (int amrlev, int mglev, MF& resid, MF const& b) const;
99107

100-
void applyPhysBC (int amrlev, int mglev, MultiFab& mf) const;
108+
void applyPhysBC (int amrlev, int mglev, MultiFab& mf, CurlCurlStateType type) const;
101109

102110
private:
103111

104-
void applyBC (int amrlev, int mglev, MF& in) const;
105-
void applyBC (int amrlev, int mglev, MultiFab& mf) const;
112+
void applyBC (int amrlev, int mglev, MF& in, CurlCurlStateType type) const;
106113

107114
[[nodiscard]] iMultiFab const& getDotMask (int amrlev, int mglev, int idim) const;
108115

109-
[[nodiscard]] int getDirichlet (int amrlev, int mglev, int idim, int face) const;
116+
[[nodiscard]] CurlCurlDirichletInfo getDirichletInfo (int amrlev, int mglev) const;
117+
[[nodiscard]] CurlCurlSymmetryInfo getSymmetryInfo (int amrlev, int mglev) const;
110118

111119
RT m_alpha = std::numeric_limits<RT>::lowest();
112120
RT m_beta = std::numeric_limits<RT>::lowest();
@@ -118,9 +126,10 @@ private:
118126
{IntVect(0,1), IntVect(1,0), IntVect(1,1)};
119127
#endif
120128

121-
Vector<Vector<Array<std::unique_ptr<iMultiFab>,3>>> m_dirichlet_mask;
122129
mutable Vector<Vector<Array<std::unique_ptr<iMultiFab>,3>>> m_dotmask;
123130
static constexpr int m_ncomp = 1;
131+
Vector<Vector<std::unique_ptr<Gpu::DeviceScalar
132+
<LUSolver<AMREX_SPACEDIM*2,RT>>>>> m_lusolver;
124133
};
125134

126135
}

0 commit comments

Comments
 (0)