Skip to content

Commit 08de8fa

Browse files
committed
Refactor Array4: Compute strides on the fly
This reduces the size of Array4 from 64 to 40 bytes.
1 parent be801b9 commit 08de8fa

11 files changed

+92
-87
lines changed

Src/AmrCore/AMReX_MFInterp_C.H

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ Real mf_compute_slopes_x (int i, int j, int k, Array4<Real const> const& u, int
1414
Real dc = Real(0.5) * (u(i+1,j,k,nu) - u(i-1,j,k,nu));
1515
if (i == domain.smallEnd(0) && (bc.lo(0) == BCType::ext_dir ||
1616
bc.lo(0) == BCType::hoextrap)) {
17-
if (i+2 < u.end.x) {
17+
if (i+2 < (u.begin.x+u.len.x)) {
1818
dc = -Real(16./15.)*u(i-1,j,k,nu) + Real(0.5)*u(i,j,k,nu)
1919
+ Real(2./3.)*u(i+1,j,k,nu) - Real(0.1)*u(i+2,j,k,nu);
2020
} else {
@@ -40,7 +40,7 @@ Real mf_compute_slopes_y (int i, int j, int k, Array4<Real const> const& u, int
4040
Real dc = Real(0.5) * (u(i,j+1,k,nu) - u(i,j-1,k,nu));
4141
if (j == domain.smallEnd(1) && (bc.lo(1) == BCType::ext_dir ||
4242
bc.lo(1) == BCType::hoextrap)) {
43-
if (j+2 < u.end.y) {
43+
if (j+2 < (u.begin.y+u.len.y)) {
4444
dc = -Real(16./15.)*u(i,j-1,k,nu) + Real(0.5)*u(i,j,k,nu)
4545
+ Real(2./3.)*u(i,j+1,k,nu) - Real(0.1)*u(i,j+2,k,nu);
4646
} else {
@@ -66,7 +66,7 @@ Real mf_compute_slopes_z (int i, int j, int k, Array4<Real const> const& u, int
6666
Real dc = Real(0.5) * (u(i,j,k+1,nu) - u(i,j,k-1,nu));
6767
if (k == domain.smallEnd(2) && (bc.lo(2) == BCType::ext_dir ||
6868
bc.lo(2) == BCType::hoextrap)) {
69-
if (k+2 < u.end.z) {
69+
if (k+2 < (u.begin.z+u.len.z)) {
7070
dc = -Real(16./15.)*u(i,j,k-1,nu) + Real(0.5)*u(i,j,k,nu)
7171
+ Real(2./3.)*u(i,j,k+1,nu) - Real(0.1)*u(i,j,k+2,nu);
7272
} else {
@@ -93,7 +93,7 @@ Real mf_cell_quadratic_compute_slopes_xx (int i, int j, int k,
9393
Real xx = u(i-1,j,k,nu) - 2.0_rt * u(i,j,k,nu) + u(i+1,j,k,nu);
9494
if (i == domain.smallEnd(0) && (bc.lo(0) == BCType::ext_dir ||
9595
bc.lo(0) == BCType::hoextrap)) {
96-
if (i+2 < u.end.x) {
96+
if (i+2 < (u.begin.x+u.len.x)) {
9797
xx = 0._rt;
9898
}
9999
}
@@ -114,7 +114,7 @@ Real mf_cell_quadratic_compute_slopes_yy (int i, int j, int k,
114114
Real yy = u(i,j-1,k,nu) - 2.0_rt * u(i,j,k,nu) + u(i,j+1,k,nu);
115115
if (j == domain.smallEnd(1) && (bc.lo(1) == BCType::ext_dir ||
116116
bc.lo(1) == BCType::hoextrap)) {
117-
if (j+2 < u.end.y) {
117+
if (j+2 < (u.begin.y+u.len.y)) {
118118
yy = 0._rt;
119119
}
120120
}
@@ -135,7 +135,7 @@ Real mf_cell_quadratic_compute_slopes_zz (int i, int j, int k,
135135
Real zz = u(i,j,k-1,nu) - 2.0_rt * u(i,j,k,nu) + u(i,j,k+1,nu);
136136
if (k == domain.smallEnd(2) && (bc.lo(2) == BCType::ext_dir ||
137137
bc.lo(2) == BCType::hoextrap)) {
138-
if (k+2 < u.end.z) {
138+
if (k+2 < (u.begin.z+u.len.z)) {
139139
zz = 0._rt;
140140
}
141141
}
@@ -157,7 +157,7 @@ Real mf_cell_quadratic_compute_slopes_xy (int i, int j, int k,
157157
- u(i-1,j+1,k,nu) + u(i+1,j+1,k,nu) );
158158
if (i == domain.smallEnd(0) && (bc.lo(0) == BCType::ext_dir ||
159159
bc.lo(0) == BCType::hoextrap)) {
160-
if (i+2 < u.end.x) {
160+
if (i+2 < (u.begin.x+u.len.x)) {
161161
xy = 0._rt;
162162
}
163163
}
@@ -169,7 +169,7 @@ Real mf_cell_quadratic_compute_slopes_xy (int i, int j, int k,
169169
}
170170
if (j == domain.smallEnd(1) && (bc.lo(1) == BCType::ext_dir ||
171171
bc.lo(1) == BCType::hoextrap)) {
172-
if (j+2 < u.end.y) {
172+
if (j+2 < (u.begin.y+u.len.y)) {
173173
xy = 0._rt;
174174
}
175175
}
@@ -191,7 +191,7 @@ Real mf_cell_quadratic_compute_slopes_xz (int i, int j, int k,
191191
- u(i-1,j,k+1,nu) + u(i+1,j,k+1,nu) );
192192
if (i == domain.smallEnd(0) && (bc.lo(0) == BCType::ext_dir ||
193193
bc.lo(0) == BCType::hoextrap)) {
194-
if (i+2 < u.end.x) {
194+
if (i+2 < (u.begin.x+u.len.x)) {
195195
xz = 0._rt;
196196
}
197197
}
@@ -203,7 +203,7 @@ Real mf_cell_quadratic_compute_slopes_xz (int i, int j, int k,
203203
}
204204
if (k == domain.smallEnd(2) && (bc.lo(2) == BCType::ext_dir ||
205205
bc.lo(2) == BCType::hoextrap)) {
206-
if (k+2 < u.end.z) {
206+
if (k+2 < (u.begin.z+u.len.z)) {
207207
xz = 0._rt;
208208
}
209209
}
@@ -225,7 +225,7 @@ Real mf_cell_quadratic_compute_slopes_yz (int i, int j, int k,
225225
- u(i,j+1,k-1,nu) + u(i,j+1,k+1,nu) );
226226
if (j == domain.smallEnd(1) && (bc.lo(1) == BCType::ext_dir ||
227227
bc.lo(1) == BCType::hoextrap)) {
228-
if (j+2 < u.end.y) {
228+
if (j+2 < (u.begin.y+u.len.y)) {
229229
yz = 0._rt;
230230
}
231231
}
@@ -237,7 +237,7 @@ Real mf_cell_quadratic_compute_slopes_yz (int i, int j, int k,
237237
}
238238
if (k == domain.smallEnd(2) && (bc.lo(2) == BCType::ext_dir ||
239239
bc.lo(2) == BCType::hoextrap)) {
240-
if (k+2 < u.end.z) {
240+
if (k+2 < (u.begin.z+u.len.z)) {
241241
yz = 0._rt;
242242
}
243243
}

Src/Base/AMReX_Array4.H

Lines changed: 46 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -60,11 +60,8 @@ namespace amrex {
6060
struct Array4
6161
{
6262
T* AMREX_RESTRICT p;
63-
Long jstride = 0;
64-
Long kstride = 0;
65-
Long nstride = 0;
6663
Dim3 begin{1,1,1};
67-
Dim3 end{0,0,0}; // end is hi + 1
64+
Dim3 len{0,0,0};
6865
int ncomp=0;
6966

7067
AMREX_GPU_HOST_DEVICE
@@ -74,22 +71,16 @@ namespace amrex {
7471
AMREX_GPU_HOST_DEVICE
7572
constexpr Array4 (Array4<std::remove_const_t<T>> const& rhs) noexcept
7673
: p(rhs.p),
77-
jstride(rhs.jstride),
78-
kstride(rhs.kstride),
79-
nstride(rhs.nstride),
8074
begin(rhs.begin),
81-
end(rhs.end),
75+
len(rhs.len),
8276
ncomp(rhs.ncomp)
8377
{}
8478

8579
AMREX_GPU_HOST_DEVICE
8680
constexpr Array4 (T* a_p, Dim3 const& a_begin, Dim3 const& a_end, int a_ncomp) noexcept
8781
: p(a_p),
88-
jstride(a_end.x-a_begin.x),
89-
kstride(jstride*(a_end.y-a_begin.y)),
90-
nstride(kstride*(a_end.z-a_begin.z)),
9182
begin(a_begin),
92-
end(a_end),
83+
len{a_end.x-a_begin.x, a_end.y-a_begin.y, a_end.z-a_begin.z},
9384
ncomp(a_ncomp)
9485
{}
9586

@@ -99,12 +90,9 @@ namespace amrex {
9990
std::remove_const_t<U>>,int> = 0>
10091
AMREX_GPU_HOST_DEVICE
10192
constexpr Array4 (Array4<U> const& rhs, int start_comp) noexcept
102-
: p((T*)(rhs.p+start_comp*rhs.nstride)),
103-
jstride(rhs.jstride),
104-
kstride(rhs.kstride),
105-
nstride(rhs.nstride),
93+
: p((T*)(rhs.p+start_comp*rhs.nstride())),
10694
begin(rhs.begin),
107-
end(rhs.end),
95+
len(rhs.len),
10896
ncomp(rhs.ncomp-start_comp)
10997
{}
11098

@@ -114,25 +102,31 @@ namespace amrex {
114102
std::remove_const_t<U>>,int> = 0>
115103
AMREX_GPU_HOST_DEVICE
116104
constexpr Array4 (Array4<U> const& rhs, int start_comp, int num_comps) noexcept
117-
: p((T*)(rhs.p+start_comp*rhs.nstride)),
118-
jstride(rhs.jstride),
119-
kstride(rhs.kstride),
120-
nstride(rhs.nstride),
105+
: p((T*)(rhs.p+start_comp*rhs.nstride())),
121106
begin(rhs.begin),
122-
end(rhs.end),
107+
len(rhs.len),
123108
ncomp(num_comps)
124109
{}
125110

126111
AMREX_GPU_HOST_DEVICE
127112
explicit operator bool() const noexcept { return p != nullptr; }
128113

114+
[[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
115+
Long jstride () const noexcept { return Long(len.x); }
116+
117+
[[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
118+
Long kstride () const noexcept { return Long(len.x)*Long(len.y); }
119+
120+
[[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
121+
Long nstride () const noexcept { return Long(len.x)*Long(len.y)*Long(len.z); }
122+
129123
template <class U=T, std::enable_if_t<!std::is_void_v<U>,int> = 0>
130124
[[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
131125
U& operator() (int i, int j, int k) const noexcept {
132126
#if defined(AMREX_DEBUG) || defined(AMREX_BOUND_CHECK)
133127
index_assert(i,j,k,0);
134128
#endif
135-
return p[(i-begin.x)+(j-begin.y)*jstride+(k-begin.z)*kstride];
129+
return p[(i-begin.x)+(j-begin.y)*jstride()+(k-begin.z)*kstride()];
136130
}
137131

138132
template <class U=T, std::enable_if_t<!std::is_void_v<U>,int> = 0>
@@ -141,7 +135,7 @@ namespace amrex {
141135
#if defined(AMREX_DEBUG) || defined(AMREX_BOUND_CHECK)
142136
index_assert(i,j,k,n);
143137
#endif
144-
return p[(i-begin.x)+(j-begin.y)*jstride+(k-begin.z)*kstride+n*nstride];
138+
return p[(i-begin.x)+(j-begin.y)*jstride()+(k-begin.z)*kstride()+n*nstride()];
145139
}
146140

147141
template <class U=T, std::enable_if_t<!std::is_void_v<U>,int> = 0>
@@ -150,7 +144,7 @@ namespace amrex {
150144
#if defined(AMREX_DEBUG) || defined(AMREX_BOUND_CHECK)
151145
index_assert(i,j,k,0);
152146
#endif
153-
return p + ((i-begin.x)+(j-begin.y)*jstride+(k-begin.z)*kstride);
147+
return p + ((i-begin.x)+(j-begin.y)*jstride()+(k-begin.z)*kstride());
154148
}
155149

156150
template <class U=T, std::enable_if_t<!std::is_void_v<U>,int> = 0>
@@ -159,7 +153,7 @@ namespace amrex {
159153
#if defined(AMREX_DEBUG) || defined(AMREX_BOUND_CHECK)
160154
index_assert(i,j,k,n);
161155
#endif
162-
return p + ((i-begin.x)+(j-begin.y)*jstride+(k-begin.z)*kstride+n*nstride);
156+
return p + ((i-begin.x)+(j-begin.y)*jstride()+(k-begin.z)*kstride()+n*nstride());
163157
}
164158

165159
template <class U=T, std::enable_if_t<!std::is_void_v<U>,int> = 0>
@@ -241,22 +235,22 @@ namespace amrex {
241235

242236
[[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
243237
std::size_t size () const noexcept {
244-
return this->nstride * this->ncomp;
238+
return this->nstride() * this->ncomp;
245239
}
246240

247241
[[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
248242
int nComp () const noexcept { return ncomp; }
249243

250244
[[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
251245
bool contains (int i, int j, int k) const noexcept {
252-
return (i>=begin.x && i<end.x && j>=begin.y && j<end.y && k>=begin.z && k<end.z);
246+
return (i>=begin.x && i<(begin.x+len.x) && j>=begin.y && j<(begin.y+len.y) && k>=begin.z && k<(begin.z+len.z));
253247
}
254248

255249
[[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
256250
bool contains (IntVect const& iv) const noexcept {
257-
return AMREX_D_TERM( iv[0]>=begin.x && iv[0]<end.x,
258-
&& iv[1]>=begin.y && iv[1]<end.y,
259-
&& iv[2]>=begin.z && iv[2]<end.z);
251+
return AMREX_D_TERM( iv[0]>=begin.x && iv[0]<(begin.x+len.x),
252+
&& iv[1]>=begin.y && iv[1]<(begin.y+len.y),
253+
&& iv[2]>=begin.z && iv[2]<(begin.z+len.z));
260254
}
261255

262256
[[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
@@ -268,21 +262,21 @@ namespace amrex {
268262
AMREX_GPU_HOST_DEVICE inline
269263
void index_assert (int i, int j, int k, int n) const
270264
{
271-
if (i<begin.x || i>=end.x || j<begin.y || j>=end.y || k<begin.z || k>=end.z
265+
if (i<begin.x || i>=(begin.x+len.x) || j<begin.y || j>=(begin.y+len.y) || k<begin.z || k>=(begin.z+len.z)
272266
|| n < 0 || n >= ncomp) {
273267
AMREX_IF_ON_DEVICE((
274268
AMREX_DEVICE_PRINTF(" (%d,%d,%d,%d) is out of bound (%d:%d,%d:%d,%d:%d,0:%d)\n",
275-
i, j, k, n, begin.x, end.x-1, begin.y, end.y-1,
276-
begin.z, end.z-1, ncomp-1);
269+
i, j, k, n, begin.x, (begin.x+len.x)-1, begin.y, (begin.y+len.y)-1,
270+
begin.z, (begin.z+len.z)-1, ncomp-1);
277271
amrex::Abort();
278272
))
279273
AMREX_IF_ON_HOST((
280274
std::stringstream ss;
281275
ss << " (" << i << "," << j << "," << k << "," << n
282276
<< ") is out of bound ("
283-
<< begin.x << ":" << end.x-1 << ","
284-
<< begin.y << ":" << end.y-1 << ","
285-
<< begin.z << ":" << end.z-1 << ","
277+
<< begin.x << ":" << (begin.x+len.x)-1 << ","
278+
<< begin.y << ":" << (begin.y+len.y)-1 << ","
279+
<< begin.z << ":" << (begin.z+len.z)-1 << ","
286280
<< "0:" << ncomp-1 << ")";
287281
amrex::Abort(ss.str());
288282
))
@@ -292,15 +286,19 @@ namespace amrex {
292286

293287
[[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
294288
CellData<T> cellData (int i, int j, int k) const noexcept {
295-
return CellData<T>{this->ptr(i,j,k), nstride, ncomp};
289+
return CellData<T>{this->ptr(i,j,k), nstride(), ncomp};
296290
}
297291
};
298292

299293
template <class Tto, class Tfrom>
300294
[[nodiscard]] AMREX_GPU_HOST_DEVICE
301295
Array4<Tto> ToArray4 (Array4<Tfrom> const& a_in) noexcept
302296
{
303-
return Array4<Tto>((Tto*)(a_in.p), a_in.begin, a_in.end, a_in.ncomp);
297+
return Array4<Tto>((Tto*)(a_in.p), a_in.begin,
298+
Dim3{a_in.begin.x + a_in.len.x,
299+
a_in.begin.y + a_in.len.y,
300+
a_in.begin.z + a_in.len.z},
301+
a_in.ncomp);
304302
}
305303

306304
template <class T>
@@ -314,14 +312,21 @@ namespace amrex {
314312
[[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
315313
Dim3 ubound (Array4<T> const& a) noexcept
316314
{
317-
return Dim3{a.end.x-1,a.end.y-1,a.end.z-1};
315+
return Dim3{a.begin.x+a.len.x-1,a.begin.y+a.len.y-1,a.begin.z+a.len.z-1};
316+
}
317+
318+
template <class T>
319+
[[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
320+
Dim3 end (Array4<T> const& a) noexcept
321+
{
322+
return Dim3{a.begin.x+a.len.x,a.begin.y+a.len.y,a.begin.z+a.len.z};
318323
}
319324

320325
template <class T>
321326
[[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
322327
Dim3 length (Array4<T> const& a) noexcept
323328
{
324-
return Dim3{a.end.x-a.begin.x,a.end.y-a.begin.y,a.end.z-a.begin.z};
329+
return a.len;
325330
}
326331

327332
template <typename T>

Src/Base/AMReX_BaseFab.H

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2013,32 +2013,32 @@ template<class T>
20132013
BaseFab<T>::BaseFab (Array4<T> const& a) noexcept
20142014
: dptr(a.p),
20152015
domain(IntVect(AMREX_D_DECL(a.begin.x,a.begin.y,a.begin.z)),
2016-
IntVect(AMREX_D_DECL(a.end.x-1,a.end.y-1,a.end.z-1))),
2017-
nvar(a.ncomp), truesize(a.ncomp*a.nstride)
2016+
IntVect(AMREX_D_DECL(a.begin.x+a.len.x-1,a.begin.y+a.len.y-1,a.begin.z+a.len.z-1))),
2017+
nvar(a.ncomp), truesize(a.ncomp*a.nstride())
20182018
{}
20192019

20202020
template<class T>
20212021
BaseFab<T>::BaseFab (Array4<T> const& a, IndexType t) noexcept
20222022
: dptr(a.p),
20232023
domain(IntVect(AMREX_D_DECL(a.begin.x,a.begin.y,a.begin.z)),
2024-
IntVect(AMREX_D_DECL(a.end.x-1,a.end.y-1,a.end.z-1)), t),
2025-
nvar(a.ncomp), truesize(a.ncomp*a.nstride)
2024+
IntVect(AMREX_D_DECL(a.begin.x+a.len.x-1,a.begin.y+a.len.y-1,a.begin.z+a.len.z-1)), t),
2025+
nvar(a.ncomp), truesize(a.ncomp*a.nstride())
20262026
{}
20272027

20282028
template<class T>
20292029
BaseFab<T>::BaseFab (Array4<T const> const& a) noexcept
20302030
: dptr(const_cast<T*>(a.p)),
20312031
domain(IntVect(AMREX_D_DECL(a.begin.x,a.begin.y,a.begin.z)),
2032-
IntVect(AMREX_D_DECL(a.end.x-1,a.end.y-1,a.end.z-1))),
2033-
nvar(a.ncomp), truesize(a.ncomp*a.nstride)
2032+
IntVect(AMREX_D_DECL(a.begin.x+a.len.x-1,a.begin.y+a.len.y-1,a.begin.z+a.len.z-1))),
2033+
nvar(a.ncomp), truesize(a.ncomp*a.nstride())
20342034
{}
20352035

20362036
template<class T>
20372037
BaseFab<T>::BaseFab (Array4<T const> const& a, IndexType t) noexcept
20382038
: dptr(const_cast<T*>(a.p)),
20392039
domain(IntVect(AMREX_D_DECL(a.begin.x,a.begin.y,a.begin.z)),
2040-
IntVect(AMREX_D_DECL(a.end.x-1,a.end.y-1,a.end.z-1)), t),
2041-
nvar(a.ncomp), truesize(a.ncomp*a.nstride)
2040+
IntVect(AMREX_D_DECL(a.begin.x+a.len.x-1,a.begin.y+a.len.y-1,a.begin.z+a.len.z-1)), t),
2041+
nvar(a.ncomp), truesize(a.ncomp*a.nstride())
20422042
{}
20432043

20442044
template <class T>

Src/Base/AMReX_Box.H

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ public:
9595
AMREX_GPU_HOST_DEVICE
9696
explicit BoxND (Array4<T> const& a) noexcept
9797
: smallend(a.begin),
98-
bigend(IntVectND<dim>(a.end) - 1)
98+
bigend(IntVectND<dim>(ubound(a)))
9999
{}
100100

101101
// dtor, copy-ctor, copy-op=, move-ctor, and move-op= are compiler generated.

Src/Base/AMReX_CudaGraph.H

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,16 +39,16 @@ makeCopyMemory (Array4<T> const& src, Array4<U> const& dst, int scomp, int ncomp
3939
mem.src = (void*)(src.p);
4040
mem.dst = (void*)(dst.p);
4141
mem.src_begin = src.begin;
42-
mem.src_end = src.end;
42+
mem.src_end = end(src);
4343
mem.dst_begin = dst.begin;
44-
mem.dst_end = dst.end;
44+
mem.dst_end = end(dst);
4545
mem.scomp = scomp;
4646
mem.ncomp = ncomp;
4747
return mem;
4848

4949
#else
5050

51-
return CopyMemory{ (void*)(src.p), (void*)(dst.p), src.begin, src.end, dst.begin, dst.end, scomp, ncomp };
51+
return CopyMemory{ (void*)(src.p), (void*)(dst.p), src.begin, end(src), dst.begin, end(dst), scomp, ncomp };
5252
#endif
5353
}
5454

0 commit comments

Comments
 (0)