Skip to content

Commit fd6d649

Browse files
authored
Kernel fusing in Geometry (#2606)
1 parent c62334c commit fd6d649

File tree

1 file changed

+85
-10
lines changed

1 file changed

+85
-10
lines changed

Src/Base/AMReX_Geometry.cpp

Lines changed: 85 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include <AMReX_MultiFab.H>
77
#include <AMReX_Utility.H>
88
#include <AMReX_SPACE.H>
9+
#include <AMReX_COORDSYS_C.H>
910

1011
#include <AMReX_OpenMP.H>
1112

@@ -214,14 +215,38 @@ Geometry::GetVolume (MultiFab& vol,
214215
}
215216

216217
void
217-
Geometry::GetVolume (MultiFab& vol) const
218+
Geometry::GetVolume (MultiFab& vol) const
218219
{
220+
const auto a_dx = CellSizeArray();
221+
if (IsCartesian()) {
222+
vol.setVal(AMREX_D_TERM(a_dx[0],*a_dx[1],*a_dx[2]), 0, 1, vol.nGrowVect());
223+
} else {
224+
#if (AMREX_SPACEDIM == 3)
225+
amrex::Abort("Geometry::GetVolume: for 3d, only Cartesian is supported");
226+
#else
227+
#ifdef AMREX_USE_GPU
228+
if (Gpu::inLaunchRegion()) {
229+
GpuArray<Real,AMREX_SPACEDIM> a_offset{{AMREX_D_DECL(offset[0],offset[1],offset[2])}};
230+
int coord = (int) c_sys;
231+
auto const& ma = vol.arrays();
232+
ParallelFor(vol, vol.nGrowVect(),
233+
[=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept
234+
{
235+
amrex_setvol(makeSingleCellBox(i,j,k), ma[box_no], a_offset, a_dx, coord);
236+
});
237+
Gpu::streamSynchronize();
238+
} else
239+
#endif
240+
{
219241
#ifdef AMREX_USE_OMP
220242
#pragma omp parallel if (Gpu::notInLaunchRegion())
221243
#endif
222-
for (MFIter mfi(vol,TilingIfNotGPU()); mfi.isValid(); ++mfi)
223-
{
224-
CoordSys::SetVolume(vol[mfi], mfi.growntilebox());
244+
for (MFIter mfi(vol,TilingIfNotGPU()); mfi.isValid(); ++mfi)
245+
{
246+
CoordSys::SetVolume(vol[mfi], mfi.growntilebox());
247+
}
248+
}
249+
#endif
225250
}
226251
}
227252

@@ -243,12 +268,29 @@ Geometry::GetDLogA (MultiFab& dloga,
243268
int ngrow) const
244269
{
245270
dloga.define(grds,dm,1,ngrow,MFInfo(),FArrayBoxFactory());
271+
272+
#ifdef AMREX_USE_GPU
273+
if (Gpu::inLaunchRegion()) {
274+
auto const& ma = dloga.arrays();
275+
GpuArray<Real,AMREX_SPACEDIM> a_offset{AMREX_D_DECL(offset[0],offset[1],offset[2])};
276+
GpuArray<Real,AMREX_SPACEDIM> a_dx {AMREX_D_DECL( dx[0], dx[1], dx[2])};
277+
int coord = (int) c_sys;
278+
ParallelFor(dloga, IntVect(ngrow),
279+
[=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept
280+
{
281+
amrex_setdloga(makeSingleCellBox(i,j,k), ma[box_no], a_offset, a_dx, dir, coord);
282+
});
283+
Gpu::streamSynchronize();
284+
} else
285+
#endif
286+
{
246287
#ifdef AMREX_USE_OMP
247288
#pragma omp parallel if (Gpu::notInLaunchRegion())
248289
#endif
249-
for (MFIter mfi(dloga,TilingIfNotGPU()); mfi.isValid(); ++mfi)
250-
{
251-
CoordSys::SetDLogA(dloga[mfi], mfi.growntilebox(), dir);
290+
for (MFIter mfi(dloga,TilingIfNotGPU()); mfi.isValid(); ++mfi)
291+
{
292+
CoordSys::SetDLogA(dloga[mfi], mfi.growntilebox(), dir);
293+
}
252294
}
253295
}
254296
#endif
@@ -271,12 +313,45 @@ void
271313
Geometry::GetFaceArea (MultiFab& area,
272314
int dir) const
273315
{
316+
const auto a_dx = CellSizeArray();
317+
318+
if (IsCartesian()) {
319+
#if (AMREX_SPACEDIM == 1)
320+
const Real a0 = 1._rt;
321+
#elif (AMREX_SPACEDIM == 2)
322+
const Real a0 = (dir == 0) ? a_dx[1] : a_dx[0];
323+
#else
324+
const Real a0 = (dir == 0) ? a_dx[1]*a_dx[2]
325+
: ((dir == 1) ? a_dx[0]*a_dx[2] : a_dx[0]*a_dx[1]);
326+
#endif
327+
area.setVal(a0, 0, 1, area.nGrowVect());
328+
} else {
329+
#if (AMREX_SPACEDIM == 3)
330+
amrex::Abort("Geometry::GetFaceArea:: for 3d, only Cartesian is supported");
331+
#else
332+
#ifdef AMREX_USE_GPU
333+
if (Gpu::inLaunchRegion()) {
334+
GpuArray<Real,AMREX_SPACEDIM> a_offset{AMREX_D_DECL(offset[0],offset[1],offset[2])};
335+
int coord = (int) c_sys;
336+
auto const& ma = area.arrays();
337+
ParallelFor(area, area.nGrowVect(),
338+
[=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept
339+
{
340+
amrex_setarea(makeSingleCellBox(i,j,k), ma[box_no], a_offset, a_dx, dir, coord);
341+
});
342+
Gpu::streamSynchronize();
343+
} else
344+
#endif
345+
{
274346
#ifdef AMREX_USE_OMP
275347
#pragma omp parallel if (Gpu::notInLaunchRegion())
276348
#endif
277-
for (MFIter mfi(area,TilingIfNotGPU()); mfi.isValid(); ++mfi)
278-
{
279-
CoordSys::SetFaceArea(area[mfi],mfi.growntilebox(),dir);
349+
for (MFIter mfi(area,TilingIfNotGPU()); mfi.isValid(); ++mfi)
350+
{
351+
CoordSys::SetFaceArea(area[mfi],mfi.growntilebox(),dir);
352+
}
353+
}
354+
#endif
280355
}
281356
}
282357

0 commit comments

Comments
 (0)