Skip to content

Commit 5181c27

Browse files
authored
ParallelCopy: Fix a launch region issue (#4481)
Fix a place in ParallelCopy where the gpu launch region flag was not respected.
1 parent 78268f1 commit 5181c27

File tree

1 file changed

+40
-29
lines changed

1 file changed

+40
-29
lines changed

Src/Base/AMReX_FabArrayCommI.H

Lines changed: 40 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -344,46 +344,57 @@ FabArray<FAB>::ParallelCopy_nowait (const FabArray<FAB>& src,
344344
& amrex::grow(this->box(0),dnghost);
345345
if (op == FabArrayBase::COPY) {
346346
#ifdef AMREX_USE_GPU
347-
ParallelFor(box, ncomp,
347+
if (Gpu::inLaunchRegion()) {
348+
ParallelFor(box, ncomp,
348349
[=] AMREX_GPU_DEVICE (int i, int j, int k, int n) {
349-
da(i,j,k,n) = sa(i,j,k,n);
350-
});
351-
#else
352-
auto const& lo = amrex::lbound(box);
353-
auto const& hi = amrex::ubound(box);
350+
da(i,j,k,n) = sa(i,j,k,n);
351+
});
352+
if (!Gpu::inNoSyncRegion()) {
353+
Gpu::streamSynchronize();
354+
}
355+
} else
356+
#endif
357+
{
358+
auto const& lo = amrex::lbound(box);
359+
auto const& hi = amrex::ubound(box);
354360
#ifdef AMREX_USE_OMP
355361
#pragma omp parallel for collapse(3)
356362
#endif
357-
for (int n = 0; n < ncomp; ++n) {
358-
for (int k = lo.z; k <= hi.z; ++k) {
359-
for (int j = lo.y; j <= hi.y; ++j) {
360-
AMREX_PRAGMA_SIMD
361-
for (int i = lo.x; i <= hi.x; ++i) {
362-
da(i,j,k,n) = sa(i,j,k,n);
363-
}}}}
364-
#endif
363+
for (int n = 0; n < ncomp; ++n) {
364+
for (int k = lo.z; k <= hi.z; ++k) {
365+
for (int j = lo.y; j <= hi.y; ++j) {
366+
AMREX_PRAGMA_SIMD
367+
for (int i = lo.x; i <= hi.x; ++i) {
368+
da(i,j,k,n) = sa(i,j,k,n);
369+
}}}}
370+
}
365371
} else {
366372
#ifdef AMREX_USE_GPU
367-
ParallelFor(box, ncomp,
373+
if (Gpu::inLaunchRegion()) {
374+
ParallelFor(box, ncomp,
368375
[=] AMREX_GPU_DEVICE (int i, int j, int k, int n) {
369-
da(i,j,k,n) += sa(i,j,k,n);
370-
});
371-
#else
372-
auto const& lo = amrex::lbound(box);
373-
auto const& hi = amrex::ubound(box);
376+
da(i,j,k,n) += sa(i,j,k,n);
377+
});
378+
if (!Gpu::inNoSyncRegion()) {
379+
Gpu::streamSynchronize();
380+
}
381+
} else
382+
#endif
383+
{
384+
auto const& lo = amrex::lbound(box);
385+
auto const& hi = amrex::ubound(box);
374386
#ifdef AMREX_USE_OMP
375387
#pragma omp parallel for collapse(3)
376388
#endif
377-
for (int n = 0; n < ncomp; ++n) {
378-
for (int k = lo.z; k <= hi.z; ++k) {
379-
for (int j = lo.y; j <= hi.y; ++j) {
380-
AMREX_PRAGMA_SIMD
381-
for (int i = lo.x; i <= hi.x; ++i) {
382-
da(i,j,k,n) += sa(i,j,k,n);
383-
}}}}
384-
#endif
389+
for (int n = 0; n < ncomp; ++n) {
390+
for (int k = lo.z; k <= hi.z; ++k) {
391+
for (int j = lo.y; j <= hi.y; ++j) {
392+
AMREX_PRAGMA_SIMD
393+
for (int i = lo.x; i <= hi.x; ++i) {
394+
da(i,j,k,n) += sa(i,j,k,n);
395+
}}}}
396+
}
385397
}
386-
Gpu::streamSynchronize();
387398
}
388399
return;
389400
}

0 commit comments

Comments
 (0)