@@ -344,46 +344,57 @@ FabArray<FAB>::ParallelCopy_nowait (const FabArray<FAB>& src,
344344 & amrex::grow (this ->box (0 ),dnghost);
345345 if (op == FabArrayBase::COPY) {
346346#ifdef AMREX_USE_GPU
347- ParallelFor (box, ncomp,
347+ if (Gpu::inLaunchRegion ()) {
348+ ParallelFor (box, ncomp,
348349 [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) {
349- da (i,j,k,n) = sa (i,j,k,n);
350- });
351- #else
352- auto const & lo = amrex::lbound (box);
353- auto const & hi = amrex::ubound (box);
350+ da (i,j,k,n) = sa (i,j,k,n);
351+ });
352+ if (!Gpu::inNoSyncRegion ()) {
353+ Gpu::streamSynchronize ();
354+ }
355+ } else
356+ #endif
357+ {
358+ auto const & lo = amrex::lbound (box);
359+ auto const & hi = amrex::ubound (box);
354360#ifdef AMREX_USE_OMP
355361#pragma omp parallel for collapse(3)
356362#endif
357- for (int n = 0 ; n < ncomp; ++n) {
358- for (int k = lo.z ; k <= hi.z ; ++k) {
359- for (int j = lo.y ; j <= hi.y ; ++j) {
360- AMREX_PRAGMA_SIMD
361- for (int i = lo.x ; i <= hi.x ; ++i) {
362- da (i,j,k,n) = sa (i,j,k,n);
363- }}}}
364- # endif
363+ for (int n = 0 ; n < ncomp; ++n) {
364+ for (int k = lo.z ; k <= hi.z ; ++k) {
365+ for (int j = lo.y ; j <= hi.y ; ++j) {
366+ AMREX_PRAGMA_SIMD
367+ for (int i = lo.x ; i <= hi.x ; ++i) {
368+ da (i,j,k,n) = sa (i,j,k,n);
369+ }}}}
370+ }
365371 } else {
366372#ifdef AMREX_USE_GPU
367- ParallelFor (box, ncomp,
373+ if (Gpu::inLaunchRegion ()) {
374+ ParallelFor (box, ncomp,
368375 [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) {
369- da (i,j,k,n) += sa (i,j,k,n);
370- });
371- #else
372- auto const & lo = amrex::lbound (box);
373- auto const & hi = amrex::ubound (box);
376+ da (i,j,k,n) += sa (i,j,k,n);
377+ });
378+ if (!Gpu::inNoSyncRegion ()) {
379+ Gpu::streamSynchronize ();
380+ }
381+ } else
382+ #endif
383+ {
384+ auto const & lo = amrex::lbound (box);
385+ auto const & hi = amrex::ubound (box);
374386#ifdef AMREX_USE_OMP
375387#pragma omp parallel for collapse(3)
376388#endif
377- for (int n = 0 ; n < ncomp; ++n) {
378- for (int k = lo.z ; k <= hi.z ; ++k) {
379- for (int j = lo.y ; j <= hi.y ; ++j) {
380- AMREX_PRAGMA_SIMD
381- for (int i = lo.x ; i <= hi.x ; ++i) {
382- da (i,j,k,n) += sa (i,j,k,n);
383- }}}}
384- # endif
389+ for (int n = 0 ; n < ncomp; ++n) {
390+ for (int k = lo.z ; k <= hi.z ; ++k) {
391+ for (int j = lo.y ; j <= hi.y ; ++j) {
392+ AMREX_PRAGMA_SIMD
393+ for (int i = lo.x ; i <= hi.x ; ++i) {
394+ da (i,j,k,n) += sa (i,j,k,n);
395+ }}}}
396+ }
385397 }
386- Gpu::streamSynchronize ();
387398 }
388399 return ;
389400 }
0 commit comments