Skip to content

Commit 4290dbb

Browse files
committed
Move cast and NormHelper
1 parent 9e35dc1 commit 4290dbb

File tree

1 file changed

+163
-152
lines changed

1 file changed

+163
-152
lines changed

Src/Base/AMReX_MultiFabUtil.H

Lines changed: 163 additions & 152 deletions
Original file line numberDiff line numberDiff line change
@@ -226,25 +226,36 @@ namespace amrex
226226

227227
//! example: auto mf = amrex::cast<MultiFab>(imf);
228228
template <typename T, typename U>
229-
T cast (U const& mf_in)
230-
{
231-
T mf_out(mf_in.boxArray(), mf_in.DistributionMap(), mf_in.nComp(), mf_in.nGrowVect());
229+
T cast (U const& mf_in);
232230

233-
#ifdef AMREX_USE_OMP
234-
#pragma omp parallel if (Gpu::notInLaunchRegion())
235-
#endif
236-
for (MFIter mfi(mf_in); mfi.isValid(); ++mfi)
237-
{
238-
const Long n = mfi.fabbox().numPts() * mf_in.nComp();
239-
auto * pdst = mf_out[mfi].dataPtr();
240-
auto const* psrc = mf_in [mfi].dataPtr();
241-
AMREX_HOST_DEVICE_PARALLEL_FOR_1D ( n, i,
242-
{
243-
pdst[i] = static_cast<typename U::value_type>(psrc[i]); // NOLINT(bugprone-signed-char-misuse)
244-
});
245-
}
246-
return mf_out;
247-
}
231+
/**
232+
* \brief Returns part of a norm based on two MultiFabs.
233+
*
234+
* The MultiFabs MUST have the same underlying BoxArray.
235+
* The function f is applied elementwise as f(x(i,j,k,n),y(i,j,k,n))
236+
* inside the summation (subject to a valid mask entry pf(mask(i,j,k,n).
237+
*/
238+
template <typename F>
239+
Real NormHelper (const MultiFab& x, int xcomp,
240+
const MultiFab& y, int ycomp,
241+
F && f,
242+
int numcomp, IntVect nghost, bool local);
243+
244+
/**
245+
* \brief Returns part of a norm based on three MultiFabs.
246+
*
247+
* The MultiFabs MUST have the same underlying BoxArray.
248+
* The Predicate pf is used to test the mask
249+
* The function f is applied elementwise as f(x(i,j,k,n),y(i,j,k,n))
250+
* inside the summation (subject to a valid mask entry pf(mask(i,j,k,n)
251+
*/
252+
template <typename MMF, typename Pred, typename F>
253+
Real NormHelper (const MMF& mask,
254+
const MultiFab& x, int xcomp,
255+
const MultiFab& y, int ycomp,
256+
Pred && pf,
257+
F && f,
258+
int numcomp, IntVect nghost, bool local);
248259

249260
/**
250261
* \brief Reduce FabArray/MultiFab data to a plane.
@@ -621,140 +632,6 @@ void average_down (const FabArray<FAB>& S_fine, FabArray<FAB>& S_crse,
621632
}
622633
}
623634

624-
625-
626-
627-
628-
/**
629-
* \brief Returns part of a norm based on two MultiFabs
630-
* The MultiFabs MUST have the same underlying BoxArray.
631-
* The function f is applied elementwise as f(x(i,j,k,n),y(i,j,k,n))
632-
* inside the summation (subject to a valid mask entry pf(mask(i,j,k,n)
633-
*/
634-
635-
template <typename F>
636-
Real
637-
NormHelper (const MultiFab& x, int xcomp,
638-
const MultiFab& y, int ycomp,
639-
F && f,
640-
int numcomp, IntVect nghost, bool local)
641-
{
642-
BL_ASSERT(x.boxArray() == y.boxArray());
643-
BL_ASSERT(x.DistributionMap() == y.DistributionMap());
644-
BL_ASSERT(x.nGrowVect().allGE(nghost) && y.nGrowVect().allGE(nghost));
645-
646-
Real sm = Real(0.0);
647-
#ifdef AMREX_USE_GPU
648-
if (Gpu::inLaunchRegion()) {
649-
auto const& xma = x.const_arrays();
650-
auto const& yma = y.const_arrays();
651-
sm = ParReduce(TypeList<ReduceOpSum>{}, TypeList<Real>{}, x, nghost,
652-
[=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept -> GpuTuple<Real>
653-
{
654-
Real t = Real(0.0);
655-
auto const& xfab = xma[box_no];
656-
auto const& yfab = yma[box_no];
657-
for (int n = 0; n < numcomp; ++n) {
658-
t += f(xfab(i,j,k,xcomp+n) , yfab(i,j,k,ycomp+n));
659-
}
660-
return t;
661-
});
662-
} else
663-
#endif
664-
{
665-
#ifdef AMREX_USE_OMP
666-
#pragma omp parallel if (!system::regtest_reduction) reduction(+:sm)
667-
#endif
668-
for (MFIter mfi(x,true); mfi.isValid(); ++mfi)
669-
{
670-
Box const& bx = mfi.growntilebox(nghost);
671-
Array4<Real const> const& xfab = x.const_array(mfi);
672-
Array4<Real const> const& yfab = y.const_array(mfi);
673-
AMREX_LOOP_4D(bx, numcomp, i, j, k, n,
674-
{
675-
sm += f(xfab(i,j,k,xcomp+n) , yfab(i,j,k,ycomp+n));
676-
});
677-
}
678-
}
679-
680-
if (!local) {
681-
ParallelAllReduce::Sum(sm, ParallelContext::CommunicatorSub());
682-
}
683-
684-
return sm;
685-
}
686-
687-
/**
688-
* \brief Returns part of a norm based on three MultiFabs
689-
* The MultiFabs MUST have the same underlying BoxArray.
690-
* The Predicate pf is used to test the mask
691-
* The function f is applied elementwise as f(x(i,j,k,n),y(i,j,k,n))
692-
* inside the summation (subject to a valid mask entry pf(mask(i,j,k,n)
693-
*/
694-
695-
template <typename MMF, typename Pred, typename F>
696-
Real
697-
NormHelper (const MMF& mask,
698-
const MultiFab& x, int xcomp,
699-
const MultiFab& y, int ycomp,
700-
Pred && pf,
701-
F && f,
702-
int numcomp, IntVect nghost, bool local)
703-
{
704-
BL_ASSERT(x.boxArray() == y.boxArray());
705-
BL_ASSERT(x.boxArray() == mask.boxArray());
706-
BL_ASSERT(x.DistributionMap() == y.DistributionMap());
707-
BL_ASSERT(x.DistributionMap() == mask.DistributionMap());
708-
BL_ASSERT(x.nGrowVect().allGE(nghost) && y.nGrowVect().allGE(nghost));
709-
BL_ASSERT(mask.nGrowVect().allGE(nghost));
710-
711-
Real sm = Real(0.0);
712-
#ifdef AMREX_USE_GPU
713-
if (Gpu::inLaunchRegion()) {
714-
auto const& xma = x.const_arrays();
715-
auto const& yma = y.const_arrays();
716-
auto const& mma = mask.const_arrays();
717-
sm = ParReduce(TypeList<ReduceOpSum>{}, TypeList<Real>{}, x, nghost,
718-
[=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept -> GpuTuple<Real>
719-
{
720-
Real t = Real(0.0);
721-
if (pf(mma[box_no](i,j,k))) {
722-
auto const& xfab = xma[box_no];
723-
auto const& yfab = yma[box_no];
724-
for (int n = 0; n < numcomp; ++n) {
725-
t += f(xfab(i,j,k,xcomp+n) , yfab(i,j,k,ycomp+n));
726-
}
727-
}
728-
return t;
729-
});
730-
} else
731-
#endif
732-
{
733-
#ifdef AMREX_USE_OMP
734-
#pragma omp parallel if (!system::regtest_reduction) reduction(+:sm)
735-
#endif
736-
for (MFIter mfi(x,true); mfi.isValid(); ++mfi)
737-
{
738-
Box const& bx = mfi.growntilebox(nghost);
739-
Array4<Real const> const& xfab = x.const_array(mfi);
740-
Array4<Real const> const& yfab = y.const_array(mfi);
741-
auto const& mfab = mask.const_array(mfi);
742-
AMREX_LOOP_4D(bx, numcomp, i, j, k, n,
743-
{
744-
if (pf(mfab(i,j,k))) {
745-
sm += f(xfab(i,j,k,xcomp+n) , yfab(i,j,k,ycomp+n));
746-
}
747-
});
748-
}
749-
}
750-
751-
if (!local) {
752-
ParallelAllReduce::Sum(sm, ParallelContext::CommunicatorSub());
753-
}
754-
755-
return sm;
756-
}
757-
758635
template <typename CMF, typename FMF,
759636
std::enable_if_t<IsFabArray_v<CMF> && IsFabArray_v<FMF>, int> FOO>
760637
void average_face_to_cellcenter (CMF& cc, int dcomp,
@@ -1017,6 +894,140 @@ MF get_line_data (MF const& mf, int dir, IntVect const& cell)
1017894
}
1018895
}
1019896

897+
template <typename T, typename U>
898+
T cast (U const& mf_in)
899+
{
900+
T mf_out(mf_in.boxArray(), mf_in.DistributionMap(), mf_in.nComp(), mf_in.nGrowVect());
901+
902+
#ifdef AMREX_USE_OMP
903+
#pragma omp parallel if (Gpu::notInLaunchRegion())
904+
#endif
905+
for (MFIter mfi(mf_in); mfi.isValid(); ++mfi)
906+
{
907+
const Long n = mfi.fabbox().numPts() * mf_in.nComp();
908+
auto * pdst = mf_out[mfi].dataPtr();
909+
auto const* psrc = mf_in [mfi].dataPtr();
910+
AMREX_HOST_DEVICE_PARALLEL_FOR_1D ( n, i,
911+
{
912+
pdst[i] = static_cast<typename U::value_type>(psrc[i]); // NOLINT(bugprone-signed-char-misuse)
913+
});
914+
}
915+
return mf_out;
916+
}
917+
918+
template <typename F>
919+
Real NormHelper (const MultiFab& x, int xcomp,
920+
const MultiFab& y, int ycomp,
921+
F && f,
922+
int numcomp, IntVect nghost, bool local)
923+
{
924+
BL_ASSERT(x.boxArray() == y.boxArray());
925+
BL_ASSERT(x.DistributionMap() == y.DistributionMap());
926+
BL_ASSERT(x.nGrowVect().allGE(nghost) && y.nGrowVect().allGE(nghost));
927+
928+
Real sm = Real(0.0);
929+
#ifdef AMREX_USE_GPU
930+
if (Gpu::inLaunchRegion()) {
931+
auto const& xma = x.const_arrays();
932+
auto const& yma = y.const_arrays();
933+
sm = ParReduce(TypeList<ReduceOpSum>{}, TypeList<Real>{}, x, nghost,
934+
[=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept -> GpuTuple<Real>
935+
{
936+
Real t = Real(0.0);
937+
auto const& xfab = xma[box_no];
938+
auto const& yfab = yma[box_no];
939+
for (int n = 0; n < numcomp; ++n) {
940+
t += f(xfab(i,j,k,xcomp+n) , yfab(i,j,k,ycomp+n));
941+
}
942+
return t;
943+
});
944+
} else
945+
#endif
946+
{
947+
#ifdef AMREX_USE_OMP
948+
#pragma omp parallel if (!system::regtest_reduction) reduction(+:sm)
949+
#endif
950+
for (MFIter mfi(x,true); mfi.isValid(); ++mfi)
951+
{
952+
Box const& bx = mfi.growntilebox(nghost);
953+
Array4<Real const> const& xfab = x.const_array(mfi);
954+
Array4<Real const> const& yfab = y.const_array(mfi);
955+
AMREX_LOOP_4D(bx, numcomp, i, j, k, n,
956+
{
957+
sm += f(xfab(i,j,k,xcomp+n) , yfab(i,j,k,ycomp+n));
958+
});
959+
}
960+
}
961+
962+
if (!local) {
963+
ParallelAllReduce::Sum(sm, ParallelContext::CommunicatorSub());
964+
}
965+
966+
return sm;
967+
}
968+
969+
template <typename MMF, typename Pred, typename F>
970+
Real NormHelper (const MMF& mask,
971+
const MultiFab& x, int xcomp,
972+
const MultiFab& y, int ycomp,
973+
Pred && pf,
974+
F && f,
975+
int numcomp, IntVect nghost, bool local)
976+
{
977+
BL_ASSERT(x.boxArray() == y.boxArray());
978+
BL_ASSERT(x.boxArray() == mask.boxArray());
979+
BL_ASSERT(x.DistributionMap() == y.DistributionMap());
980+
BL_ASSERT(x.DistributionMap() == mask.DistributionMap());
981+
BL_ASSERT(x.nGrowVect().allGE(nghost) && y.nGrowVect().allGE(nghost));
982+
BL_ASSERT(mask.nGrowVect().allGE(nghost));
983+
984+
Real sm = Real(0.0);
985+
#ifdef AMREX_USE_GPU
986+
if (Gpu::inLaunchRegion()) {
987+
auto const& xma = x.const_arrays();
988+
auto const& yma = y.const_arrays();
989+
auto const& mma = mask.const_arrays();
990+
sm = ParReduce(TypeList<ReduceOpSum>{}, TypeList<Real>{}, x, nghost,
991+
[=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept -> GpuTuple<Real>
992+
{
993+
Real t = Real(0.0);
994+
if (pf(mma[box_no](i,j,k))) {
995+
auto const& xfab = xma[box_no];
996+
auto const& yfab = yma[box_no];
997+
for (int n = 0; n < numcomp; ++n) {
998+
t += f(xfab(i,j,k,xcomp+n) , yfab(i,j,k,ycomp+n));
999+
}
1000+
}
1001+
return t;
1002+
});
1003+
} else
1004+
#endif
1005+
{
1006+
#ifdef AMREX_USE_OMP
1007+
#pragma omp parallel if (!system::regtest_reduction) reduction(+:sm)
1008+
#endif
1009+
for (MFIter mfi(x,true); mfi.isValid(); ++mfi)
1010+
{
1011+
Box const& bx = mfi.growntilebox(nghost);
1012+
Array4<Real const> const& xfab = x.const_array(mfi);
1013+
Array4<Real const> const& yfab = y.const_array(mfi);
1014+
auto const& mfab = mask.const_array(mfi);
1015+
AMREX_LOOP_4D(bx, numcomp, i, j, k, n,
1016+
{
1017+
if (pf(mfab(i,j,k))) {
1018+
sm += f(xfab(i,j,k,xcomp+n) , yfab(i,j,k,ycomp+n));
1019+
}
1020+
});
1021+
}
1022+
}
1023+
1024+
if (!local) {
1025+
ParallelAllReduce::Sum(sm, ParallelContext::CommunicatorSub());
1026+
}
1027+
1028+
return sm;
1029+
}
1030+
10201031
template <typename Op, typename T, typename FAB, typename F,
10211032
std::enable_if_t<IsBaseFab<FAB>::value
10221033
#ifndef AMREX_USE_CUDA

0 commit comments

Comments
 (0)