@@ -794,6 +794,7 @@ std::shared_ptr<StateDescriptor> Initialize(ParameterInput *pin) {
794794 m = Metadata ({Metadata::Cell, Metadata::Derived, Metadata::OneCopy},
795795 std::vector<int >({nhydro + nscalars}), prim_labels);
796796 pkg->AddField (" prim" , m);
797+ pkg->AddField (" flx" , m);
797798
798799 const auto refine_str = pin->GetOrAddString (" refinement" , " type" , " unset" );
799800 if (refine_str == " pressure_gradient" ) {
@@ -1057,6 +1058,7 @@ TaskStatus CalculateFluxes(MeshData<Real> *u0_data, MeshData<Real> *u1_data,
10571058 auto const &u0_cons_pack = u0_data->PackVariables (std::vector<std::string>{" cons" });
10581059 auto const &u1_cons_pack = u1_data->PackVariables (std::vector<std::string>{" cons" });
10591060 auto const &u0_prim_pack = u0_data->PackVariables (std::vector<std::string>{" prim" });
1061+ auto &u0_flx_pack = u0_data->PackVariables (std::vector<std::string>{" flx" });
10601062 auto pkg = pmb->packages .Get (" Hydro" );
10611063 const auto nhydro = pkg->Param <int >(" nhydro" );
10621064 const auto nscalars = pkg->Param <int >(" nscalars" );
@@ -1095,39 +1097,26 @@ TaskStatus CalculateFluxes(MeshData<Real> *u0_data, MeshData<Real> *u1_data,
10951097 scratch_size_in_bytes, scratch_level, 0 , u0_cons_pack.GetDim (5 ) - 1 , kl, ku, jl, ju,
10961098 KOKKOS_LAMBDA (parthenon::team_mbr_t member, const int b, const int k, const int j) {
10971099 const auto &u0_prim = u0_prim_pack (b);
1100+ auto &flx = u0_flx_pack (b);
10981101 parthenon::ScratchPad2D<Real> wl (member.team_scratch (scratch_level),
10991102 num_scratch_vars, nx1);
11001103 parthenon::ScratchPad2D<Real> wr (member.team_scratch (scratch_level),
11011104 num_scratch_vars, nx1);
1102- parthenon::ScratchPad2D<Real> flx (member.team_scratch (scratch_level),
1103- num_scratch_vars, nx1);
1105+ parthenon::ScratchPad2D<Real> flxm1 (member.team_scratch (scratch_level),
1106+ num_scratch_vars, nx1);
11041107 // get reconstructed state on faces
11051108 Reconstruct<recon, X1DIR>(member, k, j, ib.s - 1 , ib.e + 1 , u0_prim, wl, wr);
11061109 // Sync all threads in the team so that scratch memory is consistent
11071110 member.team_barrier ();
11081111
1109- riemann.Solve (member, ib.s , ib.e + 1 , IV1, wl, wr, flx, eos, c_h);
1112+ riemann.Solve (member, k, j, ib.s , ib.e + 1 , IV1, wl, wr, flxm1 , flx, eos, c_h);
11101113 member.team_barrier ();
1111-
1112- // Now directly update
1113- const int Ni = ib.e - ib.s + 1 ;
1114- const int NvNi = u0_cons_pack.GetDim (4 ) * Ni;
1115- auto tvr = Kokkos::TeamVectorRange (member, NvNi);
1116- Kokkos::parallel_for (tvr, [&](const int idx) {
1117- const int v = idx / Ni;
1118- const int i = idx % Ni + ib.s ;
1119- const auto du = -(flx (v, i + 1 ) - flx (v, i)) / dx1;
1120-
1121- // WARNING: removing gam0 is specific to the VL2 integrator
1122- u0_cons_pack (b, v, k, j, i) = // gam0 * u0_cons_pack(b, v, k, j, i) +
1123- gam1 * u1_cons_pack (b, v, k, j, i) + beta_dt * du;
1124- });
11251114 });
11261115 // --------------------------------------------------------------------------------------
11271116 // j-direction
11281117 if (pmb->pmy_mesh ->ndim >= 2 ) {
11291118 scratch_size_in_bytes =
1130- parthenon::ScratchPad2D<Real>::shmem_size (num_scratch_vars, nx1) * 5 ;
1119+ parthenon::ScratchPad2D<Real>::shmem_size (num_scratch_vars, nx1) * 4 ;
11311120 // set the loop limits
11321121 il = ib.s - 1 , iu = ib.e + 1 , kl = kb.s , ku = kb.e ;
11331122 if (pmb->block_size .nx (X3DIR) == 1 ) // 2D
@@ -1141,52 +1130,30 @@ TaskStatus CalculateFluxes(MeshData<Real> *u0_data, MeshData<Real> *u1_data,
11411130 scratch_size_in_bytes, scratch_level, 0 , u0_cons_pack.GetDim (5 ) - 1 , kl, ku,
11421131 KOKKOS_LAMBDA (parthenon::team_mbr_t member, const int b, const int k) {
11431132 const auto &prim = u0_prim_pack (b);
1133+ auto &flx = u0_flx_pack (b);
11441134 parthenon::ScratchPad2D<Real> wl (member.team_scratch (scratch_level),
11451135 num_scratch_vars, nx1);
11461136 parthenon::ScratchPad2D<Real> wr (member.team_scratch (scratch_level),
11471137 num_scratch_vars, nx1);
11481138 parthenon::ScratchPad2D<Real> wlb (member.team_scratch (scratch_level),
11491139 num_scratch_vars, nx1);
1150- parthenon::ScratchPad2D<Real> flxl (member.team_scratch (scratch_level),
1151- num_scratch_vars, nx1);
1152- parthenon::ScratchPad2D<Real> flxr (member.team_scratch (scratch_level),
1153- num_scratch_vars, nx1);
1140+ parthenon::ScratchPad2D<Real> flxm1 (member.team_scratch (scratch_level),
1141+ num_scratch_vars, nx1);
11541142 for (int j = jb.s - 1 ; j <= jb.e + 1 ; ++j) {
11551143 // reconstruct L/R states at j
11561144 Reconstruct<recon, X2DIR>(member, k, j, il, iu, prim, wlb, wr);
11571145 // Sync all threads in the team so that scratch memory is consistent
11581146 member.team_barrier ();
11591147
11601148 if (j > jb.s - 1 ) {
1161- riemann.Solve (member, il, iu, IV2, wl, wr, flxr , eos, c_h);
1149+ riemann.Solve (member, k, j, il, iu, IV2, wl, wr, flxm1, flx , eos, c_h);
11621150 member.team_barrier ();
1163- if (j > jb.s ) {
1164- // Now directly update
1165- const int Ni = iu - il + 1 ;
1166- const int NvNi = u0_cons_pack.GetDim (4 ) * Ni;
1167- auto tvr = Kokkos::TeamVectorRange (member, NvNi);
1168- Kokkos::parallel_for (tvr, [&](const int idx) {
1169- const int v = idx / Ni;
1170- const int i = idx % Ni + il;
1171- const auto du = -(flxr (v, i) - flxl (v, i)) / dx2;
1172-
1173- // WARNING: this is specific to the VL2 integrator
1174- u0_cons_pack (b, v, k, j - 1 , i) +=
1175- // gam0 * u0_cons_pack(b, v, k, j, i) +
1176- // gam1 * u1_cons_pack(b, v, k, j, i) +
1177- beta_dt * du;
1178- });
1179- member.team_barrier ();
1180- }
11811151 }
11821152
11831153 // swap the arrays for the next step
11841154 auto *tmp = wl.data ();
11851155 wl.assign_data (wlb.data ());
11861156 wlb.assign_data (tmp);
1187- tmp = flxr.data ();
1188- flxr.assign_data (flxl.data ());
1189- flxl.assign_data (tmp);
11901157 }
11911158 });
11921159 }
@@ -1202,55 +1169,48 @@ TaskStatus CalculateFluxes(MeshData<Real> *u0_data, MeshData<Real> *u1_data,
12021169 scratch_size_in_bytes, scratch_level, 0 , u0_cons_pack.GetDim (5 ) - 1 , jl, ju,
12031170 KOKKOS_LAMBDA (parthenon::team_mbr_t member, const int b, const int j) {
12041171 const auto &prim = u0_prim_pack (b);
1172+ auto &flx = u0_flx_pack (b);
12051173 parthenon::ScratchPad2D<Real> wl (member.team_scratch (scratch_level),
12061174 num_scratch_vars, nx1);
12071175 parthenon::ScratchPad2D<Real> wr (member.team_scratch (scratch_level),
12081176 num_scratch_vars, nx1);
12091177 parthenon::ScratchPad2D<Real> wlb (member.team_scratch (scratch_level),
12101178 num_scratch_vars, nx1);
1211- parthenon::ScratchPad2D<Real> flxr (member.team_scratch (scratch_level),
1212- num_scratch_vars, nx1);
1213- parthenon::ScratchPad2D<Real> flxl (member.team_scratch (scratch_level),
1214- num_scratch_vars, nx1);
1179+ parthenon::ScratchPad2D<Real> flxm1 (member.team_scratch (scratch_level),
1180+ num_scratch_vars, nx1);
12151181 for (int k = kb.s - 1 ; k <= kb.e + 1 ; ++k) {
12161182 // reconstruct L/R states at j
12171183 Reconstruct<recon, X3DIR>(member, k, j, il, iu, prim, wlb, wr);
12181184 // Sync all threads in the team so that scratch memory is consistent
12191185 member.team_barrier ();
12201186
12211187 if (k > kb.s - 1 ) {
1222- riemann.Solve (member, il, iu, IV3, wl, wr, flxr , eos, c_h);
1188+ riemann.Solve (member, k, j, il, iu, IV3, wl, wr, flxm1, flx , eos, c_h);
12231189 member.team_barrier ();
1224- if (k > kb.s ) {
1225- // Now directly update
1226- const int Ni = iu - il + 1 ;
1227- const int NvNi = u0_cons_pack.GetDim (4 ) * Ni;
1228- auto tvr = Kokkos::TeamVectorRange (member, NvNi);
1229- Kokkos::parallel_for (tvr, [&](const int idx) {
1230- const int v = idx / Ni;
1231- const int i = idx % Ni + il;
1232- const auto du = -(flxr (v, i) - flxl (v, i)) / dx3;
1233-
1234- // WARNING: this is specific to the VL2 integrator
1235- u0_cons_pack (b, v, k - 1 , j, i) +=
1236- // gam0 * u0_cons_pack(b, v, k, j, i) +
1237- // gam1 * u1_cons_pack(b, v, k, j, i) +
1238- beta_dt * du;
1239- });
1240- member.team_barrier ();
1241- }
12421190 }
12431191 // swap the arrays for the next step
12441192 auto *tmp = wl.data ();
12451193 wl.assign_data (wlb.data ());
12461194 wlb.assign_data (tmp);
1247- tmp = flxr.data ();
1248- flxr.assign_data (flxl.data ());
1249- flxl.assign_data (tmp);
12501195 }
12511196 });
12521197 }
12531198
1199+ // Now directly update
1200+ ib = u0_data->GetBlockData (0 )->GetBoundsI (IndexDomain::interior);
1201+ jb = u0_data->GetBlockData (0 )->GetBoundsJ (IndexDomain::interior);
1202+ kb = u0_data->GetBlockData (0 )->GetBoundsK (IndexDomain::interior);
1203+ // Important, this assumes that dx1=dx2=dx3! (same in Riemann solve where flx is set)
1204+ pmb->par_for (
1205+ " UpdateFluxDiv" , 0 , u0_cons_pack.GetDim (5 ) - 1 , 0 , u0_cons_pack.GetDim (4 ) - 1 , kb.s ,
1206+ kb.e , jb.s , jb.e , ib.s , ib.e ,
1207+ KOKKOS_LAMBDA (const int b, const int v, const int k, const int j, const int i) {
1208+ // WARNING: removing gam0 is specific to the VL2 integrator
1209+ u0_cons_pack (b, v, k, j, i) = // gam0 * u0_cons_pack(b, v, k, j, i) +
1210+ gam1 * u1_cons_pack (b, v, k, j, i) -
1211+ beta_dt * u0_flx_pack (b, v, k, j, i) / dx1;
1212+ });
1213+
12541214 return TaskStatus::complete;
12551215}
12561216
0 commit comments