Skip to content

Commit 06ef6f4

Browse files
committed
WIP single flx var
1 parent 9b881ea commit 06ef6f4

3 files changed

Lines changed: 106 additions & 90 deletions

File tree

src/hydro/hydro.cpp

Lines changed: 30 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -794,6 +794,7 @@ std::shared_ptr<StateDescriptor> Initialize(ParameterInput *pin) {
794794
m = Metadata({Metadata::Cell, Metadata::Derived, Metadata::OneCopy},
795795
std::vector<int>({nhydro + nscalars}), prim_labels);
796796
pkg->AddField("prim", m);
797+
pkg->AddField("flx", m);
797798

798799
const auto refine_str = pin->GetOrAddString("refinement", "type", "unset");
799800
if (refine_str == "pressure_gradient") {
@@ -1057,6 +1058,7 @@ TaskStatus CalculateFluxes(MeshData<Real> *u0_data, MeshData<Real> *u1_data,
10571058
auto const &u0_cons_pack = u0_data->PackVariables(std::vector<std::string>{"cons"});
10581059
auto const &u1_cons_pack = u1_data->PackVariables(std::vector<std::string>{"cons"});
10591060
auto const &u0_prim_pack = u0_data->PackVariables(std::vector<std::string>{"prim"});
1061+
auto &u0_flx_pack = u0_data->PackVariables(std::vector<std::string>{"flx"});
10601062
auto pkg = pmb->packages.Get("Hydro");
10611063
const auto nhydro = pkg->Param<int>("nhydro");
10621064
const auto nscalars = pkg->Param<int>("nscalars");
@@ -1095,39 +1097,26 @@ TaskStatus CalculateFluxes(MeshData<Real> *u0_data, MeshData<Real> *u1_data,
10951097
scratch_size_in_bytes, scratch_level, 0, u0_cons_pack.GetDim(5) - 1, kl, ku, jl, ju,
10961098
KOKKOS_LAMBDA(parthenon::team_mbr_t member, const int b, const int k, const int j) {
10971099
const auto &u0_prim = u0_prim_pack(b);
1100+
auto &flx = u0_flx_pack(b);
10981101
parthenon::ScratchPad2D<Real> wl(member.team_scratch(scratch_level),
10991102
num_scratch_vars, nx1);
11001103
parthenon::ScratchPad2D<Real> wr(member.team_scratch(scratch_level),
11011104
num_scratch_vars, nx1);
1102-
parthenon::ScratchPad2D<Real> flx(member.team_scratch(scratch_level),
1103-
num_scratch_vars, nx1);
1105+
parthenon::ScratchPad2D<Real> flxm1(member.team_scratch(scratch_level),
1106+
num_scratch_vars, nx1);
11041107
// get reconstructed state on faces
11051108
Reconstruct<recon, X1DIR>(member, k, j, ib.s - 1, ib.e + 1, u0_prim, wl, wr);
11061109
// Sync all threads in the team so that scratch memory is consistent
11071110
member.team_barrier();
11081111

1109-
riemann.Solve(member, ib.s, ib.e + 1, IV1, wl, wr, flx, eos, c_h);
1112+
riemann.Solve(member, k, j, ib.s, ib.e + 1, IV1, wl, wr, flxm1, flx, eos, c_h);
11101113
member.team_barrier();
1111-
1112-
// Now directly update
1113-
const int Ni = ib.e - ib.s + 1;
1114-
const int NvNi = u0_cons_pack.GetDim(4) * Ni;
1115-
auto tvr = Kokkos::TeamVectorRange(member, NvNi);
1116-
Kokkos::parallel_for(tvr, [&](const int idx) {
1117-
const int v = idx / Ni;
1118-
const int i = idx % Ni + ib.s;
1119-
const auto du = -(flx(v, i + 1) - flx(v, i)) / dx1;
1120-
1121-
// WARNING: removing gam0 is specific to the VL2 integrator
1122-
u0_cons_pack(b, v, k, j, i) = // gam0 * u0_cons_pack(b, v, k, j, i) +
1123-
gam1 * u1_cons_pack(b, v, k, j, i) + beta_dt * du;
1124-
});
11251114
});
11261115
//--------------------------------------------------------------------------------------
11271116
// j-direction
11281117
if (pmb->pmy_mesh->ndim >= 2) {
11291118
scratch_size_in_bytes =
1130-
parthenon::ScratchPad2D<Real>::shmem_size(num_scratch_vars, nx1) * 5;
1119+
parthenon::ScratchPad2D<Real>::shmem_size(num_scratch_vars, nx1) * 4;
11311120
// set the loop limits
11321121
il = ib.s - 1, iu = ib.e + 1, kl = kb.s, ku = kb.e;
11331122
if (pmb->block_size.nx(X3DIR) == 1) // 2D
@@ -1141,52 +1130,30 @@ TaskStatus CalculateFluxes(MeshData<Real> *u0_data, MeshData<Real> *u1_data,
11411130
scratch_size_in_bytes, scratch_level, 0, u0_cons_pack.GetDim(5) - 1, kl, ku,
11421131
KOKKOS_LAMBDA(parthenon::team_mbr_t member, const int b, const int k) {
11431132
const auto &prim = u0_prim_pack(b);
1133+
auto &flx = u0_flx_pack(b);
11441134
parthenon::ScratchPad2D<Real> wl(member.team_scratch(scratch_level),
11451135
num_scratch_vars, nx1);
11461136
parthenon::ScratchPad2D<Real> wr(member.team_scratch(scratch_level),
11471137
num_scratch_vars, nx1);
11481138
parthenon::ScratchPad2D<Real> wlb(member.team_scratch(scratch_level),
11491139
num_scratch_vars, nx1);
1150-
parthenon::ScratchPad2D<Real> flxl(member.team_scratch(scratch_level),
1151-
num_scratch_vars, nx1);
1152-
parthenon::ScratchPad2D<Real> flxr(member.team_scratch(scratch_level),
1153-
num_scratch_vars, nx1);
1140+
parthenon::ScratchPad2D<Real> flxm1(member.team_scratch(scratch_level),
1141+
num_scratch_vars, nx1);
11541142
for (int j = jb.s - 1; j <= jb.e + 1; ++j) {
11551143
// reconstruct L/R states at j
11561144
Reconstruct<recon, X2DIR>(member, k, j, il, iu, prim, wlb, wr);
11571145
// Sync all threads in the team so that scratch memory is consistent
11581146
member.team_barrier();
11591147

11601148
if (j > jb.s - 1) {
1161-
riemann.Solve(member, il, iu, IV2, wl, wr, flxr, eos, c_h);
1149+
riemann.Solve(member, k, j, il, iu, IV2, wl, wr, flxm1, flx, eos, c_h);
11621150
member.team_barrier();
1163-
if (j > jb.s) {
1164-
// Now directly update
1165-
const int Ni = iu - il + 1;
1166-
const int NvNi = u0_cons_pack.GetDim(4) * Ni;
1167-
auto tvr = Kokkos::TeamVectorRange(member, NvNi);
1168-
Kokkos::parallel_for(tvr, [&](const int idx) {
1169-
const int v = idx / Ni;
1170-
const int i = idx % Ni + il;
1171-
const auto du = -(flxr(v, i) - flxl(v, i)) / dx2;
1172-
1173-
// WARNING: this is specific to the VL2 integrator
1174-
u0_cons_pack(b, v, k, j - 1, i) +=
1175-
// gam0 * u0_cons_pack(b, v, k, j, i) +
1176-
// gam1 * u1_cons_pack(b, v, k, j, i) +
1177-
beta_dt * du;
1178-
});
1179-
member.team_barrier();
1180-
}
11811151
}
11821152

11831153
// swap the arrays for the next step
11841154
auto *tmp = wl.data();
11851155
wl.assign_data(wlb.data());
11861156
wlb.assign_data(tmp);
1187-
tmp = flxr.data();
1188-
flxr.assign_data(flxl.data());
1189-
flxl.assign_data(tmp);
11901157
}
11911158
});
11921159
}
@@ -1202,55 +1169,48 @@ TaskStatus CalculateFluxes(MeshData<Real> *u0_data, MeshData<Real> *u1_data,
12021169
scratch_size_in_bytes, scratch_level, 0, u0_cons_pack.GetDim(5) - 1, jl, ju,
12031170
KOKKOS_LAMBDA(parthenon::team_mbr_t member, const int b, const int j) {
12041171
const auto &prim = u0_prim_pack(b);
1172+
auto &flx = u0_flx_pack(b);
12051173
parthenon::ScratchPad2D<Real> wl(member.team_scratch(scratch_level),
12061174
num_scratch_vars, nx1);
12071175
parthenon::ScratchPad2D<Real> wr(member.team_scratch(scratch_level),
12081176
num_scratch_vars, nx1);
12091177
parthenon::ScratchPad2D<Real> wlb(member.team_scratch(scratch_level),
12101178
num_scratch_vars, nx1);
1211-
parthenon::ScratchPad2D<Real> flxr(member.team_scratch(scratch_level),
1212-
num_scratch_vars, nx1);
1213-
parthenon::ScratchPad2D<Real> flxl(member.team_scratch(scratch_level),
1214-
num_scratch_vars, nx1);
1179+
parthenon::ScratchPad2D<Real> flxm1(member.team_scratch(scratch_level),
1180+
num_scratch_vars, nx1);
12151181
for (int k = kb.s - 1; k <= kb.e + 1; ++k) {
12161182
// reconstruct L/R states at j
12171183
Reconstruct<recon, X3DIR>(member, k, j, il, iu, prim, wlb, wr);
12181184
// Sync all threads in the team so that scratch memory is consistent
12191185
member.team_barrier();
12201186

12211187
if (k > kb.s - 1) {
1222-
riemann.Solve(member, il, iu, IV3, wl, wr, flxr, eos, c_h);
1188+
riemann.Solve(member, k, j, il, iu, IV3, wl, wr, flxm1, flx, eos, c_h);
12231189
member.team_barrier();
1224-
if (k > kb.s) {
1225-
// Now directly update
1226-
const int Ni = iu - il + 1;
1227-
const int NvNi = u0_cons_pack.GetDim(4) * Ni;
1228-
auto tvr = Kokkos::TeamVectorRange(member, NvNi);
1229-
Kokkos::parallel_for(tvr, [&](const int idx) {
1230-
const int v = idx / Ni;
1231-
const int i = idx % Ni + il;
1232-
const auto du = -(flxr(v, i) - flxl(v, i)) / dx3;
1233-
1234-
// WARNING: this is specific to the VL2 integrator
1235-
u0_cons_pack(b, v, k - 1, j, i) +=
1236-
// gam0 * u0_cons_pack(b, v, k, j, i) +
1237-
// gam1 * u1_cons_pack(b, v, k, j, i) +
1238-
beta_dt * du;
1239-
});
1240-
member.team_barrier();
1241-
}
12421190
}
12431191
// swap the arrays for the next step
12441192
auto *tmp = wl.data();
12451193
wl.assign_data(wlb.data());
12461194
wlb.assign_data(tmp);
1247-
tmp = flxr.data();
1248-
flxr.assign_data(flxl.data());
1249-
flxl.assign_data(tmp);
12501195
}
12511196
});
12521197
}
12531198

1199+
// Now directly update
1200+
ib = u0_data->GetBlockData(0)->GetBoundsI(IndexDomain::interior);
1201+
jb = u0_data->GetBlockData(0)->GetBoundsJ(IndexDomain::interior);
1202+
kb = u0_data->GetBlockData(0)->GetBoundsK(IndexDomain::interior);
1203+
// Important, this assumes that dx1=dx2=dx3! (same in Riemann solve where flx is set)
1204+
pmb->par_for(
1205+
"UpdateFluxDiv", 0, u0_cons_pack.GetDim(5) - 1, 0, u0_cons_pack.GetDim(4) - 1, kb.s,
1206+
kb.e, jb.s, jb.e, ib.s, ib.e,
1207+
KOKKOS_LAMBDA(const int b, const int v, const int k, const int j, const int i) {
1208+
// WARNING: removing gam0 is specific to the VL2 integrator
1209+
u0_cons_pack(b, v, k, j, i) = // gam0 * u0_cons_pack(b, v, k, j, i) +
1210+
gam1 * u1_cons_pack(b, v, k, j, i) -
1211+
beta_dt * u0_flx_pack(b, v, k, j, i) / dx1;
1212+
});
1213+
12541214
return TaskStatus::complete;
12551215
}
12561216

src/hydro/rsolvers/glmmhd_hlld.hpp

Lines changed: 46 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,10 @@ struct Cons1D {
3838
template <>
3939
struct Riemann<Fluid::glmmhd, RiemannSolver::hlld> {
4040
static KOKKOS_INLINE_FUNCTION void
41-
Solve(parthenon::team_mbr_t const &member, const int il, const int iu, const int ivx,
42-
const ScratchPad2D<Real> &wl, const ScratchPad2D<Real> &wr,
43-
ScratchPad2D<Real> &flx, const AdiabaticGLMMHDEOS &eos, const Real c_h) {
41+
Solve(parthenon::team_mbr_t const &member, const int k, const int j, const int il,
42+
const int iu, const int ivx, const ScratchPad2D<Real> &wl,
43+
const ScratchPad2D<Real> &wr, ScratchPad2D<Real> &flxm1, VariablePack<Real> &flx,
44+
const AdiabaticGLMMHDEOS &eos, const Real c_h) {
4445
const int ivy = IV1 + ((ivx - IV1) + 1) % 3;
4546
const int ivz = IV1 + ((ivx - IV1) + 2) % 3;
4647
const int iBx = ivx - 1 + NHYDRO;
@@ -381,15 +382,48 @@ struct Riemann<Fluid::glmmhd, RiemannSolver::hlld> {
381382
flxi[IB3] = fr.bz + urst.bz;
382383
}
383384

384-
flx(IDN, i) = flxi[IDN];
385-
flx(ivx, i) = flxi[IV1];
386-
flx(ivy, i) = flxi[IV2];
387-
flx(ivz, i) = flxi[IV3];
388-
flx(IEN, i) = flxi[IEN];
389-
flx(iBx, i) = flxi[IB1];
390-
flx(iBy, i) = flxi[IB2];
391-
flx(iBz, i) = flxi[IB3];
392-
flx(IPS, i) = flxi[IPS];
385+
// Important, this assumes that dx1=dx2=dx3! (same in Riemann solve where flx is
386+
// set)
387+
if (ivx == IV1) {
388+
flx(IDN, k, j, i - 1) = flxi[IDN] - flxm1(IDN, i);
389+
flx(ivx, k, j, i - 1) = flxi[IV1] - flxm1(IV1, i);
390+
flx(ivy, k, j, i - 1) = flxi[IV2] - flxm1(IV2, i);
391+
flx(ivz, k, j, i - 1) = flxi[IV3] - flxm1(IV3, i);
392+
flx(IEN, k, j, i - 1) = flxi[IEN] - flxm1(IEN, i);
393+
flx(iBx, k, j, i - 1) = flxi[IB1] - flxm1(IB1, i);
394+
flx(iBy, k, j, i - 1) = flxi[IB2] - flxm1(IB2, i);
395+
flx(iBz, k, j, i - 1) = flxi[IB3] - flxm1(IB3, i);
396+
flx(IPS, k, j, i - 1) = flxi[IPS] - flxm1(IPS, i);
397+
} else if (ivx == IV2) {
398+
flx(IDN, k, j - 1, i) += flxi[IDN] - flxm1(IDN, i);
399+
flx(ivx, k, j - 1, i) += flxi[IV1] - flxm1(IV1, i);
400+
flx(ivy, k, j - 1, i) += flxi[IV2] - flxm1(IV2, i);
401+
flx(ivz, k, j - 1, i) += flxi[IV3] - flxm1(IV3, i);
402+
flx(IEN, k, j - 1, i) += flxi[IEN] - flxm1(IEN, i);
403+
flx(iBx, k, j - 1, i) += flxi[IB1] - flxm1(IB1, i);
404+
flx(iBy, k, j - 1, i) += flxi[IB2] - flxm1(IB2, i);
405+
flx(iBz, k, j - 1, i) += flxi[IB3] - flxm1(IB3, i);
406+
flx(IPS, k, j - 1, i) += flxi[IPS] - flxm1(IPS, i);
407+
} else if (ivx == IV3) {
408+
flx(IDN, k - 1, j, i) += flxi[IDN] - flxm1(IDN, i);
409+
flx(ivx, k - 1, j, i) += flxi[IV1] - flxm1(IV1, i);
410+
flx(ivy, k - 1, j, i) += flxi[IV2] - flxm1(IV2, i);
411+
flx(ivz, k - 1, j, i) += flxi[IV3] - flxm1(IV3, i);
412+
flx(IEN, k - 1, j, i) += flxi[IEN] - flxm1(IEN, i);
413+
flx(iBx, k - 1, j, i) += flxi[IB1] - flxm1(IB1, i);
414+
flx(iBy, k - 1, j, i) += flxi[IB2] - flxm1(IB2, i);
415+
flx(iBz, k - 1, j, i) += flxi[IB3] - flxm1(IB3, i);
416+
flx(IPS, k - 1, j, i) += flxi[IPS] - flxm1(IPS, i);
417+
}
418+
flxm1(IDN, i) = flxi[IDN];
419+
flxm1(IV1, i) = flxi[IV1];
420+
flxm1(IV2, i) = flxi[IV2];
421+
flxm1(IV3, i) = flxi[IV3];
422+
flxm1(IEN, i) = flxi[IEN];
423+
flxm1(IB1, i) = flxi[IB1];
424+
flxm1(IB2, i) = flxi[IB2];
425+
flxm1(IB3, i) = flxi[IB3];
426+
flxm1(IPS, i) = flxi[IPS];
393427
});
394428
}
395429
};

src/hydro/rsolvers/hydro_hllc.hpp

Lines changed: 30 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,10 @@
3131
template <>
3232
struct Riemann<Fluid::euler, RiemannSolver::hllc> {
3333
static KOKKOS_INLINE_FUNCTION void
34-
Solve(parthenon::team_mbr_t const &member, const int il, const int iu, const int ivx,
35-
const ScratchPad2D<Real> &wl, const ScratchPad2D<Real> &wr,
36-
ScratchPad2D<Real> &flx, const AdiabaticHydroEOS &eos, const Real c_h) {
34+
Solve(parthenon::team_mbr_t const &member, const int k, const int j, const int il,
35+
const int iu, const int ivx, const ScratchPad2D<Real> &wl,
36+
const ScratchPad2D<Real> &wr, ScratchPad2D<Real> &flxm1, VariablePack<Real> &flx,
37+
const AdiabaticHydroEOS &eos, const Real c_h) {
3738
int ivy = IV1 + ((ivx - IV1) + 1) % 3;
3839
int ivz = IV1 + ((ivx - IV1) + 2) % 3;
3940
Real gamma = eos.GetGamma();
@@ -146,11 +147,32 @@ struct Riemann<Fluid::euler, RiemannSolver::hllc> {
146147
flxi[IV3] = sl * fl[IV3] + sr * fr[IV3];
147148
flxi[IEN] = sl * fl[IEN] + sr * fr[IEN] + sm * cp * am;
148149

149-
flx(IDN, i) = flxi[IDN];
150-
flx(ivx, i) = flxi[IV1];
151-
flx(ivy, i) = flxi[IV2];
152-
flx(ivz, i) = flxi[IV3];
153-
flx(IEN, i) = flxi[IEN];
150+
// Important, this assumes that dx1=dx2=dx3! (same in Riemann solve where flx is
151+
// set)
152+
if (ivx == IV1) {
153+
flx(IDN, k, j, i - 1) = flxi[IDN] - flxm1(IDN, i);
154+
flx(ivx, k, j, i - 1) = flxi[IV1] - flxm1(IV1, i);
155+
flx(ivy, k, j, i - 1) = flxi[IV2] - flxm1(IV2, i);
156+
flx(ivz, k, j, i - 1) = flxi[IV3] - flxm1(IV3, i);
157+
flx(IEN, k, j, i - 1) = flxi[IEN] - flxm1(IEN, i);
158+
} else if (ivx == IV2) {
159+
flx(IDN, k, j - 1, i) += flxi[IDN] - flxm1(IDN, i);
160+
flx(ivx, k, j - 1, i) += flxi[IV1] - flxm1(IV1, i);
161+
flx(ivy, k, j - 1, i) += flxi[IV2] - flxm1(IV2, i);
162+
flx(ivz, k, j - 1, i) += flxi[IV3] - flxm1(IV3, i);
163+
flx(IEN, k, j - 1, i) += flxi[IEN] - flxm1(IEN, i);
164+
} else if (ivx == IV3) {
165+
flx(IDN, k - 1, j, i) += flxi[IDN] - flxm1(IDN, i);
166+
flx(ivx, k - 1, j, i) += flxi[IV1] - flxm1(IV1, i);
167+
flx(ivy, k - 1, j, i) += flxi[IV2] - flxm1(IV2, i);
168+
flx(ivz, k - 1, j, i) += flxi[IV3] - flxm1(IV3, i);
169+
flx(IEN, k - 1, j, i) += flxi[IEN] - flxm1(IEN, i);
170+
}
171+
flxm1(IDN, i) = flxi[IDN];
172+
flxm1(IV1, i) = flxi[IV1];
173+
flxm1(IV2, i) = flxi[IV2];
174+
flxm1(IV3, i) = flxi[IV3];
175+
flxm1(IEN, i) = flxi[IEN];
154176
});
155177
}
156178
};

0 commit comments

Comments
 (0)