Skip to content

Commit 9b881ea

Browse files
committed
Revert "Reduce register usage in hlld"
This reverts commit c344415.
1 parent bd1549f commit 9b881ea

1 file changed

Lines changed: 134 additions & 117 deletions

File tree

src/hydro/rsolvers/glmmhd_hlld.hpp

Lines changed: 134 additions & 117 deletions
Original file line numberDiff line numberDiff line change
@@ -55,92 +55,111 @@ struct Riemann<Fluid::glmmhd, RiemannSolver::hlld> {
5555
constexpr int NGLMMHD = 9;
5656

5757
parthenon::par_for_inner(member, il, iu, [&](const int i) {
58+
Real wli[NGLMMHD], wri[NGLMMHD], flxi[NGLMMHD];
5859
Real spd[5]; // signal speeds, left to right
5960
Cons1D ul, ur; // L/R states, conserved variables (computed)
6061
Cons1D ulst, uldst, urdst, urst; // Conserved variable for all states
6162
Cons1D fl, fr; // Fluxes for left & right states
6263

64+
//--- Step 1. Load L/R states into local variables
65+
66+
wli[IDN] = wl(IDN, i);
67+
wli[IV1] = wl(ivx, i);
68+
wli[IV2] = wl(ivy, i);
69+
wli[IV3] = wl(ivz, i);
70+
wli[IPR] = wl(IPR, i);
71+
wli[IB1] = wl(iBx, i);
72+
wli[IB2] = wl(iBy, i);
73+
wli[IB3] = wl(iBz, i);
74+
wli[IPS] = wl(IPS, i);
75+
76+
wri[IDN] = wr(IDN, i);
77+
wri[IV1] = wr(ivx, i);
78+
wri[IV2] = wr(ivy, i);
79+
wri[IV3] = wr(ivz, i);
80+
wri[IPR] = wr(IPR, i);
81+
wri[IB1] = wr(iBx, i);
82+
wri[IB2] = wr(iBy, i);
83+
wri[IB3] = wr(iBz, i);
84+
wri[IPS] = wr(IPS, i);
85+
6386
// first solve the decoupled state, see eq (24) in Mignone & Tzeferacos (2010)
64-
Real bxi = 0.5 * (wl(iBx, i) + wr(iBx, i)) - 0.5 / c_h * (wr(IPS, i) - wl(IPS, i));
65-
Real psii = 0.5 * (wl(IPS, i) + wr(IPS, i)) - 0.5 * c_h * (wr(iBx, i) - wl(iBx, i));
87+
Real bxi = 0.5 * (wli[IB1] + wri[IB1]) - 0.5 / c_h * (wri[IPS] - wli[IPS]);
88+
Real psii = 0.5 * (wli[IPS] + wri[IPS]) - 0.5 * c_h * (wri[IB1] - wli[IB1]);
6689
// and store flux
67-
flx(iBx, i) = psii;
68-
flx(IPS, i) = SQR(c_h) * bxi;
90+
flxi[IB1] = psii;
91+
flxi[IPS] = SQR(c_h) * bxi;
6992

7093
// Compute L/R states for selected conserved variables
7194
Real bxsq = bxi * bxi;
7295
// (KGF): group transverse vector components for floating-point associativity
7396
// symmetry
7497
Real pbl =
75-
0.5 * (bxsq + (SQR(wl(iBy, i)) + SQR(wl(iBz, i)))); // magnetic pressure (l/r)
76-
Real pbr = 0.5 * (bxsq + (SQR(wr(iBy, i)) + SQR(wr(iBz, i))));
77-
Real kel =
78-
0.5 * wl(IDN, i) * (SQR(wl(ivx, i)) + (SQR(wl(ivy, i)) + SQR(wl(ivz, i))));
79-
Real ker =
80-
0.5 * wr(IDN, i) * (SQR(wr(ivx, i)) + (SQR(wr(ivy, i)) + SQR(wr(ivz, i))));
81-
82-
ul.d = wl(IDN, i);
83-
ul.mx = wl(ivx, i) * ul.d;
84-
ul.my = wl(ivy, i) * ul.d;
85-
ul.mz = wl(ivz, i) * ul.d;
86-
ul.e = wl(IPR, i) * igm1 + kel + pbl;
87-
ul.by = wl(iBy, i);
88-
ul.bz = wl(iBz, i);
89-
90-
ur.d = wr(IDN, i);
91-
ur.mx = wr(ivx, i) * ur.d;
92-
ur.my = wr(ivy, i) * ur.d;
93-
ur.mz = wr(ivz, i) * ur.d;
94-
ur.e = wr(IPR, i) * igm1 + ker + pbr;
95-
ur.by = wr(iBy, i);
96-
ur.bz = wr(iBz, i);
98+
0.5 * (bxsq + (SQR(wli[IB2]) + SQR(wli[IB3]))); // magnetic pressure (l/r)
99+
Real pbr = 0.5 * (bxsq + (SQR(wri[IB2]) + SQR(wri[IB3])));
100+
Real kel = 0.5 * wli[IDN] * (SQR(wli[IV1]) + (SQR(wli[IV2]) + SQR(wli[IV3])));
101+
Real ker = 0.5 * wri[IDN] * (SQR(wri[IV1]) + (SQR(wri[IV2]) + SQR(wri[IV3])));
102+
103+
ul.d = wli[IDN];
104+
ul.mx = wli[IV1] * ul.d;
105+
ul.my = wli[IV2] * ul.d;
106+
ul.mz = wli[IV3] * ul.d;
107+
ul.e = wli[IPR] * igm1 + kel + pbl;
108+
ul.by = wli[IB2];
109+
ul.bz = wli[IB3];
110+
111+
ur.d = wri[IDN];
112+
ur.mx = wri[IV1] * ur.d;
113+
ur.my = wri[IV2] * ur.d;
114+
ur.mz = wri[IV3] * ur.d;
115+
ur.e = wri[IPR] * igm1 + ker + pbr;
116+
ur.by = wri[IB2];
117+
ur.bz = wri[IB3];
97118

98119
//--- Step 2. Compute L & R wave speeds according to Miyoshi & Kusano, eqn. (67)
99120

100-
const auto cfl = eos.FastMagnetosonicSpeed(wl(IDN, i), wl(IPR, i), wl(iBx, i),
101-
wl(iBy, i), wl(iBz, i));
102-
const auto cfr = eos.FastMagnetosonicSpeed(wr(IDN, i), wr(IPR, i), wr(iBx, i),
103-
wr(iBy, i), wr(iBz, i));
121+
const auto cfl =
122+
eos.FastMagnetosonicSpeed(wli[IDN], wli[IPR], wli[IB1], wli[IB2], wli[IB3]);
123+
const auto cfr =
124+
eos.FastMagnetosonicSpeed(wri[IDN], wri[IPR], wri[IB1], wri[IB2], wri[IB3]);
104125

105-
spd[0] = std::min(wl(ivx, i) - cfl, wr(ivx, i) - cfr);
106-
spd[4] = std::max(wl(ivx, i) + cfl, wr(ivx, i) + cfr);
126+
spd[0] = std::min(wli[IV1] - cfl, wri[IV1] - cfr);
127+
spd[4] = std::max(wli[IV1] + cfl, wri[IV1] + cfr);
107128

108129
// Real cfmax = std::max(cfl,cfr);
109-
// if (wl(ivx, i) <= wr(ivx, i)) {
110-
// spd[0] = wl(ivx, i) - cfmax;
111-
// spd[4] = wr(ivx, i) + cfmax;
130+
// if (wli[IV1] <= wri[IV1]) {
131+
// spd[0] = wli[IV1] - cfmax;
132+
// spd[4] = wri[IV1] + cfmax;
112133
// } else {
113-
// spd[0] = wr(ivx, i) - cfmax;
114-
// spd[4] = wl(ivx, i) + cfmax;
134+
// spd[0] = wri[IV1] - cfmax;
135+
// spd[4] = wli[IV1] + cfmax;
115136
// }
116137

117138
//--- Step 3. Compute L/R fluxes
118139

119-
Real ptl = wl(IPR, i) + pbl; // total pressures L,R
120-
Real ptr = wr(IPR, i) + pbr;
140+
Real ptl = wli[IPR] + pbl; // total pressures L,R
141+
Real ptr = wri[IPR] + pbr;
121142

122143
fl.d = ul.mx;
123-
fl.mx = ul.mx * wl(ivx, i) + ptl - bxsq;
124-
fl.my = ul.my * wl(ivx, i) - bxi * ul.by;
125-
fl.mz = ul.mz * wl(ivx, i) - bxi * ul.bz;
126-
fl.e = wl(ivx, i) * (ul.e + ptl - bxsq) -
127-
bxi * (wl(ivy, i) * ul.by + wl(ivz, i) * ul.bz);
128-
fl.by = ul.by * wl(ivx, i) - bxi * wl(ivy, i);
129-
fl.bz = ul.bz * wl(ivx, i) - bxi * wl(ivz, i);
144+
fl.mx = ul.mx * wli[IV1] + ptl - bxsq;
145+
fl.my = ul.my * wli[IV1] - bxi * ul.by;
146+
fl.mz = ul.mz * wli[IV1] - bxi * ul.bz;
147+
fl.e = wli[IV1] * (ul.e + ptl - bxsq) - bxi * (wli[IV2] * ul.by + wli[IV3] * ul.bz);
148+
fl.by = ul.by * wli[IV1] - bxi * wli[IV2];
149+
fl.bz = ul.bz * wli[IV1] - bxi * wli[IV3];
130150

131151
fr.d = ur.mx;
132-
fr.mx = ur.mx * wr(ivx, i) + ptr - bxsq;
133-
fr.my = ur.my * wr(ivx, i) - bxi * ur.by;
134-
fr.mz = ur.mz * wr(ivx, i) - bxi * ur.bz;
135-
fr.e = wr(ivx, i) * (ur.e + ptr - bxsq) -
136-
bxi * (wr(ivy, i) * ur.by + wr(ivz, i) * ur.bz);
137-
fr.by = ur.by * wr(ivx, i) - bxi * wr(ivy, i);
138-
fr.bz = ur.bz * wr(ivx, i) - bxi * wr(ivz, i);
152+
fr.mx = ur.mx * wri[IV1] + ptr - bxsq;
153+
fr.my = ur.my * wri[IV1] - bxi * ur.by;
154+
fr.mz = ur.mz * wri[IV1] - bxi * ur.bz;
155+
fr.e = wri[IV1] * (ur.e + ptr - bxsq) - bxi * (wri[IV2] * ur.by + wri[IV3] * ur.bz);
156+
fr.by = ur.by * wri[IV1] - bxi * wri[IV2];
157+
fr.bz = ur.bz * wri[IV1] - bxi * wri[IV3];
139158

140159
//--- Step 4. Compute middle and Alfven wave speeds
141160

142-
Real sdl = spd[0] - wl(ivx, i); // S_i-u_i (i=L or R)
143-
Real sdr = spd[4] - wr(ivx, i);
161+
Real sdl = spd[0] - wli[IV1]; // S_i-u_i (i=L or R)
162+
Real sdr = spd[4] - wri[IV1];
144163

145164
// S_M: eqn (38) of Miyoshi & Kusano
146165
// (KGF): group ptl, ptr terms for floating-point associativity symmetry
@@ -165,8 +184,8 @@ struct Riemann<Fluid::glmmhd, RiemannSolver::hlld> {
165184
//--- Step 5. Compute intermediate states
166185
// eqn (23) explicitly becomes eq (41) of Miyoshi & Kusano
167186
// TODO(felker): place an assertion that ptstl==ptstr
168-
Real ptstl = ptl + ul.d * sdl * (spd[2] - wl(ivx, i));
169-
Real ptstr = ptr + ur.d * sdr * (spd[2] - wr(ivx, i));
187+
Real ptstl = ptl + ul.d * sdl * (spd[2] - wli[IV1]);
188+
Real ptstr = ptr + ur.d * sdr * (spd[2] - wri[IV1]);
170189
// Real ptstl = ptl + ul.d*sdl*(sdl-sdml); // these equations had issues when
171190
// averaged Real ptstr = ptr + ur.d*sdr*(sdr-sdmr);
172191
Real ptst = 0.5 * (ptstr + ptstl); // total pressure (star state)
@@ -175,16 +194,16 @@ struct Riemann<Fluid::glmmhd, RiemannSolver::hlld> {
175194
ulst.mx = ulst.d * spd[2];
176195
if (std::abs(ul.d * sdl * sdml - bxsq) < (SMALL_NUMBER)*ptst) {
177196
// Degenerate case
178-
ulst.my = ulst.d * wl(ivy, i);
179-
ulst.mz = ulst.d * wl(ivz, i);
197+
ulst.my = ulst.d * wli[IV2];
198+
ulst.mz = ulst.d * wli[IV3];
180199

181200
ulst.by = ul.by;
182201
ulst.bz = ul.bz;
183202
} else {
184203
// eqns (44) and (46) of M&K
185204
Real tmp = bxi * (sdl - sdml) / (ul.d * sdl * sdml - bxsq);
186-
ulst.my = ulst.d * (wl(ivy, i) - ul.by * tmp);
187-
ulst.mz = ulst.d * (wl(ivz, i) - ul.bz * tmp);
205+
ulst.my = ulst.d * (wli[IV2] - ul.by * tmp);
206+
ulst.mz = ulst.d * (wli[IV3] - ul.bz * tmp);
188207

189208
// eqns (45) and (47) of M&K
190209
tmp = (ul.d * SQR(sdl) - bxsq) / (ul.d * sdl * sdml - bxsq);
@@ -196,25 +215,24 @@ struct Riemann<Fluid::glmmhd, RiemannSolver::hlld> {
196215
Real vbstl = (ulst.mx * bxi + (ulst.my * ulst.by + ulst.mz * ulst.bz)) * ulst_d_inv;
197216
// eqn (48) of M&K
198217
// (KGF): group transverse by, bz terms for floating-point associativity symmetry
199-
ulst.e =
200-
(sdl * ul.e - ptl * wl(ivx, i) + ptst * spd[2] +
201-
bxi * (wl(ivx, i) * bxi + (wl(ivy, i) * ul.by + wl(ivz, i) * ul.bz) - vbstl)) *
202-
sdml_inv;
218+
ulst.e = (sdl * ul.e - ptl * wli[IV1] + ptst * spd[2] +
219+
bxi * (wli[IV1] * bxi + (wli[IV2] * ul.by + wli[IV3] * ul.bz) - vbstl)) *
220+
sdml_inv;
203221

204222
// ur* - eqn (39) of M&K
205223
urst.mx = urst.d * spd[2];
206224
if (std::abs(ur.d * sdr * sdmr - bxsq) < (SMALL_NUMBER)*ptst) {
207225
// Degenerate case
208-
urst.my = urst.d * wr(ivy, i);
209-
urst.mz = urst.d * wr(ivz, i);
226+
urst.my = urst.d * wri[IV2];
227+
urst.mz = urst.d * wri[IV3];
210228

211229
urst.by = ur.by;
212230
urst.bz = ur.bz;
213231
} else {
214232
// eqns (44) and (46) of M&K
215233
Real tmp = bxi * (sdr - sdmr) / (ur.d * sdr * sdmr - bxsq);
216-
urst.my = urst.d * (wr(ivy, i) - ur.by * tmp);
217-
urst.mz = urst.d * (wr(ivz, i) - ur.bz * tmp);
234+
urst.my = urst.d * (wri[IV2] - ur.by * tmp);
235+
urst.mz = urst.d * (wri[IV3] - ur.bz * tmp);
218236

219237
// eqns (45) and (47) of M&K
220238
tmp = (ur.d * SQR(sdr) - bxsq) / (ur.d * sdr * sdmr - bxsq);
@@ -226,10 +244,9 @@ struct Riemann<Fluid::glmmhd, RiemannSolver::hlld> {
226244
Real vbstr = (urst.mx * bxi + (urst.my * urst.by + urst.mz * urst.bz)) * urst_d_inv;
227245
// eqn (48) of M&K
228246
// (KGF): group transverse by, bz terms for floating-point associativity symmetry
229-
urst.e =
230-
(sdr * ur.e - ptr * wr(ivx, i) + ptst * spd[2] +
231-
bxi * (wr(ivx, i) * bxi + (wr(ivy, i) * ur.by + wr(ivz, i) * ur.bz) - vbstr)) *
232-
sdmr_inv;
247+
urst.e = (sdr * ur.e - ptr * wri[IV1] + ptst * spd[2] +
248+
bxi * (wri[IV1] * bxi + (wri[IV2] * ur.by + wri[IV3] * ur.bz) - vbstr)) *
249+
sdmr_inv;
233250
// ul** and ur** - if Bx is near zero, same as *-states
234251
if (0.5 * bxsq < (SMALL_NUMBER)*ptst) {
235252
uldst = ulst;
@@ -310,69 +327,69 @@ struct Riemann<Fluid::glmmhd, RiemannSolver::hlld> {
310327

311328
if (spd[0] >= 0.0) {
312329
// return Fl if flow is supersonic
313-
flx(IDN, i) = fl.d;
314-
flx(ivx, i) = fl.mx;
315-
flx(ivy, i) = fl.my;
316-
flx(ivz, i) = fl.mz;
330+
flxi[IDN] = fl.d;
331+
flxi[IV1] = fl.mx;
332+
flxi[IV2] = fl.my;
333+
flxi[IV3] = fl.mz;
317334
flxi[IEN] = fl.e;
318-
flx(iBy, i) = fl.by;
319-
flx(iBz, i) = fl.bz;
335+
flxi[IB2] = fl.by;
336+
flxi[IB3] = fl.bz;
320337
} else if (spd[4] <= 0.0) {
321338
// return Fr if flow is supersonic
322-
flx(IDN, i) = fr.d;
323-
flx(ivx, i) = fr.mx;
324-
flx(ivy, i) = fr.my;
325-
flx(ivz, i) = fr.mz;
339+
flxi[IDN] = fr.d;
340+
flxi[IV1] = fr.mx;
341+
flxi[IV2] = fr.my;
342+
flxi[IV3] = fr.mz;
326343
flxi[IEN] = fr.e;
327-
flx(iBy, i) = fr.by;
328-
flx(iBz, i) = fr.bz;
344+
flxi[IB2] = fr.by;
345+
flxi[IB3] = fr.bz;
329346
} else if (spd[1] >= 0.0) {
330347
// return Fl*
331-
flx(IDN, i) = fl.d + ulst.d;
332-
flx(ivx, i) = fl.mx + ulst.mx;
333-
flx(ivy, i) = fl.my + ulst.my;
334-
flx(ivz, i) = fl.mz + ulst.mz;
348+
flxi[IDN] = fl.d + ulst.d;
349+
flxi[IV1] = fl.mx + ulst.mx;
350+
flxi[IV2] = fl.my + ulst.my;
351+
flxi[IV3] = fl.mz + ulst.mz;
335352
flxi[IEN] = fl.e + ulst.e;
336-
flx(iBy, i) = fl.by + ulst.by;
337-
flx(iBz, i) = fl.bz + ulst.bz;
353+
flxi[IB2] = fl.by + ulst.by;
354+
flxi[IB3] = fl.bz + ulst.bz;
338355
} else if (spd[2] >= 0.0) {
339356
// return Fl**
340-
flx(IDN, i) = fl.d + ulst.d + uldst.d;
341-
flx(ivx, i) = fl.mx + ulst.mx + uldst.mx;
342-
flx(ivy, i) = fl.my + ulst.my + uldst.my;
343-
flx(ivz, i) = fl.mz + ulst.mz + uldst.mz;
357+
flxi[IDN] = fl.d + ulst.d + uldst.d;
358+
flxi[IV1] = fl.mx + ulst.mx + uldst.mx;
359+
flxi[IV2] = fl.my + ulst.my + uldst.my;
360+
flxi[IV3] = fl.mz + ulst.mz + uldst.mz;
344361
flxi[IEN] = fl.e + ulst.e + uldst.e;
345-
flx(iBy, i) = fl.by + ulst.by + uldst.by;
346-
flx(iBz, i) = fl.bz + ulst.bz + uldst.bz;
362+
flxi[IB2] = fl.by + ulst.by + uldst.by;
363+
flxi[IB3] = fl.bz + ulst.bz + uldst.bz;
347364
} else if (spd[3] > 0.0) {
348365
// return Fr**
349-
flx(IDN, i) = fr.d + urst.d + urdst.d;
350-
flx(ivx, i) = fr.mx + urst.mx + urdst.mx;
351-
flx(ivy, i) = fr.my + urst.my + urdst.my;
352-
flx(ivz, i) = fr.mz + urst.mz + urdst.mz;
366+
flxi[IDN] = fr.d + urst.d + urdst.d;
367+
flxi[IV1] = fr.mx + urst.mx + urdst.mx;
368+
flxi[IV2] = fr.my + urst.my + urdst.my;
369+
flxi[IV3] = fr.mz + urst.mz + urdst.mz;
353370
flxi[IEN] = fr.e + urst.e + urdst.e;
354-
flx(iBy, i) = fr.by + urst.by + urdst.by;
355-
flx(iBz, i) = fr.bz + urst.bz + urdst.bz;
371+
flxi[IB2] = fr.by + urst.by + urdst.by;
372+
flxi[IB3] = fr.bz + urst.bz + urdst.bz;
356373
} else {
357374
// return Fr*
358-
flx(IDN, i) = fr.d + urst.d;
359-
flx(ivx, i) = fr.mx + urst.mx;
360-
flx(ivy, i) = fr.my + urst.my;
361-
flx(ivz, i) = fr.mz + urst.mz;
375+
flxi[IDN] = fr.d + urst.d;
376+
flxi[IV1] = fr.mx + urst.mx;
377+
flxi[IV2] = fr.my + urst.my;
378+
flxi[IV3] = fr.mz + urst.mz;
362379
flxi[IEN] = fr.e + urst.e;
363-
flx(iBy, i) = fr.by + urst.by;
364-
flx(iBz, i) = fr.bz + urst.bz;
380+
flxi[IB2] = fr.by + urst.by;
381+
flxi[IB3] = fr.bz + urst.bz;
365382
}
366383

367-
flx(IDN, i) = flx(IDN, i);
368-
flx(ivx, i) = flx(ivx, i);
369-
flx(ivy, i) = flx(ivy, i);
370-
flx(ivz, i) = flx(ivz, i);
384+
flx(IDN, i) = flxi[IDN];
385+
flx(ivx, i) = flxi[IV1];
386+
flx(ivy, i) = flxi[IV2];
387+
flx(ivz, i) = flxi[IV3];
371388
flx(IEN, i) = flxi[IEN];
372-
flx(iBx, i) = flx(iBx, i);
373-
flx(iBy, i) = flx(iBy, i);
374-
flx(iBz, i) = flx(iBz, i);
375-
flx(IPS, i) = flx(IPS, i);
389+
flx(iBx, i) = flxi[IB1];
390+
flx(iBy, i) = flxi[IB2];
391+
flx(iBz, i) = flxi[IB3];
392+
flx(IPS, i) = flxi[IPS];
376393
});
377394
}
378395
};

0 commit comments

Comments
 (0)