Skip to content

Commit c344415

Browse files
committed
Reduce register usage in hlld
1 parent 17747d1 commit c344415

1 file changed

Lines changed: 117 additions & 134 deletions

File tree

src/hydro/rsolvers/glmmhd_hlld.hpp

Lines changed: 117 additions & 134 deletions
Original file line numberDiff line numberDiff line change
@@ -55,111 +55,92 @@ struct Riemann<Fluid::glmmhd, RiemannSolver::hlld> {
5555
constexpr int NGLMMHD = 9;
5656

5757
parthenon::par_for_inner(member, il, iu, [&](const int i) {
58-
Real wli[NGLMMHD], wri[NGLMMHD], flxi[NGLMMHD];
5958
Real spd[5]; // signal speeds, left to right
6059
Cons1D ul, ur; // L/R states, conserved variables (computed)
6160
Cons1D ulst, uldst, urdst, urst; // Conserved variable for all states
6261
Cons1D fl, fr; // Fluxes for left & right states
6362

64-
//--- Step 1. Load L/R states into local variables
65-
66-
wli[IDN] = wl(IDN, i);
67-
wli[IV1] = wl(ivx, i);
68-
wli[IV2] = wl(ivy, i);
69-
wli[IV3] = wl(ivz, i);
70-
wli[IPR] = wl(IPR, i);
71-
wli[IB1] = wl(iBx, i);
72-
wli[IB2] = wl(iBy, i);
73-
wli[IB3] = wl(iBz, i);
74-
wli[IPS] = wl(IPS, i);
75-
76-
wri[IDN] = wr(IDN, i);
77-
wri[IV1] = wr(ivx, i);
78-
wri[IV2] = wr(ivy, i);
79-
wri[IV3] = wr(ivz, i);
80-
wri[IPR] = wr(IPR, i);
81-
wri[IB1] = wr(iBx, i);
82-
wri[IB2] = wr(iBy, i);
83-
wri[IB3] = wr(iBz, i);
84-
wri[IPS] = wr(IPS, i);
85-
8663
// first solve the decoupled state, see eq (24) in Mignone & Tzeferacos (2010)
87-
Real bxi = 0.5 * (wli[IB1] + wri[IB1]) - 0.5 / c_h * (wri[IPS] - wli[IPS]);
88-
Real psii = 0.5 * (wli[IPS] + wri[IPS]) - 0.5 * c_h * (wri[IB1] - wli[IB1]);
64+
Real bxi = 0.5 * (wl(iBx, i) + wr(iBx, i)) - 0.5 / c_h * (wr(IPS, i) - wl(IPS, i));
65+
Real psii = 0.5 * (wl(IPS, i) + wr(IPS, i)) - 0.5 * c_h * (wr(iBx, i) - wl(iBx, i));
8966
// and store flux
90-
flxi[IB1] = psii;
91-
flxi[IPS] = SQR(c_h) * bxi;
67+
flx(iBx, i) = psii;
68+
flx(IPS, i) = SQR(c_h) * bxi;
9269

9370
// Compute L/R states for selected conserved variables
9471
Real bxsq = bxi * bxi;
9572
// (KGF): group transverse vector components for floating-point associativity
9673
// symmetry
9774
Real pbl =
98-
0.5 * (bxsq + (SQR(wli[IB2]) + SQR(wli[IB3]))); // magnetic pressure (l/r)
99-
Real pbr = 0.5 * (bxsq + (SQR(wri[IB2]) + SQR(wri[IB3])));
100-
Real kel = 0.5 * wli[IDN] * (SQR(wli[IV1]) + (SQR(wli[IV2]) + SQR(wli[IV3])));
101-
Real ker = 0.5 * wri[IDN] * (SQR(wri[IV1]) + (SQR(wri[IV2]) + SQR(wri[IV3])));
102-
103-
ul.d = wli[IDN];
104-
ul.mx = wli[IV1] * ul.d;
105-
ul.my = wli[IV2] * ul.d;
106-
ul.mz = wli[IV3] * ul.d;
107-
ul.e = wli[IPR] * igm1 + kel + pbl;
108-
ul.by = wli[IB2];
109-
ul.bz = wli[IB3];
110-
111-
ur.d = wri[IDN];
112-
ur.mx = wri[IV1] * ur.d;
113-
ur.my = wri[IV2] * ur.d;
114-
ur.mz = wri[IV3] * ur.d;
115-
ur.e = wri[IPR] * igm1 + ker + pbr;
116-
ur.by = wri[IB2];
117-
ur.bz = wri[IB3];
75+
0.5 * (bxsq + (SQR(wl(iBy, i)) + SQR(wl(iBz, i)))); // magnetic pressure (l/r)
76+
Real pbr = 0.5 * (bxsq + (SQR(wr(iBy, i)) + SQR(wr(iBz, i))));
77+
Real kel =
78+
0.5 * wl(IDN, i) * (SQR(wl(ivx, i)) + (SQR(wl(ivy, i)) + SQR(wl(ivz, i))));
79+
Real ker =
80+
0.5 * wr(IDN, i) * (SQR(wr(ivx, i)) + (SQR(wr(ivy, i)) + SQR(wr(ivz, i))));
81+
82+
ul.d = wl(IDN, i);
83+
ul.mx = wl(ivx, i) * ul.d;
84+
ul.my = wl(ivy, i) * ul.d;
85+
ul.mz = wl(ivz, i) * ul.d;
86+
ul.e = wl(IPR, i) * igm1 + kel + pbl;
87+
ul.by = wl(iBy, i);
88+
ul.bz = wl(iBz, i);
89+
90+
ur.d = wr(IDN, i);
91+
ur.mx = wr(ivx, i) * ur.d;
92+
ur.my = wr(ivy, i) * ur.d;
93+
ur.mz = wr(ivz, i) * ur.d;
94+
ur.e = wr(IPR, i) * igm1 + ker + pbr;
95+
ur.by = wr(iBy, i);
96+
ur.bz = wr(iBz, i);
11897

11998
//--- Step 2. Compute L & R wave speeds according to Miyoshi & Kusano, eqn. (67)
12099

121-
const auto cfl =
122-
eos.FastMagnetosonicSpeed(wli[IDN], wli[IPR], wli[IB1], wli[IB2], wli[IB3]);
123-
const auto cfr =
124-
eos.FastMagnetosonicSpeed(wri[IDN], wri[IPR], wri[IB1], wri[IB2], wri[IB3]);
100+
const auto cfl = eos.FastMagnetosonicSpeed(wl(IDN, i), wl(IPR, i), wl(iBx, i),
101+
wl(iBy, i), wl(iBz, i));
102+
const auto cfr = eos.FastMagnetosonicSpeed(wr(IDN, i), wr(IPR, i), wr(iBx, i),
103+
wr(iBy, i), wr(iBz, i));
125104

126-
spd[0] = std::min(wli[IV1] - cfl, wri[IV1] - cfr);
127-
spd[4] = std::max(wli[IV1] + cfl, wri[IV1] + cfr);
105+
spd[0] = std::min(wl(ivx, i) - cfl, wr(ivx, i) - cfr);
106+
spd[4] = std::max(wl(ivx, i) + cfl, wr(ivx, i) + cfr);
128107

129108
// Real cfmax = std::max(cfl,cfr);
130-
// if (wli[IV1] <= wri[IV1]) {
131-
// spd[0] = wli[IV1] - cfmax;
132-
// spd[4] = wri[IV1] + cfmax;
109+
// if (wl(ivx, i) <= wr(ivx, i)) {
110+
// spd[0] = wl(ivx, i) - cfmax;
111+
// spd[4] = wr(ivx, i) + cfmax;
133112
// } else {
134-
// spd[0] = wri[IV1] - cfmax;
135-
// spd[4] = wli[IV1] + cfmax;
113+
// spd[0] = wr(ivx, i) - cfmax;
114+
// spd[4] = wl(ivx, i) + cfmax;
136115
// }
137116

138117
//--- Step 3. Compute L/R fluxes
139118

140-
Real ptl = wli[IPR] + pbl; // total pressures L,R
141-
Real ptr = wri[IPR] + pbr;
119+
Real ptl = wl(IPR, i) + pbl; // total pressures L,R
120+
Real ptr = wr(IPR, i) + pbr;
142121

143122
fl.d = ul.mx;
144-
fl.mx = ul.mx * wli[IV1] + ptl - bxsq;
145-
fl.my = ul.my * wli[IV1] - bxi * ul.by;
146-
fl.mz = ul.mz * wli[IV1] - bxi * ul.bz;
147-
fl.e = wli[IV1] * (ul.e + ptl - bxsq) - bxi * (wli[IV2] * ul.by + wli[IV3] * ul.bz);
148-
fl.by = ul.by * wli[IV1] - bxi * wli[IV2];
149-
fl.bz = ul.bz * wli[IV1] - bxi * wli[IV3];
123+
fl.mx = ul.mx * wl(ivx, i) + ptl - bxsq;
124+
fl.my = ul.my * wl(ivx, i) - bxi * ul.by;
125+
fl.mz = ul.mz * wl(ivx, i) - bxi * ul.bz;
126+
fl.e = wl(ivx, i) * (ul.e + ptl - bxsq) -
127+
bxi * (wl(ivy, i) * ul.by + wl(ivz, i) * ul.bz);
128+
fl.by = ul.by * wl(ivx, i) - bxi * wl(ivy, i);
129+
fl.bz = ul.bz * wl(ivx, i) - bxi * wl(ivz, i);
150130

151131
fr.d = ur.mx;
152-
fr.mx = ur.mx * wri[IV1] + ptr - bxsq;
153-
fr.my = ur.my * wri[IV1] - bxi * ur.by;
154-
fr.mz = ur.mz * wri[IV1] - bxi * ur.bz;
155-
fr.e = wri[IV1] * (ur.e + ptr - bxsq) - bxi * (wri[IV2] * ur.by + wri[IV3] * ur.bz);
156-
fr.by = ur.by * wri[IV1] - bxi * wri[IV2];
157-
fr.bz = ur.bz * wri[IV1] - bxi * wri[IV3];
132+
fr.mx = ur.mx * wr(ivx, i) + ptr - bxsq;
133+
fr.my = ur.my * wr(ivx, i) - bxi * ur.by;
134+
fr.mz = ur.mz * wr(ivx, i) - bxi * ur.bz;
135+
fr.e = wr(ivx, i) * (ur.e + ptr - bxsq) -
136+
bxi * (wr(ivy, i) * ur.by + wr(ivz, i) * ur.bz);
137+
fr.by = ur.by * wr(ivx, i) - bxi * wr(ivy, i);
138+
fr.bz = ur.bz * wr(ivx, i) - bxi * wr(ivz, i);
158139

159140
//--- Step 4. Compute middle and Alfven wave speeds
160141

161-
Real sdl = spd[0] - wli[IV1]; // S_i-u_i (i=L or R)
162-
Real sdr = spd[4] - wri[IV1];
142+
Real sdl = spd[0] - wl(ivx, i); // S_i-u_i (i=L or R)
143+
Real sdr = spd[4] - wr(ivx, i);
163144

164145
// S_M: eqn (38) of Miyoshi & Kusano
165146
// (KGF): group ptl, ptr terms for floating-point associativity symmetry
@@ -184,8 +165,8 @@ struct Riemann<Fluid::glmmhd, RiemannSolver::hlld> {
184165
//--- Step 5. Compute intermediate states
185166
// eqn (23) explicitly becomes eq (41) of Miyoshi & Kusano
186167
// TODO(felker): place an assertion that ptstl==ptstr
187-
Real ptstl = ptl + ul.d * sdl * (spd[2] - wli[IV1]);
188-
Real ptstr = ptr + ur.d * sdr * (spd[2] - wri[IV1]);
168+
Real ptstl = ptl + ul.d * sdl * (spd[2] - wl(ivx, i));
169+
Real ptstr = ptr + ur.d * sdr * (spd[2] - wr(ivx, i));
189170
// Real ptstl = ptl + ul.d*sdl*(sdl-sdml); // these equations had issues when
190171
// averaged Real ptstr = ptr + ur.d*sdr*(sdr-sdmr);
191172
Real ptst = 0.5 * (ptstr + ptstl); // total pressure (star state)
@@ -194,16 +175,16 @@ struct Riemann<Fluid::glmmhd, RiemannSolver::hlld> {
194175
ulst.mx = ulst.d * spd[2];
195176
if (std::abs(ul.d * sdl * sdml - bxsq) < (SMALL_NUMBER)*ptst) {
196177
// Degenerate case
197-
ulst.my = ulst.d * wli[IV2];
198-
ulst.mz = ulst.d * wli[IV3];
178+
ulst.my = ulst.d * wl(ivy, i);
179+
ulst.mz = ulst.d * wl(ivz, i);
199180

200181
ulst.by = ul.by;
201182
ulst.bz = ul.bz;
202183
} else {
203184
// eqns (44) and (46) of M&K
204185
Real tmp = bxi * (sdl - sdml) / (ul.d * sdl * sdml - bxsq);
205-
ulst.my = ulst.d * (wli[IV2] - ul.by * tmp);
206-
ulst.mz = ulst.d * (wli[IV3] - ul.bz * tmp);
186+
ulst.my = ulst.d * (wl(ivy, i) - ul.by * tmp);
187+
ulst.mz = ulst.d * (wl(ivz, i) - ul.bz * tmp);
207188

208189
// eqns (45) and (47) of M&K
209190
tmp = (ul.d * SQR(sdl) - bxsq) / (ul.d * sdl * sdml - bxsq);
@@ -215,24 +196,25 @@ struct Riemann<Fluid::glmmhd, RiemannSolver::hlld> {
215196
Real vbstl = (ulst.mx * bxi + (ulst.my * ulst.by + ulst.mz * ulst.bz)) * ulst_d_inv;
216197
// eqn (48) of M&K
217198
// (KGF): group transverse by, bz terms for floating-point associativity symmetry
218-
ulst.e = (sdl * ul.e - ptl * wli[IV1] + ptst * spd[2] +
219-
bxi * (wli[IV1] * bxi + (wli[IV2] * ul.by + wli[IV3] * ul.bz) - vbstl)) *
220-
sdml_inv;
199+
ulst.e =
200+
(sdl * ul.e - ptl * wl(ivx, i) + ptst * spd[2] +
201+
bxi * (wl(ivx, i) * bxi + (wl(ivy, i) * ul.by + wl(ivz, i) * ul.bz) - vbstl)) *
202+
sdml_inv;
221203

222204
// ur* - eqn (39) of M&K
223205
urst.mx = urst.d * spd[2];
224206
if (std::abs(ur.d * sdr * sdmr - bxsq) < (SMALL_NUMBER)*ptst) {
225207
// Degenerate case
226-
urst.my = urst.d * wri[IV2];
227-
urst.mz = urst.d * wri[IV3];
208+
urst.my = urst.d * wr(ivy, i);
209+
urst.mz = urst.d * wr(ivz, i);
228210

229211
urst.by = ur.by;
230212
urst.bz = ur.bz;
231213
} else {
232214
// eqns (44) and (46) of M&K
233215
Real tmp = bxi * (sdr - sdmr) / (ur.d * sdr * sdmr - bxsq);
234-
urst.my = urst.d * (wri[IV2] - ur.by * tmp);
235-
urst.mz = urst.d * (wri[IV3] - ur.bz * tmp);
216+
urst.my = urst.d * (wr(ivy, i) - ur.by * tmp);
217+
urst.mz = urst.d * (wr(ivz, i) - ur.bz * tmp);
236218

237219
// eqns (45) and (47) of M&K
238220
tmp = (ur.d * SQR(sdr) - bxsq) / (ur.d * sdr * sdmr - bxsq);
@@ -244,9 +226,10 @@ struct Riemann<Fluid::glmmhd, RiemannSolver::hlld> {
244226
Real vbstr = (urst.mx * bxi + (urst.my * urst.by + urst.mz * urst.bz)) * urst_d_inv;
245227
// eqn (48) of M&K
246228
// (KGF): group transverse by, bz terms for floating-point associativity symmetry
247-
urst.e = (sdr * ur.e - ptr * wri[IV1] + ptst * spd[2] +
248-
bxi * (wri[IV1] * bxi + (wri[IV2] * ur.by + wri[IV3] * ur.bz) - vbstr)) *
249-
sdmr_inv;
229+
urst.e =
230+
(sdr * ur.e - ptr * wr(ivx, i) + ptst * spd[2] +
231+
bxi * (wr(ivx, i) * bxi + (wr(ivy, i) * ur.by + wr(ivz, i) * ur.bz) - vbstr)) *
232+
sdmr_inv;
250233
// ul** and ur** - if Bx is near zero, same as *-states
251234
if (0.5 * bxsq < (SMALL_NUMBER)*ptst) {
252235
uldst = ulst;
@@ -327,69 +310,69 @@ struct Riemann<Fluid::glmmhd, RiemannSolver::hlld> {
327310

328311
if (spd[0] >= 0.0) {
329312
// return Fl if flow is supersonic
330-
flxi[IDN] = fl.d;
331-
flxi[IV1] = fl.mx;
332-
flxi[IV2] = fl.my;
333-
flxi[IV3] = fl.mz;
313+
flx(IDN, i) = fl.d;
314+
flx(ivx, i) = fl.mx;
315+
flx(ivy, i) = fl.my;
316+
flx(ivz, i) = fl.mz;
334317
flxi[IEN] = fl.e;
335-
flxi[IB2] = fl.by;
336-
flxi[IB3] = fl.bz;
318+
flx(iBy, i) = fl.by;
319+
flx(iBz, i) = fl.bz;
337320
} else if (spd[4] <= 0.0) {
338321
// return Fr if flow is supersonic
339-
flxi[IDN] = fr.d;
340-
flxi[IV1] = fr.mx;
341-
flxi[IV2] = fr.my;
342-
flxi[IV3] = fr.mz;
322+
flx(IDN, i) = fr.d;
323+
flx(ivx, i) = fr.mx;
324+
flx(ivy, i) = fr.my;
325+
flx(ivz, i) = fr.mz;
343326
flxi[IEN] = fr.e;
344-
flxi[IB2] = fr.by;
345-
flxi[IB3] = fr.bz;
327+
flx(iBy, i) = fr.by;
328+
flx(iBz, i) = fr.bz;
346329
} else if (spd[1] >= 0.0) {
347330
// return Fl*
348-
flxi[IDN] = fl.d + ulst.d;
349-
flxi[IV1] = fl.mx + ulst.mx;
350-
flxi[IV2] = fl.my + ulst.my;
351-
flxi[IV3] = fl.mz + ulst.mz;
331+
flx(IDN, i) = fl.d + ulst.d;
332+
flx(ivx, i) = fl.mx + ulst.mx;
333+
flx(ivy, i) = fl.my + ulst.my;
334+
flx(ivz, i) = fl.mz + ulst.mz;
352335
flxi[IEN] = fl.e + ulst.e;
353-
flxi[IB2] = fl.by + ulst.by;
354-
flxi[IB3] = fl.bz + ulst.bz;
336+
flx(iBy, i) = fl.by + ulst.by;
337+
flx(iBz, i) = fl.bz + ulst.bz;
355338
} else if (spd[2] >= 0.0) {
356339
// return Fl**
357-
flxi[IDN] = fl.d + ulst.d + uldst.d;
358-
flxi[IV1] = fl.mx + ulst.mx + uldst.mx;
359-
flxi[IV2] = fl.my + ulst.my + uldst.my;
360-
flxi[IV3] = fl.mz + ulst.mz + uldst.mz;
340+
flx(IDN, i) = fl.d + ulst.d + uldst.d;
341+
flx(ivx, i) = fl.mx + ulst.mx + uldst.mx;
342+
flx(ivy, i) = fl.my + ulst.my + uldst.my;
343+
flx(ivz, i) = fl.mz + ulst.mz + uldst.mz;
361344
flxi[IEN] = fl.e + ulst.e + uldst.e;
362-
flxi[IB2] = fl.by + ulst.by + uldst.by;
363-
flxi[IB3] = fl.bz + ulst.bz + uldst.bz;
345+
flx(iBy, i) = fl.by + ulst.by + uldst.by;
346+
flx(iBz, i) = fl.bz + ulst.bz + uldst.bz;
364347
} else if (spd[3] > 0.0) {
365348
// return Fr**
366-
flxi[IDN] = fr.d + urst.d + urdst.d;
367-
flxi[IV1] = fr.mx + urst.mx + urdst.mx;
368-
flxi[IV2] = fr.my + urst.my + urdst.my;
369-
flxi[IV3] = fr.mz + urst.mz + urdst.mz;
349+
flx(IDN, i) = fr.d + urst.d + urdst.d;
350+
flx(ivx, i) = fr.mx + urst.mx + urdst.mx;
351+
flx(ivy, i) = fr.my + urst.my + urdst.my;
352+
flx(ivz, i) = fr.mz + urst.mz + urdst.mz;
370353
flxi[IEN] = fr.e + urst.e + urdst.e;
371-
flxi[IB2] = fr.by + urst.by + urdst.by;
372-
flxi[IB3] = fr.bz + urst.bz + urdst.bz;
354+
flx(iBy, i) = fr.by + urst.by + urdst.by;
355+
flx(iBz, i) = fr.bz + urst.bz + urdst.bz;
373356
} else {
374357
// return Fr*
375-
flxi[IDN] = fr.d + urst.d;
376-
flxi[IV1] = fr.mx + urst.mx;
377-
flxi[IV2] = fr.my + urst.my;
378-
flxi[IV3] = fr.mz + urst.mz;
358+
flx(IDN, i) = fr.d + urst.d;
359+
flx(ivx, i) = fr.mx + urst.mx;
360+
flx(ivy, i) = fr.my + urst.my;
361+
flx(ivz, i) = fr.mz + urst.mz;
379362
flxi[IEN] = fr.e + urst.e;
380-
flxi[IB2] = fr.by + urst.by;
381-
flxi[IB3] = fr.bz + urst.bz;
363+
flx(iBy, i) = fr.by + urst.by;
364+
flx(iBz, i) = fr.bz + urst.bz;
382365
}
383366

384-
flx(IDN, i) = flxi[IDN];
385-
flx(ivx, i) = flxi[IV1];
386-
flx(ivy, i) = flxi[IV2];
387-
flx(ivz, i) = flxi[IV3];
367+
flx(IDN, i) = flx(IDN, i);
368+
flx(ivx, i) = flx(ivx, i);
369+
flx(ivy, i) = flx(ivy, i);
370+
flx(ivz, i) = flx(ivz, i);
388371
flx(IEN, i) = flxi[IEN];
389-
flx(iBx, i) = flxi[IB1];
390-
flx(iBy, i) = flxi[IB2];
391-
flx(iBz, i) = flxi[IB3];
392-
flx(IPS, i) = flxi[IPS];
372+
flx(iBx, i) = flx(iBx, i);
373+
flx(iBy, i) = flx(iBy, i);
374+
flx(iBz, i) = flx(iBz, i);
375+
flx(IPS, i) = flx(IPS, i);
393376
});
394377
}
395378
};

0 commit comments

Comments
 (0)