Skip to content

Commit 64cb627

Browse files
authored
some NSE table optimizations (#1914)
with C++ 20 we can switch M_LN10 to std::numbers
1 parent 7d144e3 commit 64cb627

File tree

3 files changed

+26
-17
lines changed

3 files changed

+26
-17
lines changed

networks/aprox19/nse_table_size.H

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,5 +27,10 @@ namespace nse_table_size {
2727
constexpr amrex::Real ye_max = 0.5;
2828
constexpr amrex::Real dye = 0.0025;
2929

30+
constexpr amrex::Real inv_dlogT = 1.0_rt / dlogT;
31+
constexpr amrex::Real inv_dlogrho = 1.0_rt / dlogrho;
32+
constexpr amrex::Real inv_dye = 1.0_rt / dye;
33+
34+
3035
}
3136
#endif

nse_tabular/make_nse_table.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,10 @@ def output_header(Ts, rhos, yes):
119119
nse_h.write(f" constexpr amrex::Real ye_max = {yes.max()};\n")
120120
nse_h.write(f" constexpr amrex::Real dye = {(yes.max() - yes.min()) / (len(yes) - 1)};\n\n")
121121

122+
nse_h.write(" constexpr amrex::Real inv_dlogT = 1.0_rt / dlogT;\n")
123+
nse_h.write(" constexpr amrex::Real inv_dlogrho = 1.0_rt / dlogrho;\n")
124+
nse_h.write(" constexpr amrex::Real inv_dye = 1.0_rt / dye;\n")
125+
122126
nse_h.write("}\n")
123127
nse_h.write("#endif\n")
124128

nse_tabular/nse_table.H

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ using namespace network_rp;
2222
///
2323
/// given a rho, T, and Ye index, return the 1-d index into the NSE table arrays
2424
///
25-
AMREX_GPU_HOST_DEVICE AMREX_INLINE
25+
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
2626
int nse_idx(const int ir, const int it, const int ic) {
2727
// this uses a 1-based indexing
2828
return (ir-1) * nse_table_size::ntemp * nse_table_size::nye + (it-1) * nse_table_size::nye + ic;
@@ -78,38 +78,38 @@ void init_nse() {
7878

7979
}
8080

81-
AMREX_GPU_HOST_DEVICE AMREX_INLINE
81+
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
8282
amrex::Real nse_table_logT(const int it) {
8383
return nse_table_size::logT_min + static_cast<amrex::Real>(it-1) * nse_table_size::dlogT;
8484
}
8585

86-
AMREX_GPU_HOST_DEVICE AMREX_INLINE
86+
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
8787
amrex::Real nse_table_logrho(const int ir) {
8888
return nse_table_size::logrho_min + static_cast<amrex::Real>(ir-1) * nse_table_size::dlogrho;
8989
}
9090

91-
AMREX_GPU_HOST_DEVICE AMREX_INLINE
91+
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
9292
amrex::Real nse_table_ye(const int ic) {
9393
return nse_table_size::ye_max - static_cast<amrex::Real>(ic-1) * nse_table_size::dye;
9494
}
9595

9696
// return the index in the table such that logrho[irho] < input density
9797
// note: this is a 1-based index
98-
AMREX_GPU_HOST_DEVICE AMREX_INLINE
98+
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
9999
int nse_get_logrho_index(const amrex::Real logrho) {
100100

101-
int ir0 = static_cast<int>((logrho - nse_table_size::logrho_min) /
102-
nse_table_size::dlogrho - 1.e-6_rt);
101+
int ir0 = static_cast<int>((logrho - nse_table_size::logrho_min) *
102+
nse_table_size::inv_dlogrho - 1.e-6_rt);
103103
return ir0 + 1;
104104
}
105105

106106
// return the index in the table such that logT[it] < input temperature
107107
// note: this is a 1-based index
108-
AMREX_GPU_HOST_DEVICE AMREX_INLINE
108+
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
109109
int nse_get_logT_index(const amrex::Real logT) {
110110

111-
int it0 = static_cast<int>((logT - nse_table_size::logT_min) /
112-
nse_table_size::dlogT - 1.e-6_rt);
111+
int it0 = static_cast<int>((logT - nse_table_size::logT_min) *
112+
nse_table_size::inv_dlogT - 1.e-6_rt);
113113
return it0 + 1;
114114
}
115115

@@ -118,8 +118,8 @@ int nse_get_logT_index(const amrex::Real logT) {
118118
AMREX_GPU_HOST_DEVICE AMREX_INLINE
119119
int nse_get_ye_index(const amrex::Real ye) {
120120

121-
int ic0 = static_cast<int>((nse_table_size::ye_max - ye) /
122-
nse_table_size::dye - 1.0e-6_rt);
121+
int ic0 = static_cast<int>((nse_table_size::ye_max - ye) *
122+
nse_table_size::inv_dye - 1.0e-6_rt);
123123
return ic0 + 1;
124124
}
125125

@@ -189,9 +189,9 @@ amrex::Real trilinear(const int ir1, const int it1, const int ic1,
189189
amrex::Real r0 = nse_table_logrho(ir1);
190190
amrex::Real x0 = nse_table_ye(ic1);
191191

192-
amrex::Real td = (temp - t0) / nse_table_size::dlogT;
193-
amrex::Real rd = (rho - r0) / nse_table_size::dlogrho;
194-
amrex::Real xd = (x0 - ye) / nse_table_size::dye;
192+
amrex::Real td = (temp - t0) * nse_table_size::inv_dlogT;
193+
amrex::Real rd = (rho - r0) * nse_table_size::inv_dlogrho;
194+
amrex::Real xd = (x0 - ye) * nse_table_size::inv_dye;
195195
xd = amrex::max(0.0_rt, xd);
196196

197197
amrex::Real omtd = 1.0_rt - td;
@@ -529,7 +529,7 @@ nse_interp_dT(const amrex::Real temp, const amrex::Real rho, const amrex::Real y
529529
// note: this is returning the derivative wrt log10(T), so we need to
530530
// convert to d/dT
531531

532-
amrex::Real ddatadT = tricubic_dT(ir0, it0, ic0, rholog, tlog, yet, data) / (std::log(10.0_rt) * temp);
532+
amrex::Real ddatadT = tricubic_dT(ir0, it0, ic0, rholog, tlog, yet, data) / (M_LN10 * temp);
533533

534534
return ddatadT;
535535

@@ -582,7 +582,7 @@ nse_interp_drho(const amrex::Real temp, const amrex::Real rho, const amrex::Real
582582
// note: this is returning the derivative wrt log10(rho), so we need to
583583
// convert to d/drho
584584

585-
amrex::Real ddatadrho = tricubic_drho(ir0, it0, ic0, rholog, tlog, yet, data) / (std::log(10.0_rt) * rho);
585+
amrex::Real ddatadrho = tricubic_drho(ir0, it0, ic0, rholog, tlog, yet, data) / (M_LN10 * rho);
586586

587587
return ddatadrho;
588588

0 commit comments

Comments
 (0)