Skip to content

Commit 51b6f8b

Browse files
authored
Merge pull request #434 from mwarusz/precision
avoiding float to double promotion
2 parents a07596d + b79e7f3 commit 51b6f8b

28 files changed

+233
-195
lines changed

libmpdata++/concurr/detail/sharedmem.hpp

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,8 @@ namespace libmpdataxx
3232
static_assert(n_dims > 0, "n_dims <= 0");
3333
static_assert(n_tlev > 0, "n_tlev <= 0");
3434

35-
// TODO: T_sumtype (perhaps worh using double even if summing floats?)
3635
std::unique_ptr<blitz::Array<real_t, 1>> xtmtmp;
37-
std::unique_ptr<blitz::Array<real_t, 1>> sumtmp;
36+
std::unique_ptr<blitz::Array<double, 1>> sumtmp;
3837

3938
protected:
4039

@@ -93,12 +92,12 @@ namespace libmpdataxx
9392
throw std::runtime_error("number of subdomains greater than number of gridpoints");
9493

9594
if (n_dims != 1)
96-
sumtmp.reset(new blitz::Array<real_t, 1>(grid_size[0]));
95+
sumtmp.reset(new blitz::Array<double, 1>(grid_size[0]));
9796
xtmtmp.reset(new blitz::Array<real_t, 1>(size));
9897
}
9998

10099
/// @brief concurrency-aware summation of array elements
101-
real_t sum(const arr_t &arr, const idx_t<n_dims> &ijk, const bool sum_khn)
100+
double sum(const arr_t &arr, const idx_t<n_dims> &ijk, const bool sum_khn)
102101
{
103102
// doing a two-step sum to reduce numerical error
104103
// and make parallel results reproducible
@@ -114,7 +113,7 @@ namespace libmpdataxx
114113
(*sumtmp)(c) = blitz::sum(arr(slice_idx));
115114
}
116115
barrier();
117-
real_t result;
116+
double result;
118117
if (sum_khn)
119118
result = blitz::kahan_sum(*sumtmp);
120119
else
@@ -124,7 +123,7 @@ namespace libmpdataxx
124123
}
125124

126125
/// @brief concurrency-aware summation of a (element-wise) product of two arrays
127-
real_t sum(const arr_t &arr1, const arr_t &arr2, const idx_t<n_dims> &ijk, const bool sum_khn)
126+
double sum(const arr_t &arr1, const arr_t &arr2, const idx_t<n_dims> &ijk, const bool sum_khn)
128127
{
129128
// doing a two-step sum to reduce numerical error
130129
// and make parallel results reproducible
@@ -140,7 +139,7 @@ namespace libmpdataxx
140139
(*sumtmp)(c) = blitz::sum(arr1(slice_idx) * arr2(slice_idx));
141140
}
142141
barrier();
143-
real_t result;
142+
double result;
144143
if (sum_khn)
145144
result = blitz::kahan_sum(*sumtmp);
146145
else

libmpdata++/formulae/common.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,12 @@ namespace libmpdataxx
1313
{
1414
namespace formulae
1515
{
16+
// helper to cast floating literals to correct precision based on the blitz array underlaying type
17+
template<class arr_t>
18+
constexpr auto fconst(const double v)
19+
{
20+
return static_cast<typename arr_t::T_numtype>(v);
21+
}
1622
// overloads of abs/min/max/where that pick out the correct version based on ix_t
1723
template<class ix_t, class arg_t>
1824
forceinline_macro auto abs(const arg_t &a, typename std::enable_if<std::is_same<ix_t, int>::value>::type* = 0)

libmpdata++/formulae/mpdata/formulae_mpdata_1d.hpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ namespace libmpdataxx
130130
)
131131
{
132132
return return_helper<ix_t>(
133-
- 1.0 / 24 *
133+
- fconst<arr_1d_t>(1.0 / 24) *
134134
(
135135
4 * GC[0](i+h) * ndxx_psi<opts>(psi, i)
136136
+ 2 * ndx_psi<opts>(psi, i) * ndx_GC0(GC[0], i)
@@ -170,9 +170,9 @@ namespace libmpdataxx
170170
// spatial terms
171171
+ div_3rd_spatial<opts, sptl_intrp>(psi, GC, G, i)
172172
// mixed terms
173-
+ 0.5 * abs(GC[0](i+h)) * ndx_fdiv<opts>(psi, GC, G, i)
173+
+ fconst<arr_1d_t>(0.5) * abs(GC[0](i+h)) * ndx_fdiv<opts>(psi, GC, G, i)
174174
// temporal terms
175-
+ 1.0 / 24 *
175+
+ fconst<arr_1d_t>(1.0 / 24) *
176176
(
177177
- 8 * GC[0](i+h) * nfdiv_fdiv<opts>(psi, GC, G, i)
178178
+ div_3rd_temporal<opts, tmprl_extrp>(psi, ndtt_GC, i)

libmpdata++/formulae/mpdata/formulae_mpdata_2d.hpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ namespace libmpdataxx
145145
)
146146
{
147147
return return_helper<ix_t>(
148-
- 1.0 / 24 *
148+
- fconst<arr_2d_t>(1.0 / 24) *
149149
(
150150
4 * GC[dim](pi<dim>(i+h, j)) * ndxx_psi<opts, dim>(psi, i, j)
151151
+ 2 * ndx_psi<opts, dim>(psi, i, j) * ndx_GC0<dim>(GC[dim], i, j)
@@ -193,9 +193,9 @@ namespace libmpdataxx
193193
// spatial terms
194194
+ div_3rd_spatial<opts, dim, sptl_intrp>(psi_np1, GC, G, i, j)
195195
// mixed terms
196-
+ 0.5 * abs(GC[dim](pi<dim>(i+h, j))) * ndx_fdiv<opts, dim>(psi_np1, GC, G, i, j)
196+
+ fconst<arr_2d_t>(0.5) * abs(GC[dim](pi<dim>(i+h, j))) * ndx_fdiv<opts, dim>(psi_np1, GC, G, i, j)
197197
// temporal terms
198-
+ 1.0 / 24 *
198+
+ fconst<arr_2d_t>(1.0 / 24) *
199199
(
200200
- 8 * GC[dim](pi<dim>(i+h, j)) * nfdiv_fdiv<opts, dim>(psi_np1, GC, G, i, j)
201201
+ div_3rd_temporal<opts, dim, tmprl_extrp>(psi_np1, ndtt_GC, i, j)
@@ -226,9 +226,9 @@ namespace libmpdataxx
226226
// spatial terms
227227
+ div_3rd_spatial<opts, dim, sptl_intrp>(psi_np1, GC, G, i, j)
228228
// mixed terms
229-
- 0.5 * abs(GC[dim](pi<dim>(i+h, j))) * ndtx_psi<opts, dim>(psi_np1, psi_n, i, j)
229+
- fconst<arr_2d_t>(0.5) * abs(GC[dim](pi<dim>(i+h, j))) * ndtx_psi<opts, dim>(psi_np1, psi_n, i, j)
230230
// temporal terms
231-
+ 1.0 / 24 *
231+
+ fconst<arr_2d_t>(1.0 / 24) *
232232
(
233233
+ 8 * GC[dim](pi<dim>(i+h, j)) * nfdiv_dt<opts, dim>(psi_np1, psi_n, GC, G, i, j)
234234
+ 1 * ndtt_GC0<opts, dim>(psi_np1, ndtt_GC[dim], i, j)

libmpdata++/formulae/mpdata/formulae_mpdata_3d.hpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ namespace libmpdataxx
154154
)
155155
{
156156
return return_helper<ix_t>(
157-
- 1.0 / 24 *
157+
- fconst<arr_3d_t>(1.0 / 24) *
158158
(
159159
4 * GC[dim](pi<dim>(i+h, j, k)) * ndxx_psi<opts, dim>(psi, i, j, k)
160160
+ 2 * ndx_psi<opts, dim>(psi, i, j, k) * ndx_GC0<dim>(GC[dim], i, j, k)
@@ -204,9 +204,9 @@ namespace libmpdataxx
204204
// spatial terms
205205
+ div_3rd_spatial<opts, dim, sptl_intrp>(psi_np1, GC, G, i, j, k)
206206
// mixed terms
207-
+ 0.5 * abs(GC[dim](pi<dim>(i+h, j, k))) * ndx_fdiv<opts, dim>(psi_np1, GC, G, i, j, k)
207+
+ fconst<arr_3d_t>(0.5) * abs(GC[dim](pi<dim>(i+h, j, k))) * ndx_fdiv<opts, dim>(psi_np1, GC, G, i, j, k)
208208
// temporal terms
209-
+ 1.0 / 24 *
209+
+ fconst<arr_3d_t>(1.0 / 24) *
210210
(
211211
- 8 * GC[dim](pi<dim>(i+h, j, k)) * nfdiv_fdiv<opts, dim>(psi_np1, GC, G, i, j, k)
212212
+ div_3rd_temporal<opts, dim, tmprl_extrp>(psi_np1, ndtt_GC, i, j, k)
@@ -238,9 +238,9 @@ namespace libmpdataxx
238238
// spatial terms
239239
+ div_3rd_spatial<opts, dim, sptl_intrp>(psi_np1, GC, G, i, j, k)
240240
// mixed terms
241-
- 0.5 * abs(GC[dim](pi<dim>(i+h, j, k))) * ndtx_psi<opts, dim>(psi_np1, psi_n, i, j, k)
241+
- fconst<arr_3d_t>(0.5) * abs(GC[dim](pi<dim>(i+h, j, k))) * ndtx_psi<opts, dim>(psi_np1, psi_n, i, j, k)
242242
// temporal terms
243-
+ 1.0 / 24 *
243+
+ fconst<arr_3d_t>(1.0 / 24) *
244244
(
245245
+ 8 * GC[dim](pi<dim>(i+h, j, k)) * nfdiv_dt<opts, dim>(psi_np1, psi_n, GC, G, i, j, k)
246246
+ div_3rd_temporal<opts, dim, tmprl_extrp>(psi_np1, ndtt_GC, i, j, k)

libmpdata++/formulae/mpdata/formulae_mpdata_common.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,10 @@ namespace libmpdataxx
4040
using idxperm::pi;
4141
using opts::opts_t;
4242
using std::abs;
43+
44+
using blitz::pow2;
45+
using blitz::pow3;
46+
using blitz::pow4;
4347

4448
const int n_tlev = 2;
4549

libmpdata++/formulae/mpdata/formulae_mpdata_dfl_1d.hpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ namespace libmpdataxx
3838
)
3939
{
4040
return return_helper<ix_t>(
41-
- 0.5 * GC(i+h)
41+
- fconst<arr_1d_t>(0.5) * GC(i+h)
4242
/
4343
(formulae::G<opts>(G, i+1) + formulae::G<opts>(G, i))
4444
*
@@ -56,13 +56,13 @@ namespace libmpdataxx
5656
)
5757
{
5858
return return_helper<ix_t>(
59-
- 0.5 * GC(i+h)
59+
- fconst<arr_1d_t>(0.5) * GC(i+h)
6060
/
6161
(formulae::G<opts>(G, i+1) + formulae::G<opts>(G, i))
6262
*
6363
(GC((i+1)+h) - GC(i-h))
6464
*
65-
0.5 * (psi(i+1) + psi(i)) //to be compatible with iga formulation
65+
fconst<arr_1d_t>(0.5) * (psi(i+1) + psi(i)) //to be compatible with iga formulation
6666
);
6767
}
6868
} // namespace mpdata

libmpdata++/formulae/mpdata/formulae_mpdata_dfl_2d.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ namespace libmpdataxx
4242
)
4343
{
4444
return return_helper<ix_t>(
45-
- 0.25 * GC[dim](pi<dim>(i+h, j))
45+
- fconst<arr_2d_t>(0.25) * GC[dim](pi<dim>(i+h, j))
4646
/
4747
G_bar_x<opts, dim>(G, i, j)
4848
*
@@ -74,7 +74,7 @@ namespace libmpdataxx
7474
)
7575
{
7676
return return_helper<ix_t>(
77-
- 0.25 * GC[dim](pi<dim>(i+h, j))
77+
- fconst<arr_2d_t>(0.25) * GC[dim](pi<dim>(i+h, j))
7878
/
7979
G_bar_x<opts, dim>(G, i, j)
8080
*

libmpdata++/formulae/mpdata/formulae_mpdata_dfl_3d.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ namespace libmpdataxx
4444
)
4545
{
4646
return return_helper<ix_t>(
47-
- 0.25 * GC[dim](pi<dim>(i+h, j, k))
47+
- fconst<arr_3d_t>(0.25) * GC[dim](pi<dim>(i+h, j, k))
4848
/
4949
G_bar_x<opts, dim>(G, i, j, k)
5050
*
@@ -84,7 +84,7 @@ namespace libmpdataxx
8484
)
8585
{
8686
return return_helper<ix_t>(
87-
- 0.25 * GC[dim](pi<dim>(i+h, j, k))
87+
- fconst<arr_3d_t>(0.25) * GC[dim](pi<dim>(i+h, j, k))
8888
/
8989
G_bar_x<opts, dim>(G, i, j, k)
9090
*

libmpdata++/formulae/mpdata/formulae_mpdata_hot_1d.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ namespace libmpdataxx
2626
return return_helper<ix_t>(
2727
(
2828
3 * GC(i+h) * abs(GC(i+h)) / G_bar_x<opts>(G, i)
29-
- 2 * pow(GC(i+h), 3) / pow(G_bar_x<opts>(G, i), 2)
29+
- 2 * pow3(GC(i+h)) / pow2(G_bar_x<opts>(G, i))
3030
- GC(i+h)
3131
) / 6
3232
);

0 commit comments

Comments
 (0)