igfuw
diff --git a/‎libmpdata++/concurr/detail/sharedmem.hpp‎
Lines changed: 6 additions & 7 deletions b/‎libmpdata++/concurr/detail/sharedmem.hpp‎
Lines changed: 6 additions & 7 deletions
diff --git a/‎libmpdata++/formulae/common.hpp‎
Lines changed: 6 additions & 0 deletions b/‎libmpdata++/formulae/common.hpp‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎libmpdata++/formulae/mpdata/formulae_mpdata_1d.hpp‎
Lines changed: 3 additions & 3 deletions b/‎libmpdata++/formulae/mpdata/formulae_mpdata_1d.hpp‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎libmpdata++/formulae/mpdata/formulae_mpdata_2d.hpp‎
Lines changed: 5 additions & 5 deletions b/‎libmpdata++/formulae/mpdata/formulae_mpdata_2d.hpp‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎libmpdata++/formulae/mpdata/formulae_mpdata_3d.hpp‎
Lines changed: 5 additions & 5 deletions b/‎libmpdata++/formulae/mpdata/formulae_mpdata_3d.hpp‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎libmpdata++/formulae/mpdata/formulae_mpdata_common.hpp‎
Lines changed: 4 additions & 0 deletions b/‎libmpdata++/formulae/mpdata/formulae_mpdata_common.hpp‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎libmpdata++/formulae/mpdata/formulae_mpdata_dfl_1d.hpp‎
Lines changed: 3 additions & 3 deletions b/‎libmpdata++/formulae/mpdata/formulae_mpdata_dfl_1d.hpp‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎libmpdata++/formulae/mpdata/formulae_mpdata_dfl_2d.hpp‎
Lines changed: 2 additions & 2 deletions b/‎libmpdata++/formulae/mpdata/formulae_mpdata_dfl_2d.hpp‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎libmpdata++/formulae/mpdata/formulae_mpdata_dfl_3d.hpp‎
Lines changed: 2 additions & 2 deletions b/‎libmpdata++/formulae/mpdata/formulae_mpdata_dfl_3d.hpp‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎libmpdata++/formulae/mpdata/formulae_mpdata_hot_1d.hpp‎
Lines changed: 1 addition & 1 deletion b/‎libmpdata++/formulae/mpdata/formulae_mpdata_hot_1d.hpp‎
Lines changed: 1 addition & 1 deletion
@@ -32,9 +32,8 @@ namespace libmpdataxx
 	static_assert(n_dims > 0, "n_dims <= 0");
 	static_assert(n_tlev > 0, "n_tlev <= 0");
 
-        // TODO: T_sumtype (perhaps worh using double even if summing floats?)
         std::unique_ptr<blitz::Array<real_t, 1>> xtmtmp; 
-        std::unique_ptr<blitz::Array<real_t, 1>> sumtmp;
+        std::unique_ptr<blitz::Array<double, 1>> sumtmp;
 
         protected:
 
@@ -93,12 +92,12 @@ namespace libmpdataxx
             throw std::runtime_error("number of subdomains greater than number of gridpoints");
 
           if (n_dims != 1) 
-            sumtmp.reset(new blitz::Array<real_t, 1>(grid_size[0]));
+            sumtmp.reset(new blitz::Array<double, 1>(grid_size[0]));
           xtmtmp.reset(new blitz::Array<real_t, 1>(size));
         }
 
         /// @brief concurrency-aware summation of array elements
-        real_t sum(const arr_t &arr, const idx_t<n_dims> &ijk, const bool sum_khn)
+        double sum(const arr_t &arr, const idx_t<n_dims> &ijk, const bool sum_khn)
         {
 	  // doing a two-step sum to reduce numerical error 
 	  // and make parallel results reproducible
@@ -114,7 +113,7 @@ namespace libmpdataxx
 	      (*sumtmp)(c) = blitz::sum(arr(slice_idx));
           }
           barrier();
-          real_t result;
+          double result;
           if (sum_khn)
             result = blitz::kahan_sum(*sumtmp);
           else
@@ -124,7 +123,7 @@ namespace libmpdataxx
         }
 
         /// @brief concurrency-aware summation of a (element-wise) product of two arrays
-        real_t sum(const arr_t &arr1, const arr_t &arr2, const idx_t<n_dims> &ijk, const bool sum_khn)
+        double sum(const arr_t &arr1, const arr_t &arr2, const idx_t<n_dims> &ijk, const bool sum_khn)
         {
 	  // doing a two-step sum to reduce numerical error 
 	  // and make parallel results reproducible
@@ -140,7 +139,7 @@ namespace libmpdataxx
 	      (*sumtmp)(c) = blitz::sum(arr1(slice_idx) * arr2(slice_idx)); 
           }
           barrier();
-          real_t result;
+          double result;
           if (sum_khn)
             result = blitz::kahan_sum(*sumtmp);
           else
 
@@ -13,6 +13,12 @@ namespace libmpdataxx
 {
   namespace formulae
   {
+    // helper to cast floating literals to correct precision based on the blitz array underlaying type
+    template<class arr_t>
+    constexpr auto fconst(const double v)
+    {
+      return static_cast<typename arr_t::T_numtype>(v);
+    }
     // overloads of abs/min/max/where that pick out the correct version based on ix_t
     template<class ix_t, class arg_t>
     forceinline_macro auto abs(const arg_t &a, typename std::enable_if<std::is_same<ix_t, int>::value>::type* = 0)
 
@@ -130,7 +130,7 @@ namespace libmpdataxx
       )
       {
         return return_helper<ix_t>(
-          - 1.0 / 24 *
+          - fconst<arr_1d_t>(1.0 / 24) *
           (
               4 * GC[0](i+h) * ndxx_psi<opts>(psi, i)
             + 2 * ndx_psi<opts>(psi, i) * ndx_GC0(GC[0], i)
@@ -170,9 +170,9 @@ namespace libmpdataxx
           // spatial terms
           + div_3rd_spatial<opts, sptl_intrp>(psi, GC, G, i)
           // mixed terms
-          + 0.5 * abs(GC[0](i+h)) * ndx_fdiv<opts>(psi, GC, G, i)
+          + fconst<arr_1d_t>(0.5) * abs(GC[0](i+h)) * ndx_fdiv<opts>(psi, GC, G, i)
           // temporal terms
-          + 1.0 / 24 *
+          + fconst<arr_1d_t>(1.0 / 24) *
           (
               - 8 * GC[0](i+h) *  nfdiv_fdiv<opts>(psi, GC, G, i)
               + div_3rd_temporal<opts, tmprl_extrp>(psi, ndtt_GC, i)
 
@@ -145,7 +145,7 @@ namespace libmpdataxx
       )
       {
         return return_helper<ix_t>(
-          - 1.0 / 24 *
+          - fconst<arr_2d_t>(1.0 / 24) *
           (
               4 * GC[dim](pi<dim>(i+h, j)) * ndxx_psi<opts, dim>(psi, i, j)
             + 2 * ndx_psi<opts, dim>(psi, i, j) * ndx_GC0<dim>(GC[dim], i, j)
@@ -193,9 +193,9 @@ namespace libmpdataxx
           // spatial terms
           + div_3rd_spatial<opts, dim, sptl_intrp>(psi_np1, GC, G, i, j)
           // mixed terms
-          + 0.5 * abs(GC[dim](pi<dim>(i+h, j))) * ndx_fdiv<opts, dim>(psi_np1, GC, G, i, j)
+          + fconst<arr_2d_t>(0.5) * abs(GC[dim](pi<dim>(i+h, j))) * ndx_fdiv<opts, dim>(psi_np1, GC, G, i, j)
           // temporal terms
-          + 1.0 / 24 *
+          + fconst<arr_2d_t>(1.0 / 24) *
           (
               - 8 * GC[dim](pi<dim>(i+h, j)) *  nfdiv_fdiv<opts, dim>(psi_np1, GC, G, i, j)
               + div_3rd_temporal<opts, dim, tmprl_extrp>(psi_np1, ndtt_GC, i, j)
@@ -226,9 +226,9 @@ namespace libmpdataxx
           // spatial terms
           + div_3rd_spatial<opts, dim, sptl_intrp>(psi_np1, GC, G, i, j)
           // mixed terms
-          - 0.5 * abs(GC[dim](pi<dim>(i+h, j))) * ndtx_psi<opts, dim>(psi_np1, psi_n, i, j)
+          - fconst<arr_2d_t>(0.5) * abs(GC[dim](pi<dim>(i+h, j))) * ndtx_psi<opts, dim>(psi_np1, psi_n, i, j)
           // temporal terms
-          + 1.0 / 24 *
+          + fconst<arr_2d_t>(1.0 / 24) *
           (
               + 8 * GC[dim](pi<dim>(i+h, j)) *  nfdiv_dt<opts, dim>(psi_np1, psi_n, GC, G, i, j)
               + 1 * ndtt_GC0<opts, dim>(psi_np1, ndtt_GC[dim], i, j)
 
@@ -154,7 +154,7 @@ namespace libmpdataxx
       )
       {
         return return_helper<ix_t>(
-          - 1.0 / 24 *
+          - fconst<arr_3d_t>(1.0 / 24) *
           (
               4 * GC[dim](pi<dim>(i+h, j, k)) * ndxx_psi<opts, dim>(psi, i, j, k)
             + 2 * ndx_psi<opts, dim>(psi, i, j, k) * ndx_GC0<dim>(GC[dim], i, j, k)
@@ -204,9 +204,9 @@ namespace libmpdataxx
           // spatial terms
           + div_3rd_spatial<opts, dim, sptl_intrp>(psi_np1, GC, G, i, j, k)
           // mixed terms
-          + 0.5 * abs(GC[dim](pi<dim>(i+h, j, k))) * ndx_fdiv<opts, dim>(psi_np1, GC, G, i, j, k)
+          + fconst<arr_3d_t>(0.5) * abs(GC[dim](pi<dim>(i+h, j, k))) * ndx_fdiv<opts, dim>(psi_np1, GC, G, i, j, k)
           // temporal terms
-          + 1.0 / 24 *
+          + fconst<arr_3d_t>(1.0 / 24) *
           (
               - 8 * GC[dim](pi<dim>(i+h, j, k)) *  nfdiv_fdiv<opts, dim>(psi_np1, GC, G, i, j, k)
               + div_3rd_temporal<opts, dim, tmprl_extrp>(psi_np1, ndtt_GC, i, j, k)
@@ -238,9 +238,9 @@ namespace libmpdataxx
           // spatial terms
           + div_3rd_spatial<opts, dim, sptl_intrp>(psi_np1, GC, G, i, j, k)
           // mixed terms
-          - 0.5 * abs(GC[dim](pi<dim>(i+h, j, k))) * ndtx_psi<opts, dim>(psi_np1, psi_n, i, j, k)
+          - fconst<arr_3d_t>(0.5) * abs(GC[dim](pi<dim>(i+h, j, k))) * ndtx_psi<opts, dim>(psi_np1, psi_n, i, j, k)
           // temporal terms
-          + 1.0 / 24 *
+          + fconst<arr_3d_t>(1.0 / 24) *
           (
               + 8 * GC[dim](pi<dim>(i+h, j, k)) *  nfdiv_dt<opts, dim>(psi_np1, psi_n, GC, G, i, j, k)
               + div_3rd_temporal<opts, dim, tmprl_extrp>(psi_np1, ndtt_GC, i, j, k)
 
@@ -40,6 +40,10 @@ namespace libmpdataxx
       using idxperm::pi;
       using opts::opts_t;
       using std::abs;
+      
+      using blitz::pow2;
+      using blitz::pow3;
+      using blitz::pow4;
 
       const int n_tlev = 2;
 
 
@@ -38,7 +38,7 @@ namespace libmpdataxx
       )
       {
         return return_helper<ix_t>(
-          - 0.5 * GC(i+h) 
+          - fconst<arr_1d_t>(0.5) * GC(i+h) 
           / 
           (formulae::G<opts>(G, i+1) + formulae::G<opts>(G, i)) 
           * 
@@ -56,13 +56,13 @@ namespace libmpdataxx
       )
       {
         return return_helper<ix_t>(
-          - 0.5 * GC(i+h) 
+          - fconst<arr_1d_t>(0.5) * GC(i+h) 
           / 
           (formulae::G<opts>(G, i+1) + formulae::G<opts>(G, i)) 
           * 
           (GC((i+1)+h) - GC(i-h))
           *
-          0.5 *  (psi(i+1) + psi(i)) //to be compatible with iga formulation
+          fconst<arr_1d_t>(0.5) *  (psi(i+1) + psi(i)) //to be compatible with iga formulation
         );
       }
     } // namespace mpdata
 
@@ -42,7 +42,7 @@ namespace libmpdataxx
       )
       {
         return return_helper<ix_t>(
-          - 0.25 * GC[dim](pi<dim>(i+h, j)) 
+          - fconst<arr_2d_t>(0.25) * GC[dim](pi<dim>(i+h, j)) 
           /
           G_bar_x<opts, dim>(G, i, j) 
           * 
@@ -74,7 +74,7 @@ namespace libmpdataxx
       )
       {
         return return_helper<ix_t>(
-          - 0.25 * GC[dim](pi<dim>(i+h, j)) 
+          - fconst<arr_2d_t>(0.25) * GC[dim](pi<dim>(i+h, j)) 
           /
           G_bar_x<opts, dim>(G, i, j) 
           * 
 
@@ -44,7 +44,7 @@ namespace libmpdataxx
       )
       {
         return return_helper<ix_t>(
-          - 0.25 * GC[dim](pi<dim>(i+h, j, k)) 
+          - fconst<arr_3d_t>(0.25) * GC[dim](pi<dim>(i+h, j, k)) 
           /
           G_bar_x<opts, dim>(G, i, j, k)
           * 
@@ -84,7 +84,7 @@ namespace libmpdataxx
       )
       {
         return return_helper<ix_t>(
-          - 0.25 * GC[dim](pi<dim>(i+h, j, k)) 
+          - fconst<arr_3d_t>(0.25) * GC[dim](pi<dim>(i+h, j, k)) 
           /
           G_bar_x<opts, dim>(G, i, j, k)
           * 
 
@@ -26,7 +26,7 @@ namespace libmpdataxx
         return return_helper<ix_t>(
           (
             3 * GC(i+h) * abs(GC(i+h)) / G_bar_x<opts>(G, i)
-            - 2 * pow(GC(i+h), 3) / pow(G_bar_x<opts>(G, i), 2)  
+            - 2 * pow3(GC(i+h)) / pow2(G_bar_x<opts>(G, i))
             - GC(i+h)
           ) / 6
         );
Original file line number	Diff line number	Diff line change
`@@ -13,6 +13,12 @@ namespace libmpdataxx`
`13`	`13`	`{`
`14`	`14`	`namespace formulae`
`15`	`15`	`{`
	`16`	`+ // helper to cast floating literals to correct precision based on the blitz array underlaying type`
	`17`	`+ template<class arr_t>`
	`18`	`+ constexpr auto fconst(const double v)`
	`19`	`+ {`
	`20`	`+ return static_cast<typename arr_t::T_numtype>(v);`
	`21`	`+ }`
`16`	`22`	`// overloads of abs/min/max/where that pick out the correct version based on ix_t`
`17`	`23`	`template<class ix_t, class arg_t>`
`18`	`24`	`forceinline_macro auto abs(const arg_t &a, typename std::enable_if<std::is_same<ix_t, int>::value>::type* = 0)`
Original file line number	Diff line number	Diff line change
`@@ -130,7 +130,7 @@ namespace libmpdataxx`
`130`	`130`	`)`
`131`	`131`	`{`
`132`	`132`	`return return_helper<ix_t>(`
`133`		`- - 1.0 / 24 *`
	`133`	`+ - fconst<arr_1d_t>(1.0 / 24) *`
`134`	`134`	`(`
`135`	`135`	`4 * GC[0](i+h) * ndxx_psi<opts>(psi, i)`
`136`	`136`	`+ 2 * ndx_psi<opts>(psi, i) * ndx_GC0(GC[0], i)`
`@@ -170,9 +170,9 @@ namespace libmpdataxx`
`170`	`170`	`// spatial terms`
`171`	`171`	`+ div_3rd_spatial<opts, sptl_intrp>(psi, GC, G, i)`
`172`	`172`	`// mixed terms`
`173`		`- + 0.5 * abs(GC[0](i+h)) * ndx_fdiv<opts>(psi, GC, G, i)`
	`173`	`+ + fconst<arr_1d_t>(0.5) * abs(GC[0](i+h)) * ndx_fdiv<opts>(psi, GC, G, i)`
`174`	`174`	`// temporal terms`
`175`		`- + 1.0 / 24 *`
	`175`	`+ + fconst<arr_1d_t>(1.0 / 24) *`
`176`	`176`	`(`
`177`	`177`	`- 8 * GC[0](i+h) * nfdiv_fdiv<opts>(psi, GC, G, i)`
`178`	`178`	`+ div_3rd_temporal<opts, tmprl_extrp>(psi, ndtt_GC, i)`
Original file line number	Diff line number	Diff line change
`@@ -38,7 +38,7 @@ namespace libmpdataxx`
`38`	`38`	`)`
`39`	`39`	`{`
`40`	`40`	`return return_helper<ix_t>(`
`41`		`- - 0.5 * GC(i+h)`
	`41`	`+ - fconst<arr_1d_t>(0.5) * GC(i+h)`
`42`	`42`	`/`
`43`	`43`	`(formulae::G<opts>(G, i+1) + formulae::G<opts>(G, i))`
`44`	`44`	`*`
`@@ -56,13 +56,13 @@ namespace libmpdataxx`
`56`	`56`	`)`
`57`	`57`	`{`
`58`	`58`	`return return_helper<ix_t>(`
`59`		`- - 0.5 * GC(i+h)`
	`59`	`+ - fconst<arr_1d_t>(0.5) * GC(i+h)`
`60`	`60`	`/`
`61`	`61`	`(formulae::G<opts>(G, i+1) + formulae::G<opts>(G, i))`
`62`	`62`	`*`
`63`	`63`	`(GC((i+1)+h) - GC(i-h))`
`64`	`64`	`*`
`65`		`- 0.5 * (psi(i+1) + psi(i)) //to be compatible with iga formulation`
	`65`	`+ fconst<arr_1d_t>(0.5) * (psi(i+1) + psi(i)) //to be compatible with iga formulation`
`66`	`66`	`);`
`67`	`67`	`}`
`68`	`68`	`} // namespace mpdata`
Original file line number	Diff line number	Diff line change
`@@ -42,7 +42,7 @@ namespace libmpdataxx`
`42`	`42`	`)`
`43`	`43`	`{`
`44`	`44`	`return return_helper<ix_t>(`
`45`		`- - 0.25 * GC[dim](pi<dim>(i+h, j))`
	`45`	`+ - fconst<arr_2d_t>(0.25) * GC[dim](pi<dim>(i+h, j))`
`46`	`46`	`/`
`47`	`47`	`G_bar_x<opts, dim>(G, i, j)`
`48`	`48`	`*`
`@@ -74,7 +74,7 @@ namespace libmpdataxx`
`74`	`74`	`)`
`75`	`75`	`{`
`76`	`76`	`return return_helper<ix_t>(`
`77`		`- - 0.25 * GC[dim](pi<dim>(i+h, j))`
	`77`	`+ - fconst<arr_2d_t>(0.25) * GC[dim](pi<dim>(i+h, j))`
`78`	`78`	`/`
`79`	`79`	`G_bar_x<opts, dim>(G, i, j)`
`80`	`80`	`*`
Original file line number	Diff line number	Diff line change
`@@ -44,7 +44,7 @@ namespace libmpdataxx`
`44`	`44`	`)`
`45`	`45`	`{`
`46`	`46`	`return return_helper<ix_t>(`
`47`		`- - 0.25 * GC[dim](pi<dim>(i+h, j, k))`
	`47`	`+ - fconst<arr_3d_t>(0.25) * GC[dim](pi<dim>(i+h, j, k))`
`48`	`48`	`/`
`49`	`49`	`G_bar_x<opts, dim>(G, i, j, k)`
`50`	`50`	`*`
`@@ -84,7 +84,7 @@ namespace libmpdataxx`
`84`	`84`	`)`
`85`	`85`	`{`
`86`	`86`	`return return_helper<ix_t>(`
`87`		`- - 0.25 * GC[dim](pi<dim>(i+h, j, k))`
	`87`	`+ - fconst<arr_3d_t>(0.25) * GC[dim](pi<dim>(i+h, j, k))`
`88`	`88`	`/`
`89`	`89`	`G_bar_x<opts, dim>(G, i, j, k)`
`90`	`90`	`*`
Original file line number	Diff line number	Diff line change
`@@ -26,7 +26,7 @@ namespace libmpdataxx`
`26`	`26`	`return return_helper<ix_t>(`
`27`	`27`	`(`
`28`	`28`	`3 * GC(i+h) * abs(GC(i+h)) / G_bar_x<opts>(G, i)`
`29`		`- - 2 * pow(GC(i+h), 3) / pow(G_bar_x<opts>(G, i), 2)`
	`29`	`+ - 2 * pow3(GC(i+h)) / pow2(G_bar_x<opts>(G, i))`
`30`	`30`	`- GC(i+h)`
`31`	`31`	`) / 6`
`32`	`32`	`);`