igfuw
diff --git a/‎src/cases/CasesCommon.hpp‎
Lines changed: 2 additions & 0 deletions b/‎src/cases/CasesCommon.hpp‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/cases/DYCOMS.hpp‎
Lines changed: 2 additions & 0 deletions b/‎src/cases/DYCOMS.hpp‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/cases/RICO11.hpp‎
Lines changed: 2 additions & 0 deletions b/‎src/cases/RICO11.hpp‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/detail/exec_timer.hpp‎
Lines changed: 20 additions & 2 deletions b/‎src/detail/exec_timer.hpp‎
Lines changed: 20 additions & 2 deletions
diff --git a/‎src/detail/func_time.hpp‎
Lines changed: 86 additions & 0 deletions b/‎src/detail/func_time.hpp‎
Lines changed: 86 additions & 0 deletions
diff --git a/‎src/detail/setup.hpp‎
Lines changed: 15 additions & 4 deletions b/‎src/detail/setup.hpp‎
Lines changed: 15 additions & 4 deletions
diff --git a/‎src/detail/user_params.hpp‎
Lines changed: 2 additions & 1 deletion b/‎src/detail/user_params.hpp‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎src/opts/opts_blk_2m.hpp‎
Lines changed: 2 additions & 2 deletions b/‎src/opts/opts_blk_2m.hpp‎
Lines changed: 2 additions & 2 deletions
@@ -116,6 +116,7 @@ namespace setup
 
     real_t div_LS = 0.; // large-scale wind divergence (same as ForceParameters::D), 0. to turn off large-scale subsidence of SDs, TODO: add a process switch in libcloudph++ like for coal/cond/etc
 
+    quantity<si::length, real_t> gccn_max_height; // GCCN added (at init and via relaxation) only up to this level
 
     template<bool enable_sgs = case_ct_params_t::enable_sgs>
     void setopts_sgs(rt_params_t &params,
@@ -218,6 +219,7 @@ namespace setup
       X = 0 * si::metres;
       Y = 0 * si::metres;
       Z = 0 * si::metres;
+      gccn_max_height = 0 * si::metres;
     }
 
     virtual ~CasesCommon() = default;
 
@@ -20,6 +20,7 @@ namespace setup
     const real_t z_abs = 1250;
     const real_t z_i[] = {/*RF1*/840, /*RF2*/795}; //initial inversion height
     const quantity<si::length, real_t> z_rlx = 25 * si::metres;
+    const quantity<si::length, real_t> gccn_max_height = 460 * si::metres; // below inversion
     const real_t D = 3.75e-6; // large-scale wind horizontal divergence [1/s], needed only in radiation procedure of DYCOMS
 
     // liquid water potential temperature at height z
@@ -285,6 +286,7 @@ namespace setup
         this->ForceParameters.D = D; // large-scale wind horizontal divergence [1/s], needed in the radiation procedure of DYCOMS
         this->Z = Z;
         this->z_rlx = z_rlx;
+        this->gccn_max_height = gccn_max_height;
       }
     };
 
 
@@ -30,6 +30,7 @@ namespace setup
     const real_t z_abs = 3000;
 //    const real_t z_i = 795; //initial inversion height
     const quantity<si::length, real_t> z_rlx = 100 * si::metres;
+    const quantity<si::length, real_t> gccn_max_height = 460 * si::metres; // below cloud base
 
     inline quantity<si::temperature, real_t> th_l_rico(const real_t &z)
     {
@@ -321,6 +322,7 @@ namespace setup
         this->X = X;
         this->Z = Z;
         this->z_rlx = z_rlx;
+        this->gccn_max_height = gccn_max_height;
       }
     };
 
 
@@ -54,7 +54,10 @@ class exec_timer : public solver_t
     parent_t::hook_ante_loop(nt);
     this->mem->barrier();
     if (this->rank == 0) 
+    {   
       tbeg_loop = parent_t::clock::now();
+      trecord_all = parent_t::timer::zero(); // reset to 0, because we only want record all done in loop, not the one in ante_loop 
+    }
     this->mem->barrier();
   }
 
@@ -86,11 +89,17 @@ class exec_timer : public solver_t
         tend_loop = parent_t::clock::now();
         tloop = std::chrono::duration_cast<std::chrono::milliseconds>( tend_loop - tbeg_loop );
 
+        // calculate CPU/GPU times and concurrency, valid only for async runs and not taking into account diagnostics in record_all
+        typename parent_t::timer tsync_in = parent_t::tsync,
+                                 tgpu = parent_t::tasync_wait_in_record_all + parent_t::tsync_wait + parent_t::tasync_wait + tsync_in, // time of pure GPU calculations (= wait time of CPU)
+                                 tcpugpu = tsync_in + parent_t::tasync_gpu + parent_t::tsync_gpu - tgpu, // time of concurrent CPU and GPU calculations (= total time of GPU calculations - tgpu)
+                                 tcpu = tloop - tgpu - tcpugpu;
+         
         std::cout <<  "wall time in milliseconds: " << std::endl
           << "loop:                            " << tloop.count() << std::endl
           << "  hook_ante_step:                  " << thas.count() << " ("<< setup::real_t(thas.count())/tloop.count()*100 <<"%)" << std::endl
-          << "    async_wait:                      " << parent_t::tasync_wait.count() << " ("<< setup::real_t(parent_t::tasync_wait.count())/tloop.count()*100 <<"%)" << std::endl
           << "    hook_mixed_rhs_ante_step:        " << thmas.count() << " ("<< setup::real_t(thmas.count())/tloop.count()*100 <<"%)" << std::endl
+          << "      async_wait:                    " << parent_t::tasync_wait.count() << " ("<< setup::real_t(parent_t::tasync_wait.count())/tloop.count()*100 <<"%)" << std::endl
           << "      sync:                            " << parent_t::tsync.count() << " ("<< setup::real_t(parent_t::tsync.count())/tloop.count()*100 <<"%)" << std::endl
           << "  step:                            " << thas_hads.count() << " ("<< setup::real_t(thas_hads.count())/tloop.count()*100 <<"%)" << std::endl
           << "  hook_ante_delayed_step:          " << thads.count() << " ("<< setup::real_t(thads.count())/tloop.count()*100 <<"%)" << std::endl
@@ -99,9 +108,18 @@ class exec_timer : public solver_t
           << "  delayed step:                    " << thads_hps.count() << " ("<< setup::real_t(thads_hps.count())/tloop.count()*100 <<"%)" << std::endl
           << "  hook_post_step:                  " << thps.count() << " ("<< setup::real_t(thps.count())/tloop.count()*100 <<"%)" << std::endl
           << "    hook_mixed_rhs_post_step:        " << thmps.count() << " ("<< setup::real_t(thmps.count())/tloop.count()*100 <<"%)" << std::endl
-          << "    record_all:                      " << trecord_all.count() << " ("<< setup::real_t(trecord_all.count())/tloop.count()*100 <<"%)" << std::endl
+          << "    record_all (in loop):            " << trecord_all.count() << " ("<< setup::real_t(trecord_all.count())/tloop.count()*100 <<"%)" << std::endl
           << "      async_wait in record_all:      " << parent_t::tasync_wait_in_record_all.count() << " ("<< setup::real_t(parent_t::tasync_wait_in_record_all.count())/tloop.count()*100 <<"%)" << std::endl
           << "  hook_post_step->hook_ante_step:  " << thps_has.count() << " ("<< setup::real_t(thps_has.count())/tloop.count()*100 <<"%)" << std::endl;
+
+          std::cout << std::endl
+          << "CPU/GPU concurrency stats, only make sense for async lgrngn runs" << std::endl
+          << "and does not take into account GPU time in record_all, so most accurate without diag:" << std::endl
+          << "  pure CPU calculations: " << tcpu.count() << " ("<< setup::real_t(tcpu.count())/tloop.count()*100 <<"%)" << std::endl
+          << "  pure GPU calculations: " << tgpu.count() << " ("<< setup::real_t(tgpu.count())/tloop.count()*100 <<"%)" << std::endl
+          << "  concurrent CPU&GPU:    " << tcpugpu.count() << " ("<< setup::real_t(tcpugpu.count())/tloop.count()*100 <<"%)" << std::endl
+          << "  tsync_gpu:  " << parent_t::tsync_gpu.count() << " ("<< setup::real_t(parent_t::tsync_gpu.count())/tloop.count()*100 <<"%)" << std::endl
+          << "  tasync_gpu: " << parent_t::tasync_gpu.count() << " ("<< setup::real_t(parent_t::tasync_gpu.count())/tloop.count()*100 <<"%)" << std::endl;
       }
     }
   }
 
@@ -0,0 +1,86 @@
+// function that calculates execution time of any other member function called via ptr
+#pragma once
+
+// async_forwarder code taken from https://kholdstare.github.io/technical/2012/12/18/perfect-forwarding-to-async-2.html (C) Alexander Kondratskiy
+// it is used to pass any type of reference (lvalue or rvalue) to std::async
+template <typename T>
+class async_forwarder
+{
+    // Store value directly
+    T val_;
+
+public:
+    /**
+     * Move an rvalue of T into the wrapper,
+     * incurring no copies.
+     */
+    async_forwarder(T&& t) 
+     : val_(std::move(t)) { }
+
+    // ensure no copies are made
+    async_forwarder(async_forwarder const& other) = delete;
+
+    // move constructor
+    async_forwarder(async_forwarder&& other)
+        : val_(std::move(other.val_)) { }
+
+    // Move the value out.
+    // Note: can only occur once!
+    operator T&& ()       { return std::move(val_); }
+    operator T&& () const { return std::move(val_); }
+};
+
+// This particular specialization
+// is essentially std::ref
+template <typename T>
+class async_forwarder<T&>
+{
+    T& val_;
+
+public:
+    /**
+     * Wrap the reference when passed an lvalue reference,
+     * to fool std::async
+     */
+    async_forwarder(T& t) : val_(t) { }
+
+    // ensure no copies are made
+    async_forwarder(async_forwarder const& other) = delete;
+
+    // move constructor
+    async_forwarder(async_forwarder&& other)
+        : val_(other.val_) { }
+
+    // User-defined conversion that automatically
+    // converts to the appropriate type
+    operator T&       ()       { return val_; }
+    operator T const& () const { return val_; }
+};
+
+
+#if defined(UWLCM_TIMING)
+  template<class clock, class timer, class F, class ptr, typename... Args>
+  timer func_time(F func, ptr p, Args&&... args){
+    auto t1=clock::now();
+    (p->*func)(std::forward<Args>(args)...);
+    return std::chrono::duration_cast<timer>(clock::now()-t1);
+  }
+#else
+  template<class clock, class timer, class F, class ptr, typename... Args>
+  timer func_time(F func, ptr p, Args&&... args){
+    (p->*func)(std::forward<Args>(args)...);
+    return timer();
+  }
+#endif
+
+template<class clock, class timer, class F, class ptr, typename... Args>
+std::future<timer> async_timing_launcher(F func, ptr p, Args&&... args) // func and p are pointers, so their copies are lightweight
+{
+  return std::async(
+           std::launch::async,
+           func_time<clock, timer, F, ptr, Args...>,
+           func, 
+           p,
+           async_forwarder<Args>(std::forward<Args>(args))... // ATTENTION! args are passed by reference to async
+         );
+}
@@ -84,12 +84,23 @@ namespace setup
   template <typename T>
   struct log_dry_radii_gccn : public libcloudphxx::common::unary_function<T>
   {
+    real_t lnrd_min, lnrd_max, conc_multiplier;
+
     T funval(const T lnrd) const
     {
-      return T((
-          lognormal::n_e(mean_rd3, sdev_rd3, n3_stp, quantity<si::dimensionless, real_t>(lnrd)) 
-        ) * si::cubic_metres
-      );
+      return 
+        lnrd < lnrd_min ? 0 :
+          lnrd > lnrd_max ? 0 :
+            T((
+              lognormal::n_e(mean_rd3, sdev_rd3, conc_multiplier * n3_stp, quantity<si::dimensionless, real_t>(lnrd)) 
+            ) * si::cubic_metres
+            );
     }
+
+    log_dry_radii_gccn(const real_t lnrd_min = 0, const real_t lnrd_max = 1000000, const real_t conc_multiplier = 1): 
+      lnrd_min(lnrd_min),
+      lnrd_max(lnrd_max),
+      conc_multiplier(conc_multiplier)
+    {}
   };
 };
@@ -10,10 +10,11 @@ struct user_params_t
   int nt, outfreq, spinup, rng_seed, rng_seed_init;
   setup::real_t dt;
   std::string outdir, model_case;
-  bool th_src, rv_src, rc_src, rr_src, nc_src, nr_src, uv_src, w_src;
+  bool th_src, rv_src, rc_src, rr_src, nc_src, nr_src, uv_src, w_src, ccn_relax;
   setup::real_t sgs_delta;
   quantity<si::length, setup::real_t> mean_rd1, mean_rd2;		
   quantity<si::dimensionless, setup::real_t> sdev_rd1, sdev_rd2;		
   quantity<power_typeof_helper<si::length, static_rational<-3>>::type, setup::real_t> n1_stp, n2_stp;		
   quantity<si::dimensionless, setup::real_t> kappa1, kappa2;
+  quantity<si::dimensionless, setup::real_t> case_n_stp_multiplier;
 };
@@ -68,13 +68,13 @@ void setopts_micro(
     rt_params.cloudph_opts.dry_distros.push_back({
       .mean_rd = case_ptr->mean_rd1 / si::metres,
       .sdev_rd = case_ptr->sdev_rd1,
-      .N_stp   = case_ptr->n1_stp * si::cubic_metres,
+      .N_stp   = user_params.case_n_stp_multiplier * case_ptr->n1_stp * si::cubic_metres,
       .chem_b  = case_ptr->kappa
     });
     rt_params.cloudph_opts.dry_distros.push_back({
       .mean_rd = case_ptr->mean_rd2 / si::metres,
       .sdev_rd = case_ptr->sdev_rd2,
-      .N_stp   = case_ptr->n2_stp * si::cubic_metres,
+      .N_stp   = user_params.case_n_stp_multiplier * case_ptr->n2_stp * si::cubic_metres,
       .chem_b  = case_ptr->kappa
     });
   }
Original file line number	Diff line number	Diff line change
`@@ -30,6 +30,7 @@ namespace setup`
`30`	`30`	`const real_t z_abs = 3000;`
`31`	`31`	`// const real_t z_i = 795; //initial inversion height`
`32`	`32`	`const quantity<si::length, real_t> z_rlx = 100 * si::metres;`
	`33`	`+ const quantity<si::length, real_t> gccn_max_height = 460 * si::metres; // below cloud base`
`33`	`34`
`34`	`35`	`inline quantity<si::temperature, real_t> th_l_rico(const real_t &z)`
`35`	`36`	`{`
`@@ -321,6 +322,7 @@ namespace setup`
`321`	`322`	`this->X = X;`
`322`	`323`	`this->Z = Z;`
`323`	`324`	`this->z_rlx = z_rlx;`
	`325`	`+ this->gccn_max_height = gccn_max_height;`
`324`	`326`	`}`
`325`	`327`	`};`
`326`	`328`