Really fix TSC test passing criteria (#10)

mkstratos · web-flow · commit 9b721cc17f6a · 2022-07-19T16:30:30.000-04:00
* Add pool_size argument for debugging mode * Fix criteria for pass/fail of TSC test Update the pass/fall criteria so it falls in line with the original paper following discussion in E3SM-Project/E3SM#4759 First, timesteps are assessed for pass/fail, then an overall pass/fail is given. For overall FAIL, all timesteps within the inspection window must fail. For a timestep to fail, at least one variable has its null hypothesis rejected (i.e. RMSD difference has a p-value less than the threshold). * Enhance TSC plots with threshold lines and caption Add dashed lines to timeseries plots indicating time inspection window and move the PASS/FAIL text to be centered on that window. Also group the timeseries and box plots together so they work with LIVV3.x galleries * Revise word order in figure captions * Change to >=2 failed timesteps for overall failure TSC test fails when two or more timesteps meet failure criteria to rule out single timestep fluke issues
diff --git a/evv4esm/__init__.py b/evv4esm/__init__.py
@@ -28,7 +28,7 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
-__version_info__ = (0, 3, 1)
+__version_info__ = (0, 3, 2)
 __version__ = '.'.join(str(vi) for vi in __version_info__)
 
 PASS_COLOR = '#389933'
diff --git a/evv4esm/__main__.py b/evv4esm/__main__.py
@@ -59,6 +59,14 @@ def parse_args(args=None):
                                        ])
                         )
 
+    parser.add_argument('-p', '--pool-size',
+                        nargs='?',
+                        type=int,
+                        default=(options.mp.cpu_count() - 1 or 1),
+                        help='The number of multiprocessing processes to run '
+                             'analyses in. If zero, processes will run serially '
+                             'outside of the multiprocessing module.')
+
     parser.add_argument('--version',
                         action='version',
                         version='EVV {}'.format(evv4esm.__version__),
@@ -73,7 +81,6 @@ def parse_args(args=None):
     from evv4esm import resources
     args.livv_resource_dir = livvkit.resource_dir
     livvkit.resource_dir = os.sep.join(resources.__path__)
-
     return args
 
 
@@ -106,6 +113,7 @@ def main(cl_args=None):
     from livvkit.util import functions
     from livvkit import elements
 
+    livvkit.pool_size = args.pool_size
     if args.extensions:
         functions.setup_output()
         summary_elements = []
diff --git a/evv4esm/extensions/tsc.py b/evv4esm/extensions/tsc.py
@@ -284,14 +284,23 @@ def main(args):
         null_hypothesis = ttest.applymap(lambda x: 'Reject' if x[1] < args.p_threshold else 'Accept')
 
         domains = (
-            null_hypothesis
+            # True for rejection of null_hypothesis for each variable at each time, by comparing
+            # index 1 (x[1]) of each column of tuples, which corresponds to the p-value to the
+            # threshold for p-values
+            ttest.applymap(lambda x: x[1] < args.p_threshold)
+            # Select only times in the inspection window
             .query(' seconds >= @args.inspect_times[0] & seconds <= @args.inspect_times[-1]')
-            .applymap(lambda x: x == 'Reject').all().transform(
-                lambda x: 'Fail' if x is True else 'Pass'
-            )
+            # Create groups of all variables at each time step in the window
+            .groupby("seconds")
+            # Are any variables failing at each time step in the inspection window?
+            .any()
+            # Since True -> 1 False -> 0, .sum() gets the number of timesteps for which
+            # the null hypothesis is rejected
+            .sum()
+            # If two or more time steps are failing then the domain [glob, lnd, ocn] is failing
+            .transform(lambda x: "Fail" if x >= 2 else "Pass")
         )
-
-        overall = 'Fail' if domains.apply(lambda x: x == 'Fail').any() else 'Pass'
+        overall = 'Fail' if domains[delta_columns].apply(lambda x: x == 'Fail').any() else 'Pass'
 
         ttest.reset_index(inplace=True)
         null_hypothesis.reset_index(inplace=True)
@@ -360,6 +369,17 @@ def pressure_layer_thickness(dataset):
     dp = np.expand_dims(da * p0, 1) + (np.expand_dims(db, 1) * np.expand_dims(ps, 0))
     return dp, ps
 
+def plot_thresholds(args, axis, x_thr=None, y_thr=None):
+    """Add vertical / horiziontal lines on axis indicating certain thresholds."""
+    if x_thr is None:
+        x_thr = [args.inspect_times[0], args.inspect_times[-1]]
+    if y_thr is None:
+        y_thr = args.p_threshold * 100
+
+    axis.axhline(y_thr, linestyle="--", linewidth=1, color="grey", zorder=-1)
+    for _time in x_thr:
+        axis.axvline(_time, linestyle="--", linewidth=1, color="grey", zorder=-1)
+
 
 def plot_bit_for_bit(args):
     failing_img_file = os.path.relpath(os.path.join(args.img_dir, 'failing_timeline.png'), os.getcwd())
@@ -369,6 +389,7 @@ def plot_bit_for_bit(args):
     xx = np.arange(0, args.time_slice[1] + args.time_slice[0],  args.time_slice[0])
     yy = np.zeros(xx.shape)
     ax.plot(xx, yy, linestyle='-', marker='o', color=pf_color_picker.get('pass'))
+    plot_thresholds(args, ax, y_thr=1)
 
     ax.set_ybound(-1, 20)
     ax.set_yticks(np.arange(0, 24, 4))
@@ -381,18 +402,29 @@ def plot_bit_for_bit(args):
     plt.savefig(failing_img_file, bbox_inches='tight')
     plt.close(fig)
 
-    failing_img_caption = 'The number of failing variables across both domains (land and ' \
-                          'ocean) as a function of model integration time.'
+    failing_img_caption = (
+        "The number of failing variables across both domains (land and ocean) as a "
+        "function of model integration time. The dashed horizontal line represents the "
+        "failing threshold of 1 variable, the dashed vertical lines represent the inspection "
+        f"window of {args.inspect_times[0]} - {args.inspect_times[-1]} s."
+    )
     failing_img_link = Path(*Path(args.img_dir).parts[-2:], Path(failing_img_file).name)
-    failing_img = el.Image('Timeline of failing variables', failing_img_caption, failing_img_link, height=300, relative_to="")
+    failing_img = el.Image(
+        'Timeline of failing variables',
+        failing_img_caption,
+        failing_img_link,
+        height=300,
+        relative_to="",
+        group="Timelines"
+    )
 
     pmin_img_file = os.path.relpath(os.path.join(args.img_dir, 'pmin_timeline.png'), os.getcwd())
     fig, ax = plt.subplots(figsize=(10, 8))
     plt.rc('font', family='serif')
 
     ax.semilogy(xx, yy + 1.0, linestyle='-', marker='o', color=pf_color_picker.get('pass'))
+    plot_thresholds(args, ax)
 
-    ax.plot(args.time_slice, [args.p_threshold * 100] * 2, 'k--')
     ax.text(np.mean(args.time_slice), 10 ** -1, 'Fail', fontsize=15, color=pf_color_picker.get('fail'),
             horizontalalignment='center')
     ax.text(np.mean(args.time_slice), 0.5 * 10 ** 1, 'Pass', fontsize=15, color=pf_color_picker.get('pass'),
@@ -411,14 +443,18 @@ def plot_bit_for_bit(args):
     plt.savefig(pmin_img_file, bbox_inches='tight')
     plt.close(fig)
 
-    pmin_img_caption = 'The minimum P value of all variables in both domains (land and ' \
-                       'ocean) as a function of model integration time plotted with ' \
-                       'a logarithmic y-scale. The dashed grey line indicates the ' \
-                       'threshold for assigning an overall pass or fail to a test ' \
-                       'ensemble; see Wan et al. (2017) eqn. 8.'
-    # pmin_img_link = os.path.join(os.path.basename(args.img_dir), os.path.basename(pmin_img_file))
+    pmin_img_caption = (
+        "The minimum P value of all variables in both domains (land and ocean) as a "
+        "function of model integration time plotted with a logarithmic y-scale. The "
+        "dashed horizontal grey line indicates the threshold for assigning an overall "
+        "pass or fail to a test ensemble; the dashed vertical lines represent the "
+        f"inspection window of {args.inspect_times[0]} - {args.inspect_times[-1]} s. "
+        "see Wan et al. (2017) eqn. 8"
+    )
     pmin_img_link = Path(*Path(args.img_dir).parts[-2:], Path(pmin_img_file).name)
-    pmin_img = el.Image('Timeline of P_{min}', pmin_img_caption, pmin_img_link, height=300, relative_to="")
+    pmin_img = el.Image(
+        'Timeline of P_{min}', pmin_img_caption, pmin_img_link, height=300, relative_to="", group="Timelines"
+    )
 
     return [failing_img, pmin_img]
 
@@ -439,16 +475,21 @@ def plot_failing_variables(args, null_hypothesis, img_file):
 
     ax.set_ylabel('Number of failing variables')
     ax.set_xlabel('Integration time (s)')
+    plot_thresholds(args, ax, y_thr=1)
 
     plt.tight_layout()
     plt.savefig(img_file, bbox_inches='tight')
     plt.close(fig)
 
-    img_caption = 'The number of failing variables across both domains (land and ' \
-                  'ocean) as a function of model integration time.'
+    img_caption = (
+        "The number of failing variables across both domains (land and ocean) as a "
+        "function of model integration time. The dashed horizontal line represents the "
+        "failing threshold of 1 variable, the dashed vertical lines represent the "
+        f"inspection window of {args.inspect_times[0]} - {args.inspect_times[-1]} s."
+    )
     img_link = Path(*Path(args.img_dir).parts[-2:], Path(img_file).name)
     img = el.Image(
-        'Timeline of failing variables', img_caption, img_link, height=300, relative_to=""
+        'Timeline of failing variables', img_caption, img_link, height=300, relative_to="", group="Timelines"
     )
     return img
 
@@ -468,14 +509,16 @@ def plot_pmin(args, ttest, img_file):
     elif fails.empty:
         passes.plot(logy=True, linestyle='-', marker='o', color=pf_color_picker.get('pass'))
     else:
-        first_fail = fails.index[0]
-        pdata.loc[:first_fail].plot(logy=True, linestyle='-', marker='o', color=pf_color_picker.get('pass'))
-        pdata.loc[first_fail:].plot(logy=True, linestyle='-', marker='o', color=pf_color_picker.get('fail'))
+        pdata.plot(logy=True, linestyle="-", color="black")
+        passes.plot(logy=True, linestyle="None", marker="o", color=pf_color_picker.get("pass"))
+        fails.plot(logy=True, linestyle="None", marker="o", color=pf_color_picker.get("fail"))
 
-    ax.plot(args.time_slice, [0.5, 0.5], 'k--')
-    ax.text(np.mean(args.time_slice), 10 ** -1, 'Fail', fontsize=15, color=pf_color_picker.get('fail'),
+    plot_thresholds(args, ax)
+
+    inspect_window = [args.inspect_times[0], args.inspect_times[-1]]
+    ax.text(np.mean(inspect_window), 10 ** -1, 'Fail', fontsize=15, color=pf_color_picker.get('fail'),
             horizontalalignment='center')
-    ax.text(np.mean(args.time_slice), 10 ** 0, 'Pass', fontsize=15, color=pf_color_picker.get('pass'),
+    ax.text(np.mean(inspect_window), 10 ** 0, 'Pass', fontsize=15, color=pf_color_picker.get('pass'),
             horizontalalignment='center')
 
     ax.set_ybound(100, 10 ** -15)
@@ -491,14 +534,18 @@ def plot_pmin(args, ttest, img_file):
     plt.savefig(img_file, bbox_inches='tight')
     plt.close(fig)
 
-    img_caption = 'The minimum P value of all variables in both domains (land and ' \
-                  'ocean) as a function of model integration time plotted with ' \
-                  'a logarithmic y-scale. The dashed grey line indicates the ' \
-                  'threshold for assigning an overall pass or fail to a test ' \
-                  'ensemble; see Wan et al. (2017) eqn. 8.'
+    img_caption = (
+        "The minimum P value of all variables in both domains (land and ocean) as a "
+        "function of model integration time plotted with a logarithmic y-scale. The "
+        "dashed horizontal grey line indicates the threshold for assigning an overall "
+        "pass or fail to a test ensemble; the dashed vertical lines represent the "
+        f"inspection window of {args.inspect_times[0]} - {args.inspect_times[-1]} s. "
+        "see Wan et al. (2017) eqn. 8"
+    )
+
     # img_link = os.path.join(os.path.basename(args.img_dir), os.path.basename(img_file))
     img_link = Path(*Path(args.img_dir).parts[-2:], Path(img_file).name)
-    img = el.Image('Timeline of P_{min}', img_caption, img_link, height=300, relative_to="")
+    img = el.Image('Timeline of P_{min}', img_caption, img_link, height=300, relative_to="", group="Timelines")
     return img
 
 
@@ -572,7 +619,7 @@ def boxplot_delta_rmsd(args, delta_rmsd, null_hypothesis, img_file_format):
                                                          cpass=human_color_names['pass'][0])
         img_link = Path(*Path(args.img_dir).parts[-2:], Path(img_file).name)
         img_list.append(el.Image('Boxplot of normalized ensemble ΔRMSD at {}s'.format(time),
-                                 img_caption, img_link, height=300, relative_to=""))
+                                 img_caption, img_link, height=300, relative_to="", group="Boxplots"))
     return img_list
 
 
@@ -686,7 +733,7 @@ def errorbars_delta_rmsd(args, delta_rmsd, null_hypothesis, img_file_format):
                                                          cpass=human_color_names['pass'][0])
         img_link = Path(*Path(args.img_dir).parts[-2:], Path(img_file).name)
         img_list.append(el.Image('Distribution of the ensemble ΔRMSD at {}s'.format(time),
-                                 img_caption, img_link, height=300, relative_to=""))
+                                 img_caption, img_link, height=300, relative_to="", group="Boxplots"))
     return img_list