Skip to content

Commit 9b721cc

Browse files
authored
Really fix TSC test passing criteria (#10)
* Add pool_size argument for debugging mode * Fix criteria for pass/fail of TSC test Update the pass/fall criteria so it falls in line with the original paper following discussion in E3SM-Project/E3SM#4759 First, timesteps are assessed for pass/fail, then an overall pass/fail is given. For overall FAIL, all timesteps within the inspection window must fail. For a timestep to fail, at least one variable has its null hypothesis rejected (i.e. RMSD difference has a p-value less than the threshold). * Enhance TSC plots with threshold lines and caption Add dashed lines to timeseries plots indicating time inspection window and move the PASS/FAIL text to be centered on that window. Also group the timeseries and box plots together so they work with LIVV3.x galleries * Revise word order in figure captions * Change to >=2 failed timesteps for overall failure TSC test fails when two or more timesteps meet failure criteria to rule out single timestep fluke issues
1 parent dff65f8 commit 9b721cc

File tree

3 files changed

+91
-36
lines changed

3 files changed

+91
-36
lines changed

evv4esm/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2929

3030

31-
__version_info__ = (0, 3, 1)
31+
__version_info__ = (0, 3, 2)
3232
__version__ = '.'.join(str(vi) for vi in __version_info__)
3333

3434
PASS_COLOR = '#389933'

evv4esm/__main__.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,14 @@ def parse_args(args=None):
5959
])
6060
)
6161

62+
parser.add_argument('-p', '--pool-size',
63+
nargs='?',
64+
type=int,
65+
default=(options.mp.cpu_count() - 1 or 1),
66+
help='The number of multiprocessing processes to run '
67+
'analyses in. If zero, processes will run serially '
68+
'outside of the multiprocessing module.')
69+
6270
parser.add_argument('--version',
6371
action='version',
6472
version='EVV {}'.format(evv4esm.__version__),
@@ -73,7 +81,6 @@ def parse_args(args=None):
7381
from evv4esm import resources
7482
args.livv_resource_dir = livvkit.resource_dir
7583
livvkit.resource_dir = os.sep.join(resources.__path__)
76-
7784
return args
7885

7986

@@ -106,6 +113,7 @@ def main(cl_args=None):
106113
from livvkit.util import functions
107114
from livvkit import elements
108115

116+
livvkit.pool_size = args.pool_size
109117
if args.extensions:
110118
functions.setup_output()
111119
summary_elements = []

evv4esm/extensions/tsc.py

Lines changed: 81 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -284,14 +284,23 @@ def main(args):
284284
null_hypothesis = ttest.applymap(lambda x: 'Reject' if x[1] < args.p_threshold else 'Accept')
285285

286286
domains = (
287-
null_hypothesis
287+
# True for rejection of null_hypothesis for each variable at each time, by comparing
288+
# index 1 (x[1]) of each column of tuples, which corresponds to the p-value to the
289+
# threshold for p-values
290+
ttest.applymap(lambda x: x[1] < args.p_threshold)
291+
# Select only times in the inspection window
288292
.query(' seconds >= @args.inspect_times[0] & seconds <= @args.inspect_times[-1]')
289-
.applymap(lambda x: x == 'Reject').all().transform(
290-
lambda x: 'Fail' if x is True else 'Pass'
291-
)
293+
# Create groups of all variables at each time step in the window
294+
.groupby("seconds")
295+
# Are any variables failing at each time step in the inspection window?
296+
.any()
297+
# Since True -> 1 False -> 0, .sum() gets the number of timesteps for which
298+
# the null hypothesis is rejected
299+
.sum()
300+
# If two or more time steps are failing then the domain [glob, lnd, ocn] is failing
301+
.transform(lambda x: "Fail" if x >= 2 else "Pass")
292302
)
293-
294-
overall = 'Fail' if domains.apply(lambda x: x == 'Fail').any() else 'Pass'
303+
overall = 'Fail' if domains[delta_columns].apply(lambda x: x == 'Fail').any() else 'Pass'
295304

296305
ttest.reset_index(inplace=True)
297306
null_hypothesis.reset_index(inplace=True)
@@ -360,6 +369,17 @@ def pressure_layer_thickness(dataset):
360369
dp = np.expand_dims(da * p0, 1) + (np.expand_dims(db, 1) * np.expand_dims(ps, 0))
361370
return dp, ps
362371

372+
def plot_thresholds(args, axis, x_thr=None, y_thr=None):
373+
"""Add vertical / horiziontal lines on axis indicating certain thresholds."""
374+
if x_thr is None:
375+
x_thr = [args.inspect_times[0], args.inspect_times[-1]]
376+
if y_thr is None:
377+
y_thr = args.p_threshold * 100
378+
379+
axis.axhline(y_thr, linestyle="--", linewidth=1, color="grey", zorder=-1)
380+
for _time in x_thr:
381+
axis.axvline(_time, linestyle="--", linewidth=1, color="grey", zorder=-1)
382+
363383

364384
def plot_bit_for_bit(args):
365385
failing_img_file = os.path.relpath(os.path.join(args.img_dir, 'failing_timeline.png'), os.getcwd())
@@ -369,6 +389,7 @@ def plot_bit_for_bit(args):
369389
xx = np.arange(0, args.time_slice[1] + args.time_slice[0], args.time_slice[0])
370390
yy = np.zeros(xx.shape)
371391
ax.plot(xx, yy, linestyle='-', marker='o', color=pf_color_picker.get('pass'))
392+
plot_thresholds(args, ax, y_thr=1)
372393

373394
ax.set_ybound(-1, 20)
374395
ax.set_yticks(np.arange(0, 24, 4))
@@ -381,18 +402,29 @@ def plot_bit_for_bit(args):
381402
plt.savefig(failing_img_file, bbox_inches='tight')
382403
plt.close(fig)
383404

384-
failing_img_caption = 'The number of failing variables across both domains (land and ' \
385-
'ocean) as a function of model integration time.'
405+
failing_img_caption = (
406+
"The number of failing variables across both domains (land and ocean) as a "
407+
"function of model integration time. The dashed horizontal line represents the "
408+
"failing threshold of 1 variable, the dashed vertical lines represent the inspection "
409+
f"window of {args.inspect_times[0]} - {args.inspect_times[-1]} s."
410+
)
386411
failing_img_link = Path(*Path(args.img_dir).parts[-2:], Path(failing_img_file).name)
387-
failing_img = el.Image('Timeline of failing variables', failing_img_caption, failing_img_link, height=300, relative_to="")
412+
failing_img = el.Image(
413+
'Timeline of failing variables',
414+
failing_img_caption,
415+
failing_img_link,
416+
height=300,
417+
relative_to="",
418+
group="Timelines"
419+
)
388420

389421
pmin_img_file = os.path.relpath(os.path.join(args.img_dir, 'pmin_timeline.png'), os.getcwd())
390422
fig, ax = plt.subplots(figsize=(10, 8))
391423
plt.rc('font', family='serif')
392424

393425
ax.semilogy(xx, yy + 1.0, linestyle='-', marker='o', color=pf_color_picker.get('pass'))
426+
plot_thresholds(args, ax)
394427

395-
ax.plot(args.time_slice, [args.p_threshold * 100] * 2, 'k--')
396428
ax.text(np.mean(args.time_slice), 10 ** -1, 'Fail', fontsize=15, color=pf_color_picker.get('fail'),
397429
horizontalalignment='center')
398430
ax.text(np.mean(args.time_slice), 0.5 * 10 ** 1, 'Pass', fontsize=15, color=pf_color_picker.get('pass'),
@@ -411,14 +443,18 @@ def plot_bit_for_bit(args):
411443
plt.savefig(pmin_img_file, bbox_inches='tight')
412444
plt.close(fig)
413445

414-
pmin_img_caption = 'The minimum P value of all variables in both domains (land and ' \
415-
'ocean) as a function of model integration time plotted with ' \
416-
'a logarithmic y-scale. The dashed grey line indicates the ' \
417-
'threshold for assigning an overall pass or fail to a test ' \
418-
'ensemble; see Wan et al. (2017) eqn. 8.'
419-
# pmin_img_link = os.path.join(os.path.basename(args.img_dir), os.path.basename(pmin_img_file))
446+
pmin_img_caption = (
447+
"The minimum P value of all variables in both domains (land and ocean) as a "
448+
"function of model integration time plotted with a logarithmic y-scale. The "
449+
"dashed horizontal grey line indicates the threshold for assigning an overall "
450+
"pass or fail to a test ensemble; the dashed vertical lines represent the "
451+
f"inspection window of {args.inspect_times[0]} - {args.inspect_times[-1]} s. "
452+
"see Wan et al. (2017) eqn. 8"
453+
)
420454
pmin_img_link = Path(*Path(args.img_dir).parts[-2:], Path(pmin_img_file).name)
421-
pmin_img = el.Image('Timeline of P_{min}', pmin_img_caption, pmin_img_link, height=300, relative_to="")
455+
pmin_img = el.Image(
456+
'Timeline of P_{min}', pmin_img_caption, pmin_img_link, height=300, relative_to="", group="Timelines"
457+
)
422458

423459
return [failing_img, pmin_img]
424460

@@ -439,16 +475,21 @@ def plot_failing_variables(args, null_hypothesis, img_file):
439475

440476
ax.set_ylabel('Number of failing variables')
441477
ax.set_xlabel('Integration time (s)')
478+
plot_thresholds(args, ax, y_thr=1)
442479

443480
plt.tight_layout()
444481
plt.savefig(img_file, bbox_inches='tight')
445482
plt.close(fig)
446483

447-
img_caption = 'The number of failing variables across both domains (land and ' \
448-
'ocean) as a function of model integration time.'
484+
img_caption = (
485+
"The number of failing variables across both domains (land and ocean) as a "
486+
"function of model integration time. The dashed horizontal line represents the "
487+
"failing threshold of 1 variable, the dashed vertical lines represent the "
488+
f"inspection window of {args.inspect_times[0]} - {args.inspect_times[-1]} s."
489+
)
449490
img_link = Path(*Path(args.img_dir).parts[-2:], Path(img_file).name)
450491
img = el.Image(
451-
'Timeline of failing variables', img_caption, img_link, height=300, relative_to=""
492+
'Timeline of failing variables', img_caption, img_link, height=300, relative_to="", group="Timelines"
452493
)
453494
return img
454495

@@ -468,14 +509,16 @@ def plot_pmin(args, ttest, img_file):
468509
elif fails.empty:
469510
passes.plot(logy=True, linestyle='-', marker='o', color=pf_color_picker.get('pass'))
470511
else:
471-
first_fail = fails.index[0]
472-
pdata.loc[:first_fail].plot(logy=True, linestyle='-', marker='o', color=pf_color_picker.get('pass'))
473-
pdata.loc[first_fail:].plot(logy=True, linestyle='-', marker='o', color=pf_color_picker.get('fail'))
512+
pdata.plot(logy=True, linestyle="-", color="black")
513+
passes.plot(logy=True, linestyle="None", marker="o", color=pf_color_picker.get("pass"))
514+
fails.plot(logy=True, linestyle="None", marker="o", color=pf_color_picker.get("fail"))
474515

475-
ax.plot(args.time_slice, [0.5, 0.5], 'k--')
476-
ax.text(np.mean(args.time_slice), 10 ** -1, 'Fail', fontsize=15, color=pf_color_picker.get('fail'),
516+
plot_thresholds(args, ax)
517+
518+
inspect_window = [args.inspect_times[0], args.inspect_times[-1]]
519+
ax.text(np.mean(inspect_window), 10 ** -1, 'Fail', fontsize=15, color=pf_color_picker.get('fail'),
477520
horizontalalignment='center')
478-
ax.text(np.mean(args.time_slice), 10 ** 0, 'Pass', fontsize=15, color=pf_color_picker.get('pass'),
521+
ax.text(np.mean(inspect_window), 10 ** 0, 'Pass', fontsize=15, color=pf_color_picker.get('pass'),
479522
horizontalalignment='center')
480523

481524
ax.set_ybound(100, 10 ** -15)
@@ -491,14 +534,18 @@ def plot_pmin(args, ttest, img_file):
491534
plt.savefig(img_file, bbox_inches='tight')
492535
plt.close(fig)
493536

494-
img_caption = 'The minimum P value of all variables in both domains (land and ' \
495-
'ocean) as a function of model integration time plotted with ' \
496-
'a logarithmic y-scale. The dashed grey line indicates the ' \
497-
'threshold for assigning an overall pass or fail to a test ' \
498-
'ensemble; see Wan et al. (2017) eqn. 8.'
537+
img_caption = (
538+
"The minimum P value of all variables in both domains (land and ocean) as a "
539+
"function of model integration time plotted with a logarithmic y-scale. The "
540+
"dashed horizontal grey line indicates the threshold for assigning an overall "
541+
"pass or fail to a test ensemble; the dashed vertical lines represent the "
542+
f"inspection window of {args.inspect_times[0]} - {args.inspect_times[-1]} s. "
543+
"see Wan et al. (2017) eqn. 8"
544+
)
545+
499546
# img_link = os.path.join(os.path.basename(args.img_dir), os.path.basename(img_file))
500547
img_link = Path(*Path(args.img_dir).parts[-2:], Path(img_file).name)
501-
img = el.Image('Timeline of P_{min}', img_caption, img_link, height=300, relative_to="")
548+
img = el.Image('Timeline of P_{min}', img_caption, img_link, height=300, relative_to="", group="Timelines")
502549
return img
503550

504551

@@ -572,7 +619,7 @@ def boxplot_delta_rmsd(args, delta_rmsd, null_hypothesis, img_file_format):
572619
cpass=human_color_names['pass'][0])
573620
img_link = Path(*Path(args.img_dir).parts[-2:], Path(img_file).name)
574621
img_list.append(el.Image('Boxplot of normalized ensemble ΔRMSD at {}s'.format(time),
575-
img_caption, img_link, height=300, relative_to=""))
622+
img_caption, img_link, height=300, relative_to="", group="Boxplots"))
576623
return img_list
577624

578625

@@ -686,7 +733,7 @@ def errorbars_delta_rmsd(args, delta_rmsd, null_hypothesis, img_file_format):
686733
cpass=human_color_names['pass'][0])
687734
img_link = Path(*Path(args.img_dir).parts[-2:], Path(img_file).name)
688735
img_list.append(el.Image('Distribution of the ensemble ΔRMSD at {}s'.format(time),
689-
img_caption, img_link, height=300, relative_to=""))
736+
img_caption, img_link, height=300, relative_to="", group="Boxplots"))
690737
return img_list
691738

692739

0 commit comments

Comments
 (0)