Skip to content

Commit b0d023d

Browse files
authored
Merge pull request #12 from LIVVkit/mkstratos/mvk-threshold
Implement FDR correction for MVK tests
2 parents 15693ca + db8aaed commit b0d023d

File tree

8 files changed

+488
-216
lines changed

8 files changed

+488
-216
lines changed

evv4esm/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# coding=utf-8
2-
# Copyright (c) 2018-2022 UT-BATTELLE, LLC
2+
# Copyright (c) 2018-2023 UT-BATTELLE, LLC
33
# All rights reserved.
44
#
55
# Redistribution and use in source and binary forms, with or without
@@ -28,7 +28,7 @@
2828
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2929

3030

31-
__version_info__ = (0, 4, 0)
31+
__version_info__ = (0, 5, 0)
3232
__version__ = '.'.join(str(vi) for vi in __version_info__)
3333

3434
PASS_COLOR = '#389933'

evv4esm/ensembles/e3sm.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,8 +72,7 @@ def component_monthly_files(dir_, component, ninst, hist_name="h0", nmonth_max=1
7272
else:
7373
date_search = "????-??"
7474

75-
def component_monthly_files(dir_, component, ninst, hist_name="hist", nmonth_max=24, date_style="short"):
76-
base = "{d}/*{c}_????.{n}.????-??-??.nc".format(d=dir_, c=component, n=hist_name)
75+
base = "{d}/*{c}_????.{n}.{ds}.nc".format(d=dir_, c=component, n=hist_name, ds=date_search)
7776
search = os.path.normpath(base)
7877
result = sorted(glob.glob(search))
7978

evv4esm/ensembles/tools.py

Lines changed: 166 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#!/usr/bin/env python
22
# coding=utf-8
3-
# Copyright (c) 2018 UT-BATTELLE, LLC
3+
# Copyright (c) 2018-2023 UT-BATTELLE, LLC
44
# All rights reserved.
55
#
66
# Redistribution and use in source and binary forms, with or without
@@ -29,19 +29,23 @@
2929
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3030

3131
"""General tools for working with ensembles."""
32-
32+
import os
3333
import numpy as np
3434
import pandas as pd
3535
import matplotlib.pyplot as plt
36+
from matplotlib.gridspec import GridSpec
37+
38+
from evv4esm import pf_color_picker, light_pf_color_picker
3639

37-
from evv4esm import pf_color_picker
3840

39-
def monthly_to_annual_avg(var_data, cal='ignore'):
41+
def monthly_to_annual_avg(var_data, cal="ignore"):
4042
if len(var_data) != 12:
41-
raise ValueError('Error! There are 12 months in a year; '
42-
'you passed in {} monthly averages.'.format(len(var_data)))
43+
raise ValueError(
44+
"Error! There are 12 months in a year; "
45+
"you passed in {} monthly averages.".format(len(var_data))
46+
)
4347

44-
if cal == 'ignore':
48+
if cal == "ignore":
4549
# weight each month equally
4650
avg = np.average(var_data)
4751
else:
@@ -50,81 +54,142 @@ def monthly_to_annual_avg(var_data, cal='ignore'):
5054
return avg
5155

5256

53-
def prob_plot(test, ref, n_q, img_file, test_name='Test', ref_name='Ref.',
54-
thing='annual global averages', pf=None):
55-
# NOTE: Following the methods described in
56-
# https://stackoverflow.com/questions/43285752
57-
# to create the Q-Q and P-P plots
57+
def prob_plot(
58+
test,
59+
ref,
60+
n_q,
61+
img_file,
62+
test_name="Test",
63+
ref_name="Ref.",
64+
thing="annual global averages",
65+
pf=None,
66+
combine_hist=False,
67+
):
5868
q = np.linspace(0, 100, n_q + 1)
5969
all_ = np.concatenate((test, ref))
6070
min_ = np.min(all_)
6171
max_ = np.max(all_)
6272

6373
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(10, 10))
64-
plt.rc('font', family='serif')
74+
axes = [ax1, ax2, ax3, ax4]
6575

66-
ax1.set_title('Q-Q Plot')
67-
ax1.set_xlabel('{} pdf'.format(ref_name))
68-
ax1.set_ylabel('{} pdf'.format(test_name))
76+
_var = os.path.split(img_file)[-1].split(".")[0]
77+
fig.suptitle(_var)
6978

70-
# NOTE: Axis switched here from Q-Q plot because cdf reflects about the 1-1 line
71-
ax2.set_title('P-P Plot')
72-
ax2.set_xlabel('{} cdf'.format(test_name))
73-
ax2.set_ylabel('{} cdf'.format(ref_name))
79+
plt.rc("font", family="serif")
7480

75-
ax3.set_title('{} pdf'.format(ref_name))
76-
ax3.set_xlabel('Unity-based normalization of {}'.format(thing))
77-
ax3.set_ylabel('Frequency')
81+
ax1.set_title("Q-Q Plot")
82+
ax1.set_xlabel("{} pdf".format(ref_name))
83+
ax1.set_ylabel("{} pdf".format(test_name))
7884

79-
ax4.set_title('{} pdf'.format(test_name))
80-
ax4.set_xlabel('Unity-based normalization of {}'.format(thing))
81-
ax4.set_ylabel('Frequency')
85+
# NOTE: Axis switched here from Q-Q plot because cdf reflects about the 1-1 line
86+
ax2.set_title("P-P Plot")
87+
ax2.set_xlabel("{} cdf".format(test_name))
88+
ax2.set_ylabel("{} cdf".format(ref_name))
8289

8390
norm_rng = [0.0, 1.0]
84-
ax1.plot(norm_rng, norm_rng, 'gray', zorder=1)
91+
ax1.plot(norm_rng, norm_rng, "gray", zorder=1)
8592
ax1.set_xlim(tuple(norm_rng))
8693
ax1.set_ylim(tuple(norm_rng))
8794
ax1.autoscale()
8895

89-
ax2.plot(norm_rng, norm_rng, 'gray', zorder=1)
96+
ax2.plot(norm_rng, norm_rng, "gray", zorder=1)
9097
ax2.set_xlim(tuple(norm_rng))
9198
ax2.set_ylim(tuple(norm_rng))
9299
ax2.autoscale()
93100

101+
if combine_hist:
102+
ax3.set_title("Ensemble histogram")
103+
ax4.set_title("Ensemble CDF")
104+
else:
105+
ax3.set_title("{} pdf".format(ref_name))
106+
107+
ax4.set_title("{} pdf".format(test_name))
108+
ax4.set_xlabel("Unity-based normalization of {}".format(thing))
109+
ax4.set_ylabel("Frequency")
110+
ax4.set_xlim(tuple(norm_rng))
111+
ax4.autoscale()
112+
113+
ax3.set_ylabel("Frequency")
114+
ax3.set_xlabel("Unity-based normalization of {}".format(thing))
115+
94116
ax3.set_xlim(tuple(norm_rng))
95117
ax3.autoscale()
96118

97-
ax4.set_xlim(tuple(norm_rng))
98-
ax4.autoscale()
119+
ax4.set_ylabel("N Ensemble members")
120+
ax4.set_xlabel("Unity-based normalization of {}".format(thing))
99121

100122
# NOTE: Produce unity-based normalization of data for the Q-Q plots because
101123
# matplotlib can't handle small absolute values or data ranges. See
102124
# https://github.com/matplotlib/matplotlib/issues/6015
103-
if not np.allclose(min_, max_, rtol=np.finfo(max_).eps):
104-
norm1 = (ref - min_) / (max_ - min_)
105-
norm2 = (test - min_) / (max_ - min_)
106-
107-
ax1.scatter(np.percentile(norm1, q), np.percentile(norm2, q),
108-
color=pf_color_picker.get(pf, '#1F77B4'), zorder=2)
109-
ax3.hist(norm1, bins=n_q, color=pf_color_picker.get(pf, '#1F77B4'), edgecolor="k")
110-
ax4.hist(norm2, bins=n_q, color=pf_color_picker.get(pf, '#1F77B4'), edgecolor="k")
111-
112-
# Check if these distributions are wildly different. If they are, use different
113-
# colours for the bottom axis? Otherwise set the scales to be the same [0, 1]
114-
if abs(norm1.mean() - norm2.mean()) >= 0.5:
115-
ax3.tick_params(axis="x", colors="C0")
116-
ax3.spines["bottom"].set_color("C0")
117-
118-
ax4.tick_params(axis="x", colors="C1")
119-
ax4.spines["bottom"].set_color("C1")
120-
else:
121-
ax3.set_xlim(tuple(norm_rng))
125+
bnds = np.linspace(min_, max_, n_q)
126+
if not np.allclose(
127+
bnds, bnds[0], rtol=np.finfo(bnds[0]).eps, atol=np.finfo(bnds[0]).eps
128+
):
129+
norm_ref = (ref - min_) / (max_ - min_)
130+
norm_test = (test - min_) / (max_ - min_)
131+
132+
# Create P-P plot
133+
ax1.scatter(
134+
np.percentile(norm_ref, q),
135+
np.percentile(norm_test, q),
136+
color=pf_color_picker.get(pf, "#1F77B4"),
137+
zorder=2,
138+
)
139+
if combine_hist:
140+
# Plot joint histogram (groups test / ref side-by-side for each bin)
141+
freq, bins, _ = ax3.hist(
142+
[norm_ref, norm_test],
143+
bins=n_q,
144+
edgecolor="k",
145+
label=[ref_name, test_name],
146+
color=[
147+
pf_color_picker.get(pf, "#1F77B4"),
148+
light_pf_color_picker.get(pf, "#B55D1F"),
149+
],
150+
zorder=5,
151+
)
152+
ax3.legend()
153+
154+
cdf = freq.cumsum(axis=1)
155+
156+
ax4.plot(
157+
bins,
158+
[0, *cdf[0]],
159+
color=pf_color_picker.get(pf, "#1F77B4"),
160+
label=ref_name,
161+
)
162+
ax4.plot(
163+
bins,
164+
[0, *cdf[1]],
165+
color=light_pf_color_picker.get(pf, "#B55D1F"),
166+
label=test_name,
167+
)
122168
ax4.set_xlim(tuple(norm_rng))
169+
ax4.legend()
123170

171+
else:
172+
ax3.hist(
173+
norm_ref, bins=n_q, color=pf_color_picker.get(pf, "#1F77B4"), edgecolor="k"
174+
)
175+
ax4.hist(
176+
norm_test, bins=n_q, color=pf_color_picker.get(pf, "#1F77B4"), edgecolor="k"
177+
)
124178

125-
# bin both series into equal bins and get cumulative counts for each bin
126-
bnds = np.linspace(min_, max_, n_q)
127-
if not np.allclose(bnds, bnds[0], rtol=np.finfo(bnds[0]).eps):
179+
# Check if these distributions are wildly different. If they are, use different
180+
# colours for the bottom axis? Otherwise set the scales to be the same [0, 1]
181+
if abs(norm_ref.mean() - norm_test.mean()) >= 0.5:
182+
ax3.tick_params(axis="x", colors="C0")
183+
ax3.spines["bottom"].set_color("C0")
184+
185+
ax4.tick_params(axis="x", colors="C1")
186+
ax4.spines["bottom"].set_color("C1")
187+
else:
188+
ax4.set_xlim(tuple(norm_rng))
189+
190+
ax3.set_xlim(tuple(norm_rng))
191+
192+
# bin both series into equal bins and get cumulative counts for each bin
128193
ppxb = pd.cut(ref, bnds)
129194
ppyb = pd.cut(test, bnds)
130195

@@ -134,12 +199,58 @@ def prob_plot(test, ref, n_q, img_file, test_name='Test', ref_name='Ref.',
134199
ppxh = np.cumsum(ppxh)
135200
ppyh = np.cumsum(ppyh)
136201

137-
ax2.scatter(ppyh.values, ppxh.values,
138-
color=pf_color_picker.get(pf, '#1F77B4'), zorder=2)
202+
ax2.scatter(
203+
ppyh.values, ppxh.values, color=pf_color_picker.get(pf, "#1F77B4"), zorder=2
204+
)
205+
else:
206+
# Define a text box if the data are not plottable
207+
const_axis_text = {
208+
"x": 0.5,
209+
"y": 0.5,
210+
"s": f"CONSTANT FIELD\nMIN: {min_:.4e}\nMAX: {max_:.4e}",
211+
"horizontalalignment": "center",
212+
"verticalalignment": "center",
213+
"backgroundcolor": ax1.get_facecolor(),
214+
}
215+
ax1.text(**const_axis_text)
216+
ax2.text(**const_axis_text)
217+
if combine_hist:
218+
ax3.hist(
219+
[test, ref],
220+
bins=n_q,
221+
edgecolor="k",
222+
label=[test_name, ref_name],
223+
color=[
224+
pf_color_picker.get(pf, "#1F77B4"),
225+
light_pf_color_picker.get(pf, "#B55D1F"),
226+
],
227+
zorder=5,
228+
)
229+
ax3.legend()
230+
ax4.legend()
231+
else:
232+
ax3.hist(
233+
test,
234+
bins=n_q,
235+
edgecolor="k",
236+
color=pf_color_picker.get(pf, "#1F77B4"),
237+
zorder=5,
238+
)
239+
ax4.hist(
240+
ref,
241+
bins=n_q,
242+
edgecolor="k",
243+
label=[test_name, ref_name],
244+
color=pf_color_picker.get(pf, "#1F77B4"),
245+
zorder=5,
246+
)
139247

248+
for axis in axes:
249+
axis.grid(visible=True, ls="--", lw=0.5, zorder=-1)
140250

141251
plt.tight_layout()
142-
plt.savefig(img_file, bbox_inches='tight')
252+
253+
plt.savefig(img_file, bbox_inches="tight")
143254

144255
plt.close(fig)
145256

0 commit comments

Comments
 (0)