Skip to content

Commit 15693ca

Browse files
authored
Merge pull request #11 from LIVVkit/mkstratos/mvko
Add ocean K-S test
2 parents 9b721cc + 92ab3e9 commit 15693ca

File tree

6 files changed

+579
-30
lines changed

6 files changed

+579
-30
lines changed

evv4esm/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2929

3030

31-
__version_info__ = (0, 3, 2)
31+
__version_info__ = (0, 4, 0)
3232
__version__ = '.'.join(str(vi) for vi in __version_info__)
3333

3434
PASS_COLOR = '#389933'

evv4esm/ensembles/e3sm.py

Lines changed: 64 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -37,45 +37,71 @@
3737
import glob
3838

3939
from collections import OrderedDict
40+
from functools import partial
4041

4142
import numpy as np
4243
import pandas as pd
4344
from netCDF4 import Dataset
4445

4546

4647
def component_file_instance(component, case_file):
47-
search_regex = r'{c}_[0-9]+'.format(c=component)
48+
search_regex = r"{c}_[0-9]+".format(c=component)
4849
result = re.search(search_regex, case_file).group(0)
49-
return int(result.replace('{}_'.format(component), ''))
50+
return int(result.replace("{}_".format(component), ""))
5051

5152

52-
def file_date_str(case_file, style='short'):
53-
if style == 'full':
54-
search_regex = r'h0\.[0-9]+-[0-9]+-[0-9]+-[0-9]+.nc'
55-
elif style == 'short':
56-
search_regex = r'h0\.[0-9]+-[0-9]+.nc'
53+
def file_date_str(case_file, style="short", hist_name="h0"):
54+
if style == "full":
55+
search_regex = r"{}\.[0-9]+-[0-9]+-[0-9]+-[0-9]+.nc".format(hist_name)
56+
elif style == "med":
57+
search_regex = r"{}\.[0-9]+-[0-9]+-[0-9]+.nc".format(hist_name)
58+
elif style == "short":
59+
search_regex = r"{}\.[0-9]+-[0-9]+.nc".format(hist_name)
5760
else:
58-
# FIXME: log warning here
59-
search_regex = r'h0\.[0-9]+-[0-9]+.nc'
61+
search_regex = r"{}\.[0-9]+-[0-9]+.nc".format(hist_name)
62+
6063
result = re.search(search_regex, case_file).group(0)
61-
return result.replace('h0.', '').replace('.nc', '')
64+
return result.replace("{}.".format(hist_name), "").replace(".nc", "")
65+
6266

67+
def component_monthly_files(dir_, component, ninst, hist_name="h0", nmonth_max=12, date_style="short"):
68+
if date_style == "full":
69+
date_search = "????-??-??-??"
70+
elif date_style == "med":
71+
date_search = "????-??-??"
72+
else:
73+
date_search = "????-??"
6374

64-
def component_monthly_files(dir_, component, ninst):
65-
base = '{d}/*{c}_????.h0.????-??.nc'.format(d=dir_, c=component)
75+
def component_monthly_files(dir_, component, ninst, hist_name="hist", nmonth_max=24, date_style="short"):
76+
base = "{d}/*{c}_????.{n}.????-??-??.nc".format(d=dir_, c=component, n=hist_name)
6677
search = os.path.normpath(base)
6778
result = sorted(glob.glob(search))
6879

6980
instance_files = OrderedDict()
81+
_file_date_str = partial(file_date_str, style=date_style, hist_name=hist_name)
7082
for ii in range(1, ninst + 1):
71-
instance_files[ii] = sorted(filter(lambda x: component_file_instance(component, x) == ii, result),
72-
key=file_date_str)
73-
if len(instance_files[ii]) > 12:
74-
instance_files[ii] = instance_files[ii][-12:]
83+
instance_files[ii] = sorted(
84+
filter(lambda x: component_file_instance(component, x) == ii, result),
85+
key=_file_date_str,
86+
)
87+
if len(instance_files[ii]) > nmonth_max:
88+
instance_files[ii] = instance_files[ii][-nmonth_max:]
7589

7690
return instance_files
7791

7892

93+
def get_variable_meta(dataset, var_name):
94+
try:
95+
_name = f": {dataset.variables[var_name].getncattr('long_name')}"
96+
except AttributeError:
97+
_name = ""
98+
try:
99+
_units = f" [{dataset.variables[var_name].getncattr('units')}]"
100+
except AttributeError:
101+
_units = ""
102+
return {"long_name": _name, "units": _units}
103+
104+
79105
def gather_monthly_averages(ensemble_files, variable_set=None):
80106
monthly_avgs = []
81107
for case, inst_dict in six.iteritems(ensemble_files):
@@ -101,16 +127,29 @@ def gather_monthly_averages(ensemble_files, variable_set=None):
101127
continue
102128
else:
103129
m = np.mean(data.variables[var][0, ...])
104-
try:
105-
_name = f": {data.variables[var].getncattr('long_name')}"
106-
except AttributeError:
107-
_name = ""
108-
try:
109-
_units = f" [{data.variables[var].getncattr('units')}]"
110-
except AttributeError:
111-
_units = ""
112-
desc = f"{_name}{_units}"
130+
131+
desc = "{long_name}{units}".format(**get_variable_meta(data, var))
113132
monthly_avgs.append((case, var, '{:04}'.format(inst), date_str, m, desc))
114133

115134
monthly_avgs = pd.DataFrame(monthly_avgs, columns=('case', 'variable', 'instance', 'date', 'monthly_mean', 'desc'))
116135
return monthly_avgs
136+
137+
138+
def load_mpas_climatology_ensemble(files, field_name, mask_value=None):
139+
# Get the first file to set up ensemble array output
140+
with Dataset(files[0], "r") as dset:
141+
_field = dset.variables[field_name][:].squeeze()
142+
var_desc = "{long_name}{units}".format(**get_variable_meta(dset, field_name))
143+
144+
dims = _field.shape
145+
ens_out = np.ma.zeros([*dims, len(files)])
146+
ens_out[..., 0] = _field
147+
for idx, file_name in enumerate(files[1:]):
148+
with Dataset(file_name, "r") as dset:
149+
_field = dset.variables[field_name][:].squeeze()
150+
ens_out[..., idx + 1] = _field
151+
152+
if mask_value:
153+
ens_out = np.ma.masked_less(ens_out, mask_value)
154+
155+
return {"data": ens_out, "desc": var_desc}

evv4esm/ensembles/tools.py

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -100,18 +100,31 @@ def prob_plot(test, ref, n_q, img_file, test_name='Test', ref_name='Ref.',
100100
# NOTE: Produce unity-based normalization of data for the Q-Q plots because
101101
# matplotlib can't handle small absolute values or data ranges. See
102102
# https://github.com/matplotlib/matplotlib/issues/6015
103-
if not np.allclose(min_, max_, atol=np.finfo(max_).eps):
103+
if not np.allclose(min_, max_, rtol=np.finfo(max_).eps):
104104
norm1 = (ref - min_) / (max_ - min_)
105105
norm2 = (test - min_) / (max_ - min_)
106106

107107
ax1.scatter(np.percentile(norm1, q), np.percentile(norm2, q),
108108
color=pf_color_picker.get(pf, '#1F77B4'), zorder=2)
109-
ax3.hist(norm1, bins=n_q, color=pf_color_picker.get(pf, '#1F77B4'))
110-
ax4.hist(norm2, bins=n_q, color=pf_color_picker.get(pf, '#1F77B4'))
109+
ax3.hist(norm1, bins=n_q, color=pf_color_picker.get(pf, '#1F77B4'), edgecolor="k")
110+
ax4.hist(norm2, bins=n_q, color=pf_color_picker.get(pf, '#1F77B4'), edgecolor="k")
111+
112+
# Check if these distributions are wildly different. If they are, use different
113+
# colours for the bottom axis? Otherwise set the scales to be the same [0, 1]
114+
if abs(norm1.mean() - norm2.mean()) >= 0.5:
115+
ax3.tick_params(axis="x", colors="C0")
116+
ax3.spines["bottom"].set_color("C0")
117+
118+
ax4.tick_params(axis="x", colors="C1")
119+
ax4.spines["bottom"].set_color("C1")
120+
else:
121+
ax3.set_xlim(tuple(norm_rng))
122+
ax4.set_xlim(tuple(norm_rng))
123+
111124

112125
# bin both series into equal bins and get cumulative counts for each bin
113126
bnds = np.linspace(min_, max_, n_q)
114-
if not np.allclose(bnds, bnds[0], atol=np.finfo(bnds[0]).eps):
127+
if not np.allclose(bnds, bnds[0], rtol=np.finfo(bnds[0]).eps):
115128
ppxb = pd.cut(ref, bnds)
116129
ppyb = pd.cut(test, bnds)
117130

@@ -124,6 +137,7 @@ def prob_plot(test, ref, n_q, img_file, test_name='Test', ref_name='Ref.',
124137
ax2.scatter(ppyh.values, ppxh.values,
125138
color=pf_color_picker.get(pf, '#1F77B4'), zorder=2)
126139

140+
127141
plt.tight_layout()
128142
plt.savefig(img_file, bbox_inches='tight')
129143

0 commit comments

Comments
 (0)