Skip to content

Commit fe14db0

Browse files
add jupyter notebooks documenting metrics diffs (#964)
1 parent 118cdb9 commit fe14db0

File tree

2 files changed

+396
-0
lines changed

2 files changed

+396
-0
lines changed
Lines changed: 186 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,186 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 1,
6+
"id": "004762c2",
7+
"metadata": {},
8+
"outputs": [],
9+
"source": [
10+
"import xarray as xr\n",
11+
"import xcdat as xc\n",
12+
"import numpy as np\n",
13+
"import xskillscore as xs "
14+
]
15+
},
16+
{
17+
"cell_type": "markdown",
18+
"id": "f61a38ed",
19+
"metadata": {},
20+
"source": [
21+
"Description: in metrics table from e3sm_diags v3.0.0, it is shown a small difference for regridded mean compare to e3sm_diags v2. \n",
22+
"\n",
23+
"\n",
24+
"V3:\n",
25+
"Variables\tUnit\t Test_mean\tRef._mean\tMean_Bias\tTest_STD\tRef._STD\tRMSE\tCorrelation\n",
26+
"\n",
27+
"SST global HadISST_CL\tdegC\t20.256\t18.777\t1.48\t8.178\t9.464\t1.055\t0.992\n",
28+
"\n",
29+
"SST global HadISST_PI\tdegC\t20.256\t19.058\t1.199\t8.178\t8.853\t1.233\t0.991\n",
30+
"\n",
31+
"SST global HadISST_PD\tdegC\t20.256\t18.885\t1.372\t8.178\t9.47\t1.082\t0.992\n",
32+
"\n",
33+
"V2:\n",
34+
"\n",
35+
"SST global HadISST_CL\tdegC\t20.256\t18.698\t1.559\t8.178\t9.536\t1.054\t0.992\n",
36+
"\n",
37+
"SST global HadISST_PI\tdegC\t20.256\t18.978\t1.279\t8.178\t8.933\t1.232\t0.991\n",
38+
"\n",
39+
"SST global HadISST_PD\tdegC\t20.256\t18.807\t1.45\t8.178\t9.543\t1.082\t0.992\n",
40+
"\n",
41+
"\n",
42+
"\n",
43+
"Summary: the small difference came from regridding routine change, both uses bilinear, but for the new code base, it needs explicitly add a mask to the dataset to pass into ESMF regridder. Otherwise, there will be more data treated as missing, a.k.a missing data bleeding into regridded data. \n",
44+
"\n",
45+
"Solutions:\n",
46+
"1. in xcdat regridder, add `mask` before passing data into xesmf \n",
47+
"2. in e3sm_diags add `mask` before calling xcdat\n",
48+
"3. to use conservative_norm method for SST, though this requires the HadISST data drop the lat bounds which is in descending (already fixed in lcrc inputdata server), another issue xcdat team is addressing.\n",
49+
"\n",
50+
"Data for testing available from :https://web.lcrc.anl.gov/public/e3sm/zhang40/cdat-migration-fy24/test_data/"
51+
]
52+
},
53+
{
54+
"cell_type": "code",
55+
"execution_count": 2,
56+
"id": "76b832d7",
57+
"metadata": {},
58+
"outputs": [],
59+
"source": [
60+
"f_a = '/Users/zhang40/Downloads/HadISST_CL-SST-ANN-global_test.nc'\n",
61+
"f_b = '/Users/zhang40/Downloads/HadISST_CL-SST-ANN-global_ref.nc'"
62+
]
63+
},
64+
{
65+
"cell_type": "code",
66+
"execution_count": 3,
67+
"id": "2eb27519",
68+
"metadata": {},
69+
"outputs": [],
70+
"source": [
71+
"sst_a = xr.open_dataset(f_a)\n",
72+
"sst_b = xr.open_dataset(f_b)\n",
73+
"var = 'SST'"
74+
]
75+
},
76+
{
77+
"cell_type": "code",
78+
"execution_count": 4,
79+
"id": "559a37ce",
80+
"metadata": {},
81+
"outputs": [
82+
{
83+
"name": "stderr",
84+
"output_type": "stream",
85+
"text": [
86+
"/Users/zhang40/mambaforge/envs/e3sm-unified/lib/python3.10/site-packages/xarray/core/concat.py:546: FutureWarning: unique with argument that is not not a Series, Index, ExtensionArray, or np.ndarray is deprecated and will raise in a future version.\n",
87+
" common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))\n"
88+
]
89+
},
90+
{
91+
"name": "stdout",
92+
"output_type": "stream",
93+
"text": [
94+
"When no mask is explicitly added:\n",
95+
"weighted mean, bilinear: 18.77674568342201 1.4763235405423747\n",
96+
"weighted mean, conserve: 18.646808919906057 1.4764820110242953\n"
97+
]
98+
}
99+
],
100+
"source": [
101+
"sst_a = sst_a.bounds.add_missing_bounds()\n",
102+
"sst_b = sst_b.bounds.add_missing_bounds()\n",
103+
"\n",
104+
"weights = sst_a.spatial.get_weights([\"X\", \"Y\"], data_var=var)\n",
105+
"\n",
106+
"output_grid = sst_a.regridder.grid\n",
107+
"# Regriding without mask\n",
108+
"sst_b_regrid_bilinear = sst_b.regridder.horizontal(\n",
109+
" var, output_grid, tool='xesmf', method='bilinear'\n",
110+
" )\n",
111+
"\n",
112+
"sst_b_regrid_conservative_normed = sst_b.regridder.horizontal(\n",
113+
" var, output_grid, tool='xesmf', method='conservative_normed'\n",
114+
" )\n",
115+
"result_xr1 = xs.rmse(sst_a[var], sst_b_regrid_bilinear[var], dim=[\"lat\", \"lon\"], weights=weights, skipna=True)\n",
116+
"result_xr2 = xs.rmse(sst_a[var], sst_b_regrid_conservative_normed[var], dim=[\"lat\", \"lon\"], weights=weights, skipna=True)\n",
117+
"\n",
118+
"\n",
119+
"print('When no mask is explicitly added:')\n",
120+
"print('weighted mean, bilinear:', sst_b_regrid_bilinear[var].weighted(weights).mean().values, result_xr1.values)\n",
121+
"print('weighted mean, conserve:', sst_b_regrid_conservative_normed[var].weighted(weights).mean().values, result_xr2.values)"
122+
]
123+
},
124+
{
125+
"cell_type": "code",
126+
"execution_count": 5,
127+
"id": "368d18da",
128+
"metadata": {},
129+
"outputs": [
130+
{
131+
"name": "stdout",
132+
"output_type": "stream",
133+
"text": [
134+
"With mask explicitly added:\n",
135+
"weighted mean and rmse, bilinear: 18.673915615671618 1.4764820110242953\n",
136+
"weighted mean and rmse, conserve: 18.646808919906057 1.4764820110242953\n"
137+
]
138+
}
139+
],
140+
"source": [
141+
"# Add a mask variable to the dataset to regrid with a mask. This helps\n",
142+
"# prevent missing values (`np.nan`) from bleeding into the\n",
143+
"# regridding.\n",
144+
"# https://xesmf.readthedocs.io/en/latest/notebooks/Masking.html#Regridding-with-a-mask\n",
145+
"# sst_b[\"mask\"] = xr.where(~np.isnan(sst_b[var]), 1, 0)\n",
146+
"# Below creates a True/False boolean mask, which may be faster and use less memory.\n",
147+
"sst_b[\"mask\"] = ~np.isnan(sst_b[var])\n",
148+
"sst_b_regrid_bilinear = sst_b.regridder.horizontal(\n",
149+
" var, output_grid, tool='xesmf', method='bilinear'\n",
150+
" )\n",
151+
"\n",
152+
"sst_b_regrid_conservative_normed = sst_b.regridder.horizontal(\n",
153+
" var, output_grid, tool='xesmf', method='conservative_normed'\n",
154+
" )\n",
155+
"result_xr1 = xs.rmse(sst_a[var], sst_b_regrid_bilinear[var], dim=[\"lat\", \"lon\"], weights=weights, skipna=True)\n",
156+
"result_xr2 = xs.rmse(sst_a[var], sst_b_regrid_conservative_normed[var], dim=[\"lat\", \"lon\"], weights=weights, skipna=True)\n",
157+
"\n",
158+
"print('With mask explicitly added:')\n",
159+
"print('weighted mean and rmse, bilinear:', sst_b_regrid_bilinear[var].weighted(weights).mean().values, result_xr1.values)\n",
160+
"print('weighted mean and rmse, conserve:', sst_b_regrid_conservative_normed[var].weighted(weights).mean().values, result_xr2.values)\n",
161+
"\n"
162+
]
163+
}
164+
],
165+
"metadata": {
166+
"kernelspec": {
167+
"display_name": "Python [conda env:e3sm-unified] *",
168+
"language": "python",
169+
"name": "conda-env-e3sm-unified-py"
170+
},
171+
"language_info": {
172+
"codemirror_mode": {
173+
"name": "ipython",
174+
"version": 3
175+
},
176+
"file_extension": ".py",
177+
"mimetype": "text/x-python",
178+
"name": "python",
179+
"nbconvert_exporter": "python",
180+
"pygments_lexer": "ipython3",
181+
"version": "3.10.15"
182+
}
183+
},
184+
"nbformat": 4,
185+
"nbformat_minor": 5
186+
}

0 commit comments

Comments
 (0)