E3SM-Project
diff --git a/‎auxiliary_tools/cdat_regression_testing/metrics_diffs/lat_lon_HadISST_CL-SST-metrics-diff.ipynb‎
Lines changed: 186 additions & 0 deletions b/‎auxiliary_tools/cdat_regression_testing/metrics_diffs/lat_lon_HadISST_CL-SST-metrics-diff.ipynb‎
Lines changed: 186 additions & 0 deletions
@@ -0,0 +1,186 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "004762c2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import xarray as xr\n",
+    "import xcdat as xc\n",
+    "import numpy as np\n",
+    "import xskillscore as xs "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f61a38ed",
+   "metadata": {},
+   "source": [
+    "Description: in metrics table from e3sm_diags v3.0.0, it is shown a small difference for regridded mean compare to e3sm_diags v2. \n",
+    "\n",
+    "\n",
+    "V3:\n",
+    "Variables\tUnit\t          Test_mean\tRef._mean\tMean_Bias\tTest_STD\tRef._STD\tRMSE\tCorrelation\n",
+    "\n",
+    "SST global HadISST_CL\tdegC\t20.256\t18.777\t1.48\t8.178\t9.464\t1.055\t0.992\n",
+    "\n",
+    "SST global HadISST_PI\tdegC\t20.256\t19.058\t1.199\t8.178\t8.853\t1.233\t0.991\n",
+    "\n",
+    "SST global HadISST_PD\tdegC\t20.256\t18.885\t1.372\t8.178\t9.47\t1.082\t0.992\n",
+    "\n",
+    "V2:\n",
+    "\n",
+    "SST global HadISST_CL\tdegC\t20.256\t18.698\t1.559\t8.178\t9.536\t1.054\t0.992\n",
+    "\n",
+    "SST global HadISST_PI\tdegC\t20.256\t18.978\t1.279\t8.178\t8.933\t1.232\t0.991\n",
+    "\n",
+    "SST global HadISST_PD\tdegC\t20.256\t18.807\t1.45\t8.178\t9.543\t1.082\t0.992\n",
+    "\n",
+    "\n",
+    "\n",
+    "Summary: the small difference came from regridding routine change, both uses bilinear, but for the new code base, it needs explicitly add a mask to the dataset to pass into ESMF regridder. Otherwise, there will be more data treated as missing, a.k.a missing data bleeding into regridded data. \n",
+    "\n",
+    "Solutions:\n",
+    "1. in xcdat regridder, add `mask` before passing data into xesmf \n",
+    "2. in e3sm_diags add `mask` before calling xcdat\n",
+    "3. to use conservative_norm method for SST, though this requires the HadISST data drop the lat bounds which is in descending (already fixed in lcrc inputdata server), another issue xcdat team is addressing.\n",
+    "\n",
+    "Data for testing available from :https://web.lcrc.anl.gov/public/e3sm/zhang40/cdat-migration-fy24/test_data/"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "76b832d7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "f_a = '/Users/zhang40/Downloads/HadISST_CL-SST-ANN-global_test.nc'\n",
+    "f_b = '/Users/zhang40/Downloads/HadISST_CL-SST-ANN-global_ref.nc'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "2eb27519",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sst_a = xr.open_dataset(f_a)\n",
+    "sst_b = xr.open_dataset(f_b)\n",
+    "var = 'SST'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "559a37ce",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/zhang40/mambaforge/envs/e3sm-unified/lib/python3.10/site-packages/xarray/core/concat.py:546: FutureWarning: unique with argument that is not not a Series, Index, ExtensionArray, or np.ndarray is deprecated and will raise in a future version.\n",
+      "  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "When no mask is explicitly added:\n",
+      "weighted mean, bilinear: 18.77674568342201 1.4763235405423747\n",
+      "weighted mean, conserve: 18.646808919906057 1.4764820110242953\n"
+     ]
+    }
+   ],
+   "source": [
+    "sst_a = sst_a.bounds.add_missing_bounds()\n",
+    "sst_b = sst_b.bounds.add_missing_bounds()\n",
+    "\n",
+    "weights = sst_a.spatial.get_weights([\"X\", \"Y\"], data_var=var)\n",
+    "\n",
+    "output_grid = sst_a.regridder.grid\n",
+    "# Regriding without mask\n",
+    "sst_b_regrid_bilinear = sst_b.regridder.horizontal(\n",
+    "            var, output_grid, tool='xesmf', method='bilinear'\n",
+    "        )\n",
+    "\n",
+    "sst_b_regrid_conservative_normed = sst_b.regridder.horizontal(\n",
+    "            var, output_grid, tool='xesmf', method='conservative_normed'\n",
+    "        )\n",
+    "result_xr1 = xs.rmse(sst_a[var], sst_b_regrid_bilinear[var], dim=[\"lat\", \"lon\"], weights=weights, skipna=True)\n",
+    "result_xr2 = xs.rmse(sst_a[var], sst_b_regrid_conservative_normed[var], dim=[\"lat\", \"lon\"], weights=weights, skipna=True)\n",
+    "\n",
+    "\n",
+    "print('When no mask is explicitly added:')\n",
+    "print('weighted mean, bilinear:', sst_b_regrid_bilinear[var].weighted(weights).mean().values, result_xr1.values)\n",
+    "print('weighted mean, conserve:', sst_b_regrid_conservative_normed[var].weighted(weights).mean().values, result_xr2.values)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "368d18da",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "With mask explicitly added:\n",
+      "weighted mean and rmse, bilinear: 18.673915615671618 1.4764820110242953\n",
+      "weighted mean and rmse, conserve: 18.646808919906057 1.4764820110242953\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Add a mask variable to the dataset to regrid with a mask. This helps\n",
+    "# prevent missing values (`np.nan`) from bleeding into the\n",
+    "# regridding.\n",
+    "# https://xesmf.readthedocs.io/en/latest/notebooks/Masking.html#Regridding-with-a-mask\n",
+    "# sst_b[\"mask\"] = xr.where(~np.isnan(sst_b[var]), 1, 0)\n",
+    "# Below creates a True/False boolean mask, which may be faster and use less memory.\n",
+    "sst_b[\"mask\"] = ~np.isnan(sst_b[var])\n",
+    "sst_b_regrid_bilinear = sst_b.regridder.horizontal(\n",
+    "            var, output_grid, tool='xesmf', method='bilinear'\n",
+    "        )\n",
+    "\n",
+    "sst_b_regrid_conservative_normed = sst_b.regridder.horizontal(\n",
+    "            var, output_grid, tool='xesmf', method='conservative_normed'\n",
+    "        )\n",
+    "result_xr1 = xs.rmse(sst_a[var], sst_b_regrid_bilinear[var], dim=[\"lat\", \"lon\"], weights=weights, skipna=True)\n",
+    "result_xr2 = xs.rmse(sst_a[var], sst_b_regrid_conservative_normed[var], dim=[\"lat\", \"lon\"], weights=weights, skipna=True)\n",
+    "\n",
+    "print('With mask explicitly added:')\n",
+    "print('weighted mean and rmse, bilinear:', sst_b_regrid_bilinear[var].weighted(weights).mean().values, result_xr1.values)\n",
+    "print('weighted mean and rmse, conserve:', sst_b_regrid_conservative_normed[var].weighted(weights).mean().values, result_xr2.values)\n",
+    "\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python [conda env:e3sm-unified] *",
+   "language": "python",
+   "name": "conda-env-e3sm-unified-py"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.15"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}