ClimateImpactLab
diff --git a/‎notebooks/downscaling_pipeline/pipeline_testing/check_bias_correction_two_methods.ipynb‎
Lines changed: 302 additions & 0 deletions b/‎notebooks/downscaling_pipeline/pipeline_testing/check_bias_correction_two_methods.ipynb‎
Lines changed: 302 additions & 0 deletions
@@ -0,0 +1,302 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "1a1c2a62-0daa-490c-8c27-d38b5082fa06",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "##### Short bias correction method validation exercise. where we check that we properly bias corrected the CMIP6 model output. #####\n",
+    "\n",
+    "##### Check (1) For each cell in a small selection we calculate a range of quantiles (across the temporal distribution of a given cell) of tasmax within the whole historical period and we compare that to the reference ERA-5 data. ######\n",
+    "\n",
+    "##### Check (2) Similarly, we calculate quantiles for the non-bias-corrected and bias-corrected future model output, compute the absolute change relative to historical and verify the changes are preserved after correction. ######"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4b99f421-d6e0-423a-ab9f-5cc633247189",
+   "metadata": {
+    "jp-MarkdownHeadingCollapsed": true,
+    "tags": []
+   },
+   "source": [
+    "#### Set up "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "d4be1bb5-2c55-40ae-9c9c-803efaf697f1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%capture\n",
+    "! pip install xclim\n",
+    "import numpy as np\n",
+    "from urbanspoon import core\n",
+    "import json\n",
+    "import gcsfs\n",
+    "import xarray as xr"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "db87076f-dd75-49ef-8f19-2af23b5dadcf",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "gcm = 'GFDL-ESM4'\n",
+    "data_paths_file = '/home/jovyan/output/tasmax_gcms_data_paths.json'\n",
+    "with open(data_paths_file) as json_file:\n",
+    "    data_dict = json.load(json_file)[gcm]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "a0e3d0f5-a8c4-4a87-81a4-1d4ef0e8818a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def read_gcs_zarr(zarr_url, token='/opt/gcsfuse_tokens/impactlab-data.json', check=False, consolidated=True):\n",
+    "    \"\"\"\n",
+    "    takes in a GCSFS zarr url, bucket token, and returns a dataset \n",
+    "    Note that you will need to have the proper bucket authentication. \n",
+    "    \"\"\"\n",
+    "    fs = gcsfs.GCSFileSystem(token=token)\n",
+    "    store_path = fs.get_mapper(zarr_url, check=check) \n",
+    "    ds = xr.open_zarr(store_path, consolidated=consolidated) \n",
+    "    ds.close() \n",
+    "    return ds "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "aeaacdc6-b6c8-49cb-92b4-33cf951c7540",
+   "metadata": {
+    "jp-MarkdownHeadingCollapsed": true,
+    "tags": []
+   },
+   "source": [
+    "#### Select a few grid cells and quantiles to look at ###"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "a44600a0-4870-4a79-b93e-07231040d278",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Andorra, Ohio, N Brazil\n",
+    "cells = [(41.5, 1.5),(39.5, -83.5),(-5.5, -49.5)]\n",
+    "myquants = [0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99]\n",
+    "quants_results = {}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e8d58846-7dc5-4183-9789-eabe1a56c366",
+   "metadata": {
+    "jp-MarkdownHeadingCollapsed": true,
+    "tags": []
+   },
+   "source": [
+    "#### Compute quantiles for each dataset ###"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "ccd7cce4-8a39-474b-83e4-ff241c1a6e36",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "data_files = [\n",
+    "    data_dict['coarse']['cmip6']['historical'],\n",
+    "    data_dict['coarse']['cmip6']['ssp370'],\n",
+    "    data_dict['coarse']['bias_corrected']['historical'],\n",
+    "    data_dict['coarse']['bias_corrected']['ssp370'],\n",
+    "    data_dict['coarse']['ERA-5']\n",
+    "]\n",
+    "for data_file in data_files:\n",
+    "    da = read_gcs_zarr(data_file)['tasmax'].chunk(dict(time=-1))\n",
+    "    if data_file == data_files[1] or data_file == data_files[3]:\n",
+    "        da = da.sel(time=slice('2080', '2100'))\n",
+    "    result = core.xr_quantiles_across_time_by_cell(da=da, q=myquants, cells=cells)\n",
+    "    for r,k in result.items():\n",
+    "        result[r] = k.compute()\n",
+    "    quants_results[data_file] = result"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a0385778-08c7-4127-9316-8eaeaf1212e7",
+   "metadata": {
+    "jp-MarkdownHeadingCollapsed": true,
+    "tags": []
+   },
+   "source": [
+    "### Bias correction method check (1) ###"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cb69b7c6-6dba-4807-8f8d-0115e7c1eeb7",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "##### Verify bias-corrected historical CMIP6 is consistent with ERA-5, while non-bias-corrected isn't. Correction should reduce the bias (so quantiles diffs should be closer to zero)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 67,
+   "id": "11080d9c-539a-4a08-bf7a-d7dd0ec9dbac",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "cell : (41.5, 1.5)\n",
+      "non corrected\n",
+      "[2.56773102 2.15558777 1.97580566 2.34085083 3.32855225 3.06008911\n",
+      " 2.49491577 2.06820374 1.76759369]\n",
+      "corrected\n",
+      "[1.12341064 0.98111115 0.84924927 0.58791351 0.47463989 0.6499939\n",
+      " 0.42756042 0.24011841 0.12395844]\n",
+      "cell : (39.5, -83.5)\n",
+      "non corrected\n",
+      "[0.24749176 0.93431091 2.05977478 4.80963898 4.96957397 3.92437744\n",
+      " 3.11152039 2.97287292 3.2295816 ]\n",
+      "corrected\n",
+      "[-0.3089743  -0.37416229 -0.40345154  0.03394318  0.18711853  0.39482117\n",
+      "  0.30922241  0.26907196  0.19752075]\n",
+      "cell : (-5.5, -49.5)\n",
+      "non corrected\n",
+      "[ 2.81440765  2.70875092  2.26695557  2.23838806  2.25622559 -0.85774231\n",
+      " -2.69372253 -3.00125885 -3.31266205]\n",
+      "corrected\n",
+      "[0.79361389 0.74092407 0.57475586 0.46287537 0.47612    0.40182495\n",
+      " 0.38529968 0.40956726 0.41493103]\n"
+     ]
+    }
+   ],
+   "source": [
+    "for c in cells:\n",
+    "    print(f'cell : {c}')\n",
+    "    print('non corrected')\n",
+    "    print(quants_results[data_dict['coarse']['ERA-5']][c].values-quants_results[data_dict['coarse']['cmip6']['historical']][c].values)\n",
+    "    print('corrected')\n",
+    "    print(quants_results[data_dict['coarse']['ERA-5']][c].values-quants_results[data_dict['coarse']['bias_corrected']['historical']][c].values)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a1692629-b8b3-4bba-9b49-4697b968a643",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "### Bias correction method check (2) ###"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "73d77fb5-e9f1-402b-9666-77cac00b3d0e",
+   "metadata": {
+    "jp-MarkdownHeadingCollapsed": true,
+    "tags": []
+   },
+   "source": [
+    "##### Verify CMIP6 absolute changes in quantiles across time are preserved after QDM bias correction."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "7b8e3b54-1cf2-45d5-b030-75eefcee81f6",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "cell : (41.5, 1.5)\n",
+      "non corrected\n",
+      "[3.68578522 3.20266876 3.14243774 3.14105988 3.70657349 4.42950439\n",
+      " 4.95620422 4.98266907 4.68912598]\n",
+      "corrected\n",
+      "[3.87143188 3.08726807 3.14093018 3.51397705 3.96615601 4.8757019\n",
+      " 5.29766846 5.19889526 4.94866333]\n",
+      "cell : (39.5, -83.5)\n",
+      "non corrected\n",
+      "[4.91748657 2.47857208 2.54364014 3.39523315 3.83833313 4.31138611\n",
+      " 4.07737732 3.99814301 3.65487091]\n",
+      "corrected\n",
+      "[5.15460327 2.65206299 2.66172485 3.64028931 4.08895874 4.36862183\n",
+      " 4.30161133 4.03612671 3.95448853]\n",
+      "cell : (-5.5, -49.5)\n",
+      "non corrected\n",
+      "[2.24838043 2.55540619 2.65269165 3.25076294 3.73539734 4.23408508\n",
+      " 3.90587769 3.66032257 3.33886932]\n",
+      "corrected\n",
+      "[2.55775635 2.8618042  3.00776978 3.49560547 4.03863525 4.00012207\n",
+      " 3.64265137 3.59101563 3.56733765]\n"
+     ]
+    }
+   ],
+   "source": [
+    "for c in cells:\n",
+    "    print(f'cell : {c}')\n",
+    "    print('non corrected')\n",
+    "    print(quants_results[data_dict['coarse']['cmip6']['ssp370']][c].values-quants_results[data_dict['coarse']['cmip6']['historical']][c].values)\n",
+    "    print('corrected')\n",
+    "    print(quants_results[data_dict['coarse']['bias_corrected']['ssp370']][c].values-quants_results[data_dict['coarse']['bias_corrected']['historical']][c].values)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "23668f48-76d6-4180-9eec-a850cd6cce4c",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.12"
+  },
+  "widgets": {
+   "application/vnd.jupyter.widget-state+json": {
+    "state": {},
+    "version_major": 2,
+    "version_minor": 0
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}