From b605b8b7a967b8d42ac5d86909d25df536b39659 Mon Sep 17 00:00:00 2001 From: Tom Vo Date: Tue, 10 Oct 2023 15:42:49 -0700 Subject: [PATCH 01/25] CDAT Migration Phase 2: Refactor core utilities and `lat_lon` set (#677) Refer to the PR for more information because the changelog is massive. Update build workflow to run on `cdat-migration-fy24` branch CDAT Migration Phase 2: Add CDAT regression test notebook template and fix GH Actions build (#743) - Add Makefile for quick access to multiple Python-based commands such as linting, testing, cleaning up cache and build files - Fix some lingering unit tests failure - Update `xcdat=0.6.0rc1` to `xcdat >=0.6.0` in `ci.yml`, `dev.yml` and `dev-nompi.yml` - Add `xskillscore` to `ci.yml` - Fix `pre-commit` issues CDAT Migration Phase 2: Regression testing for `lat_lon`, `lat_lon_land`, and `lat_lon_river` (#744) - Add Makefile that simplifies common development commands (building and installing, testing, etc.) - Write unit tests to cover all new code for utility functions - `dataset_xr.py`, `metrics.py`, `climo_xr.py`, `io.py`, `regrid.py` - Metrics comparison for `cdat-migration-fy24` `lat_lon` and `main` branch of `lat_lon` -- `NET_FLUX_SRF` and `RESTOM` have the highest spatial average diffs - Test run with 3D variables (`_run_3d_diags()`) - Fix Python 3.9 bug with using pipe command to represent Union -- doesn't work with `from __future__ import annotations` still - Fix subsetting syntax bug using ilev - Fix regridding bug where a single plev is passed and xCDAT does not allow generating bounds for coordinates of len <= 1 -- add conditional that just ignores adding new bounds for regridded output datasets, fix related tests - Fix accidentally calling save plots and metrics twice in `_get_metrics_by_region()` - Fix failing integration tests pass in CI/CD - Refactor `test_diags.py` -- replace unittest with pytest - Refactor `test_all_sets.py` -- replace unittest with pytest - Test climatology datasets -- tested with 3d variables using `test_all_sets.py` CDAT Migration Phase 2: Refactor utilities and CoreParameter methods for reusability across diagnostic sets (#746) - Move driver type annotations to `type_annotations.py` - Move `lat_lon_driver._save_data_metrics_and_plots()` to `io.py` - Update `_save_data_metrics_and_plots` args to accept `plot_func` callable - Update `metrics.spatial_avg` to return an optionally `xr.DataArray` with `as_list=False` - Move `parameter` arg to the top in `lat_lon_plot.plot` - Move `_set_param_output_attrs` and `_set_name_yr_attrs` from `lat_lon_driver` to `CoreParameter` class Regression testing for lat_lon variables `NET_FLUX_SRF` and `RESTOM` (#754) Update regression test notebook to show validation of all vars Add `subset_and_align_datasets()` to regrid.py (#776) Add template run scripts CDAT Migration Phase: Refactor `cosp_histogram` set (#748) - Refactor `cosp_histogram_driver.py` and `cosp_histogram_plot.py` - `formulas_cosp.py` (new file) - Includes refactored, Xarray-based `cosp_histogram_standard()` and `cosp_bin_sum()` functions - I wrote a lot of new code in `formulas_cosp.py` to clean up `derivations.py` and the old equivalent functions in `utils.py` - `derivations.py` - Cleaned up portions of `DERIVED_VARIABLES` dictionary - Removed unnecessary `OrderedDict` usage for `cosp_histogram` related variables (we should do this for the rest of the variables in in #716) - Remove unnecessary `convert_units()` function calls - Move cloud levels passed to derived variable formulas to `formulas_cosp.CLOUD_BIN_SUM_MAP` - `utils.py` - Delete deprecated, CDAT-based `cosp_histogram` functions - `dataset_xr.py` - Add `dataset_xr.Dataset._open_climo_dataset()` method with a catch for dataset quality issues where "time" is a scalar variable that does not match the "time" dimension array length, drops this variable and replaces it with the correct coordinate - Update `_get_dataset_with_derivation_func()` to handle derivation functions that require the `xr.Dataset` and `target_var_key` args (e.g., `cosp_histogram_standardize()` and `cosp_bin_sum()`) - `io.py` - Update `_write_vars_to_netcdf()` to write test, ref, and diff variables to individual netCDF (required for easy comparison to CDAT-based code that does the same thing) - Add `cdat_migration_regression_test_netcdf.ipynb` validation notebook template for comparing `.nc` files CDAT Migration Phase 2: Refactor `zonal_mean_2d()` and `zonal_mean_2d_stratosphere()` sets (#774) Refactor 654 zonal mean xy (#752) Co-authored-by: Tom Vo CDAT Migration - Update run script output directory to NERSC public webserver (#793) [PR]: CDAT Migration: Refactor `aerosol_aeronet` set (#788) CDAT Migration: Test `lat_lon` set with run script and debug any issues (#794) CDAT Migration: Refactor `polar` set (#749) Co-authored-by: Tom Vo Align order of calls to `_set_param_output_attrs` CDAT Migration: Refactor `meridional_mean_2d` set (#795) CDAT Migration: Refactor `aerosol_budget` (#800) Add `acme.py` changes from PR #712 (#814) * Add `acme.py` changes from PR #712 * Replace unnecessary lambda call Refactor area_mean_time_series and add ccb slice flag feature (#750) Co-authored-by: Tom Vo [Refactor]: Validate fix in PR #750 for #759 (#815) CDAT Migration Phase 2: Refactor `diurnal_cycle` set (#819) CDAT Migration: Refactor annual_cycle_zonal_mean set (#798) * Refactor `annual_cycle_zonal_mean` set * Address PR review comments * Add lat lon regression testing * Add debugging scripts * Update `_open_climo_dataset()` to decode times as workaround to misaligned time coords - Update `annual_cycle_zonal_mean_plot.py` to convert time coordinates to month integers * Fix unit tests * Remove old plotter * Add script to debug decode_times=True and ncclimo file * Update plotter time values to month integers * Fix slow `.load()` and multiprocessing issue - Due to incorrectly updating `keep_bnds` logic - Add `_encode_time_coords()` to workaround cftime issue `ValueError: "months since" units only allowed for "360_day" calendar` * Update `_encode_time_coords()` docstring * Add AODVIS debug script * update AODVIS obs datasets; regression test results --------- Co-authored-by: Tom Vo CDAT Migration Phase 2: Refactor `qbo` set (#826) CDAT Migration Phase 2: Refactor tc_analysis set (#829) * start tc_analysis_refactor * update driver * update plotting * Clean up plotter - Remove unused variables - Make `plot_info` a constant called `PLOT_INFO`, which is now a dict of dicts - Reorder functions for top-down readability * Remove unused notebook --------- Co-authored-by: tomvothecoder CDAT Migration Phase 2: Refactor `enso_diags` set (#832) CDAT Migration Phase 2: Refactor `streamflow` set (#837) [Bug]: CDAT Migration Phase 2: enso_diags plot fixes (#841) [Refactor]: CDAT Migration Phase 3: testing and documentation update (#846) CDAT Migration Phase 3 - Port QBO Wavelet feature to Xarray/xCDAT codebase (#860) CDAT Migration Phase 2: Refactor arm_diags set (#842) Add performance benchmark material (#864) Add function to add CF axis attr to Z axis if missing for downstream xCDAT operations (#865) CDAT Migration Phase 3: Add Convective Precipitation Fraction in lat-lon (#875) CDAT Migration Phase 3: Fix LHFLX name and add catch for non-existent or empty TE stitch file (#876) Add support for time series datasets via glob and fix `enso_diags` set (#866) Add fix for checking `is_time_series()` property based on `data_type` attr (#881) CDAT migration: Fix African easterly wave density plots in TC analysis and convert H20LNZ units to ppm/volume (#882) CDAT Migration: Update `mp_partition_driver.py` to use Dataset from `dataset_xr.py` (#883) CDAT Migration - Port JJB tropical subseasonal diags to Xarray/xCDAT (#887) CDAT Migration: Prepare branch for merge to `main` (#885) [Refactor]: CDAT Migration - Update dependencies and remove Dataset._add_cf_attrs_to_z_axes() (#891) CDAT Migration Phase 2: Refactor core utilities and `lat_lon` set (#677) Refer to the PR for more information because the changelog is massive. Update build workflow to run on `cdat-migration-fy24` branch CDAT Migration Phase 2: Add CDAT regression test notebook template and fix GH Actions build (#743) - Add Makefile for quick access to multiple Python-based commands such as linting, testing, cleaning up cache and build files - Fix some lingering unit tests failure - Update `xcdat=0.6.0rc1` to `xcdat >=0.6.0` in `ci.yml`, `dev.yml` and `dev-nompi.yml` - Add `xskillscore` to `ci.yml` - Fix `pre-commit` issues CDAT Migration Phase 2: Regression testing for `lat_lon`, `lat_lon_land`, and `lat_lon_river` (#744) - Add Makefile that simplifies common development commands (building and installing, testing, etc.) - Write unit tests to cover all new code for utility functions - `dataset_xr.py`, `metrics.py`, `climo_xr.py`, `io.py`, `regrid.py` - Metrics comparison for `cdat-migration-fy24` `lat_lon` and `main` branch of `lat_lon` -- `NET_FLUX_SRF` and `RESTOM` have the highest spatial average diffs - Test run with 3D variables (`_run_3d_diags()`) - Fix Python 3.9 bug with using pipe command to represent Union -- doesn't work with `from __future__ import annotations` still - Fix subsetting syntax bug using ilev - Fix regridding bug where a single plev is passed and xCDAT does not allow generating bounds for coordinates of len <= 1 -- add conditional that just ignores adding new bounds for regridded output datasets, fix related tests - Fix accidentally calling save plots and metrics twice in `_get_metrics_by_region()` - Fix failing integration tests pass in CI/CD - Refactor `test_diags.py` -- replace unittest with pytest - Refactor `test_all_sets.py` -- replace unittest with pytest - Test climatology datasets -- tested with 3d variables using `test_all_sets.py` CDAT Migration Phase 2: Refactor utilities and CoreParameter methods for reusability across diagnostic sets (#746) - Move driver type annotations to `type_annotations.py` - Move `lat_lon_driver._save_data_metrics_and_plots()` to `io.py` - Update `_save_data_metrics_and_plots` args to accept `plot_func` callable - Update `metrics.spatial_avg` to return an optionally `xr.DataArray` with `as_list=False` - Move `parameter` arg to the top in `lat_lon_plot.plot` - Move `_set_param_output_attrs` and `_set_name_yr_attrs` from `lat_lon_driver` to `CoreParameter` class CDAT Migration Phase 2: Refactor `zonal_mean_2d()` and `zonal_mean_2d_stratosphere()` sets (#774) CDAT Migration Phase 2: Refactor `qbo` set (#826) --- .github/workflows/build_workflow.yml | 2 +- .../template_cdat_regression_test.ipynb | 1333 +++++++++++++++++ e3sm_diags/driver/aerosol_budget_driver.py | 1 + e3sm_diags/driver/qbo_driver.py | 5 + e3sm_diags/driver/utils/climo_xr.py | 2 +- e3sm_diags/driver/utils/dataset_xr.py | 2 +- e3sm_diags/metrics/metrics.py | 1 + .../plot/cartopy/aerosol_aeronet_plot.py | 132 ++ e3sm_diags/plot/cartopy/zonal_mean_2d_plot.py | 187 +++ .../zonal_mean_2d_stratosphere_plot.py | 15 + e3sm_diags/plot/deprecated_lat_lon_plot.py | 360 +++++ .../driver/utils/test_dataset_xr.py | 57 + tests/e3sm_diags/driver/utils/test_regrid.py | 1 - 13 files changed, 2094 insertions(+), 4 deletions(-) create mode 100644 auxiliary_tools/template_cdat_regression_test.ipynb create mode 100644 e3sm_diags/plot/cartopy/aerosol_aeronet_plot.py create mode 100644 e3sm_diags/plot/cartopy/zonal_mean_2d_plot.py create mode 100644 e3sm_diags/plot/cartopy/zonal_mean_2d_stratosphere_plot.py create mode 100644 e3sm_diags/plot/deprecated_lat_lon_plot.py diff --git a/.github/workflows/build_workflow.yml b/.github/workflows/build_workflow.yml index d19f29a5b..f3557732c 100644 --- a/.github/workflows/build_workflow.yml +++ b/.github/workflows/build_workflow.yml @@ -5,7 +5,7 @@ on: branches: [main] pull_request: - branches: [main] + branches: [main, cdat-migration-fy24] workflow_dispatch: diff --git a/auxiliary_tools/template_cdat_regression_test.ipynb b/auxiliary_tools/template_cdat_regression_test.ipynb new file mode 100644 index 000000000..8b4d00bd1 --- /dev/null +++ b/auxiliary_tools/template_cdat_regression_test.ipynb @@ -0,0 +1,1333 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# CDAT Migration Regression Test (FY24)\n", + "\n", + "This notebook is used to perform regression testing between the development and\n", + "production versions of a diagnostic set.\n", + "\n", + "## How it works\n", + "\n", + "It compares the relative differences (%) between two sets of `.json` files in two\n", + "separate directories, one for the refactored code and the other for the `main` branch.\n", + "\n", + "It will display metrics values with relative differences >= 2%. Relative differences are used instead of absolute differences because:\n", + "\n", + "- Relative differences are in percentages, which shows the scale of the differences.\n", + "- Absolute differences are just a raw number that doesn't factor in\n", + " floating point size (e.g., 100.00 vs. 0.0001), which can be misleading.\n", + "\n", + "## How to use\n", + "\n", + "PREREQUISITE: The diagnostic set's metrics stored in `.json` files in two directories\n", + "(dev and `main` branches).\n", + "\n", + "1. Make a copy of this notebook.\n", + "2. Run `mamba create -n cdat_regression_test -y -c conda-forge \"python<3.12\" pandas matplotlib-base ipykernel`\n", + "3. Run `mamba activate cdat_regression_test`\n", + "4. Update `DEV_PATH` and `PROD_PATH` in the copy of your notebook.\n", + "5. Run all cells IN ORDER.\n", + "6. Review results for any outstanding differences (>= 2%).\n", + " - Debug these differences (e.g., bug in metrics functions, incorrect variable references, etc.)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup Code\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import glob\n", + "import math\n", + "from typing import List\n", + "\n", + "import pandas as pd\n", + "\n", + "# TODO: Update DEV_RESULTS and PROD_RESULTS to your diagnostic sets.\n", + "DEV_PATH = \"/global/cfs/cdirs/e3sm/www/vo13/examples_658/ex1_modTS_vs_modTS_3years/lat_lon/model_vs_model\"\n", + "PROD_PATH = \"/global/cfs/cdirs/e3sm/www/vo13/examples/ex1_modTS_vs_modTS_3years/lat_lon/model_vs_model\"\n", + "\n", + "DEV_GLOB = sorted(glob.glob(DEV_PATH + \"/*.json\"))\n", + "PROD_GLOB = sorted(glob.glob(PROD_PATH + \"/*.json\"))\n", + "\n", + "# The names of the columns that store percentage difference values.\n", + "PERCENTAGE_COLUMNS = [\n", + " \"test DIFF (%)\",\n", + " \"ref DIFF (%)\",\n", + " \"test_regrid DIFF (%)\",\n", + " \"ref_regrid DIFF (%)\",\n", + " \"diff DIFF (%)\",\n", + " \"misc DIFF (%)\",\n", + "]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Core Functions\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "def get_metrics(filepaths: List[str]) -> pd.DataFrame:\n", + " \"\"\"Get the metrics using a glob of `.json` metric files in a directory.\n", + "\n", + " Parameters\n", + " ----------\n", + " filepaths : List[str]\n", + " The filepaths for metrics `.json` files.\n", + "\n", + " Returns\n", + " -------\n", + " pd.DataFrame\n", + " The DataFrame containing the metrics for all of the variables in\n", + " the results directory.\n", + " \"\"\"\n", + " metrics = []\n", + "\n", + " for filepath in filepaths:\n", + " df = pd.read_json(filepath)\n", + "\n", + " filename = filepath.split(\"/\")[-1]\n", + " var_key = filename.split(\"-\")[1]\n", + "\n", + " # Add the variable key to the MultiIndex and update the index\n", + " # before stacking to make the DataFrame easier to parse.\n", + " multiindex = pd.MultiIndex.from_product([[var_key], [*df.index]])\n", + " df = df.set_index(multiindex)\n", + " df.stack()\n", + "\n", + " metrics.append(df)\n", + "\n", + " df_final = pd.concat(metrics)\n", + "\n", + " # Reorder columns and drop \"unit\" column (string dtype breaks Pandas\n", + " # arithmetic).\n", + " df_final = df_final[[\"test\", \"ref\", \"test_regrid\", \"ref_regrid\", \"diff\", \"misc\"]]\n", + "\n", + " return df_final\n", + "\n", + "\n", + "def get_rel_diffs(df_actual: pd.DataFrame, df_reference: pd.DataFrame) -> pd.DataFrame:\n", + " \"\"\"Get the relative differences between two DataFrames.\n", + "\n", + " Formula: abs(actual - reference) / abs(actual)\n", + "\n", + " Parameters\n", + " ----------\n", + " df_actual : pd.DataFrame\n", + " The first DataFrame representing \"actual\" results (dev branch).\n", + " df_reference : pd.DataFrame\n", + " The second DataFrame representing \"reference\" results (main branch).\n", + "\n", + " Returns\n", + " -------\n", + " pd.DataFrame\n", + " The DataFrame containing absolute and relative differences between\n", + " the metrics DataFrames.\n", + " \"\"\"\n", + " df_diff = abs(df_actual - df_reference) / abs(df_actual)\n", + " df_diff = df_diff.add_suffix(\" DIFF (%)\")\n", + "\n", + " return df_diff\n", + "\n", + "\n", + "def sort_columns(df: pd.DataFrame) -> pd.DataFrame:\n", + " \"\"\"Sorts the order of the columns for the final DataFrame output.\n", + "\n", + " Parameters\n", + " ----------\n", + " df : pd.DataFrame\n", + " The final DataFrame output.\n", + "\n", + " Returns\n", + " -------\n", + " pd.DataFrame\n", + " The final DataFrame output with sorted columns.\n", + " \"\"\"\n", + " columns = [\n", + " \"test_dev\",\n", + " \"test_prod\",\n", + " \"test DIFF (%)\",\n", + " \"ref_dev\",\n", + " \"ref_prod\",\n", + " \"ref DIFF (%)\",\n", + " \"test_regrid_dev\",\n", + " \"test_regrid_prod\",\n", + " \"test_regrid DIFF (%)\",\n", + " \"ref_regrid_dev\",\n", + " \"ref_regrid_prod\",\n", + " \"ref_regrid DIFF (%)\",\n", + " \"diff_dev\",\n", + " \"diff_prod\",\n", + " \"diff DIFF (%)\",\n", + " \"misc_dev\",\n", + " \"misc_prod\",\n", + " \"misc DIFF (%)\",\n", + " ]\n", + "\n", + " df_new = df.copy()\n", + " df_new = df_new[columns]\n", + "\n", + " return df_new\n", + "\n", + "\n", + "def update_diffs_to_pct(df: pd.DataFrame):\n", + " \"\"\"Update relative diff columns from float to string percentage.\n", + "\n", + " Parameters\n", + " ----------\n", + " df : pd.DataFrame\n", + " The final DataFrame containing metrics and diffs (floats).\n", + "\n", + " Returns\n", + " -------\n", + " pd.DataFrame\n", + " The final DataFrame containing metrics and diffs (str percentage).\n", + " \"\"\"\n", + " df_new = df.copy()\n", + " df_new[PERCENTAGE_COLUMNS] = df_new[PERCENTAGE_COLUMNS].map(\n", + " lambda x: \"{0:.2f}%\".format(x * 100) if not math.isnan(x) else x\n", + " )\n", + "\n", + " return df_new" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Get the DataFrame containing development and production metrics.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "df_metrics_dev = get_metrics(DEV_GLOB)\n", + "df_metrics_prod = get_metrics(PROD_GLOB)\n", + "df_metrics_all = pd.concat(\n", + " [df_metrics_dev.add_suffix(\"_dev\"), df_metrics_prod.add_suffix(\"_prod\")],\n", + " axis=1,\n", + " join=\"outer\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Get DataFrame for differences >= 2%.\n", + "\n", + "- Get the relative differences for all metrics\n", + "- Filter down metrics to those with differences >= 2%\n", + " - If all cells in a row are NaN (< 2%), the entire row is dropped to make the results easier to parse.\n", + " - Any remaining NaN cells are below < 2% difference and **should be ignored**.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "df_metrics_diffs = get_rel_diffs(df_metrics_dev, df_metrics_prod)\n", + "df_metrics_diffs_thres = df_metrics_diffs[df_metrics_diffs >= 0.02]\n", + "df_metrics_diffs_thres = df_metrics_diffs_thres.dropna(\n", + " axis=0, how=\"all\", ignore_index=False\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Combine both DataFrames to get the final result.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "df_final = df_metrics_diffs_thres.join(df_metrics_all)\n", + "df_final = sort_columns(df_final)\n", + "df_final = update_diffs_to_pct(df_final)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Display final DataFrame and review results.\n", + "\n", + "- Red cells are differences >= 2%\n", + "- `nan` cells are differences < 2% and **should be ignored**\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
 var_keymetrictest_devtest_prodtest DIFF (%)ref_devref_prodref DIFF (%)test_regrid_devtest_regrid_prodtest_regrid DIFF (%)ref_regrid_devref_regrid_prodref_regrid DIFF (%)diff_devdiff_proddiff DIFF (%)misc_devmisc_prodmisc DIFF (%)
0FLUTmax299.911864299.355074nan300.162128299.776167nan299.911864299.355074nan300.162128299.776167nan9.4923599.7888093.12%nannannan
1FLUTmin124.610884125.987072nan122.878196124.148986nan124.610884125.987072nan122.878196124.148986nan-15.505809-17.0323259.84%nannannan
2FSNSmax269.789702269.798166nan272.722362272.184917nan269.789702269.798166nan272.722362272.184917nan20.64792924.85985220.40%nannannan
3FSNSmin16.89742317.7608895.11%16.71013416.2370612.83%16.89742317.7608895.11%16.71013416.2370612.83%-28.822277-28.324921nannannannan
4FSNTOAmax360.624327360.209193nan362.188816361.778529nan360.624327360.209193nan362.188816361.778529nan18.60227622.62426621.62%nannannan
5FSNTOAmean239.859777240.001860nan241.439641241.544384nan239.859777240.001860nan241.439641241.544384nan-1.579864-1.5425242.36%nannannan
6FSNTOAmin44.90704148.2568187.46%47.22350250.3396086.60%44.90704148.2568187.46%47.22350250.3396086.60%-23.576184-23.171864nannannannan
7LHFLXmax282.280453289.0799402.41%275.792933276.297281nan282.280453289.0799402.41%275.792933276.297281nan47.53550353.16892411.85%nannannan
8LHFLXmean88.37960988.470270nan88.96955088.976266nan88.37960988.470270nan88.96955088.976266nan-0.589942-0.50599614.23%nannannan
9LHFLXmin-0.878371-0.54924837.47%-1.176561-0.94611019.59%-0.878371-0.54924837.47%-1.176561-0.94611019.59%-34.375924-33.902769nannannannan
10LWCFmax78.49365377.473220nan86.12195984.993825nan78.49365377.473220nan86.12195984.993825nan9.61605710.79610412.27%nannannan
11LWCFmean24.37322424.370539nan24.40669724.391579nan24.37322424.370539nan24.40669724.391579nan-0.033473-0.02104037.14%nannannan
12LWCFmin-0.667812-0.6171077.59%-1.360010-1.18178713.10%-0.667812-0.6171077.59%-1.360010-1.18178713.10%-10.574643-10.1451884.06%nannannan
13NETCFmax13.22460412.6218254.56%13.71543813.2327163.52%13.22460412.6218254.56%13.71543813.2327163.52%10.89934410.2848255.64%nannannan
14NETCFmin-66.633044-66.008633nan-64.832041-67.3980473.96%-66.633044-66.008633nan-64.832041-67.3980473.96%-17.923932-17.940099nannannannan
15NET_FLUX_SRFmax155.691338156.424180nan166.556120166.506173nan155.691338156.424180nan166.556120166.506173nan59.81944961.6728243.10%nannannan
16NET_FLUX_SRFmean0.3940160.51633031.04%-0.0681860.068584200.58%0.3940160.51633031.04%-0.0681860.068584200.58%0.4622020.4477463.13%nannannan
17NET_FLUX_SRFmin-284.505205-299.5050245.27%-280.893287-290.2029343.31%-284.505205-299.5050245.27%-280.893287-290.2029343.31%-75.857589-85.85208913.18%nannannan
18PRECTmax17.28995117.071276nan20.26486220.138274nan17.28995117.071276nan20.26486220.138274nan2.3441112.4066252.67%nannannan
19PRECTmean3.0538023.056760nan3.0748853.074978nan3.0538023.056760nan3.0748853.074978nan-0.021083-0.01821813.59%nannannan
20PSLmin970.981710971.390765nan973.198437973.235326nan970.981710971.390765nan973.198437973.235326nan-6.328677-6.1046103.54%nannannan
21PSLrmsenannannannannannannannannannannannannannannan1.0428840.9799816.03%
22RESTOMmax84.29550283.821906nan87.70794487.451262nan84.29550283.821906nan87.70794487.451262nan17.39628321.42361623.15%nannannan
23RESTOMmean0.4815490.65656036.34%0.0180410.162984803.40%0.4815490.65656036.34%0.0180410.162984803.40%0.4635080.4935766.49%nannannan
24RESTOMmin-127.667181-129.014673nan-127.417586-128.673508nan-127.667181-129.014673nan-127.417586-128.673508nan-15.226249-14.8696142.34%nannannan
25SHFLXmax114.036895112.859646nan116.870038116.432591nan114.036895112.859646nan116.870038116.432591nan28.32065627.5567552.70%nannannan
26SHFLXmin-88.650312-88.386947nan-85.809438-85.480377nan-88.650312-88.386947nan-85.809438-85.480377nan-27.776625-28.3630532.11%nannannan
27SSTmin-1.788055-1.788055nan-1.676941-1.676941nan-1.788055-1.788055nan-1.676941-1.676941nan-4.513070-2.99327233.68%nannannan
28SWCFmax-0.518025-0.5368443.63%-0.311639-0.3316166.41%-0.518025-0.5368443.63%-0.311639-0.3316166.41%11.66893912.0870773.58%nannannan
29SWCFmin-123.625017-122.042043nan-131.053537-130.430161nan-123.625017-122.042043nan-131.053537-130.430161nan-21.415249-20.8089732.83%nannannan
30TREFHTmax31.14150831.058424nan29.81921029.721868nan31.14150831.058424nan29.81921029.721868nan4.9817575.1261852.90%nannannan
31TREFHTmax31.14150831.058424nan29.81921029.721868nan31.14150831.058424nan29.81921029.721868nan4.8678555.1261852.90%nannannan
32TREFHTmax31.14150831.058424nan29.81921029.721868nan31.14150831.058424nan29.81921029.721868nan4.9817575.1261855.31%nannannan
33TREFHTmax31.14150831.058424nan29.81921029.721868nan31.14150831.058424nan29.81921029.721868nan4.8678555.1261855.31%nannannan
34TREFHTmean14.76994614.741707nan13.84201313.800258nan14.76994614.741707nan13.84201313.800258nan0.9279330.9414492.28%nannannan
35TREFHTmean9.2142249.114572nan8.0833497.957917nan9.2142249.114572nan8.0833497.957917nan1.1308761.1566552.28%nannannan
36TREFHTmin-56.266677-55.623001nan-58.159250-57.542053nan-56.266677-55.623001nan-58.159250-57.542053nan-0.681558-0.6243718.39%nannannan
37TREFHTmin-56.266677-55.623001nan-58.159250-57.542053nan-56.266677-55.623001nan-58.159250-57.542053nan-0.681558-0.6243718.39%nannannan
38TREFHTmin-56.266677-55.623001nan-58.159250-57.542053nan-56.266677-55.623001nan-58.159250-57.542053nan-0.681558-0.6243718.39%nannannan
39TREFHTmin-56.266677-55.623001nan-58.159250-57.542053nan-56.266677-55.623001nan-58.159250-57.542053nan-0.681558-0.6243718.39%nannannan
40TREFHTrmsenannannannannannannannannannannannannannannan1.1607181.1799952.68%
41TREFHTrmsenannannannannannannannannannannannannannannan1.3431691.3791412.68%
\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_final.reset_index(names=[\"var_key\", \"metric\"]).style.map(\n", + " lambda x: \"background-color : red\" if isinstance(x, str) else \"\",\n", + " subset=pd.IndexSlice[:, PERCENTAGE_COLUMNS],\n", + ")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "cdat_regression_test", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/e3sm_diags/driver/aerosol_budget_driver.py b/e3sm_diags/driver/aerosol_budget_driver.py index 9c1de7d00..faf0c4005 100644 --- a/e3sm_diags/driver/aerosol_budget_driver.py +++ b/e3sm_diags/driver/aerosol_budget_driver.py @@ -3,6 +3,7 @@ script is integrated in e3sm_diags by Jill Zhang, with input from Kai Zhang, Taufiq Hassan, Xue Zheng, Ziming Ke, Susannah Burrows, and Naser Mahfouz. """ + from __future__ import annotations import csv diff --git a/e3sm_diags/driver/qbo_driver.py b/e3sm_diags/driver/qbo_driver.py index 3379f4c46..5bc0c5ec2 100644 --- a/e3sm_diags/driver/qbo_driver.py +++ b/e3sm_diags/driver/qbo_driver.py @@ -125,6 +125,11 @@ def run_diag(parameter: QboParameter) -> QboParameter: test_dict["name"] = test_ds._get_test_name() ref_dict["name"] = ref_ds._get_ref_name() + try: + ref_dict["name"] = ref_ds._get_ref_name() + except AttributeError: + ref_dict["name"] = parameter.ref_name + _save_metrics_to_json(parameter, test_dict, "test") # type: ignore _save_metrics_to_json(parameter, ref_dict, "ref") # type: ignore diff --git a/e3sm_diags/driver/utils/climo_xr.py b/e3sm_diags/driver/utils/climo_xr.py index bb229048c..acbe73fa2 100644 --- a/e3sm_diags/driver/utils/climo_xr.py +++ b/e3sm_diags/driver/utils/climo_xr.py @@ -1,8 +1,8 @@ """This module stores climatology functions operating on Xarray objects. - This file will eventually be refactored to use xCDAT's climatology API. """ + from typing import Dict, List, Literal, get_args import numpy as np diff --git a/e3sm_diags/driver/utils/dataset_xr.py b/e3sm_diags/driver/utils/dataset_xr.py index 22cab378c..182618841 100644 --- a/e3sm_diags/driver/utils/dataset_xr.py +++ b/e3sm_diags/driver/utils/dataset_xr.py @@ -1,6 +1,5 @@ """This module stores the Dataset class, which is the primary class for I/O. - This Dataset class operates on `xr.Dataset` objects, which are created using netCDF files. These `xr.Dataset` contain either the reference or test variable. This variable can either be from a climatology file or a time series file. @@ -8,6 +7,7 @@ calculated. Reference and test variables can also be derived using other variables from dataset files. """ + from __future__ import annotations import collections diff --git a/e3sm_diags/metrics/metrics.py b/e3sm_diags/metrics/metrics.py index 333980643..d98fe519d 100644 --- a/e3sm_diags/metrics/metrics.py +++ b/e3sm_diags/metrics/metrics.py @@ -1,4 +1,5 @@ """This module stores functions to calculate metrics using Xarray objects.""" + from __future__ import annotations from typing import List, Literal diff --git a/e3sm_diags/plot/cartopy/aerosol_aeronet_plot.py b/e3sm_diags/plot/cartopy/aerosol_aeronet_plot.py new file mode 100644 index 000000000..765235095 --- /dev/null +++ b/e3sm_diags/plot/cartopy/aerosol_aeronet_plot.py @@ -0,0 +1,132 @@ +import os + +import cartopy.crs as ccrs +import matplotlib +import numpy as np + +from e3sm_diags.driver.utils.general import get_output_dir +from e3sm_diags.logger import custom_logger +from e3sm_diags.metrics import mean +from e3sm_diags.plot.cartopy.deprecated_lat_lon_plot import plot_panel + +matplotlib.use("Agg") +import matplotlib.pyplot as plt # isort:skip # noqa: E402 + +logger = custom_logger(__name__) + +plotTitle = {"fontsize": 11.5} +plotSideTitle = {"fontsize": 9.5} + + +def plot(test, test_site, ref_site, parameter): + # Plot scatter plot + # Position and sizes of subplot axes in page coordinates (0 to 1) + # (left, bottom, width, height) in page coordinates + panel = [ + (0.09, 0.40, 0.72, 0.30), + (0.19, 0.2, 0.62, 0.30), + ] + # Border padding relative to subplot axes for saving individual panels + # (left, bottom, right, top) in page coordinates + border = (-0.06, -0.03, 0.13, 0.03) + + fig = plt.figure(figsize=parameter.figsize, dpi=parameter.dpi) + fig.suptitle(parameter.var_id, x=0.5, y=0.97) + proj = ccrs.PlateCarree() + max1 = test.max() + min1 = test.min() + mean1 = mean(test) + # TODO: Replace this function call with `e3sm_diags.plot.utils._add_colormap()`. + plot_panel( + 0, + fig, + proj, + test, + parameter.contour_levels, + parameter.test_colormap, + (parameter.test_name_yrs, None, None), + parameter, + stats=(max1, mean1, min1), + ) + + ax = fig.add_axes(panel[1]) + ax.set_title(f"{parameter.var_id} from AERONET sites") + + # define 1:1 line, and x y axis limits + + if parameter.var_id == "AODVIS": + x1 = np.arange(0.01, 3.0, 0.1) + y1 = np.arange(0.01, 3.0, 0.1) + plt.xlim(0.03, 1) + plt.ylim(0.03, 1) + else: + x1 = np.arange(0.0001, 1.0, 0.01) + y1 = np.arange(0.0001, 1.0, 0.01) + plt.xlim(0.001, 0.3) + plt.ylim(0.001, 0.3) + + plt.loglog(x1, y1, "-k", linewidth=0.5) + plt.loglog(x1, y1 * 0.5, "--k", linewidth=0.5) + plt.loglog(x1 * 0.5, y1, "--k", linewidth=0.5) + + corr = np.corrcoef(ref_site, test_site) + xmean = np.mean(ref_site) + ymean = np.mean(test_site) + ax.text( + 0.3, + 0.9, + f"Mean (test): {ymean:.3f} \n Mean (ref): {xmean:.3f}\n Corr: {corr[0, 1]:.2f}", + horizontalalignment="right", + verticalalignment="top", + transform=ax.transAxes, + ) + + # axis ticks + plt.tick_params(axis="both", which="major") + plt.tick_params(axis="both", which="minor") + + # axis labels + plt.xlabel(f"ref: {parameter.ref_name_yrs}") + plt.ylabel(f"test: {parameter.test_name_yrs}") + + plt.loglog(ref_site, test_site, "kx", markersize=3.0, mfc="none") + + # legend + plt.legend(frameon=False, prop={"size": 5}) + + # TODO: This section can be refactored to use `plot.utils._save_plot()`. + for f in parameter.output_format: + f = f.lower().split(".")[-1] + fnm = os.path.join( + get_output_dir(parameter.current_set, parameter), + f"{parameter.output_file}" + "." + f, + ) + plt.savefig(fnm) + logger.info(f"Plot saved in: {fnm}") + + for f in parameter.output_format_subplot: + fnm = os.path.join( + get_output_dir(parameter.current_set, parameter), + parameter.output_file, + ) + page = fig.get_size_inches() + i = 0 + for p in panel: + # Extent of subplot + subpage = np.array(p).reshape(2, 2) + subpage[1, :] = subpage[0, :] + subpage[1, :] + subpage = subpage + np.array(border).reshape(2, 2) + subpage = list(((subpage) * page).flatten()) # type: ignore + extent = matplotlib.transforms.Bbox.from_extents(*subpage) + # Save subplot + fname = fnm + ".%i." % (i) + f + plt.savefig(fname, bbox_inches=extent) + + orig_fnm = os.path.join( + get_output_dir(parameter.current_set, parameter), + parameter.output_file, + ) + fname = orig_fnm + ".%i." % (i) + f + logger.info(f"Sub-plot saved in: {fname}") + + i += 1 diff --git a/e3sm_diags/plot/cartopy/zonal_mean_2d_plot.py b/e3sm_diags/plot/cartopy/zonal_mean_2d_plot.py new file mode 100644 index 000000000..a72bf5dce --- /dev/null +++ b/e3sm_diags/plot/cartopy/zonal_mean_2d_plot.py @@ -0,0 +1,187 @@ +from typing import List, Optional, Tuple + +import matplotlib +import numpy as np +import xarray as xr +import xcdat as xc + +from e3sm_diags.driver.utils.type_annotations import MetricsDict +from e3sm_diags.logger import custom_logger +from e3sm_diags.parameter.core_parameter import CoreParameter +from e3sm_diags.parameter.zonal_mean_2d_parameter import DEFAULT_PLEVS +from e3sm_diags.plot.utils import ( + DEFAULT_PANEL_CFG, + _add_colorbar, + _add_contour_plot, + _add_min_mean_max_text, + _add_rmse_corr_text, + _configure_titles, + _configure_x_and_y_axes, + _get_c_levels_and_norm, + _save_plot, +) + +matplotlib.use("Agg") +import matplotlib.pyplot as plt # isort:skip # noqa: E402 + +logger = custom_logger(__name__) + + +# Configs for x axis ticks and x axis limits. +X_TICKS = np.array([-90, -60, -30, 0, 30, 60, 90]) +X_LIM = -90, 90 + + +def plot( + parameter: CoreParameter, + da_test: xr.DataArray, + da_ref: xr.DataArray, + da_diff: xr.DataArray, + metrics_dict: MetricsDict, +): + """Plot the variable's metrics generated by the zonal_mean_2d set. + + Parameters + ---------- + parameter : CoreParameter + The CoreParameter object containing plot configurations. + da_test : xr.DataArray + The test data. + da_ref : xr.DataArray + The reference data. + da_diff : xr.DataArray + The difference between `da_test` and `da_ref` (both are regridded to + the lower resolution of the two beforehand). + metrics_dict : Metrics + The metrics. + """ + fig = plt.figure(figsize=parameter.figsize, dpi=parameter.dpi) + fig.suptitle(parameter.main_title, x=0.5, y=0.96, fontsize=18) + + # The variable units. + units = metrics_dict["units"] + + # Add the first subplot for test data. + min1 = metrics_dict["test"]["min"] # type: ignore + mean1 = metrics_dict["test"]["mean"] # type: ignore + max1 = metrics_dict["test"]["max"] # type: ignore + + _add_colormap( + 0, + da_test, + fig, + parameter, + parameter.test_colormap, + parameter.contour_levels, + title=(parameter.test_name_yrs, parameter.test_title, units), # type: ignore + metrics=(max1, mean1, min1), # type: ignore + ) + + # Add the second and third subplots for ref data and the differences, + # respectively. + min2 = metrics_dict["ref"]["min"] # type: ignore + mean2 = metrics_dict["ref"]["mean"] # type: ignore + max2 = metrics_dict["ref"]["max"] # type: ignore + + _add_colormap( + 1, + da_ref, + fig, + parameter, + parameter.reference_colormap, + parameter.contour_levels, + title=(parameter.ref_name_yrs, parameter.reference_title, units), # type: ignore + metrics=(max2, mean2, min2), # type: ignore + ) + + min3 = metrics_dict["diff"]["min"] # type: ignore + mean3 = metrics_dict["diff"]["mean"] # type: ignore + max3 = metrics_dict["diff"]["max"] # type: ignore + r = metrics_dict["misc"]["rmse"] # type: ignore + c = metrics_dict["misc"]["corr"] # type: ignore + + _add_colormap( + 2, + da_diff, + fig, + parameter, + parameter.diff_colormap, + parameter.diff_levels, + title=(None, parameter.diff_title, da_diff.attrs["units"]), # + metrics=(max3, mean3, min3, r, c), # type: ignore + ) + + _save_plot(fig, parameter) + + plt.close() + + +def _add_colormap( + subplot_num: int, + var: xr.DataArray, + fig: plt.Figure, + parameter: CoreParameter, + color_map: str, + contour_levels: List[float], + title: Tuple[Optional[str], str, str], + metrics: Tuple[float, ...], +): + lat = xc.get_dim_coords(var, axis="Y") + plev = xc.get_dim_coords(var, axis="Z") + var = var.squeeze() + + # Configure contour levels + # -------------------------------------------------------------------------- + c_levels, norm = _get_c_levels_and_norm(contour_levels) + + # Add the contour plot + # -------------------------------------------------------------------------- + ax = fig.add_axes(DEFAULT_PANEL_CFG[subplot_num], projection=None) + + contour_plot = _add_contour_plot( + ax, parameter, var, lat, plev, color_map, None, norm, c_levels + ) + + # Configure the aspect ratio and plot titles. + # -------------------------------------------------------------------------- + ax.set_aspect("auto") + _configure_titles(ax, title) + + # Configure x and y axis. + # -------------------------------------------------------------------------- + _configure_x_and_y_axes(ax, X_TICKS, None, None, parameter.current_set) + ax.set_xlim(X_LIM) + + if parameter.plot_log_plevs: + ax.set_yscale("log") + + if parameter.plot_plevs: + plev_ticks = parameter.plevs + plt.yticks(plev_ticks, plev_ticks) + + # For default plevs, specify the pressure axis and show the 50 mb tick + # at the top. + if ( + not parameter.plot_log_plevs + and not parameter.plot_plevs + and parameter.plevs == DEFAULT_PLEVS + ): + plev_ticks = parameter.plevs + new_ticks = [plev_ticks[0]] + plev_ticks[1::2] + new_ticks = [int(x) for x in new_ticks] + plt.yticks(new_ticks, new_ticks) + + plt.ylabel("pressure (mb)") + ax.invert_yaxis() + + # Add and configure the color bar. + # -------------------------------------------------------------------------- + _add_colorbar(fig, subplot_num, DEFAULT_PANEL_CFG, contour_plot, c_levels) + + # Add metrics text. + # -------------------------------------------------------------------------- + # Min, Mean, Max + _add_min_mean_max_text(fig, subplot_num, DEFAULT_PANEL_CFG, metrics) + + if len(metrics) == 5: + _add_rmse_corr_text(fig, subplot_num, DEFAULT_PANEL_CFG, metrics) diff --git a/e3sm_diags/plot/cartopy/zonal_mean_2d_stratosphere_plot.py b/e3sm_diags/plot/cartopy/zonal_mean_2d_stratosphere_plot.py new file mode 100644 index 000000000..004f3c93d --- /dev/null +++ b/e3sm_diags/plot/cartopy/zonal_mean_2d_stratosphere_plot.py @@ -0,0 +1,15 @@ +import xarray as xr + +from e3sm_diags.driver.utils.type_annotations import MetricsDict +from e3sm_diags.parameter.core_parameter import CoreParameter +from e3sm_diags.plot.cartopy.zonal_mean_2d_plot import plot as base_plot + + +def plot( + parameter: CoreParameter, + da_test: xr.DataArray, + da_ref: xr.DataArray, + da_diff: xr.DataArray, + metrics_dict: MetricsDict, +): + return base_plot(parameter, da_test, da_ref, da_diff, metrics_dict) diff --git a/e3sm_diags/plot/deprecated_lat_lon_plot.py b/e3sm_diags/plot/deprecated_lat_lon_plot.py new file mode 100644 index 000000000..4eaebcf80 --- /dev/null +++ b/e3sm_diags/plot/deprecated_lat_lon_plot.py @@ -0,0 +1,360 @@ +""" +WARNING: This module has been deprecated and replaced by +`e3sm_diags.plot.lat_lon_plot.py`. This file temporarily kept because +`e3sm_diags.plot.cartopy.aerosol_aeronet_plot.plot` references the +`plot_panel()` function. Once the aerosol_aeronet set is refactored, this +file can be deleted. +""" +from __future__ import print_function + +import os + +import cartopy.crs as ccrs +import cartopy.feature as cfeature +import cdutil +import matplotlib +import numpy as np +import numpy.ma as ma +from cartopy.mpl.ticker import LatitudeFormatter, LongitudeFormatter + +from e3sm_diags.derivations.default_regions import regions_specs +from e3sm_diags.driver.utils.general import get_output_dir +from e3sm_diags.logger import custom_logger +from e3sm_diags.plot import get_colormap + +matplotlib.use("Agg") +import matplotlib.colors as colors # isort:skip # noqa: E402 +import matplotlib.pyplot as plt # isort:skip # noqa: E402 + +logger = custom_logger(__name__) + +plotTitle = {"fontsize": 11.5} +plotSideTitle = {"fontsize": 9.5} + +# Position and sizes of subplot axes in page coordinates (0 to 1) +panel = [ + (0.1691, 0.6810, 0.6465, 0.2258), + (0.1691, 0.3961, 0.6465, 0.2258), + (0.1691, 0.1112, 0.6465, 0.2258), +] + +# Border padding relative to subplot axes for saving individual panels +# (left, bottom, right, top) in page coordinates +border = (-0.06, -0.03, 0.13, 0.03) + + +def add_cyclic(var): + lon = var.getLongitude() + return var(longitude=(lon[0], lon[0] + 360.0, "coe")) + + +def get_ax_size(fig, ax): + bbox = ax.get_window_extent().transformed(fig.dpi_scale_trans.inverted()) + width, height = bbox.width, bbox.height + width *= fig.dpi + height *= fig.dpi + return width, height + + +def determine_tick_step(degrees_covered): + if degrees_covered > 180: + return 60 + if degrees_covered > 60: + return 30 + elif degrees_covered > 30: + return 10 + elif degrees_covered > 20: + return 5 + else: + return 1 + + +def plot_panel( # noqa: C901 + n, fig, proj, var, clevels, cmap, title, parameters, stats=None +): + var = add_cyclic(var) + lon = var.getLongitude() + lat = var.getLatitude() + var = ma.squeeze(var.asma()) + + # Contour levels + levels = None + norm = None + if len(clevels) > 0: + levels = [-1.0e8] + clevels + [1.0e8] + norm = colors.BoundaryNorm(boundaries=levels, ncolors=256) + + # ax.set_global() + region_str = parameters.regions[0] + region = regions_specs[region_str] + global_domain = True + full_lon = True + if "domain" in region.keys(): # type: ignore + # Get domain to plot + domain = region["domain"] # type: ignore + global_domain = False + else: + # Assume global domain + domain = cdutil.region.domain(latitude=(-90.0, 90, "ccb")) + kargs = domain.components()[0].kargs + lon_west, lon_east, lat_south, lat_north = (0, 360, -90, 90) + if "longitude" in kargs: + full_lon = False + lon_west, lon_east, _ = kargs["longitude"] + # Note cartopy Problem with gridlines across the dateline:https://github.com/SciTools/cartopy/issues/821. Region cross dateline is not supported yet. + if lon_west > 180 and lon_east > 180: + lon_west = lon_west - 360 + lon_east = lon_east - 360 + + if "latitude" in kargs: + lat_south, lat_north, _ = kargs["latitude"] + lon_covered = lon_east - lon_west + lon_step = determine_tick_step(lon_covered) + xticks = np.arange(lon_west, lon_east, lon_step) + # Subtract 0.50 to get 0 W to show up on the right side of the plot. + # If less than 0.50 is subtracted, then 0 W will overlap 0 E on the left side of the plot. + # If a number is added, then the value won't show up at all. + if global_domain or full_lon: + xticks = np.append(xticks, lon_east - 0.50) + proj = ccrs.PlateCarree(central_longitude=180) + else: + xticks = np.append(xticks, lon_east) + lat_covered = lat_north - lat_south + lat_step = determine_tick_step(lat_covered) + yticks = np.arange(lat_south, lat_north, lat_step) + yticks = np.append(yticks, lat_north) + + # Contour plot + ax = fig.add_axes(panel[n], projection=proj) + ax.set_extent([lon_west, lon_east, lat_south, lat_north], crs=proj) + cmap = get_colormap(cmap, parameters) + p1 = ax.contourf( + lon, + lat, + var, + transform=ccrs.PlateCarree(), + norm=norm, + levels=levels, + cmap=cmap, + extend="both", + ) + + # ax.set_aspect('auto') + # Full world would be aspect 360/(2*180) = 1 + ax.set_aspect((lon_east - lon_west) / (2 * (lat_north - lat_south))) + ax.coastlines(lw=0.3) + if not global_domain and "RRM" in region_str: + ax.coastlines(resolution="50m", color="black", linewidth=1) + state_borders = cfeature.NaturalEarthFeature( + category="cultural", + name="admin_1_states_provinces_lakes", + scale="50m", + facecolor="none", + ) + ax.add_feature(state_borders, edgecolor="black") + if title[0] is not None: + ax.set_title(title[0], loc="left", fontdict=plotSideTitle) + if title[1] is not None: + ax.set_title(title[1], fontdict=plotTitle) + if title[2] is not None: + ax.set_title(title[2], loc="right", fontdict=plotSideTitle) + ax.set_xticks(xticks, crs=ccrs.PlateCarree()) + ax.set_yticks(yticks, crs=ccrs.PlateCarree()) + lon_formatter = LongitudeFormatter(zero_direction_label=True, number_format=".0f") + lat_formatter = LatitudeFormatter() + ax.xaxis.set_major_formatter(lon_formatter) + ax.yaxis.set_major_formatter(lat_formatter) + ax.tick_params(labelsize=8.0, direction="out", width=1) + ax.xaxis.set_ticks_position("bottom") + ax.yaxis.set_ticks_position("left") + + # Color bar + cbax = fig.add_axes((panel[n][0] + 0.6635, panel[n][1] + 0.0215, 0.0326, 0.1792)) + cbar = fig.colorbar(p1, cax=cbax) + w, h = get_ax_size(fig, cbax) + + if levels is None: + cbar.ax.tick_params(labelsize=9.0, length=0) + + else: + maxval = np.amax(np.absolute(levels[1:-1])) + if maxval < 0.2: + fmt = "%5.3f" + pad = 28 + elif maxval < 10.0: + fmt = "%5.2f" + pad = 25 + elif maxval < 100.0: + fmt = "%5.1f" + pad = 25 + elif maxval > 9999.0: + fmt = "%.0f" + pad = 40 + else: + fmt = "%6.1f" + pad = 30 + + cbar.set_ticks(levels[1:-1]) + labels = [fmt % level for level in levels[1:-1]] + cbar.ax.set_yticklabels(labels, ha="right") + cbar.ax.tick_params(labelsize=9.0, pad=pad, length=0) + + # Min, Mean, Max + fig.text( + panel[n][0] + 0.6635, + panel[n][1] + 0.2107, + "Max\nMean\nMin", + ha="left", + fontdict=plotSideTitle, + ) + + fmt_m = [] + # printing in scientific notation if value greater than 10^5 + for i in range(len(stats[0:3])): + fs = "1e" if stats[i] > 100000.0 else "2f" + fmt_m.append(fs) + fmt_metrics = f"%.{fmt_m[0]}\n%.{fmt_m[1]}\n%.{fmt_m[2]}" + + fig.text( + panel[n][0] + 0.7635, + panel[n][1] + 0.2107, + # "%.2f\n%.2f\n%.2f" % stats[0:3], + fmt_metrics % stats[0:3], + ha="right", + fontdict=plotSideTitle, + ) + + # RMSE, CORR + if len(stats) == 5: + fig.text( + panel[n][0] + 0.6635, + panel[n][1] - 0.0105, + "RMSE\nCORR", + ha="left", + fontdict=plotSideTitle, + ) + fig.text( + panel[n][0] + 0.7635, + panel[n][1] - 0.0105, + "%.2f\n%.2f" % stats[3:5], + ha="right", + fontdict=plotSideTitle, + ) + + # grid resolution info: + if n == 2 and "RRM" in region_str: + dlat = lat[2] - lat[1] + dlon = lon[2] - lon[1] + fig.text( + panel[n][0] + 0.4635, + panel[n][1] - 0.04, + "Resolution: {:.2f}x{:.2f}".format(dlat, dlon), + ha="left", + fontdict=plotSideTitle, + ) + + +def plot(reference, test, diff, metrics_dict, parameter): + # Create figure, projection + fig = plt.figure(figsize=parameter.figsize, dpi=parameter.dpi) + proj = ccrs.PlateCarree() + + # Figure title + fig.suptitle(parameter.main_title, x=0.5, y=0.96, fontsize=18) + + # First two panels + min1 = metrics_dict["test"]["min"] + mean1 = metrics_dict["test"]["mean"] + max1 = metrics_dict["test"]["max"] + + plot_panel( + 0, + fig, + proj, + test, + parameter.contour_levels, + parameter.test_colormap, + (parameter.test_name_yrs, parameter.test_title, test.units), + parameter, + stats=(max1, mean1, min1), + ) + + if not parameter.model_only: + min2 = metrics_dict["ref"]["min"] + mean2 = metrics_dict["ref"]["mean"] + max2 = metrics_dict["ref"]["max"] + + plot_panel( + 1, + fig, + proj, + reference, + parameter.contour_levels, + parameter.reference_colormap, + (parameter.ref_name_yrs, parameter.reference_title, reference.units), + parameter, + stats=(max2, mean2, min2), + ) + + # Third panel + min3 = metrics_dict["diff"]["min"] + mean3 = metrics_dict["diff"]["mean"] + max3 = metrics_dict["diff"]["max"] + r = metrics_dict["misc"]["rmse"] + c = metrics_dict["misc"]["corr"] + plot_panel( + 2, + fig, + proj, + diff, + parameter.diff_levels, + parameter.diff_colormap, + (None, parameter.diff_title, test.units), + parameter, + stats=(max3, mean3, min3, r, c), + ) + + # Save figure + for f in parameter.output_format: + f = f.lower().split(".")[-1] + fnm = os.path.join( + get_output_dir(parameter.current_set, parameter), + parameter.output_file + "." + f, + ) + plt.savefig(fnm) + logger.info(f"Plot saved in: {fnm}") + + # Save individual subplots + if parameter.ref_name == "": + panels = [panel[0]] + else: + panels = panel + + for f in parameter.output_format_subplot: + fnm = os.path.join( + get_output_dir(parameter.current_set, parameter), + parameter.output_file, + ) + page = fig.get_size_inches() + i = 0 + for p in panels: + # Extent of subplot + subpage = np.array(p).reshape(2, 2) + subpage[1, :] = subpage[0, :] + subpage[1, :] + subpage = subpage + np.array(border).reshape(2, 2) + subpage = list(((subpage) * page).flatten()) # type: ignore + extent = matplotlib.transforms.Bbox.from_extents(*subpage) + # Save subplot + fname = fnm + ".%i." % (i) + f + plt.savefig(fname, bbox_inches=extent) + + orig_fnm = os.path.join( + get_output_dir(parameter.current_set, parameter), + parameter.output_file, + ) + fname = orig_fnm + ".%i." % (i) + f + logger.info(f"Sub-plot saved in: {fname}") + + i += 1 + + plt.close() diff --git a/tests/e3sm_diags/driver/utils/test_dataset_xr.py b/tests/e3sm_diags/driver/utils/test_dataset_xr.py index bf071fa07..7653fe299 100644 --- a/tests/e3sm_diags/driver/utils/test_dataset_xr.py +++ b/tests/e3sm_diags/driver/utils/test_dataset_xr.py @@ -630,6 +630,63 @@ def test_returns_climo_dataset_using_test_file_variable_ref_name_and_season_nest @pytest.mark.xfail( reason="Need to figure out why to create dummy incorrect time scalar variable with Xarray." ) + def test_returns_climo_dataset_with_derived_variable(self): + # We will derive the "PRECT" variable using the "pr" variable. + ds_pr = xr.Dataset( + coords={ + **spatial_coords, + "time": xr.DataArray( + dims="time", + data=np.array( + [ + cftime.DatetimeGregorian( + 2000, 1, 16, 12, 0, 0, 0, has_year_zero=False + ), + ], + dtype=object, + ), + attrs={ + "axis": "T", + "long_name": "time", + "standard_name": "time", + "bounds": "time_bnds", + }, + ), + }, + data_vars={ + **spatial_bounds, + "pr": xr.DataArray( + xr.DataArray( + data=np.array( + [ + [[1.0, 1.0], [1.0, 1.0]], + ] + ), + dims=["time", "lat", "lon"], + attrs={"units": "mm/s"}, + ) + ), + }, + ) + + parameter = _create_parameter_object( + "ref", "climo", self.data_path, "2000", "2001" + ) + parameter.ref_file = "pr_200001_200112.nc" + ds_pr.to_netcdf(f"{self.data_path}/{parameter.ref_file}") + + ds = Dataset(parameter, data_type="ref") + + result = ds.get_climo_dataset("PRECT", season="ANN") + expected = ds_pr.copy() + expected = expected.squeeze(dim="time").drop_vars("time") + expected["PRECT"] = expected["pr"] * 3600 * 24 + expected["PRECT"].attrs["units"] = "mm/day" + expected = expected.drop_vars("pr") + + xr.testing.assert_identical(result, expected) + + @pytest.mark.xfail def test_returns_climo_dataset_using_derived_var_directly_from_dataset_and_replaces_scalar_time_var( self, ): diff --git a/tests/e3sm_diags/driver/utils/test_regrid.py b/tests/e3sm_diags/driver/utils/test_regrid.py index 6dc33fcda..c02451345 100644 --- a/tests/e3sm_diags/driver/utils/test_regrid.py +++ b/tests/e3sm_diags/driver/utils/test_regrid.py @@ -231,7 +231,6 @@ def test_regrids_to_first_dataset_with_equal_latitude_points(self, tool): expected_a = ds_a.copy() expected_b = ds_a.copy() - if tool in ["esmf", "xesmf"]: expected_b.so.attrs["regrid_method"] = "conservative" From edad9ac6119114585cb309b7ced9eca580fc176c Mon Sep 17 00:00:00 2001 From: ChengzhuZhang Date: Thu, 24 Oct 2024 14:15:47 -0700 Subject: [PATCH 02/25] update 2d 3d vars --- e3sm_diags/derivations/derivations.py | 40 +++++++++++++++++++++------ 1 file changed, 31 insertions(+), 9 deletions(-) diff --git a/e3sm_diags/derivations/derivations.py b/e3sm_diags/derivations/derivations.py index 14eef3104..8c3dd6823 100644 --- a/e3sm_diags/derivations/derivations.py +++ b/e3sm_diags/derivations/derivations.py @@ -109,6 +109,7 @@ ("pr",): lambda pr: qflxconvert_units(rename(pr)), ("PRECC", "PRECL"): lambda precc, precl: prect(precc, precl), ("sat_gauge_precip",): rename, + ("precip_liq_surf_mass_flux", "precip_ice_surf_mass_flux"): prect, # EAMxx ("PrecipLiqSurfMassFlux", "PrecipIceSurfMassFlux"): prect, # EAMxx }, "PRECST": { @@ -148,6 +149,7 @@ "TMQ": OrderedDict( [ (("PREH2O",), rename), + (("VapWaterPath",), rename), # EAMxx ( ("prw",), lambda prw: convert_units(rename(prw), target_units="kg/m2"), @@ -159,10 +161,6 @@ ("ALBEDO",): rename, ("SOLIN", "FSNTOA"): lambda solin, fsntoa: albedo(solin, solin - fsntoa), ("rsdt", "rsut"): albedo, - ( - "SW_flux_up_at_model_top", - "SW_clrsky_flux_up_at_model_top", - ): swcf, # EAMxx }, "ALBEDOC": OrderedDict( [ @@ -202,6 +200,7 @@ lambda fsntoa, fsntoac: swcf(fsntoa, fsntoac), ), (("rsut", "rsutcs"), lambda rsutcs, rsut: swcf(rsut, rsutcs)), + (("ShortwaveCloudForcing",), rename), # EAMxx ] ), "SWCFSRF": OrderedDict( @@ -236,6 +235,7 @@ lambda flntoa, flntoac: lwcf(flntoa, flntoac), ), (("rlut", "rlutcs"), lambda rlutcs, rlut: lwcf(rlut, rlutcs)), + (("LongwaveCloudForcing",), rename), # EAMxx ] ), "LWCFSRF": OrderedDict( @@ -474,7 +474,7 @@ "T": { ("ta",): rename, ("T",): lambda t: convert_units(t, target_units="K"), - ("T_2m",): lambda t: convert_units(t, target_units="DegC"), # EAMxx + ("T_mid",): lambda t: convert_units(t, target_units="K"), # EAMxx }, "U": OrderedDict( [ @@ -496,6 +496,7 @@ lambda t: convert_units(t, target_units="DegC"), ), (("tas",), lambda t: convert_units(t, target_units="DegC")), + (("T_2m",), lambda t: convert_units(t, target_units="DegC")), # EAMxx ] ), # Surface water flux: kg/((m^2)*s) @@ -693,14 +694,21 @@ "RELHUM": { ("hur",): lambda hur: convert_units(hur, target_units="%"), ("RELHUM",): lambda relhum: convert_units(relhum, target_units="%"), + ("RelativeHumidity",): lambda relhum: convert_units( + relhum, target_units="%" + ), # EAMxx }, "OMEGA": { ("wap",): lambda wap: convert_units(wap, target_units="mbar/day"), ("OMEGA",): lambda omega: convert_units(omega, target_units="mbar/day"), + ("omega",): lambda omega: convert_units( + omega, target_units="mbar/day" + ), # EAMxx }, "Q": { ("hus",): lambda q: convert_units(rename(q), target_units="g/kg"), ("Q",): lambda q: convert_units(rename(q), target_units="g/kg"), + ("qv",): lambda q: convert_units(rename(q), target_units="g/kg"), # EAMxx ("SHUM",): lambda shum: convert_units(shum, target_units="g/kg"), }, "H2OLNZ": { @@ -739,9 +747,14 @@ ("surf_radiative_T",): rename, # EAMxx }, "PS": {("ps",): rename}, - "U10": {("sfcWind",): rename}, + "U10": { + ("sfcWind",): rename, + ("wind_speed_10m",): rename, # EAMxx + ("si10",): rename, + }, "QREFHT": { ("QREFHT",): lambda q: convert_units(q, target_units="g/kg"), + ("qv_2m",): lambda q: convert_units(q, target_units="g/kg"), # EAMxx ("huss",): lambda q: convert_units(q, target_units="g/kg"), ("d2m", "sp"): qsat, }, @@ -754,9 +767,18 @@ ("surf_mom_flux_V",): lambda tauv: -tauv, # EAMxx }, "CLDICE": {("cli",): rename}, - "TGCLDIWP": {("clivi",): rename}, - "CLDLIQ": {("clw",): rename}, - "TGCLDCWP": {("clwvi",): rename}, + "TGCLDIWP": { + ("clivi",): rename, + ("IceWaterPath",): rename, # EAMxx + }, + "CLDLIQ": { + ("clw",): rename, + ("qc",): rename, # EAMxx + }, + "TGCLDCWP": { + ("clwvi",): rename, + ("LiqWaterPath",): rename, # EAMxx Check if rain water is inlcuded? + }, "O3": {("o3",): rename}, "PminusE": { ("PminusE",): pminuse_convert_units, From 14f3c9b425377f89d58807a4c917a86a4bb0c33a Mon Sep 17 00:00:00 2001 From: ChengzhuZhang Date: Fri, 25 Oct 2024 16:24:32 -0700 Subject: [PATCH 03/25] add more derived vars --- e3sm_diags/derivations/derivations.py | 142 ++++++++++++++++++++++++-- 1 file changed, 134 insertions(+), 8 deletions(-) diff --git a/e3sm_diags/derivations/derivations.py b/e3sm_diags/derivations/derivations.py index 8c3dd6823..0e42b0c00 100644 --- a/e3sm_diags/derivations/derivations.py +++ b/e3sm_diags/derivations/derivations.py @@ -143,6 +143,14 @@ lower_limit=0.9, ), ), + ( + ("surf_radiative_T", "ocnfrac"), + lambda ts, ocnfrac: _apply_land_sea_mask( + convert_units(ts, target_units="degC"), + ocnfrac, + lower_limit=0.9, + ), + ), (("SST",), lambda sst: convert_units(sst, target_units="degC")), ] ), @@ -161,6 +169,7 @@ ("ALBEDO",): rename, ("SOLIN", "FSNTOA"): lambda solin, fsntoa: albedo(solin, solin - fsntoa), ("rsdt", "rsut"): albedo, + ("SW_flux_dn_at_model_top", "SW_flux_up_at_model_top"): albedo, # EAMxx }, "ALBEDOC": OrderedDict( [ @@ -170,6 +179,10 @@ lambda solin, fsntoac: albedoc(solin, solin - fsntoac), ), (("rsdt", "rsutcs"), lambda rsdt, rsutcs: albedoc(rsdt, rsutcs)), + ( + ("SW_flux_dn_at_model_top", "SW_clrsky_flux_up_at_model_top"), + lambda rsdt, rsutcs: albedoc(rsdt, rsutcs), + ), # EAMxx ] ), "ALBEDO_SRF": OrderedDict( @@ -180,6 +193,10 @@ ("FSDS", "FSNS"), lambda fsds, fsns: albedo_srf(fsds, fsds - fsns), ), + ( + ("SW_flux_dn_at_model_bot", "SW_flux_up_at_model_bot"), + lambda rsds, rsus: albedo_srf(rsds, rsus), + ), # EAMxx ] ), # Pay attention to the positive direction of SW and LW fluxes @@ -216,6 +233,15 @@ ), (("sfc_cre_net_sw_mon",), rename), (("FSNS", "FSNSC"), lambda fsns, fsnsc: swcfsrf(fsns, fsnsc)), + ( + ( + "SW_flux_dn_at_model_bot", + "SW_flux_up_at_model_bot", + "SW_clrsky_flux_dn_at_model_bot", + "SW_clrsky_flux_up_at_model_bot", + ), + lambda fsds, fsus, fsdsc, fsusc: swcfsrf(fsds - fsus, fsdsc - fsusc), + ), # EAMxx ] ), "LWCF": OrderedDict( @@ -251,6 +277,15 @@ ), (("sfc_cre_net_lw_mon",), rename), (("FLNS", "FLNSC"), lambda flns, flnsc: lwcfsrf(flns, flnsc)), + ( + ( + "LW_flux_dn_at_model_bot", + "LW_flux_up_at_model_bot", + "LW_clrsky_flux_dn_at_model_bot", + "LW_clrsky_flux_up_at_model_bot", + ), + lambda flds, flus, fldsc, flusc: lwcfsrf(flds - flus, fldsc - flusc), + ), # EAMxx ] ), "NETCF": OrderedDict( @@ -282,6 +317,10 @@ lambda swcf, lwcf: netcf2(swcf, lwcf), ), (("SWCF", "LWCF"), lambda swcf, lwcf: netcf2(swcf, lwcf)), + ( + ("ShortwaveCloudForcing", "LongwaveCloudForcing"), + lambda swcf, lwcf: netcf2(swcf, lwcf), + ), # EAMxx ( ("FSNTOA", "FSNTOAC", "FLNTOA", "FLNTOAC"), lambda fsntoa, fsntoac, flntoa, flntoac: netcf4( @@ -322,6 +361,21 @@ ("FSNS", "FSNSC", "FLNSC", "FLNS"), lambda fsns, fsnsc, flnsc, flns: netcf4srf(fsns, fsnsc, flnsc, flns), ), + ( + ( + "SW_flux_dn_at_model_bot", + "SW_flux_up_at_model_bot", + "SW_clrsky_flux_dn_at_model_bot", + "SW_clrsky_flux_up_at_model_bot", + "LW_clrsky_flux_up_at_model_bot", + "LW_clrsky_flux_dn_at_model_bot", + "LW_flux_up_at_model_bot", + "LW_flux_dn_at_model_bot", + ), + lambda fsds, fsus, fsdsc, fsusc, flusc, fldsc, flus, flds: netcf4srf( + fsds - fsus, fsdsc - fsusc, flusc - fldsc, flus - flds + ), + ), # EAMxx ] ), "FLNS": OrderedDict( @@ -331,6 +385,10 @@ lambda sfc_net_lw_all_mon: -sfc_net_lw_all_mon, ), (("rlds", "rlus"), lambda rlds, rlus: netlw(rlds, rlus)), + ( + ("LW_flux_dn_at_model_bot", "LW_flux_up_at_model_bot"), + lambda rlds, rlus: netlw(rlds, rlus), + ), ] ), "FLNSC": OrderedDict( @@ -343,18 +401,24 @@ ("sfc_net_lw_clr_t_mon",), lambda sfc_net_lw_clr_mon: -sfc_net_lw_clr_mon, ), + ( + ("LW_clrsky_flux_dn_at_model_bot", "LW_clrsky_flux_up_at_model_bot"), + lambda rlds, rlus: netlw(rlds, rlus), + ), # EAMxx ] ), - "FLDS": OrderedDict([(("rlds",), rename)]), + "FLDS": OrderedDict([(("rlds",), rename), (("LW_flux_dn_at_model_bot",), rename)]), "FLUS": OrderedDict( [ (("rlus",), rename), + (("LW_flux_up_at_model_bot",), rename), # EAMxx (("FLDS", "FLNS"), lambda FLDS, FLNS: flus(FLDS, FLNS)), ] ), "FLDSC": OrderedDict( [ (("rldscs",), rename), + (("LW_clrsky_flux_dn_at_model_bot",), rename), # EAMxx (("TS", "FLNSC"), lambda ts, flnsc: fldsc(ts, flnsc)), ] ), @@ -362,23 +426,42 @@ [ (("sfc_net_sw_all_mon",), rename), (("rsds", "rsus"), lambda rsds, rsus: netsw(rsds, rsus)), + ( + ("SW_flux_dn_at_model_bot", "SW_flux_dn_at_model_bot"), + lambda rsds, rsus: netsw(rsds, rsus), + ), # EAMxx ] ), "FSNSC": OrderedDict( [ (("sfc_net_sw_clr_mon",), rename), (("sfc_net_sw_clr_t_mon",), rename), + ( + ("SW_clrsky_flux_dn_at_model_bot", "SW_clrsky_flux_dn_at_model_bot"), + lambda rsds, rsus: netsw(rsds, rsus), + ), # EAMxx ] ), - "FSDS": OrderedDict([(("rsds",), rename)]), + "FSDS": OrderedDict( + [(("rsds",), rename), (("SW_flux_dn_at_model_bot",), rename)], + ), "FSUS": OrderedDict( [ (("rsus",), rename), + (("SW_flux_up_at_model_bot",), rename), # EAMxx (("FSDS", "FSNS"), lambda FSDS, FSNS: fsus(FSDS, FSNS)), ] ), - "FSUSC": OrderedDict([(("rsuscs",), rename)]), - "FSDSC": OrderedDict([(("rsdscs",), rename), (("rsdsc",), rename)]), + "FSUSC": OrderedDict( + [(("rsuscs",), rename), (("SW_clrsky_flux_up_at_model_bot",), rename)] + ), + "FSDSC": OrderedDict( + [ + (("rsdscs",), rename), + (("rsdsc",), rename), + (("SW_clrsky_flux_dn_at_model_bot",), rename), + ] + ), # Net surface heat flux: W/(m^2) "NET_FLUX_SRF": OrderedDict( [ @@ -408,12 +491,25 @@ rsds, rsus, rlds, rlus, hfls, hfss ), ), + ( + ( + "SW_flux_dn_at_model_bot", + "SW_flux_up_at_model_bot", + "LW_flux_dn_at_model_bot", + "LW_flux_up_at_model_bot", + "surface_upward_latent_heat_flux", + "surf_sens_flux", + ), + lambda rsds, rsus, rlds, rlus, hfls, hfss: netflux6( + rsds, rsus, rlds, rlus, hfls, hfss + ), # EAMxx + ), ] ), "FLUT": {("rlut",): rename, ("LW_flux_up_at_model_top",): rename}, - "FSUTOA": {("rsut",): rename}, - "FSUTOAC": {("rsutcs",): rename}, - "FLNT": {("FLNT",): rename}, + "FSUTOA": {("rsut",): rename, ("SW_flux_up_at_model_top",): rename}, + "FSUTOAC": {("rsutcs",): rename, ("SW_clrsky_flux_up_at_model_top",): rename}, + "FLNT": {("FLNT",): rename, ("LW_flux_up_at_model_top",): rename}, "FLUTC": {("rlutcs",): rename, ("LW_clrsky_flux_up_at_model_top",): rename}, "FSNTOA": { ("FSNTOA",): rename, @@ -453,6 +549,12 @@ prect(precc, precl), landfrac, lower_limit=0.5 ), ), + ( + ("precip_liq_surf_mass_flux", "precip_ice_surf_mass_flux", "landfrac"), + lambda precc, precl, landfrac: _apply_land_sea_mask( + prect(precc, precl), landfrac, lower_limit=0.5 + ), # EAMxx + ), ] ), "Z3": OrderedDict( @@ -462,6 +564,10 @@ lambda zg: convert_units(rename(zg), target_units="hectometer"), ), (("Z3",), lambda z3: convert_units(z3, target_units="hectometer")), + ( + ("z_mid",), + lambda z3: convert_units(z3, target_units="hectometer"), + ), # EAMxx ? ] ), "PSL": { @@ -529,6 +635,14 @@ lower_limit=0.65, ), ), + ( + ("LiqWaterPath", "ocnfrac"), + lambda tgcldlwp, ocnfrac: _apply_land_sea_mask( + convert_units(tgcldlwp, target_units="g/m^2"), + ocnfrac, + lower_limit=0.65, + ), # EAMxx + ), ] ), "PRECT_OCN": OrderedDict( @@ -545,6 +659,14 @@ lower_limit=0.65, ), ), + ( + ("precip_liq_surf_mass_flux", "precip_liq_surf_mass_flux", "ocnfrac"), + lambda a, b, ocnfrac: _apply_land_sea_mask( + aplusb(a, b, target_units="mm/day"), + ocnfrac, + lower_limit=0.65, + ), # EAMxx + ), ] ), "PREH2O_OCN": OrderedDict( @@ -583,6 +705,10 @@ ("CLOUD",), lambda cldtot: convert_units(cldtot, target_units="%"), ), + ( + ("cldfrac_tot_for_analysis",), + lambda cldtot: convert_units(cldtot, target_units="%"), + ), ] ), # below for COSP output @@ -749,7 +875,7 @@ "PS": {("ps",): rename}, "U10": { ("sfcWind",): rename, - ("wind_speed_10m",): rename, # EAMxx + ("wind_speed_10m",): rename, # EAMxx ? ("si10",): rename, }, "QREFHT": { From cae36218a98fc44e84e5efa2a3515180f5672c06 Mon Sep 17 00:00:00 2001 From: ChengzhuZhang Date: Tue, 29 Oct 2024 13:39:01 -0700 Subject: [PATCH 04/25] fix attrs errors --- e3sm_diags/derivations/formulas.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/e3sm_diags/derivations/formulas.py b/e3sm_diags/derivations/formulas.py index 78db38f99..ec43c3d4e 100644 --- a/e3sm_diags/derivations/formulas.py +++ b/e3sm_diags/derivations/formulas.py @@ -143,8 +143,8 @@ def so4_mass_sum(a1: xr.DataArray, a2: xr.DataArray): with xr.set_options(keep_attrs=True): var = (a1 + a2) * AIR_DENS * 1e9 var.name = "so4_mass" - var.units = "\u03bcg/m3" - var.long_name = "SO4 mass conc." + var.attrs["units"] = "\u03bcg/m3" + var.attrs["long_name"] = "SO4 mass conc." return var @@ -344,6 +344,7 @@ def swcfsrf(fsns: xr.DataArray, fsnsc: xr.DataArray): var = fsns - fsnsc var.name = "SCWFSRF" + var.attrs["units"] = "W/m2" var.attrs["long_name"] = "Surface shortwave cloud forcing" return var @@ -454,7 +455,7 @@ def restom3(swdn: xr.DataArray, swup: xr.DataArray, lwup: xr.DataArray): with xr.set_options(keep_attrs=True): var = swdn - swup - lwup - var.long_name = "TOM(top of model) Radiative flux" + var.attrs["long_name"] = "TOM(top of model) Radiative flux" return var From a346098471c2a37bd76854a4e5a4cc96694fdafe Mon Sep 17 00:00:00 2001 From: ChengzhuZhang Date: Wed, 30 Oct 2024 18:44:27 -0700 Subject: [PATCH 05/25] more informative log --- e3sm_diags/driver/utils/dataset_xr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/e3sm_diags/driver/utils/dataset_xr.py b/e3sm_diags/driver/utils/dataset_xr.py index 182618841..022a22da9 100644 --- a/e3sm_diags/driver/utils/dataset_xr.py +++ b/e3sm_diags/driver/utils/dataset_xr.py @@ -727,7 +727,7 @@ def _get_dataset_with_derived_climo_var(self, ds: xr.Dataset) -> xr.Dataset: return ds raise IOError( - f"The dataset file has no matching source variables for {target_var}" + f"Neither does {target_var}, nor the variables in {list(target_var_map.keys())} exist in the dataset file." ) def _get_matching_climo_src_vars( From 3ab50ae06debe76ad879adce088b6dcdcf4f1535 Mon Sep 17 00:00:00 2001 From: tomvothecoder Date: Mon, 4 Nov 2024 11:47:04 -0800 Subject: [PATCH 06/25] Remove inadvertant rebase diffs --- .../template_cdat_regression_test.ipynb | 1333 ----------------- e3sm_diags/driver/utils/dataset_xr.py | 2 +- .../plot/cartopy/aerosol_aeronet_plot.py | 132 -- e3sm_diags/plot/cartopy/zonal_mean_2d_plot.py | 187 --- .../zonal_mean_2d_stratosphere_plot.py | 15 - 5 files changed, 1 insertion(+), 1668 deletions(-) delete mode 100644 auxiliary_tools/template_cdat_regression_test.ipynb delete mode 100644 e3sm_diags/plot/cartopy/aerosol_aeronet_plot.py delete mode 100644 e3sm_diags/plot/cartopy/zonal_mean_2d_plot.py delete mode 100644 e3sm_diags/plot/cartopy/zonal_mean_2d_stratosphere_plot.py diff --git a/auxiliary_tools/template_cdat_regression_test.ipynb b/auxiliary_tools/template_cdat_regression_test.ipynb deleted file mode 100644 index 8b4d00bd1..000000000 --- a/auxiliary_tools/template_cdat_regression_test.ipynb +++ /dev/null @@ -1,1333 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# CDAT Migration Regression Test (FY24)\n", - "\n", - "This notebook is used to perform regression testing between the development and\n", - "production versions of a diagnostic set.\n", - "\n", - "## How it works\n", - "\n", - "It compares the relative differences (%) between two sets of `.json` files in two\n", - "separate directories, one for the refactored code and the other for the `main` branch.\n", - "\n", - "It will display metrics values with relative differences >= 2%. Relative differences are used instead of absolute differences because:\n", - "\n", - "- Relative differences are in percentages, which shows the scale of the differences.\n", - "- Absolute differences are just a raw number that doesn't factor in\n", - " floating point size (e.g., 100.00 vs. 0.0001), which can be misleading.\n", - "\n", - "## How to use\n", - "\n", - "PREREQUISITE: The diagnostic set's metrics stored in `.json` files in two directories\n", - "(dev and `main` branches).\n", - "\n", - "1. Make a copy of this notebook.\n", - "2. Run `mamba create -n cdat_regression_test -y -c conda-forge \"python<3.12\" pandas matplotlib-base ipykernel`\n", - "3. Run `mamba activate cdat_regression_test`\n", - "4. Update `DEV_PATH` and `PROD_PATH` in the copy of your notebook.\n", - "5. Run all cells IN ORDER.\n", - "6. Review results for any outstanding differences (>= 2%).\n", - " - Debug these differences (e.g., bug in metrics functions, incorrect variable references, etc.)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Setup Code\n" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import glob\n", - "import math\n", - "from typing import List\n", - "\n", - "import pandas as pd\n", - "\n", - "# TODO: Update DEV_RESULTS and PROD_RESULTS to your diagnostic sets.\n", - "DEV_PATH = \"/global/cfs/cdirs/e3sm/www/vo13/examples_658/ex1_modTS_vs_modTS_3years/lat_lon/model_vs_model\"\n", - "PROD_PATH = \"/global/cfs/cdirs/e3sm/www/vo13/examples/ex1_modTS_vs_modTS_3years/lat_lon/model_vs_model\"\n", - "\n", - "DEV_GLOB = sorted(glob.glob(DEV_PATH + \"/*.json\"))\n", - "PROD_GLOB = sorted(glob.glob(PROD_PATH + \"/*.json\"))\n", - "\n", - "# The names of the columns that store percentage difference values.\n", - "PERCENTAGE_COLUMNS = [\n", - " \"test DIFF (%)\",\n", - " \"ref DIFF (%)\",\n", - " \"test_regrid DIFF (%)\",\n", - " \"ref_regrid DIFF (%)\",\n", - " \"diff DIFF (%)\",\n", - " \"misc DIFF (%)\",\n", - "]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Core Functions\n" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "def get_metrics(filepaths: List[str]) -> pd.DataFrame:\n", - " \"\"\"Get the metrics using a glob of `.json` metric files in a directory.\n", - "\n", - " Parameters\n", - " ----------\n", - " filepaths : List[str]\n", - " The filepaths for metrics `.json` files.\n", - "\n", - " Returns\n", - " -------\n", - " pd.DataFrame\n", - " The DataFrame containing the metrics for all of the variables in\n", - " the results directory.\n", - " \"\"\"\n", - " metrics = []\n", - "\n", - " for filepath in filepaths:\n", - " df = pd.read_json(filepath)\n", - "\n", - " filename = filepath.split(\"/\")[-1]\n", - " var_key = filename.split(\"-\")[1]\n", - "\n", - " # Add the variable key to the MultiIndex and update the index\n", - " # before stacking to make the DataFrame easier to parse.\n", - " multiindex = pd.MultiIndex.from_product([[var_key], [*df.index]])\n", - " df = df.set_index(multiindex)\n", - " df.stack()\n", - "\n", - " metrics.append(df)\n", - "\n", - " df_final = pd.concat(metrics)\n", - "\n", - " # Reorder columns and drop \"unit\" column (string dtype breaks Pandas\n", - " # arithmetic).\n", - " df_final = df_final[[\"test\", \"ref\", \"test_regrid\", \"ref_regrid\", \"diff\", \"misc\"]]\n", - "\n", - " return df_final\n", - "\n", - "\n", - "def get_rel_diffs(df_actual: pd.DataFrame, df_reference: pd.DataFrame) -> pd.DataFrame:\n", - " \"\"\"Get the relative differences between two DataFrames.\n", - "\n", - " Formula: abs(actual - reference) / abs(actual)\n", - "\n", - " Parameters\n", - " ----------\n", - " df_actual : pd.DataFrame\n", - " The first DataFrame representing \"actual\" results (dev branch).\n", - " df_reference : pd.DataFrame\n", - " The second DataFrame representing \"reference\" results (main branch).\n", - "\n", - " Returns\n", - " -------\n", - " pd.DataFrame\n", - " The DataFrame containing absolute and relative differences between\n", - " the metrics DataFrames.\n", - " \"\"\"\n", - " df_diff = abs(df_actual - df_reference) / abs(df_actual)\n", - " df_diff = df_diff.add_suffix(\" DIFF (%)\")\n", - "\n", - " return df_diff\n", - "\n", - "\n", - "def sort_columns(df: pd.DataFrame) -> pd.DataFrame:\n", - " \"\"\"Sorts the order of the columns for the final DataFrame output.\n", - "\n", - " Parameters\n", - " ----------\n", - " df : pd.DataFrame\n", - " The final DataFrame output.\n", - "\n", - " Returns\n", - " -------\n", - " pd.DataFrame\n", - " The final DataFrame output with sorted columns.\n", - " \"\"\"\n", - " columns = [\n", - " \"test_dev\",\n", - " \"test_prod\",\n", - " \"test DIFF (%)\",\n", - " \"ref_dev\",\n", - " \"ref_prod\",\n", - " \"ref DIFF (%)\",\n", - " \"test_regrid_dev\",\n", - " \"test_regrid_prod\",\n", - " \"test_regrid DIFF (%)\",\n", - " \"ref_regrid_dev\",\n", - " \"ref_regrid_prod\",\n", - " \"ref_regrid DIFF (%)\",\n", - " \"diff_dev\",\n", - " \"diff_prod\",\n", - " \"diff DIFF (%)\",\n", - " \"misc_dev\",\n", - " \"misc_prod\",\n", - " \"misc DIFF (%)\",\n", - " ]\n", - "\n", - " df_new = df.copy()\n", - " df_new = df_new[columns]\n", - "\n", - " return df_new\n", - "\n", - "\n", - "def update_diffs_to_pct(df: pd.DataFrame):\n", - " \"\"\"Update relative diff columns from float to string percentage.\n", - "\n", - " Parameters\n", - " ----------\n", - " df : pd.DataFrame\n", - " The final DataFrame containing metrics and diffs (floats).\n", - "\n", - " Returns\n", - " -------\n", - " pd.DataFrame\n", - " The final DataFrame containing metrics and diffs (str percentage).\n", - " \"\"\"\n", - " df_new = df.copy()\n", - " df_new[PERCENTAGE_COLUMNS] = df_new[PERCENTAGE_COLUMNS].map(\n", - " lambda x: \"{0:.2f}%\".format(x * 100) if not math.isnan(x) else x\n", - " )\n", - "\n", - " return df_new" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 1. Get the DataFrame containing development and production metrics.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "df_metrics_dev = get_metrics(DEV_GLOB)\n", - "df_metrics_prod = get_metrics(PROD_GLOB)\n", - "df_metrics_all = pd.concat(\n", - " [df_metrics_dev.add_suffix(\"_dev\"), df_metrics_prod.add_suffix(\"_prod\")],\n", - " axis=1,\n", - " join=\"outer\",\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 2. Get DataFrame for differences >= 2%.\n", - "\n", - "- Get the relative differences for all metrics\n", - "- Filter down metrics to those with differences >= 2%\n", - " - If all cells in a row are NaN (< 2%), the entire row is dropped to make the results easier to parse.\n", - " - Any remaining NaN cells are below < 2% difference and **should be ignored**.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "df_metrics_diffs = get_rel_diffs(df_metrics_dev, df_metrics_prod)\n", - "df_metrics_diffs_thres = df_metrics_diffs[df_metrics_diffs >= 0.02]\n", - "df_metrics_diffs_thres = df_metrics_diffs_thres.dropna(\n", - " axis=0, how=\"all\", ignore_index=False\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 3. Combine both DataFrames to get the final result.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "df_final = df_metrics_diffs_thres.join(df_metrics_all)\n", - "df_final = sort_columns(df_final)\n", - "df_final = update_diffs_to_pct(df_final)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 4. Display final DataFrame and review results.\n", - "\n", - "- Red cells are differences >= 2%\n", - "- `nan` cells are differences < 2% and **should be ignored**\n" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
 var_keymetrictest_devtest_prodtest DIFF (%)ref_devref_prodref DIFF (%)test_regrid_devtest_regrid_prodtest_regrid DIFF (%)ref_regrid_devref_regrid_prodref_regrid DIFF (%)diff_devdiff_proddiff DIFF (%)misc_devmisc_prodmisc DIFF (%)
0FLUTmax299.911864299.355074nan300.162128299.776167nan299.911864299.355074nan300.162128299.776167nan9.4923599.7888093.12%nannannan
1FLUTmin124.610884125.987072nan122.878196124.148986nan124.610884125.987072nan122.878196124.148986nan-15.505809-17.0323259.84%nannannan
2FSNSmax269.789702269.798166nan272.722362272.184917nan269.789702269.798166nan272.722362272.184917nan20.64792924.85985220.40%nannannan
3FSNSmin16.89742317.7608895.11%16.71013416.2370612.83%16.89742317.7608895.11%16.71013416.2370612.83%-28.822277-28.324921nannannannan
4FSNTOAmax360.624327360.209193nan362.188816361.778529nan360.624327360.209193nan362.188816361.778529nan18.60227622.62426621.62%nannannan
5FSNTOAmean239.859777240.001860nan241.439641241.544384nan239.859777240.001860nan241.439641241.544384nan-1.579864-1.5425242.36%nannannan
6FSNTOAmin44.90704148.2568187.46%47.22350250.3396086.60%44.90704148.2568187.46%47.22350250.3396086.60%-23.576184-23.171864nannannannan
7LHFLXmax282.280453289.0799402.41%275.792933276.297281nan282.280453289.0799402.41%275.792933276.297281nan47.53550353.16892411.85%nannannan
8LHFLXmean88.37960988.470270nan88.96955088.976266nan88.37960988.470270nan88.96955088.976266nan-0.589942-0.50599614.23%nannannan
9LHFLXmin-0.878371-0.54924837.47%-1.176561-0.94611019.59%-0.878371-0.54924837.47%-1.176561-0.94611019.59%-34.375924-33.902769nannannannan
10LWCFmax78.49365377.473220nan86.12195984.993825nan78.49365377.473220nan86.12195984.993825nan9.61605710.79610412.27%nannannan
11LWCFmean24.37322424.370539nan24.40669724.391579nan24.37322424.370539nan24.40669724.391579nan-0.033473-0.02104037.14%nannannan
12LWCFmin-0.667812-0.6171077.59%-1.360010-1.18178713.10%-0.667812-0.6171077.59%-1.360010-1.18178713.10%-10.574643-10.1451884.06%nannannan
13NETCFmax13.22460412.6218254.56%13.71543813.2327163.52%13.22460412.6218254.56%13.71543813.2327163.52%10.89934410.2848255.64%nannannan
14NETCFmin-66.633044-66.008633nan-64.832041-67.3980473.96%-66.633044-66.008633nan-64.832041-67.3980473.96%-17.923932-17.940099nannannannan
15NET_FLUX_SRFmax155.691338156.424180nan166.556120166.506173nan155.691338156.424180nan166.556120166.506173nan59.81944961.6728243.10%nannannan
16NET_FLUX_SRFmean0.3940160.51633031.04%-0.0681860.068584200.58%0.3940160.51633031.04%-0.0681860.068584200.58%0.4622020.4477463.13%nannannan
17NET_FLUX_SRFmin-284.505205-299.5050245.27%-280.893287-290.2029343.31%-284.505205-299.5050245.27%-280.893287-290.2029343.31%-75.857589-85.85208913.18%nannannan
18PRECTmax17.28995117.071276nan20.26486220.138274nan17.28995117.071276nan20.26486220.138274nan2.3441112.4066252.67%nannannan
19PRECTmean3.0538023.056760nan3.0748853.074978nan3.0538023.056760nan3.0748853.074978nan-0.021083-0.01821813.59%nannannan
20PSLmin970.981710971.390765nan973.198437973.235326nan970.981710971.390765nan973.198437973.235326nan-6.328677-6.1046103.54%nannannan
21PSLrmsenannannannannannannannannannannannannannannan1.0428840.9799816.03%
22RESTOMmax84.29550283.821906nan87.70794487.451262nan84.29550283.821906nan87.70794487.451262nan17.39628321.42361623.15%nannannan
23RESTOMmean0.4815490.65656036.34%0.0180410.162984803.40%0.4815490.65656036.34%0.0180410.162984803.40%0.4635080.4935766.49%nannannan
24RESTOMmin-127.667181-129.014673nan-127.417586-128.673508nan-127.667181-129.014673nan-127.417586-128.673508nan-15.226249-14.8696142.34%nannannan
25SHFLXmax114.036895112.859646nan116.870038116.432591nan114.036895112.859646nan116.870038116.432591nan28.32065627.5567552.70%nannannan
26SHFLXmin-88.650312-88.386947nan-85.809438-85.480377nan-88.650312-88.386947nan-85.809438-85.480377nan-27.776625-28.3630532.11%nannannan
27SSTmin-1.788055-1.788055nan-1.676941-1.676941nan-1.788055-1.788055nan-1.676941-1.676941nan-4.513070-2.99327233.68%nannannan
28SWCFmax-0.518025-0.5368443.63%-0.311639-0.3316166.41%-0.518025-0.5368443.63%-0.311639-0.3316166.41%11.66893912.0870773.58%nannannan
29SWCFmin-123.625017-122.042043nan-131.053537-130.430161nan-123.625017-122.042043nan-131.053537-130.430161nan-21.415249-20.8089732.83%nannannan
30TREFHTmax31.14150831.058424nan29.81921029.721868nan31.14150831.058424nan29.81921029.721868nan4.9817575.1261852.90%nannannan
31TREFHTmax31.14150831.058424nan29.81921029.721868nan31.14150831.058424nan29.81921029.721868nan4.8678555.1261852.90%nannannan
32TREFHTmax31.14150831.058424nan29.81921029.721868nan31.14150831.058424nan29.81921029.721868nan4.9817575.1261855.31%nannannan
33TREFHTmax31.14150831.058424nan29.81921029.721868nan31.14150831.058424nan29.81921029.721868nan4.8678555.1261855.31%nannannan
34TREFHTmean14.76994614.741707nan13.84201313.800258nan14.76994614.741707nan13.84201313.800258nan0.9279330.9414492.28%nannannan
35TREFHTmean9.2142249.114572nan8.0833497.957917nan9.2142249.114572nan8.0833497.957917nan1.1308761.1566552.28%nannannan
36TREFHTmin-56.266677-55.623001nan-58.159250-57.542053nan-56.266677-55.623001nan-58.159250-57.542053nan-0.681558-0.6243718.39%nannannan
37TREFHTmin-56.266677-55.623001nan-58.159250-57.542053nan-56.266677-55.623001nan-58.159250-57.542053nan-0.681558-0.6243718.39%nannannan
38TREFHTmin-56.266677-55.623001nan-58.159250-57.542053nan-56.266677-55.623001nan-58.159250-57.542053nan-0.681558-0.6243718.39%nannannan
39TREFHTmin-56.266677-55.623001nan-58.159250-57.542053nan-56.266677-55.623001nan-58.159250-57.542053nan-0.681558-0.6243718.39%nannannan
40TREFHTrmsenannannannannannannannannannannannannannannan1.1607181.1799952.68%
41TREFHTrmsenannannannannannannannannannannannannannannan1.3431691.3791412.68%
\n" - ], - "text/plain": [ - "" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_final.reset_index(names=[\"var_key\", \"metric\"]).style.map(\n", - " lambda x: \"background-color : red\" if isinstance(x, str) else \"\",\n", - " subset=pd.IndexSlice[:, PERCENTAGE_COLUMNS],\n", - ")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "cdat_regression_test", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.6" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/e3sm_diags/driver/utils/dataset_xr.py b/e3sm_diags/driver/utils/dataset_xr.py index 022a22da9..182618841 100644 --- a/e3sm_diags/driver/utils/dataset_xr.py +++ b/e3sm_diags/driver/utils/dataset_xr.py @@ -727,7 +727,7 @@ def _get_dataset_with_derived_climo_var(self, ds: xr.Dataset) -> xr.Dataset: return ds raise IOError( - f"Neither does {target_var}, nor the variables in {list(target_var_map.keys())} exist in the dataset file." + f"The dataset file has no matching source variables for {target_var}" ) def _get_matching_climo_src_vars( diff --git a/e3sm_diags/plot/cartopy/aerosol_aeronet_plot.py b/e3sm_diags/plot/cartopy/aerosol_aeronet_plot.py deleted file mode 100644 index 765235095..000000000 --- a/e3sm_diags/plot/cartopy/aerosol_aeronet_plot.py +++ /dev/null @@ -1,132 +0,0 @@ -import os - -import cartopy.crs as ccrs -import matplotlib -import numpy as np - -from e3sm_diags.driver.utils.general import get_output_dir -from e3sm_diags.logger import custom_logger -from e3sm_diags.metrics import mean -from e3sm_diags.plot.cartopy.deprecated_lat_lon_plot import plot_panel - -matplotlib.use("Agg") -import matplotlib.pyplot as plt # isort:skip # noqa: E402 - -logger = custom_logger(__name__) - -plotTitle = {"fontsize": 11.5} -plotSideTitle = {"fontsize": 9.5} - - -def plot(test, test_site, ref_site, parameter): - # Plot scatter plot - # Position and sizes of subplot axes in page coordinates (0 to 1) - # (left, bottom, width, height) in page coordinates - panel = [ - (0.09, 0.40, 0.72, 0.30), - (0.19, 0.2, 0.62, 0.30), - ] - # Border padding relative to subplot axes for saving individual panels - # (left, bottom, right, top) in page coordinates - border = (-0.06, -0.03, 0.13, 0.03) - - fig = plt.figure(figsize=parameter.figsize, dpi=parameter.dpi) - fig.suptitle(parameter.var_id, x=0.5, y=0.97) - proj = ccrs.PlateCarree() - max1 = test.max() - min1 = test.min() - mean1 = mean(test) - # TODO: Replace this function call with `e3sm_diags.plot.utils._add_colormap()`. - plot_panel( - 0, - fig, - proj, - test, - parameter.contour_levels, - parameter.test_colormap, - (parameter.test_name_yrs, None, None), - parameter, - stats=(max1, mean1, min1), - ) - - ax = fig.add_axes(panel[1]) - ax.set_title(f"{parameter.var_id} from AERONET sites") - - # define 1:1 line, and x y axis limits - - if parameter.var_id == "AODVIS": - x1 = np.arange(0.01, 3.0, 0.1) - y1 = np.arange(0.01, 3.0, 0.1) - plt.xlim(0.03, 1) - plt.ylim(0.03, 1) - else: - x1 = np.arange(0.0001, 1.0, 0.01) - y1 = np.arange(0.0001, 1.0, 0.01) - plt.xlim(0.001, 0.3) - plt.ylim(0.001, 0.3) - - plt.loglog(x1, y1, "-k", linewidth=0.5) - plt.loglog(x1, y1 * 0.5, "--k", linewidth=0.5) - plt.loglog(x1 * 0.5, y1, "--k", linewidth=0.5) - - corr = np.corrcoef(ref_site, test_site) - xmean = np.mean(ref_site) - ymean = np.mean(test_site) - ax.text( - 0.3, - 0.9, - f"Mean (test): {ymean:.3f} \n Mean (ref): {xmean:.3f}\n Corr: {corr[0, 1]:.2f}", - horizontalalignment="right", - verticalalignment="top", - transform=ax.transAxes, - ) - - # axis ticks - plt.tick_params(axis="both", which="major") - plt.tick_params(axis="both", which="minor") - - # axis labels - plt.xlabel(f"ref: {parameter.ref_name_yrs}") - plt.ylabel(f"test: {parameter.test_name_yrs}") - - plt.loglog(ref_site, test_site, "kx", markersize=3.0, mfc="none") - - # legend - plt.legend(frameon=False, prop={"size": 5}) - - # TODO: This section can be refactored to use `plot.utils._save_plot()`. - for f in parameter.output_format: - f = f.lower().split(".")[-1] - fnm = os.path.join( - get_output_dir(parameter.current_set, parameter), - f"{parameter.output_file}" + "." + f, - ) - plt.savefig(fnm) - logger.info(f"Plot saved in: {fnm}") - - for f in parameter.output_format_subplot: - fnm = os.path.join( - get_output_dir(parameter.current_set, parameter), - parameter.output_file, - ) - page = fig.get_size_inches() - i = 0 - for p in panel: - # Extent of subplot - subpage = np.array(p).reshape(2, 2) - subpage[1, :] = subpage[0, :] + subpage[1, :] - subpage = subpage + np.array(border).reshape(2, 2) - subpage = list(((subpage) * page).flatten()) # type: ignore - extent = matplotlib.transforms.Bbox.from_extents(*subpage) - # Save subplot - fname = fnm + ".%i." % (i) + f - plt.savefig(fname, bbox_inches=extent) - - orig_fnm = os.path.join( - get_output_dir(parameter.current_set, parameter), - parameter.output_file, - ) - fname = orig_fnm + ".%i." % (i) + f - logger.info(f"Sub-plot saved in: {fname}") - - i += 1 diff --git a/e3sm_diags/plot/cartopy/zonal_mean_2d_plot.py b/e3sm_diags/plot/cartopy/zonal_mean_2d_plot.py deleted file mode 100644 index a72bf5dce..000000000 --- a/e3sm_diags/plot/cartopy/zonal_mean_2d_plot.py +++ /dev/null @@ -1,187 +0,0 @@ -from typing import List, Optional, Tuple - -import matplotlib -import numpy as np -import xarray as xr -import xcdat as xc - -from e3sm_diags.driver.utils.type_annotations import MetricsDict -from e3sm_diags.logger import custom_logger -from e3sm_diags.parameter.core_parameter import CoreParameter -from e3sm_diags.parameter.zonal_mean_2d_parameter import DEFAULT_PLEVS -from e3sm_diags.plot.utils import ( - DEFAULT_PANEL_CFG, - _add_colorbar, - _add_contour_plot, - _add_min_mean_max_text, - _add_rmse_corr_text, - _configure_titles, - _configure_x_and_y_axes, - _get_c_levels_and_norm, - _save_plot, -) - -matplotlib.use("Agg") -import matplotlib.pyplot as plt # isort:skip # noqa: E402 - -logger = custom_logger(__name__) - - -# Configs for x axis ticks and x axis limits. -X_TICKS = np.array([-90, -60, -30, 0, 30, 60, 90]) -X_LIM = -90, 90 - - -def plot( - parameter: CoreParameter, - da_test: xr.DataArray, - da_ref: xr.DataArray, - da_diff: xr.DataArray, - metrics_dict: MetricsDict, -): - """Plot the variable's metrics generated by the zonal_mean_2d set. - - Parameters - ---------- - parameter : CoreParameter - The CoreParameter object containing plot configurations. - da_test : xr.DataArray - The test data. - da_ref : xr.DataArray - The reference data. - da_diff : xr.DataArray - The difference between `da_test` and `da_ref` (both are regridded to - the lower resolution of the two beforehand). - metrics_dict : Metrics - The metrics. - """ - fig = plt.figure(figsize=parameter.figsize, dpi=parameter.dpi) - fig.suptitle(parameter.main_title, x=0.5, y=0.96, fontsize=18) - - # The variable units. - units = metrics_dict["units"] - - # Add the first subplot for test data. - min1 = metrics_dict["test"]["min"] # type: ignore - mean1 = metrics_dict["test"]["mean"] # type: ignore - max1 = metrics_dict["test"]["max"] # type: ignore - - _add_colormap( - 0, - da_test, - fig, - parameter, - parameter.test_colormap, - parameter.contour_levels, - title=(parameter.test_name_yrs, parameter.test_title, units), # type: ignore - metrics=(max1, mean1, min1), # type: ignore - ) - - # Add the second and third subplots for ref data and the differences, - # respectively. - min2 = metrics_dict["ref"]["min"] # type: ignore - mean2 = metrics_dict["ref"]["mean"] # type: ignore - max2 = metrics_dict["ref"]["max"] # type: ignore - - _add_colormap( - 1, - da_ref, - fig, - parameter, - parameter.reference_colormap, - parameter.contour_levels, - title=(parameter.ref_name_yrs, parameter.reference_title, units), # type: ignore - metrics=(max2, mean2, min2), # type: ignore - ) - - min3 = metrics_dict["diff"]["min"] # type: ignore - mean3 = metrics_dict["diff"]["mean"] # type: ignore - max3 = metrics_dict["diff"]["max"] # type: ignore - r = metrics_dict["misc"]["rmse"] # type: ignore - c = metrics_dict["misc"]["corr"] # type: ignore - - _add_colormap( - 2, - da_diff, - fig, - parameter, - parameter.diff_colormap, - parameter.diff_levels, - title=(None, parameter.diff_title, da_diff.attrs["units"]), # - metrics=(max3, mean3, min3, r, c), # type: ignore - ) - - _save_plot(fig, parameter) - - plt.close() - - -def _add_colormap( - subplot_num: int, - var: xr.DataArray, - fig: plt.Figure, - parameter: CoreParameter, - color_map: str, - contour_levels: List[float], - title: Tuple[Optional[str], str, str], - metrics: Tuple[float, ...], -): - lat = xc.get_dim_coords(var, axis="Y") - plev = xc.get_dim_coords(var, axis="Z") - var = var.squeeze() - - # Configure contour levels - # -------------------------------------------------------------------------- - c_levels, norm = _get_c_levels_and_norm(contour_levels) - - # Add the contour plot - # -------------------------------------------------------------------------- - ax = fig.add_axes(DEFAULT_PANEL_CFG[subplot_num], projection=None) - - contour_plot = _add_contour_plot( - ax, parameter, var, lat, plev, color_map, None, norm, c_levels - ) - - # Configure the aspect ratio and plot titles. - # -------------------------------------------------------------------------- - ax.set_aspect("auto") - _configure_titles(ax, title) - - # Configure x and y axis. - # -------------------------------------------------------------------------- - _configure_x_and_y_axes(ax, X_TICKS, None, None, parameter.current_set) - ax.set_xlim(X_LIM) - - if parameter.plot_log_plevs: - ax.set_yscale("log") - - if parameter.plot_plevs: - plev_ticks = parameter.plevs - plt.yticks(plev_ticks, plev_ticks) - - # For default plevs, specify the pressure axis and show the 50 mb tick - # at the top. - if ( - not parameter.plot_log_plevs - and not parameter.plot_plevs - and parameter.plevs == DEFAULT_PLEVS - ): - plev_ticks = parameter.plevs - new_ticks = [plev_ticks[0]] + plev_ticks[1::2] - new_ticks = [int(x) for x in new_ticks] - plt.yticks(new_ticks, new_ticks) - - plt.ylabel("pressure (mb)") - ax.invert_yaxis() - - # Add and configure the color bar. - # -------------------------------------------------------------------------- - _add_colorbar(fig, subplot_num, DEFAULT_PANEL_CFG, contour_plot, c_levels) - - # Add metrics text. - # -------------------------------------------------------------------------- - # Min, Mean, Max - _add_min_mean_max_text(fig, subplot_num, DEFAULT_PANEL_CFG, metrics) - - if len(metrics) == 5: - _add_rmse_corr_text(fig, subplot_num, DEFAULT_PANEL_CFG, metrics) diff --git a/e3sm_diags/plot/cartopy/zonal_mean_2d_stratosphere_plot.py b/e3sm_diags/plot/cartopy/zonal_mean_2d_stratosphere_plot.py deleted file mode 100644 index 004f3c93d..000000000 --- a/e3sm_diags/plot/cartopy/zonal_mean_2d_stratosphere_plot.py +++ /dev/null @@ -1,15 +0,0 @@ -import xarray as xr - -from e3sm_diags.driver.utils.type_annotations import MetricsDict -from e3sm_diags.parameter.core_parameter import CoreParameter -from e3sm_diags.plot.cartopy.zonal_mean_2d_plot import plot as base_plot - - -def plot( - parameter: CoreParameter, - da_test: xr.DataArray, - da_ref: xr.DataArray, - da_diff: xr.DataArray, - metrics_dict: MetricsDict, -): - return base_plot(parameter, da_test, da_ref, da_diff, metrics_dict) From c3dbdd6ce05e584a9609e2cd15cdb9beb4e49579 Mon Sep 17 00:00:00 2001 From: tomvothecoder Date: Mon, 4 Nov 2024 13:43:12 -0800 Subject: [PATCH 07/25] Update IOError --- e3sm_diags/driver/utils/dataset_xr.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/e3sm_diags/driver/utils/dataset_xr.py b/e3sm_diags/driver/utils/dataset_xr.py index 182618841..33aa3ebb7 100644 --- a/e3sm_diags/driver/utils/dataset_xr.py +++ b/e3sm_diags/driver/utils/dataset_xr.py @@ -727,7 +727,8 @@ def _get_dataset_with_derived_climo_var(self, ds: xr.Dataset) -> xr.Dataset: return ds raise IOError( - f"The dataset file has no matching source variables for {target_var}" + f"The dataset file has no matching source variables for {target_var} and " + f"could not be derived using {list(target_var_map.keys())}." ) def _get_matching_climo_src_vars( From bfc51a1415b0ff3856b9abd24cbb39f5e3728fd6 Mon Sep 17 00:00:00 2001 From: tomvothecoder Date: Mon, 4 Nov 2024 15:53:22 -0800 Subject: [PATCH 08/25] Add bounds after calculating climo for time series --- e3sm_diags/driver/utils/dataset_xr.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/e3sm_diags/driver/utils/dataset_xr.py b/e3sm_diags/driver/utils/dataset_xr.py index 33aa3ebb7..ee40074cf 100644 --- a/e3sm_diags/driver/utils/dataset_xr.py +++ b/e3sm_diags/driver/utils/dataset_xr.py @@ -392,7 +392,9 @@ def get_climo_dataset(self, var: str, season: ClimoFreq) -> xr.Dataset: if self.is_time_series: ds = self.get_time_series_dataset(var) + ds_climo = climo(ds, self.var, season).to_dataset() + ds_climo = ds_climo.bounds.add_missing_bounds(axes=["X", "Y"]) return ds_climo From f49f6062651b27130acb66519d35415586aa7409 Mon Sep 17 00:00:00 2001 From: tomvothecoder Date: Mon, 4 Nov 2024 16:02:28 -0800 Subject: [PATCH 09/25] Fix unit test --- tests/e3sm_diags/driver/utils/test_dataset_xr.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/e3sm_diags/driver/utils/test_dataset_xr.py b/tests/e3sm_diags/driver/utils/test_dataset_xr.py index 7653fe299..d64dc443e 100644 --- a/tests/e3sm_diags/driver/utils/test_dataset_xr.py +++ b/tests/e3sm_diags/driver/utils/test_dataset_xr.py @@ -876,6 +876,7 @@ def test_returns_climo_dataset_using_climo_of_time_series_files(self): # Set all of the correct attributes. expected = expected.assign(**spatial_coords) # type: ignore expected = expected.drop_dims("time") + expected = expected.bounds.add_missing_bounds(axes=["X", "Y"]) xr.testing.assert_identical(result, expected) From 849ea26a35edffda7d2bc93f9a099c57519306ad Mon Sep 17 00:00:00 2001 From: ChengzhuZhang Date: Tue, 5 Nov 2024 10:40:22 -0800 Subject: [PATCH 10/25] fixing radiation fluxes and units --- e3sm_diags/derivations/derivations.py | 15 ++++++++------- e3sm_diags/derivations/formulas.py | 18 ++++++++++++++++++ 2 files changed, 26 insertions(+), 7 deletions(-) diff --git a/e3sm_diags/derivations/derivations.py b/e3sm_diags/derivations/derivations.py index 0e42b0c00..467357798 100644 --- a/e3sm_diags/derivations/derivations.py +++ b/e3sm_diags/derivations/derivations.py @@ -110,6 +110,9 @@ ("PRECC", "PRECL"): lambda precc, precl: prect(precc, precl), ("sat_gauge_precip",): rename, ("precip_liq_surf_mass_flux", "precip_ice_surf_mass_flux"): prect, # EAMxx + ("precip_total_surf_mass_flux",): lambda pr: convert_units( + rename(pr), target_units="mm/day" + ), # EAMxx ("PrecipLiqSurfMassFlux", "PrecipIceSurfMassFlux"): prect, # EAMxx }, "PRECST": { @@ -280,11 +283,9 @@ ( ( "LW_flux_dn_at_model_bot", - "LW_flux_up_at_model_bot", "LW_clrsky_flux_dn_at_model_bot", - "LW_clrsky_flux_up_at_model_bot", ), - lambda flds, flus, fldsc, flusc: lwcfsrf(flds - flus, fldsc - flusc), + lambda flds, fldsc: -lwcfsrf(flds, fldsc), ), # EAMxx ] ), @@ -367,7 +368,7 @@ "SW_flux_up_at_model_bot", "SW_clrsky_flux_dn_at_model_bot", "SW_clrsky_flux_up_at_model_bot", - "LW_clrsky_flux_up_at_model_bot", + "LW_flux_up_at_model_bot", "LW_clrsky_flux_dn_at_model_bot", "LW_flux_up_at_model_bot", "LW_flux_dn_at_model_bot", @@ -402,7 +403,7 @@ lambda sfc_net_lw_clr_mon: -sfc_net_lw_clr_mon, ), ( - ("LW_clrsky_flux_dn_at_model_bot", "LW_clrsky_flux_up_at_model_bot"), + ("LW_clrsky_flux_dn_at_model_bot", "LW_flux_up_at_model_bot"), lambda rlds, rlus: netlw(rlds, rlus), ), # EAMxx ] @@ -427,7 +428,7 @@ (("sfc_net_sw_all_mon",), rename), (("rsds", "rsus"), lambda rsds, rsus: netsw(rsds, rsus)), ( - ("SW_flux_dn_at_model_bot", "SW_flux_dn_at_model_bot"), + ("SW_flux_dn_at_model_bot", "SW_flux_up_at_model_bot"), lambda rsds, rsus: netsw(rsds, rsus), ), # EAMxx ] @@ -437,7 +438,7 @@ (("sfc_net_sw_clr_mon",), rename), (("sfc_net_sw_clr_t_mon",), rename), ( - ("SW_clrsky_flux_dn_at_model_bot", "SW_clrsky_flux_dn_at_model_bot"), + ("SW_clrsky_flux_dn_at_model_bot", "SW_clrsky_flux_up_at_model_bot"), lambda rsds, rsus: netsw(rsds, rsus), ), # EAMxx ] diff --git a/e3sm_diags/derivations/formulas.py b/e3sm_diags/derivations/formulas.py index ec43c3d4e..f6200986f 100644 --- a/e3sm_diags/derivations/formulas.py +++ b/e3sm_diags/derivations/formulas.py @@ -324,6 +324,7 @@ def rst(rsdt: xr.DataArray, rsut: xr.DataArray): var = rsdt - rsut var.name = "FSNTOA" + var.attrs["units"] = "W/m2" var.attrs["long_name"] = "TOA net shortwave flux" return var @@ -334,6 +335,7 @@ def rstcs(rsdt: xr.DataArray, rsutcs: xr.DataArray): var = rsdt - rsutcs var.name = "FSNTOAC" + var.attrs["units"] = "W/m2" var.attrs["long_name"] = "TOA net shortwave flux clear-sky" return var @@ -355,6 +357,7 @@ def lwcfsrf(flns: xr.DataArray, flnsc: xr.DataArray): var = -(flns - flnsc) var.name = "LCWFSRF" + var.attrs["units"] = "W/m2" var.attrs["long_name"] = "Surface longwave cloud forcing" return var @@ -365,6 +368,7 @@ def swcf(fsntoa: xr.DataArray, fsntoac: xr.DataArray): var = fsntoa - fsntoac var.name = "SWCF" + var.attrs["units"] = "W/m2" var.attrs["long_name"] = "TOA shortwave cloud forcing" return var @@ -375,6 +379,7 @@ def lwcf(flntoa: xr.DataArray, flntoac: xr.DataArray): var = flntoa - flntoac var.name = "LWCF" + var.attrs["units"] = "W/m2" var.attrs["long_name"] = "TOA longwave cloud forcing" return var @@ -385,6 +390,7 @@ def netcf2(swcf: xr.DataArray, lwcf: xr.DataArray): var = swcf + lwcf var.name = "NETCF" + var.attrs["units"] = "W/m2" var.attrs["long_name"] = "TOA net cloud forcing" return var @@ -400,6 +406,7 @@ def netcf4( var = fsntoa - fsntoac + flntoa - flntoac var.name = "NETCF" + var.attrs["units"] = "W/m2" var.attrs["long_name"] = "TOA net cloud forcing" return var @@ -410,6 +417,7 @@ def netcf2srf(swcf: xr.DataArray, lwcf: xr.DataArray): var = swcf + lwcf var.name = "NETCF_SRF" + var.attrs["units"] = "W/m2" var.attrs["long_name"] = "Surface net cloud forcing" return var @@ -425,6 +433,7 @@ def netcf4srf( var = fsntoa - fsntoac + flntoa - flntoac var.name = "NETCF4SRF" + var.attrs["units"] = "W/m2" var.attrs["long_name"] = "Surface net cloud forcing" return var @@ -446,6 +455,7 @@ def restom(fsnt: xr.DataArray, flnt: xr.DataArray): var = fsnt - flnt var.name = "RESTOM" + var.attrs["units"] = "W/m2" var.attrs["long_name"] = "TOM(top of model) Radiative flux" return var @@ -455,6 +465,7 @@ def restom3(swdn: xr.DataArray, swup: xr.DataArray, lwup: xr.DataArray): with xr.set_options(keep_attrs=True): var = swdn - swup - lwup + var.attrs["units"] = "W/m2" var.attrs["long_name"] = "TOM(top of model) Radiative flux" return var @@ -466,6 +477,7 @@ def restoa(fsnt: xr.DataArray, flnt: xr.DataArray): var = fsnt - flnt var.name = "RESTOA" + var.attrs["units"] = "W/m2" var.attrs["long_name"] = "TOA(top of atmosphere) Radiative flux" return var @@ -476,6 +488,7 @@ def flus(flds: xr.DataArray, flns: xr.DataArray): var = flns + flds var.name = "FLUS" + var.attrs["units"] = "W/m2" var.attrs["long_name"] = "Upwelling longwave flux at surface" return var @@ -486,6 +499,7 @@ def fsus(fsds: xr.DataArray, fsns: xr.DataArray): var = fsds - fsns var.name = "FSUS" + var.attrs["units"] = "W/m2" var.attrs["long_name"] = "Upwelling shortwave flux at surface" return var @@ -496,6 +510,7 @@ def netsw(rsds: xr.DataArray, rsus: xr.DataArray): var = rsds - rsus var.name = "FSNS" + var.attrs["units"] = "W/m2" var.attrs["long_name"] = "Surface SW Radiative flux" return var @@ -506,6 +521,7 @@ def netlw(rlds: xr.DataArray, rlus: xr.DataArray): var = -(rlds - rlus) var.name = "NET_FLUX_SRF" + var.attrs["units"] = "W/m2" var.attrs["long_name"] = "Surface LW Radiative flux" return var @@ -518,6 +534,7 @@ def netflux4( var = fsns - flns - lhflx - shflx var.name = "NET_FLUX_SRF" + var.attrs["units"] = "W/m2" var.attrs["long_name"] = "Surface Net flux" return var @@ -535,6 +552,7 @@ def netflux6( var = rsds - rsus + (rlds - rlus) - hfls - hfss var.name = "NET_FLUX_SRF" + var.attrs["units"] = "W/m2" var.attrs["long_name"] = "Surface Net flux" return var From f323ecd40d8daabd63b889148dc5e83e2af2de04 Mon Sep 17 00:00:00 2001 From: tomvothecoder Date: Tue, 5 Nov 2024 15:41:36 -0800 Subject: [PATCH 11/25] Add support for more land ocean var keys --- e3sm_diags/driver/__init__.py | 34 ++------------- e3sm_diags/driver/utils/dataset_xr.py | 62 ++++++++++++++++++++++----- e3sm_diags/driver/utils/regrid.py | 40 +++++++++++++++-- 3 files changed, 90 insertions(+), 46 deletions(-) diff --git a/e3sm_diags/driver/__init__.py b/e3sm_diags/driver/__init__.py index 7ceab7b9b..7bc491cf9 100644 --- a/e3sm_diags/driver/__init__.py +++ b/e3sm_diags/driver/__init__.py @@ -1,5 +1,7 @@ import os +import xarray as xr + from e3sm_diags import INSTALL_PATH # The path to the land ocean mask file, which is bundled with the installation @@ -8,34 +10,4 @@ # The keys for the land and ocean fraction variables in the # `LAND_OCEAN_MASK_PATH` file. -LAND_FRAC_KEY = "LANDFRAC" -OCEAN_FRAC_KEY = "OCNFRAC" - - -def _get_region_mask_var_key(region: str): - """Get the region's mask variable key. - - This variable key can be used to map the the variable data in a sdataset. - Only land and ocean regions are supported. - - Parameters - ---------- - region : str - The region. - - Returns - ------- - str - The variable key, either "LANDFRAC" or "OCNFRAC". - - Raises - ------ - ValueError - If the region passed is not land or ocean. - """ - if "land" in region: - return LAND_FRAC_KEY - elif "ocean" in region: - return OCEAN_FRAC_KEY - - raise ValueError(f"Only land and ocean regions are supported, not '{region}'.") +FRAC_REGION_KEYS = {"land": ("LANDFRAC", "landfrac"), "ocean": ("OCNFRAC", "ocnfrac")} diff --git a/e3sm_diags/driver/utils/dataset_xr.py b/e3sm_diags/driver/utils/dataset_xr.py index ee40074cf..21cd34bb1 100644 --- a/e3sm_diags/driver/utils/dataset_xr.py +++ b/e3sm_diags/driver/utils/dataset_xr.py @@ -29,7 +29,7 @@ DerivedVariableMap, DerivedVariablesMap, ) -from e3sm_diags.driver import LAND_FRAC_KEY, LAND_OCEAN_MASK_PATH, OCEAN_FRAC_KEY +from e3sm_diags.driver import FRAC_REGION_KEYS, LAND_OCEAN_MASK_PATH from e3sm_diags.driver.utils.climo_xr import CLIMO_FREQS, ClimoFreq, climo from e3sm_diags.driver.utils.regrid import HYBRID_SIGMA_KEYS from e3sm_diags.logger import custom_logger @@ -1457,18 +1457,58 @@ def _get_land_sea_mask(self, season: str) -> xr.Dataset: The xr.Dataset object containing the land sea mask variables "LANDFRAC" and "OCNFRAC". """ - try: - ds_land_frac = self.get_climo_dataset(LAND_FRAC_KEY, season) # type: ignore - ds_ocean_frac = self.get_climo_dataset(OCEAN_FRAC_KEY, season) # type: ignore - except IOError as e: - logger.info( - f"{e}. Using default land sea mask located at `{LAND_OCEAN_MASK_PATH}`." - ) + datasets = self._get_land_sea_datasets(season) - ds_mask = xr.open_dataset(LAND_OCEAN_MASK_PATH) - ds_mask = squeeze_time_dim(ds_mask) + if len(datasets) == 2: + ds_mask = xr.merge(datasets) else: - ds_mask = xr.merge([ds_land_frac, ds_ocean_frac]) + logger.info("No land sea mask datasets were found for the given season.") + ds_mask = self._get_default_land_sea_mask() + + return ds_mask + + def _get_land_sea_datasets(self, season: str) -> List[xr.Dataset]: + """Get the land and sea datasets for the given season. + + Parameters + ---------- + season : str + The season to subset on. + + Returns + ------- + List[xr.Dataset] + The list of datasets containing the land and sea fraction variables. + """ + land_keys = FRAC_REGION_KEYS["land"] + ocn_keys = FRAC_REGION_KEYS["ocean"] + + datasets = [] + + for land_key, ocn_key in zip(land_keys, ocn_keys): + try: + ds_land = self.get_climo_dataset(land_key, season) # type: ignore + ds_ocn = self.get_climo_dataset(ocn_key, season) # type: ignore + except IOError: + pass + else: + datasets.append(ds_land) + datasets.append(ds_ocn) + + return datasets + + def _get_default_land_sea_mask(self) -> xr.Dataset: + """Get the default land sea mask dataset. + + Returns + ------- + xr.Dataset + The default land sea mask dataset. + """ + logger.info(f"Using default land sea mask located at `{LAND_OCEAN_MASK_PATH}`.") + + ds_mask = xr.open_dataset(LAND_OCEAN_MASK_PATH) + ds_mask = squeeze_time_dim(ds_mask) return ds_mask diff --git a/e3sm_diags/driver/utils/regrid.py b/e3sm_diags/driver/utils/regrid.py index 98e30a619..62016e7f1 100644 --- a/e3sm_diags/driver/utils/regrid.py +++ b/e3sm_diags/driver/utils/regrid.py @@ -6,7 +6,7 @@ import xcdat as xc from e3sm_diags.derivations.default_regions_xr import REGION_SPECS -from e3sm_diags.driver import _get_region_mask_var_key +from e3sm_diags.driver import FRAC_REGION_KEYS from e3sm_diags.logger import custom_logger if TYPE_CHECKING: @@ -189,8 +189,7 @@ def _apply_land_sea_mask( ds: xr.Dataset The dataset containing the variable. ds_mask : xr.Dataset - The dataset containing the land sea region mask variables, "LANDFRAC" - and "OCEANFRAC". + The dataset containing the land sea region mask variable(s). var_key : str The key the variable region : Literal["land", "ocean"] @@ -243,7 +242,7 @@ def _apply_land_sea_mask( ds_new = ds.copy() ds_new = _drop_unused_ilev_axis(ds) output_grid = ds_new.regridder.grid - mask_var_key = _get_region_mask_var_key(region) + mask_var_key = _get_region_mask_var_key(ds_mask, region) ds_mask_new = _drop_unused_ilev_axis(ds_mask) ds_mask_regrid = ds_mask_new.regridder.horizontal( @@ -457,6 +456,39 @@ def _drop_unused_ilev_axis(ds: xr.Dataset) -> xr.Dataset: return ds_new +def _get_region_mask_var_key(ds_mask: xr.Dataset, region: str): + """Get the region's mask variable key. + + This variable key can be used to map the the variable data in a dataset. + Only land and ocean regions are supported. + + Parameters + ---------- + ds_mask : xr.Dataset + The dataset containing the land and ocean mask variables. + region : str + The region. + + Returns + ------- + Tuple[str, ...] + A tuple of valid keys for the land or ocean fraction variable. + + Raises + ------ + ValueError + If the region passed is not land or ocean. + """ + region_keys = FRAC_REGION_KEYS.get(region) + + if region_keys is None: + raise ValueError(f"Only land and ocean regions are supported, not '{region}'.") + + for key in region_keys: + if key in ds_mask.data_vars: + return key + + def regrid_z_axis_to_plevs( dataset: xr.Dataset, var_key: str, From b34caaf2f2f9a2fa93a54912ac0f6b9ba01a01a6 Mon Sep 17 00:00:00 2001 From: tomvothecoder Date: Wed, 6 Nov 2024 09:55:36 -0800 Subject: [PATCH 12/25] Refactor land sea mask methods --- e3sm_diags/driver/utils/dataset_xr.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/e3sm_diags/driver/utils/dataset_xr.py b/e3sm_diags/driver/utils/dataset_xr.py index 21cd34bb1..b740a8ec1 100644 --- a/e3sm_diags/driver/utils/dataset_xr.py +++ b/e3sm_diags/driver/utils/dataset_xr.py @@ -1457,18 +1457,16 @@ def _get_land_sea_mask(self, season: str) -> xr.Dataset: The xr.Dataset object containing the land sea mask variables "LANDFRAC" and "OCNFRAC". """ - datasets = self._get_land_sea_datasets(season) + ds_mask = self._get_land_sea_mask_dataset(season) - if len(datasets) == 2: - ds_mask = xr.merge(datasets) - else: + if ds_mask is None: logger.info("No land sea mask datasets were found for the given season.") - ds_mask = self._get_default_land_sea_mask() + ds_mask = self._get_default_land_sea_mask_dataset() return ds_mask - def _get_land_sea_datasets(self, season: str) -> List[xr.Dataset]: - """Get the land and sea datasets for the given season. + def _get_land_sea_mask_dataset(self, season: str) -> xr.Dataset | None: + """Get the land sea mask dataset for the given season. Parameters ---------- @@ -1477,8 +1475,9 @@ def _get_land_sea_datasets(self, season: str) -> List[xr.Dataset]: Returns ------- - List[xr.Dataset] - The list of datasets containing the land and sea fraction variables. + xr.Dataset | None + The land sea mask dataset for the given season, or None if not + found. """ land_keys = FRAC_REGION_KEYS["land"] ocn_keys = FRAC_REGION_KEYS["ocean"] @@ -1495,9 +1494,12 @@ def _get_land_sea_datasets(self, season: str) -> List[xr.Dataset]: datasets.append(ds_land) datasets.append(ds_ocn) - return datasets + if len(datasets) == 2: + return xr.merge(datasets) + + return None - def _get_default_land_sea_mask(self) -> xr.Dataset: + def _get_default_land_sea_mask_dataset(self) -> xr.Dataset: """Get the default land sea mask dataset. Returns From 5115c6acdfa281291dbdfb8d539fdb80fcb65dd9 Mon Sep 17 00:00:00 2001 From: ChengzhuZhang Date: Thu, 7 Nov 2024 17:17:57 -0800 Subject: [PATCH 13/25] fixing for regions i.e. land_60N60S --- e3sm_diags/driver/utils/regrid.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/e3sm_diags/driver/utils/regrid.py b/e3sm_diags/driver/utils/regrid.py index 62016e7f1..d8c04561b 100644 --- a/e3sm_diags/driver/utils/regrid.py +++ b/e3sm_diags/driver/utils/regrid.py @@ -479,7 +479,9 @@ def _get_region_mask_var_key(ds_mask: xr.Dataset, region: str): ValueError If the region passed is not land or ocean. """ - region_keys = FRAC_REGION_KEYS.get(region) + for region_prefix in ["land", "ocean"]: + if region_prefix in region: + region_keys = FRAC_REGION_KEYS.get(region_prefix) if region_keys is None: raise ValueError(f"Only land and ocean regions are supported, not '{region}'.") From 35fbf20f4b7881acbbf5fd9d7854e9040a9f2371 Mon Sep 17 00:00:00 2001 From: tomvothecoder Date: Fri, 8 Nov 2024 12:31:34 -0800 Subject: [PATCH 14/25] Update order of time series subsetting to improve performance - The time slice should occur before loading the dataset into memory, otherwise the entire time series will be loaded which can be a large dataset Add testing scripts Remove accidental rebase changes --- .../892-bottleneck/run_script.cfg | 13 + .../892-bottleneck/run_script.py | 35 ++ e3sm_diags/driver/__init__.py | 2 - e3sm_diags/driver/utils/dataset_xr.py | 4 + e3sm_diags/plot/deprecated_lat_lon_plot.py | 360 ------------------ 5 files changed, 52 insertions(+), 362 deletions(-) create mode 100644 auxiliary_tools/cdat_regression_testing/892-bottleneck/run_script.cfg create mode 100644 auxiliary_tools/cdat_regression_testing/892-bottleneck/run_script.py delete mode 100644 e3sm_diags/plot/deprecated_lat_lon_plot.py diff --git a/auxiliary_tools/cdat_regression_testing/892-bottleneck/run_script.cfg b/auxiliary_tools/cdat_regression_testing/892-bottleneck/run_script.cfg new file mode 100644 index 000000000..3d06adcde --- /dev/null +++ b/auxiliary_tools/cdat_regression_testing/892-bottleneck/run_script.cfg @@ -0,0 +1,13 @@ +[#] +sets = ["lat_lon"] +case_id = "ERA5" +variables = ["U"] +ref_name = "ERA5" +reference_name = "ERA5 Reanalysis" +seasons = ["ANN", "01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12", "DJF", "MAM", "JJA", "SON"] +plevs = [850.0] +test_colormap = "PiYG_r" +reference_colormap = "PiYG_r" +contour_levels = [-20, -15, -10, -8, -5, -3, -1, 1, 3, 5, 8, 10, 15, 20] +diff_levels = [-8, -6, -5, -4, -3, -2, -1, 1, 2, 3, 4, 5, 6, 8] +regrid_method = "bilinear" \ No newline at end of file diff --git a/auxiliary_tools/cdat_regression_testing/892-bottleneck/run_script.py b/auxiliary_tools/cdat_regression_testing/892-bottleneck/run_script.py new file mode 100644 index 000000000..a26cefb2e --- /dev/null +++ b/auxiliary_tools/cdat_regression_testing/892-bottleneck/run_script.py @@ -0,0 +1,35 @@ +import os +from e3sm_diags.parameter.core_parameter import CoreParameter +from e3sm_diags.run import runner + +param = CoreParameter() + + +param.reference_data_path = ( + "/global/cfs/cdirs/e3sm/diagnostics/observations/Atm/time-series" +) +param.test_data_path = "/global/cfs/cdirs/e3sm/chengzhu/eamxx/post/data/rgr" +param.test_name = "eamxx_decadal" +param.seasons = ["ANN"] +# param.save_netcdf = True + +param.ref_timeseries_input = True +# Years to slice the ref data, base this off the years in the filenames. +param.ref_start_yr = "1996" +param.ref_end_yr = "1996" + +prefix = "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/877-attr-err" +param.results_dir = os.path.join(prefix, "eamxx_decadal_1996_1107_edv3") + +runner.sets_to_run = [ + "lat_lon", + "zonal_mean_xy", + "zonal_mean_2d", + "zonal_mean_2d_stratosphere", + "polar", + "cosp_histogram", + "meridional_mean_2d", + "annual_cycle_zonal_mean", +] + +runner.run_diags([param]) diff --git a/e3sm_diags/driver/__init__.py b/e3sm_diags/driver/__init__.py index 7bc491cf9..630d0c539 100644 --- a/e3sm_diags/driver/__init__.py +++ b/e3sm_diags/driver/__init__.py @@ -1,7 +1,5 @@ import os -import xarray as xr - from e3sm_diags import INSTALL_PATH # The path to the land ocean mask file, which is bundled with the installation diff --git a/e3sm_diags/driver/utils/dataset_xr.py b/e3sm_diags/driver/utils/dataset_xr.py index b740a8ec1..464226adc 100644 --- a/e3sm_diags/driver/utils/dataset_xr.py +++ b/e3sm_diags/driver/utils/dataset_xr.py @@ -1157,12 +1157,16 @@ def _subset_time_series_dataset(self, ds: xr.Dataset, var: str) -> xr.Dataset: """ time_slice = self._get_time_slice(ds) ds_sub = ds.sel(time=time_slice).squeeze() + time_slice = self._get_time_slice(ds) + ds_sub = ds.sel(time=time_slice).squeeze() if self.is_sub_monthly: ds_sub = self._exclude_sub_monthly_coord_spanning_year(ds_sub) ds_sub = self._subset_vars_and_load(ds_sub, var) + ds_sub = self._subset_vars_and_load(ds_sub, var) + return ds_sub def _get_time_slice(self, ds: xr.Dataset) -> slice: diff --git a/e3sm_diags/plot/deprecated_lat_lon_plot.py b/e3sm_diags/plot/deprecated_lat_lon_plot.py deleted file mode 100644 index 4eaebcf80..000000000 --- a/e3sm_diags/plot/deprecated_lat_lon_plot.py +++ /dev/null @@ -1,360 +0,0 @@ -""" -WARNING: This module has been deprecated and replaced by -`e3sm_diags.plot.lat_lon_plot.py`. This file temporarily kept because -`e3sm_diags.plot.cartopy.aerosol_aeronet_plot.plot` references the -`plot_panel()` function. Once the aerosol_aeronet set is refactored, this -file can be deleted. -""" -from __future__ import print_function - -import os - -import cartopy.crs as ccrs -import cartopy.feature as cfeature -import cdutil -import matplotlib -import numpy as np -import numpy.ma as ma -from cartopy.mpl.ticker import LatitudeFormatter, LongitudeFormatter - -from e3sm_diags.derivations.default_regions import regions_specs -from e3sm_diags.driver.utils.general import get_output_dir -from e3sm_diags.logger import custom_logger -from e3sm_diags.plot import get_colormap - -matplotlib.use("Agg") -import matplotlib.colors as colors # isort:skip # noqa: E402 -import matplotlib.pyplot as plt # isort:skip # noqa: E402 - -logger = custom_logger(__name__) - -plotTitle = {"fontsize": 11.5} -plotSideTitle = {"fontsize": 9.5} - -# Position and sizes of subplot axes in page coordinates (0 to 1) -panel = [ - (0.1691, 0.6810, 0.6465, 0.2258), - (0.1691, 0.3961, 0.6465, 0.2258), - (0.1691, 0.1112, 0.6465, 0.2258), -] - -# Border padding relative to subplot axes for saving individual panels -# (left, bottom, right, top) in page coordinates -border = (-0.06, -0.03, 0.13, 0.03) - - -def add_cyclic(var): - lon = var.getLongitude() - return var(longitude=(lon[0], lon[0] + 360.0, "coe")) - - -def get_ax_size(fig, ax): - bbox = ax.get_window_extent().transformed(fig.dpi_scale_trans.inverted()) - width, height = bbox.width, bbox.height - width *= fig.dpi - height *= fig.dpi - return width, height - - -def determine_tick_step(degrees_covered): - if degrees_covered > 180: - return 60 - if degrees_covered > 60: - return 30 - elif degrees_covered > 30: - return 10 - elif degrees_covered > 20: - return 5 - else: - return 1 - - -def plot_panel( # noqa: C901 - n, fig, proj, var, clevels, cmap, title, parameters, stats=None -): - var = add_cyclic(var) - lon = var.getLongitude() - lat = var.getLatitude() - var = ma.squeeze(var.asma()) - - # Contour levels - levels = None - norm = None - if len(clevels) > 0: - levels = [-1.0e8] + clevels + [1.0e8] - norm = colors.BoundaryNorm(boundaries=levels, ncolors=256) - - # ax.set_global() - region_str = parameters.regions[0] - region = regions_specs[region_str] - global_domain = True - full_lon = True - if "domain" in region.keys(): # type: ignore - # Get domain to plot - domain = region["domain"] # type: ignore - global_domain = False - else: - # Assume global domain - domain = cdutil.region.domain(latitude=(-90.0, 90, "ccb")) - kargs = domain.components()[0].kargs - lon_west, lon_east, lat_south, lat_north = (0, 360, -90, 90) - if "longitude" in kargs: - full_lon = False - lon_west, lon_east, _ = kargs["longitude"] - # Note cartopy Problem with gridlines across the dateline:https://github.com/SciTools/cartopy/issues/821. Region cross dateline is not supported yet. - if lon_west > 180 and lon_east > 180: - lon_west = lon_west - 360 - lon_east = lon_east - 360 - - if "latitude" in kargs: - lat_south, lat_north, _ = kargs["latitude"] - lon_covered = lon_east - lon_west - lon_step = determine_tick_step(lon_covered) - xticks = np.arange(lon_west, lon_east, lon_step) - # Subtract 0.50 to get 0 W to show up on the right side of the plot. - # If less than 0.50 is subtracted, then 0 W will overlap 0 E on the left side of the plot. - # If a number is added, then the value won't show up at all. - if global_domain or full_lon: - xticks = np.append(xticks, lon_east - 0.50) - proj = ccrs.PlateCarree(central_longitude=180) - else: - xticks = np.append(xticks, lon_east) - lat_covered = lat_north - lat_south - lat_step = determine_tick_step(lat_covered) - yticks = np.arange(lat_south, lat_north, lat_step) - yticks = np.append(yticks, lat_north) - - # Contour plot - ax = fig.add_axes(panel[n], projection=proj) - ax.set_extent([lon_west, lon_east, lat_south, lat_north], crs=proj) - cmap = get_colormap(cmap, parameters) - p1 = ax.contourf( - lon, - lat, - var, - transform=ccrs.PlateCarree(), - norm=norm, - levels=levels, - cmap=cmap, - extend="both", - ) - - # ax.set_aspect('auto') - # Full world would be aspect 360/(2*180) = 1 - ax.set_aspect((lon_east - lon_west) / (2 * (lat_north - lat_south))) - ax.coastlines(lw=0.3) - if not global_domain and "RRM" in region_str: - ax.coastlines(resolution="50m", color="black", linewidth=1) - state_borders = cfeature.NaturalEarthFeature( - category="cultural", - name="admin_1_states_provinces_lakes", - scale="50m", - facecolor="none", - ) - ax.add_feature(state_borders, edgecolor="black") - if title[0] is not None: - ax.set_title(title[0], loc="left", fontdict=plotSideTitle) - if title[1] is not None: - ax.set_title(title[1], fontdict=plotTitle) - if title[2] is not None: - ax.set_title(title[2], loc="right", fontdict=plotSideTitle) - ax.set_xticks(xticks, crs=ccrs.PlateCarree()) - ax.set_yticks(yticks, crs=ccrs.PlateCarree()) - lon_formatter = LongitudeFormatter(zero_direction_label=True, number_format=".0f") - lat_formatter = LatitudeFormatter() - ax.xaxis.set_major_formatter(lon_formatter) - ax.yaxis.set_major_formatter(lat_formatter) - ax.tick_params(labelsize=8.0, direction="out", width=1) - ax.xaxis.set_ticks_position("bottom") - ax.yaxis.set_ticks_position("left") - - # Color bar - cbax = fig.add_axes((panel[n][0] + 0.6635, panel[n][1] + 0.0215, 0.0326, 0.1792)) - cbar = fig.colorbar(p1, cax=cbax) - w, h = get_ax_size(fig, cbax) - - if levels is None: - cbar.ax.tick_params(labelsize=9.0, length=0) - - else: - maxval = np.amax(np.absolute(levels[1:-1])) - if maxval < 0.2: - fmt = "%5.3f" - pad = 28 - elif maxval < 10.0: - fmt = "%5.2f" - pad = 25 - elif maxval < 100.0: - fmt = "%5.1f" - pad = 25 - elif maxval > 9999.0: - fmt = "%.0f" - pad = 40 - else: - fmt = "%6.1f" - pad = 30 - - cbar.set_ticks(levels[1:-1]) - labels = [fmt % level for level in levels[1:-1]] - cbar.ax.set_yticklabels(labels, ha="right") - cbar.ax.tick_params(labelsize=9.0, pad=pad, length=0) - - # Min, Mean, Max - fig.text( - panel[n][0] + 0.6635, - panel[n][1] + 0.2107, - "Max\nMean\nMin", - ha="left", - fontdict=plotSideTitle, - ) - - fmt_m = [] - # printing in scientific notation if value greater than 10^5 - for i in range(len(stats[0:3])): - fs = "1e" if stats[i] > 100000.0 else "2f" - fmt_m.append(fs) - fmt_metrics = f"%.{fmt_m[0]}\n%.{fmt_m[1]}\n%.{fmt_m[2]}" - - fig.text( - panel[n][0] + 0.7635, - panel[n][1] + 0.2107, - # "%.2f\n%.2f\n%.2f" % stats[0:3], - fmt_metrics % stats[0:3], - ha="right", - fontdict=plotSideTitle, - ) - - # RMSE, CORR - if len(stats) == 5: - fig.text( - panel[n][0] + 0.6635, - panel[n][1] - 0.0105, - "RMSE\nCORR", - ha="left", - fontdict=plotSideTitle, - ) - fig.text( - panel[n][0] + 0.7635, - panel[n][1] - 0.0105, - "%.2f\n%.2f" % stats[3:5], - ha="right", - fontdict=plotSideTitle, - ) - - # grid resolution info: - if n == 2 and "RRM" in region_str: - dlat = lat[2] - lat[1] - dlon = lon[2] - lon[1] - fig.text( - panel[n][0] + 0.4635, - panel[n][1] - 0.04, - "Resolution: {:.2f}x{:.2f}".format(dlat, dlon), - ha="left", - fontdict=plotSideTitle, - ) - - -def plot(reference, test, diff, metrics_dict, parameter): - # Create figure, projection - fig = plt.figure(figsize=parameter.figsize, dpi=parameter.dpi) - proj = ccrs.PlateCarree() - - # Figure title - fig.suptitle(parameter.main_title, x=0.5, y=0.96, fontsize=18) - - # First two panels - min1 = metrics_dict["test"]["min"] - mean1 = metrics_dict["test"]["mean"] - max1 = metrics_dict["test"]["max"] - - plot_panel( - 0, - fig, - proj, - test, - parameter.contour_levels, - parameter.test_colormap, - (parameter.test_name_yrs, parameter.test_title, test.units), - parameter, - stats=(max1, mean1, min1), - ) - - if not parameter.model_only: - min2 = metrics_dict["ref"]["min"] - mean2 = metrics_dict["ref"]["mean"] - max2 = metrics_dict["ref"]["max"] - - plot_panel( - 1, - fig, - proj, - reference, - parameter.contour_levels, - parameter.reference_colormap, - (parameter.ref_name_yrs, parameter.reference_title, reference.units), - parameter, - stats=(max2, mean2, min2), - ) - - # Third panel - min3 = metrics_dict["diff"]["min"] - mean3 = metrics_dict["diff"]["mean"] - max3 = metrics_dict["diff"]["max"] - r = metrics_dict["misc"]["rmse"] - c = metrics_dict["misc"]["corr"] - plot_panel( - 2, - fig, - proj, - diff, - parameter.diff_levels, - parameter.diff_colormap, - (None, parameter.diff_title, test.units), - parameter, - stats=(max3, mean3, min3, r, c), - ) - - # Save figure - for f in parameter.output_format: - f = f.lower().split(".")[-1] - fnm = os.path.join( - get_output_dir(parameter.current_set, parameter), - parameter.output_file + "." + f, - ) - plt.savefig(fnm) - logger.info(f"Plot saved in: {fnm}") - - # Save individual subplots - if parameter.ref_name == "": - panels = [panel[0]] - else: - panels = panel - - for f in parameter.output_format_subplot: - fnm = os.path.join( - get_output_dir(parameter.current_set, parameter), - parameter.output_file, - ) - page = fig.get_size_inches() - i = 0 - for p in panels: - # Extent of subplot - subpage = np.array(p).reshape(2, 2) - subpage[1, :] = subpage[0, :] + subpage[1, :] - subpage = subpage + np.array(border).reshape(2, 2) - subpage = list(((subpage) * page).flatten()) # type: ignore - extent = matplotlib.transforms.Bbox.from_extents(*subpage) - # Save subplot - fname = fnm + ".%i." % (i) + f - plt.savefig(fname, bbox_inches=extent) - - orig_fnm = os.path.join( - get_output_dir(parameter.current_set, parameter), - parameter.output_file, - ) - fname = orig_fnm + ".%i." % (i) + f - logger.info(f"Sub-plot saved in: {fname}") - - i += 1 - - plt.close() From 4ddf616ac1ba97eedd01b3fcde6e78ebc54962a0 Mon Sep 17 00:00:00 2001 From: Tom Vo Date: Thu, 5 Dec 2024 15:06:17 -0800 Subject: [PATCH 15/25] Revert accidental rebase changes --- .github/workflows/build_workflow.yml | 4 ++-- e3sm_diags/driver/qbo_driver.py | 5 ----- tests/e3sm_diags/driver/utils/test_regrid.py | 1 + 3 files changed, 3 insertions(+), 7 deletions(-) diff --git a/.github/workflows/build_workflow.yml b/.github/workflows/build_workflow.yml index f3557732c..a183e6572 100644 --- a/.github/workflows/build_workflow.yml +++ b/.github/workflows/build_workflow.yml @@ -5,7 +5,7 @@ on: branches: [main] pull_request: - branches: [main, cdat-migration-fy24] + branches: [main] workflow_dispatch: @@ -51,7 +51,7 @@ jobs: strategy: matrix: python-version: ["3.9", "3.10", "3.11", "3.12"] - container: + container: image: ghcr.io/e3sm-project/containers-e3sm-diags-test-data:e3sm-diags-test-data-0.0.2 steps: - id: skip_check diff --git a/e3sm_diags/driver/qbo_driver.py b/e3sm_diags/driver/qbo_driver.py index 5bc0c5ec2..3379f4c46 100644 --- a/e3sm_diags/driver/qbo_driver.py +++ b/e3sm_diags/driver/qbo_driver.py @@ -125,11 +125,6 @@ def run_diag(parameter: QboParameter) -> QboParameter: test_dict["name"] = test_ds._get_test_name() ref_dict["name"] = ref_ds._get_ref_name() - try: - ref_dict["name"] = ref_ds._get_ref_name() - except AttributeError: - ref_dict["name"] = parameter.ref_name - _save_metrics_to_json(parameter, test_dict, "test") # type: ignore _save_metrics_to_json(parameter, ref_dict, "ref") # type: ignore diff --git a/tests/e3sm_diags/driver/utils/test_regrid.py b/tests/e3sm_diags/driver/utils/test_regrid.py index c02451345..6dc33fcda 100644 --- a/tests/e3sm_diags/driver/utils/test_regrid.py +++ b/tests/e3sm_diags/driver/utils/test_regrid.py @@ -231,6 +231,7 @@ def test_regrids_to_first_dataset_with_equal_latitude_points(self, tool): expected_a = ds_a.copy() expected_b = ds_a.copy() + if tool in ["esmf", "xesmf"]: expected_b.so.attrs["regrid_method"] = "conservative" From 64629a059ebf8ec9dd8b95a612e896fc393e9b5f Mon Sep 17 00:00:00 2001 From: tomvothecoder Date: Mon, 16 Dec 2024 12:08:53 -0800 Subject: [PATCH 16/25] Add run script for bottleneck --- .../cdat_regression_testing/892-bottleneck/run_script.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/auxiliary_tools/cdat_regression_testing/892-bottleneck/run_script.py b/auxiliary_tools/cdat_regression_testing/892-bottleneck/run_script.py index a26cefb2e..7ed286279 100644 --- a/auxiliary_tools/cdat_regression_testing/892-bottleneck/run_script.py +++ b/auxiliary_tools/cdat_regression_testing/892-bottleneck/run_script.py @@ -18,7 +18,7 @@ param.ref_start_yr = "1996" param.ref_end_yr = "1996" -prefix = "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/877-attr-err" +prefix = "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/892-bottleneck" param.results_dir = os.path.join(prefix, "eamxx_decadal_1996_1107_edv3") runner.sets_to_run = [ From 0009dea6dcfe06143486c3ff19f19332994a0e48 Mon Sep 17 00:00:00 2001 From: tomvothecoder Date: Mon, 16 Dec 2024 15:21:05 -0800 Subject: [PATCH 17/25] Add FIXME comments for performance bottleneck --- .../cdat_regression_testing/892-bottleneck/run_script.py | 4 ++++ e3sm_diags/driver/lat_lon_driver.py | 2 ++ e3sm_diags/driver/utils/dataset_xr.py | 1 + 3 files changed, 7 insertions(+) diff --git a/auxiliary_tools/cdat_regression_testing/892-bottleneck/run_script.py b/auxiliary_tools/cdat_regression_testing/892-bottleneck/run_script.py index 7ed286279..452901cd6 100644 --- a/auxiliary_tools/cdat_regression_testing/892-bottleneck/run_script.py +++ b/auxiliary_tools/cdat_regression_testing/892-bottleneck/run_script.py @@ -1,3 +1,4 @@ +import sys import os from e3sm_diags.parameter.core_parameter import CoreParameter from e3sm_diags.run import runner @@ -21,6 +22,9 @@ prefix = "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/892-bottleneck" param.results_dir = os.path.join(prefix, "eamxx_decadal_1996_1107_edv3") +cfg_path = "auxiliary_tools/cdat_regression_testing/892-bottleneck/run_script.cfg" +sys.argv.extend(["--diags", cfg_path]) + runner.sets_to_run = [ "lat_lon", "zonal_mean_xy", diff --git a/e3sm_diags/driver/lat_lon_driver.py b/e3sm_diags/driver/lat_lon_driver.py index 1d36631bd..e93cd287d 100755 --- a/e3sm_diags/driver/lat_lon_driver.py +++ b/e3sm_diags/driver/lat_lon_driver.py @@ -73,6 +73,8 @@ def run_diag(parameter: CoreParameter) -> CoreParameter: parameter._set_name_yrs_attrs(test_ds, ref_ds, season) ds_test = test_ds.get_climo_dataset(var_key, season) + + # FIXME: `ds_ref` is slow with `_subset_vars_and_load()` ds_ref = _get_ref_climo_dataset(ref_ds, var_key, season) ds_land_sea_mask: xr.Dataset = test_ds._get_land_sea_mask(season) diff --git a/e3sm_diags/driver/utils/dataset_xr.py b/e3sm_diags/driver/utils/dataset_xr.py index 464226adc..30c22b162 100644 --- a/e3sm_diags/driver/utils/dataset_xr.py +++ b/e3sm_diags/driver/utils/dataset_xr.py @@ -1561,6 +1561,7 @@ def _subset_vars_and_load(self, ds: xr.Dataset, var: str | List[str]) -> xr.Data ds = ds[var + keep_vars] + # FIXME: `ds.load()` on `ds_ref` causes deadlock. ds.load(scheduler="sync") return ds From 9d00cbffa1e476472228551b090b82153e9329fe Mon Sep 17 00:00:00 2001 From: tomvothecoder Date: Tue, 17 Dec 2024 15:16:00 -0800 Subject: [PATCH 18/25] Add debug_ref_u script --- .../892-bottleneck/debug_ref_u.py | 78 +++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 auxiliary_tools/cdat_regression_testing/892-bottleneck/debug_ref_u.py diff --git a/auxiliary_tools/cdat_regression_testing/892-bottleneck/debug_ref_u.py b/auxiliary_tools/cdat_regression_testing/892-bottleneck/debug_ref_u.py new file mode 100644 index 000000000..f5229cd94 --- /dev/null +++ b/auxiliary_tools/cdat_regression_testing/892-bottleneck/debug_ref_u.py @@ -0,0 +1,78 @@ +""" +This script is used to debug the bottleneck issue in the reference u variable. +""" + +# %% +import timeit + +import xcdat as xc + +filepaths = [ + "/global/cfs/cdirs/e3sm/diagnostics/observations/Atm/time-series/ERA5/ua_197901_201912.nc" +] +time_slice = slice("1996-01-15", "1997-01-15", None) + +# %% +# Test case 1 - OPEN_MFDATASET() + "ua" dataset (76 GB) + subsetting + `.load()` +# Result: .load() hangs when using `open_mfdataset` +# ------------------------------------------------------------------------------ +ds_ua_omfd = xc.open_mfdataset( + filepaths[0], + add_bounds=["X", "Y", "T"], + decode_times=True, + use_cftime=True, + coords="minimal", + compat="override", +) +ds_ua_omfd_sub = ds_ua_omfd.sel(time=time_slice) + +# %% +start_time = timeit.default_timer() +ds_ua_omfd_sub.load() +elapsed = timeit.default_timer() - start_time +print(f"Time taken to load ds_xc_sub: {elapsed} seconds") + +# %% +# Test case 2 - OPEN_DATASET() + "ua" dataset (76 GB) + subsetting + `.load()` +# Result: load() works fine when using `open_dataset` +# ------------------------------------------------------------------------------ +ds_ua_od = xc.open_dataset( + filepaths[0], + add_bounds=["X", "Y", "T"], + decode_times=True, + use_cftime=True, + # coords="minimal", + # compat="override", +) +ds_ua_od_sub = ds_ua_od.sel(time=time_slice) + +# %% +start_time = timeit.default_timer() +ds_ua_od_sub.load() +elapsed = timeit.default_timer() - start_time +print(f"Time taken to load ds_xc_sub: {elapsed} seconds") + +# %% +# Test case 3 - OPEN_MFDATASET() + "pr" dataset (2 GB) + subsetting + `.load()` +# Result: ds.load() works fine with pr variable, but not with ua variable +# Notes: pr is 3D variable (time, lat, lon), ua is a 4D variable (time, lat, lon, plev). +# ------------------------------------------------------------------------------ +filepaths_pr = [ + "/global/cfs/cdirs/e3sm/diagnostics/observations/Atm/time-series/ERA5/pr_197901_201912.nc" +] +ds_pr = xc.open_mfdataset( + filepaths_pr, + add_bounds=["X", "Y", "T"], + decode_times=True, + use_cftime=True, + coords="minimal", + compat="override", +) + +# %% +# pr dataset is ~2 GB without subsetting. There is no need to subset. +start_time = timeit.default_timer() +ds_pr.load() +elapsed = timeit.default_timer() - start_time +print(f"Time taken to load ds_xc_sub_0: {elapsed} seconds") +# %% From 3ba6ec5ec0aa72a48bf91d8af3974f1d359b1e47 Mon Sep 17 00:00:00 2001 From: tomvothecoder Date: Thu, 19 Dec 2024 15:28:19 -0600 Subject: [PATCH 19/25] Add debug scripts --- .../892-bottleneck/debug_ref_u.py | 17 +++++-- .../892-bottleneck/xr_mvce_e3sm_data.py | 20 ++++++++ .../892-bottleneck/xr_mvce_gh.py | 48 +++++++++++++++++++ 3 files changed, 80 insertions(+), 5 deletions(-) create mode 100644 auxiliary_tools/cdat_regression_testing/892-bottleneck/xr_mvce_e3sm_data.py create mode 100644 auxiliary_tools/cdat_regression_testing/892-bottleneck/xr_mvce_gh.py diff --git a/auxiliary_tools/cdat_regression_testing/892-bottleneck/debug_ref_u.py b/auxiliary_tools/cdat_regression_testing/892-bottleneck/debug_ref_u.py index f5229cd94..88787d4ef 100644 --- a/auxiliary_tools/cdat_regression_testing/892-bottleneck/debug_ref_u.py +++ b/auxiliary_tools/cdat_regression_testing/892-bottleneck/debug_ref_u.py @@ -5,10 +5,18 @@ # %% import timeit -import xcdat as xc +import xarray as xr +# Perlmutter +# ---------- +# filepaths = [ +# "/global/cfs/cdirs/e3sm/diagnostics/observations/Atm/time-series/ERA5/ua_197901_201912.nc" +# ] + +# LCRC +# ----- filepaths = [ - "/global/cfs/cdirs/e3sm/diagnostics/observations/Atm/time-series/ERA5/ua_197901_201912.nc" + "/lcrc/group/e3sm/diagnostics/observations/Atm/time-series/ERA5/ua_197901_201912.nc" ] time_slice = slice("1996-01-15", "1997-01-15", None) @@ -16,9 +24,8 @@ # Test case 1 - OPEN_MFDATASET() + "ua" dataset (76 GB) + subsetting + `.load()` # Result: .load() hangs when using `open_mfdataset` # ------------------------------------------------------------------------------ -ds_ua_omfd = xc.open_mfdataset( - filepaths[0], - add_bounds=["X", "Y", "T"], +ds_ua_omfd = xr.open_mfdataset( + filepaths, decode_times=True, use_cftime=True, coords="minimal", diff --git a/auxiliary_tools/cdat_regression_testing/892-bottleneck/xr_mvce_e3sm_data.py b/auxiliary_tools/cdat_regression_testing/892-bottleneck/xr_mvce_e3sm_data.py new file mode 100644 index 000000000..119f869b8 --- /dev/null +++ b/auxiliary_tools/cdat_regression_testing/892-bottleneck/xr_mvce_e3sm_data.py @@ -0,0 +1,20 @@ +# %% +import timeit + +import xarray as xr + +filepaths = [ + "/lcrc/group/e3sm/diagnostics/observations/Atm/time-series/ERA5/ua_197901_201912.nc" +] + +ds = xr.open_mfdataset(filepaths) + +ds_sub = ds.sel(time=slice("1996-01-15", "1997-01-15", None)) + +# %% +start_time = timeit.default_timer() +ds_sub.ua.load() +elapsed = timeit.default_timer() - start_time +print(f"Time taken to load ds_xc_sub: {elapsed} seconds") + +# %% diff --git a/auxiliary_tools/cdat_regression_testing/892-bottleneck/xr_mvce_gh.py b/auxiliary_tools/cdat_regression_testing/892-bottleneck/xr_mvce_gh.py new file mode 100644 index 000000000..2baf2ef65 --- /dev/null +++ b/auxiliary_tools/cdat_regression_testing/892-bottleneck/xr_mvce_gh.py @@ -0,0 +1,48 @@ +# %% +import numpy as np +import pandas as pd +import xarray as xr +import timeit + +import dask.array as da + +# %% +# Define the dimensions +time = 12 +plev = 37 +lat = 721 +lon = 1440 + +# Create the data arrays using dask. +data = da.random.random(size=(time, plev, lat, lon), chunks=(12, 37, 721, 1440)).astype( + np.float32 +) + +# Create the coordinates. +times = pd.date_range("2000-01-01", periods=time) +plevs = np.linspace(100000, 10, plev) +lats = np.linspace(-90, 90, lat) +lons = np.linspace(0, 360, lon, endpoint=False) + +# Create the dataset and write out to a file. +ds = xr.Dataset( + {"data": (["time", "plev", "lat", "lon"], data)}, + coords={"time": times, "plev": plevs, "lat": lats, "lon": lons}, +) +# %% +ds.to_netcdf("dask_bottleneck.nc") + +# %% +# Open the dataset. +ds_open = xr.open_mfdataset("dask_bottleneck.nc") + +# %% +# Load the dataset into memory +start_time = timeit.default_timer() +ds.load() +end_time = timeit.default_timer() + +print(f"Time taken to load the dataset: {end_time - start_time} seconds") + + +# %% From e52c7da0830e6dbca93458797f7a45be20cb1d02 Mon Sep 17 00:00:00 2001 From: ChengzhuZhang Date: Thu, 12 Dec 2024 16:43:01 -0800 Subject: [PATCH 20/25] add run scripts examples --- examples/e3sm_diags_for_eamxx/README.md | 20 +++++++++++ examples/e3sm_diags_for_eamxx/nco.sh | 20 +++++++++++ .../run_e3sm_diags_1996.py | 34 +++++++++++++++++++ .../run_e3sm_diags_climo.py | 29 ++++++++++++++++ 4 files changed, 103 insertions(+) create mode 100644 examples/e3sm_diags_for_eamxx/README.md create mode 100644 examples/e3sm_diags_for_eamxx/nco.sh create mode 100644 examples/e3sm_diags_for_eamxx/run_e3sm_diags_1996.py create mode 100644 examples/e3sm_diags_for_eamxx/run_e3sm_diags_climo.py diff --git a/examples/e3sm_diags_for_eamxx/README.md b/examples/e3sm_diags_for_eamxx/README.md new file mode 100644 index 000000000..110abc495 --- /dev/null +++ b/examples/e3sm_diags_for_eamxx/README.md @@ -0,0 +1,20 @@ +# Initial Instruction to Run E3SM Diags on EAMxx output (e.g. monthly ne30pg2 output) + +0. Secure an interactive compute node and to activate the E3SM-Unified enviroment: + +salloc --nodes 1 --qos interactive --time 02:00:00 --constraint cpu --account e3sm +source /global/common/software/e3sm/anaconda_envs/load_latest_e3sm_unified_pm-cpu.sh +(The version of E3SM Diags (v3) that has EAMxx variable support is available in E3SM-Unified v1.11 (Mid Feb 2025 release. ) + +1. To remap monthly ne30pg2 data to regular lat-lon data to prepare for E3SM Diags run. An example usage based on a EAMxx decadal run is provided in following script ``nco.sh``. To run the script: + +bash nco.sh + +2. Generate a python script for running E3SM Diags. Two example is provided here: + +python run_e3sm_diags_1996.py: to compare 1996 climatology from EAMxx to available 1990 obs climatology +python run_e3sm_diags_climo.py: to compare 1996 climatology from EAMxx to pre-calculated obs climatology + + + + diff --git a/examples/e3sm_diags_for_eamxx/nco.sh b/examples/e3sm_diags_for_eamxx/nco.sh new file mode 100644 index 000000000..22be04ead --- /dev/null +++ b/examples/e3sm_diags_for_eamxx/nco.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +source /global/common/software/e3sm/anaconda_envs/load_latest_e3sm_unified_pm-cpu.sh + +drc_in=/global/cfs/cdirs/e3sm/chengzhu/eamxx/run +drc_out=/global/cfs/cdirs/e3sm/chengzhu/eamxx/post/data +caseid=output.scream.decadal.monthlyAVG_ne30pg2.AVERAGE.nmonths_x1 + +# spoofed climatology files with data from 1995-09 to 1996-08 + +# create climatology files +cd ${drc_in};ls ${caseid}*1996-0[1-8]*.nc ${caseid}*1995-09*.nc ${caseid}*1995-1[0-2]*.nc | ncclimo -P eamxx --fml_nm=eamxx_decadal --yr_srt=1996 --yr_end=1996 --drc_out=$drc_out + + +map=/global/cfs/projectdirs/e3sm/zender/maps/map_ne30pg2_to_cmip6_180x360_traave.20231201.nc +# remaping climo files to regular lat-lon +cd $drc_out;ls *.nc | ncremap -P eamxx --prm_opt=time,lwband,swband,ilev,lev,plev,cosp_tau,cosp_cth,cosp_prs,dim2,ncol --map=${map} --drc_out=${drc_out}/rgr + +exit + diff --git a/examples/e3sm_diags_for_eamxx/run_e3sm_diags_1996.py b/examples/e3sm_diags_for_eamxx/run_e3sm_diags_1996.py new file mode 100644 index 000000000..3a44ebd06 --- /dev/null +++ b/examples/e3sm_diags_for_eamxx/run_e3sm_diags_1996.py @@ -0,0 +1,34 @@ +import os +from e3sm_diags.parameter.core_parameter import CoreParameter +from e3sm_diags.run import runner + +param = CoreParameter() + +#param.reference_data_path = '/global/cfs/cdirs/e3sm/diagnostics/observations/Atm/climatology' +#param.test_data_path = '/global/cfs/cdirs/e3sm/zhang40/e3sm_diags_for_EAMxx/data/Cess' +#param.reference_data_path = '/global/cfs/cdirs/e3sm/diagnostics/observations/Atm/climatology' +param.reference_data_path = '/global/cfs/cdirs/e3sm/diagnostics/observations/Atm/time-series' +param.test_data_path = '/global/cfs/cdirs/e3sm/chengzhu/eamxx/post/data/rgr' +param.test_name = 'eamxx_decadal' +param.seasons = ["ANN"] +#param.save_netcdf = True + +param.ref_timeseries_input = True +# Years to slice the ref data, base this off the years in the filenames. +param.ref_start_yr = "1996" +param.ref_end_yr = "1996" + +prefix = '/global/cfs/cdirs/e3sm/www/zhang40/tests/eamxx' +param.results_dir = os.path.join(prefix, 'eamxx_decadal_1996_1212_edv3') + +runner.sets_to_run = ["lat_lon", + "zonal_mean_xy", + "zonal_mean_2d", + "zonal_mean_2d_stratosphere", + "polar", + "cosp_histogram", + "meridional_mean_2d", + "annual_cycle_zonal_mean",] + +runner.run_diags([param]) + diff --git a/examples/e3sm_diags_for_eamxx/run_e3sm_diags_climo.py b/examples/e3sm_diags_for_eamxx/run_e3sm_diags_climo.py new file mode 100644 index 000000000..f8a84fe1d --- /dev/null +++ b/examples/e3sm_diags_for_eamxx/run_e3sm_diags_climo.py @@ -0,0 +1,29 @@ +import os +from e3sm_diags.parameter.core_parameter import CoreParameter +from e3sm_diags.run import runner + +param = CoreParameter() + +#param.reference_data_path = '/global/cfs/cdirs/e3sm/diagnostics/observations/Atm/climatology' +#param.test_data_path = '/global/cfs/cdirs/e3sm/zhang40/e3sm_diags_for_EAMxx/data/Cess' +param.reference_data_path = '/global/cfs/cdirs/e3sm/diagnostics/observations/Atm/climatology' +param.test_data_path = '/global/cfs/cdirs/e3sm/chengzhu/eamxx/post/data/rgr' +param.test_name = 'eamxx_decadal' +param.seasons = ["ANN"] +#param.save_netcdf = True + +prefix = '/global/cfs/cdirs/e3sm/www/zhang40/tests/eamxx' +param.results_dir = os.path.join(prefix, 'eamxx_decadal_1212') + +runner.sets_to_run = ["lat_lon", + "zonal_mean_xy", + "zonal_mean_2d", + "zonal_mean_2d_stratosphere", + "polar", + "cosp_histogram", + "meridional_mean_2d", + "annual_cycle_zonal_mean", + ] + +runner.run_diags([param]) + From 02b96e0f24a849842a35e8c35860f0a988f80a81 Mon Sep 17 00:00:00 2001 From: ChengzhuZhang Date: Fri, 20 Dec 2024 14:11:04 -0800 Subject: [PATCH 21/25] address review; clean up --- e3sm_diags/derivations/derivations.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/e3sm_diags/derivations/derivations.py b/e3sm_diags/derivations/derivations.py index 467357798..a176987f4 100644 --- a/e3sm_diags/derivations/derivations.py +++ b/e3sm_diags/derivations/derivations.py @@ -568,7 +568,7 @@ ( ("z_mid",), lambda z3: convert_units(z3, target_units="hectometer"), - ), # EAMxx ? + ), # EAMxx ] ), "PSL": { @@ -616,7 +616,7 @@ "LHFLX": { ("hfls",): rename, ("QFLX",): qflx_convert_to_lhflx_approxi, - ("surface_upward_latent_heat_flux",): rename, # EAMxx "s^-3 kg" + ("surface_upward_latent_heat_flux",): rename, # EAMxx }, "SHFLX": { ("hfss",): rename, @@ -876,7 +876,7 @@ "PS": {("ps",): rename}, "U10": { ("sfcWind",): rename, - ("wind_speed_10m",): rename, # EAMxx ? + ("wind_speed_10m",): rename, # EAMxx ("si10",): rename, }, "QREFHT": { @@ -904,7 +904,7 @@ }, "TGCLDCWP": { ("clwvi",): rename, - ("LiqWaterPath",): rename, # EAMxx Check if rain water is inlcuded? + ("LiqWaterPath",): rename, # EAMxx }, "O3": {("o3",): rename}, "PminusE": { From 1a9152dad07273d74faee9aa6630bc7123297524 Mon Sep 17 00:00:00 2001 From: Jill Chengzhu Zhang Date: Wed, 8 Jan 2025 13:28:22 -0800 Subject: [PATCH 22/25] Update README.md --- examples/e3sm_diags_for_eamxx/README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/examples/e3sm_diags_for_eamxx/README.md b/examples/e3sm_diags_for_eamxx/README.md index 110abc495..2aa41d565 100644 --- a/examples/e3sm_diags_for_eamxx/README.md +++ b/examples/e3sm_diags_for_eamxx/README.md @@ -3,7 +3,9 @@ 0. Secure an interactive compute node and to activate the E3SM-Unified enviroment: salloc --nodes 1 --qos interactive --time 02:00:00 --constraint cpu --account e3sm + source /global/common/software/e3sm/anaconda_envs/load_latest_e3sm_unified_pm-cpu.sh + (The version of E3SM Diags (v3) that has EAMxx variable support is available in E3SM-Unified v1.11 (Mid Feb 2025 release. ) 1. To remap monthly ne30pg2 data to regular lat-lon data to prepare for E3SM Diags run. An example usage based on a EAMxx decadal run is provided in following script ``nco.sh``. To run the script: @@ -13,6 +15,7 @@ bash nco.sh 2. Generate a python script for running E3SM Diags. Two example is provided here: python run_e3sm_diags_1996.py: to compare 1996 climatology from EAMxx to available 1990 obs climatology + python run_e3sm_diags_climo.py: to compare 1996 climatology from EAMxx to pre-calculated obs climatology From 3bbc697c2b2052d43708e8b74dcae5e32722c61e Mon Sep 17 00:00:00 2001 From: Tom Vo Date: Fri, 10 Jan 2025 11:23:02 -0800 Subject: [PATCH 23/25] Update e3sm_diags/driver/lat_lon_driver.py --- e3sm_diags/driver/lat_lon_driver.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/e3sm_diags/driver/lat_lon_driver.py b/e3sm_diags/driver/lat_lon_driver.py index e93cd287d..1d36631bd 100755 --- a/e3sm_diags/driver/lat_lon_driver.py +++ b/e3sm_diags/driver/lat_lon_driver.py @@ -73,8 +73,6 @@ def run_diag(parameter: CoreParameter) -> CoreParameter: parameter._set_name_yrs_attrs(test_ds, ref_ds, season) ds_test = test_ds.get_climo_dataset(var_key, season) - - # FIXME: `ds_ref` is slow with `_subset_vars_and_load()` ds_ref = _get_ref_climo_dataset(ref_ds, var_key, season) ds_land_sea_mask: xr.Dataset = test_ds._get_land_sea_mask(season) From c70efd113ae69694c76877766c614b1e1bdbdbff Mon Sep 17 00:00:00 2001 From: Tom Vo Date: Wed, 15 Jan 2025 14:41:51 -0800 Subject: [PATCH 24/25] Apply suggestions from code review --- e3sm_diags/driver/utils/dataset_xr.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/e3sm_diags/driver/utils/dataset_xr.py b/e3sm_diags/driver/utils/dataset_xr.py index 30c22b162..b740a8ec1 100644 --- a/e3sm_diags/driver/utils/dataset_xr.py +++ b/e3sm_diags/driver/utils/dataset_xr.py @@ -1157,16 +1157,12 @@ def _subset_time_series_dataset(self, ds: xr.Dataset, var: str) -> xr.Dataset: """ time_slice = self._get_time_slice(ds) ds_sub = ds.sel(time=time_slice).squeeze() - time_slice = self._get_time_slice(ds) - ds_sub = ds.sel(time=time_slice).squeeze() if self.is_sub_monthly: ds_sub = self._exclude_sub_monthly_coord_spanning_year(ds_sub) ds_sub = self._subset_vars_and_load(ds_sub, var) - ds_sub = self._subset_vars_and_load(ds_sub, var) - return ds_sub def _get_time_slice(self, ds: xr.Dataset) -> slice: @@ -1561,7 +1557,6 @@ def _subset_vars_and_load(self, ds: xr.Dataset, var: str | List[str]) -> xr.Data ds = ds[var + keep_vars] - # FIXME: `ds.load()` on `ds_ref` causes deadlock. ds.load(scheduler="sync") return ds From 8609b10de76e5a94954ecfb5a6280378c156acb9 Mon Sep 17 00:00:00 2001 From: tomvothecoder Date: Wed, 15 Jan 2025 16:49:01 -0600 Subject: [PATCH 25/25] Fix pre-commit issues --- e3sm_diags/derivations/derivations.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/e3sm_diags/derivations/derivations.py b/e3sm_diags/derivations/derivations.py index a176987f4..b4da31044 100644 --- a/e3sm_diags/derivations/derivations.py +++ b/e3sm_diags/derivations/derivations.py @@ -568,7 +568,7 @@ ( ("z_mid",), lambda z3: convert_units(z3, target_units="hectometer"), - ), # EAMxx + ), # EAMxx ] ), "PSL": { @@ -616,7 +616,7 @@ "LHFLX": { ("hfls",): rename, ("QFLX",): qflx_convert_to_lhflx_approxi, - ("surface_upward_latent_heat_flux",): rename, # EAMxx + ("surface_upward_latent_heat_flux",): rename, # EAMxx }, "SHFLX": { ("hfss",): rename, @@ -876,7 +876,7 @@ "PS": {("ps",): rename}, "U10": { ("sfcWind",): rename, - ("wind_speed_10m",): rename, # EAMxx + ("wind_speed_10m",): rename, # EAMxx ("si10",): rename, }, "QREFHT": { @@ -904,7 +904,7 @@ }, "TGCLDCWP": { ("clwvi",): rename, - ("LiqWaterPath",): rename, # EAMxx + ("LiqWaterPath",): rename, # EAMxx }, "O3": {("o3",): rename}, "PminusE": {