diff --git a/docs/source/usage.rst b/docs/source/usage.rst index 17ef3deb..4b3ce986 100644 --- a/docs/source/usage.rst +++ b/docs/source/usage.rst @@ -71,6 +71,10 @@ Please be aware that some arguments are required or optional based on how ``e3sm (WARNING: NOT WORKING AS OF 1.8.2) -s, --serial Run in serial mode (by default parallel). Useful for debugging purposes. + --on-var-failure {ignore,fail,stop} Behavior when a variable fails: + ignore - continue and exit 0 (default) + fail - process all variables, exit 1 if any failed + stop - exit immediately on first failure, useful for debugging optional arguments (run settings): --realm The realm to process. Must be atm, lnd, mpaso or mpassi. Default is atm. @@ -162,8 +166,11 @@ when the output is intended for analysis, but is not suited for publication. Serial ^^^^^^ -For debugging purposes, or when running in a resource constrained environment, the "--serial" or "-s" boolean flag can be used to cause the conversion process -to be run in serial, using the main process. +For debugging purposes, or when running in a resource constrained environment, the "--serial" or "-s" boolean flag can be used to cause the conversion process to be run in serial, using the main process. + +On-var-failure +^^^^^^^^^^^^^^^ +This optional flag controls the behavior of the tool when a variable fails to process. The default behavior is to ignore the failure and continue processing the remaining variables, exiting with a return code of 0. The "fail" option will cause the tool to continue processing all variables, but exit with a return code of 1 if any variable failed. The "stop" option will cause the tool to exit immediately on the first variable failure, which is useful for debugging. Optional arguments (run settings) --------------------------------- diff --git a/e3sm_to_cmip/argparser.py b/e3sm_to_cmip/argparser.py index db5a9503..8fe53d8e 100644 --- a/e3sm_to_cmip/argparser.py +++ b/e3sm_to_cmip/argparser.py @@ -11,6 +11,7 @@ def setup_argparser() -> argparse.ArgumentParser: prog="e3sm_to_cmip", usage="%(prog)s [-h]", add_help=False, + formatter_class=argparse.RawTextHelpFormatter, ) # Argument groups to organize the numerous arguments printed by --help. @@ -38,19 +39,16 @@ def setup_argparser() -> argparse.ArgumentParser: "--info", action="store_true", help=( - "Produce information about the CMIP6 variables passed in the --var-list " - "argument and exit without doing any processing. There are three modes " - "for getting the info. (Mode 1) If you just pass the --info flag with the " - "--var-list then it will print out the handler information as yaml data for " - "the requested variable to your default output path (or to a file designated " - "by the --info-out path). (Mode 2) If the --freq is passed " - "along with the --tables-path, then the variable handler information will " - "only be output if the requested variables are present in the CMIP6 table matching the freq. " - "NOTE: For MPAS data, one must also include --realm mpaso (or mpassi) and --map no_map. " - "(Mode 3) For non-MPAS data, if the --freq is passed with the --tables-path, and the " - "--input-path, and the input-path points to raw unprocessed E3SM files, " - "then an additional check will me made for if the required raw " - "variables are present in the E3SM native output. " + "Produce information about the CMIP6 variables passed in the " + "--var-list argument and exit without processing. Modes:\n" + " 1) Default: Print handler info as YAML for requested variables " + "to the default output path, or to a file specified by --info-out.\n" + " 2) With --freq and --tables-path: Output handler info " + "only if variables are in the CMIP6 table matching the frequency. " + "For MPAS data, include --realm mpaso/mpassi and --map no_map.\n" + " 3) For non-MPAS data: With --freq , --tables-path, " + "and --input-path pointing to raw E3SM files, check if required raw " + "variables are present in the E3SM native output." ), ) optional_mode.add_argument( @@ -68,6 +66,18 @@ def setup_argparser() -> argparse.ArgumentParser: action="store_true", ) + optional_mode.add_argument( + "--on-var-failure", + choices=["ignore", "fail", "stop"], + default="ignore", + help=( + "Behavior when a variable fails:\n" + " 1) 'ignore' - continue and exit 0 (default)\n" + " 2) 'fail' - process all variables, exit 1 if any failed\n" + " 3) 'stop' - exit immediately on first failure, useful for debugging\n" + ), + ) + # ====================================================================== # Run settings. # ====================================================================== diff --git a/e3sm_to_cmip/cmor_handlers/handler.py b/e3sm_to_cmip/cmor_handlers/handler.py index f5a53e1e..823bc26d 100644 --- a/e3sm_to_cmip/cmor_handlers/handler.py +++ b/e3sm_to_cmip/cmor_handlers/handler.py @@ -26,6 +26,9 @@ # handled appropriately. TIME_DIMS = ["time", "time1", "time2"] +# Type alias for the dictionary representation of a VarHandler object. +VarHandlerDict = dict[str, Any] + class BaseVarHandler: def __init__( @@ -155,7 +158,7 @@ def __eq__(self, other): def __str__(self): return yaml.dump(self.__dict__, default_flow_style=False, sort_keys=False) - def to_dict(self) -> dict[str, Any]: + def to_dict(self) -> VarHandlerDict: """ Return __dict__ with additional entries to support existing e3sm_to_cmip functions. @@ -166,7 +169,7 @@ def to_dict(self) -> dict[str, Any]: Returns ------- - dict[str, Any] + VarHandlerDict __dict__ with additional entries. """ # TODO: Remove this method e3sm_to_cmip functions parse VarHandler diff --git a/e3sm_to_cmip/cmor_handlers/utils.py b/e3sm_to_cmip/cmor_handlers/utils.py index 4f1f73f6..7fad0463 100644 --- a/e3sm_to_cmip/cmor_handlers/utils.py +++ b/e3sm_to_cmip/cmor_handlers/utils.py @@ -1,8 +1,9 @@ import copy +import importlib.util import os +import sys from collections import defaultdict -from importlib.machinery import SourceFileLoader -from typing import Any, Literal, get_args +from typing import Literal, get_args import pandas as pd import yaml @@ -13,7 +14,7 @@ MPAS_HANDLER_DIR_PATH, ) from e3sm_to_cmip._logger import _setup_child_logger -from e3sm_to_cmip.cmor_handlers.handler import VarHandler +from e3sm_to_cmip.cmor_handlers.handler import VarHandler, VarHandlerDict from e3sm_to_cmip.util import FREQUENCY_TO_CMIP_TABLES, _get_table_for_non_monthly_freq logger = _setup_child_logger(__name__) @@ -33,7 +34,7 @@ def load_all_handlers( realm: Realm | MPASRealm, cmip_vars: list[str] -) -> list[dict[str, Any]]: +) -> tuple[list[VarHandlerDict], list[str]]: """Loads variable handlers based on a list of variable names. This function is used specifically for printing out the handler information @@ -48,20 +49,16 @@ def load_all_handlers( Returns ------- - list[dict[str, Any]]: - A list of the dictionary representation of VarHandler objects. - - Raises - ------ - KeyError - If no handlers are defined for a CMIP6 variable in `handlers.yaml`. + tuple[list[VarHandlerDict], list[str]]: + A list of the dictionary representation of VarHandler objects + and a list of variable names that are missing handlers if any. """ - handlers_by_var: dict[str, list[dict[str, Any]]] = _get_handlers_by_var() + handlers_by_var: dict[str, list[VarHandlerDict]] = _get_handlers_by_var() missing_handlers: list[str] = [] if realm in REALMS: - handlers: list[dict[str, Any]] = [] + handlers: list[VarHandlerDict] = [] for var in cmip_vars: var_handler = handlers_by_var.get(var) @@ -72,19 +69,13 @@ def load_all_handlers( handlers = handlers + var_handler - if len(missing_handlers) > 0: - logger.warning( - f"No handlers are defined for the variables: {missing_handlers}. " - "Make sure at least one variable handler is defined for each of these " - f"variables in `{HANDLER_DEFINITIONS_PATH}`." - ) else: - handlers = _get_mpas_handlers(cmip_vars) + handlers, missing_handlers = _get_mpas_handlers(cmip_vars) - return handlers + return handlers, missing_handlers -def _get_mpas_handlers(cmip_vars: list[str]): +def _get_mpas_handlers(cmip_vars: list[str]) -> tuple[list[VarHandlerDict], list[str]]: """Get MPAS variable handlers using the list of CMIP variables. All current MPAS variable handlers are defined as modules and there is only @@ -97,12 +88,13 @@ def _get_mpas_handlers(cmip_vars: list[str]): Returns ------- - KeyError - If no handlers are defined for the MPAS CMIP6 variable. + tuple[list[VarHandlerDict], list[str]]: + A list of the dictionary representation of VarHandler objects and + a list of variable names that are missing handlers if any. """ handlers = _get_handlers_from_modules(MPAS_HANDLER_DIR_PATH) - derived_handlers: list[dict[str, Any]] = [] + derived_handlers: list[VarHandlerDict] = [] missing_handlers: list[str] = [] for var in cmip_vars: @@ -121,7 +113,7 @@ def _get_mpas_handlers(cmip_vars: list[str]): f"`{MPAS_HANDLER_DIR_PATH}`." ) - return derived_handlers + return derived_handlers, missing_handlers def derive_handlers( @@ -130,7 +122,7 @@ def derive_handlers( e3sm_vars: list[str], freq: Frequency, realm: Realm | MPASRealm, -) -> list[dict[str, Any]]: +) -> tuple[list[VarHandlerDict], list[str], list[str]]: """Derives the appropriate handler for each CMIP variable. For each CMIP variable the user wants to CMORize (`cmip_vars`), a variable @@ -162,25 +154,21 @@ def derive_handlers( Returns ------- - list[dict[str, Any]]: - A list of the dictionary representation of VarHandler objects. + tuple[list[VarHandlerDict], list[str], list[str]]: + A list of the dictionary representation of VarHandler objects, a list of + variable names that are missing handlers (if any), and a list of + variable names that could not be derived using the input E3SM variables + (if any). - Raises - ------ - KeyError - If no handlers are defined for a CMIP6 variable in `handlers.yaml`. - KeyError - If a handler could not be derived for a CMIP6 variable using the existing - E3SM variables. """ # TODO: Refactor the function parameters. - handlers_by_var: dict[str, list[dict[str, Any]]] = _get_handlers_by_var() - derived_handlers: list[dict[str, Any]] = [] + handlers_by_var: dict[str, list[VarHandlerDict]] = _get_handlers_by_var() + derived_handlers: list[VarHandlerDict] = [] # Stores variable names that are missing handlers or the handler cannot # be derived using the input E3SM variables. missing_handlers: list[str] = [] - cannot_derive: list[str] = [] + non_derivable_handlers: list[str] = [] for var in cmip_vars: var_handlers = handlers_by_var.get(var) @@ -197,33 +185,20 @@ def derive_handlers( var_handlers, freq, realm, cmip_tables_path, e3sm_vars ) - # If no handler could be derived, add it to the cannot_derive list. - # This can happen if the handler has no matching CMIP table for the - # requested frequency, or if the handler's raw E3SM variables do not - # match the input E3SM variables. + # If a var handler handler is defined but could not be derived, add it to + # the non_derivable_handlers list. This can happen if the handler has no + # matching CMIP table for the requested frequency, or if the handler's + # raw E3SM variables do not match the input E3SM variables. if derived_handler is None: - cannot_derive.append(var) + non_derivable_handlers.append(var) continue derived_handlers.append(derived_handler) - if len(missing_handlers) > 0: - logger.warning( - f"No handlers are defined for the variables: {missing_handlers}. " - "Make sure handlers are defined for these variables in `handlers.yaml`." - ) - - if len(cannot_derive) > 0: - logger.warning( - f"No handlers could be derived for the variables: {cannot_derive}. " - "Make sure the input E3SM datasets have the variables needed for " - "derivation." - ) + return derived_handlers, missing_handlers, non_derivable_handlers - return derived_handlers - -def _get_handlers_by_var() -> dict[str, list[dict[str, Any]]]: +def _get_handlers_by_var() -> dict[str, list[VarHandlerDict]]: """Retrieve all variable handlers from YAML and legacy module sources. This function combines handlers loaded from a YAML configuration and from @@ -232,7 +207,7 @@ def _get_handlers_by_var() -> dict[str, list[dict[str, Any]]]: Returns ------- - dict[str, list[dict[str, Any]]] + dict[str, list[VarHandlerDict]] A dictionary mapping variable names to a list of handler definitions, where each handler is represented as a dictionary containing handler metadata and logic. @@ -244,12 +219,12 @@ def _get_handlers_by_var() -> dict[str, list[dict[str, Any]]]: return all_handlers -def _get_handlers_from_yaml() -> dict[str, list[dict[str, Any]]]: +def _get_handlers_from_yaml() -> dict[str, list[VarHandlerDict]]: """Get VarHandler objects using the `handlers.yaml` file. Returns ------- - dict[str, list[dict[str, Any]]] + dict[str, list[VarHandlerDict]] A dictionary, with the key being the CMIP6 variable ID and the value being a list of VarHandler objects. """ @@ -275,7 +250,7 @@ def _get_handlers_from_yaml() -> dict[str, list[dict[str, Any]]]: return dict(handlers) # type: ignore -def _get_handlers_from_modules(path: str) -> dict[str, list[dict[str, Any]]]: +def _get_handlers_from_modules(path: str) -> dict[str, list[VarHandlerDict]]: """Gets variable handlers defined in Python modules. A Python module variable handler defines information about a variable, @@ -315,7 +290,7 @@ def _get_handlers_from_modules(path: str) -> dict[str, list[dict[str, Any]]]: Returns ------- - dict[str, list[dict[str, Any]]] + dict[str, list[VarHandlerDict]] A dictionary of a list of dictionaries, with each dictionary defining a handler. """ @@ -363,8 +338,7 @@ def _get_handler_module(module_name: str, module_path: str): Parameters ---------- module_name : str - The name of the module, which should be the key of the variable (e.g., - "orog"). + The name of the module, which should be the key of the variable (e.g., "orog"). module_path : str The absolute path to the variable handler Python module. @@ -372,19 +346,30 @@ def _get_handler_module(module_name: str, module_path: str): ------- module The module. + + Raises + ------ + ImportError + If the module cannot be loaded from the specified path. """ - module = SourceFileLoader(module_name, module_path).load_module() + spec = importlib.util.spec_from_file_location(module_name, module_path) + if spec is None or spec.loader is None: + raise ImportError(f"Cannot load module {module_name} from {module_path}") + + module = importlib.util.module_from_spec(spec) + sys.modules[module_name] = module + spec.loader.exec_module(module) return module def _derive_handler( - var_handlers: list[dict[str, Any]], + var_handlers: list[VarHandlerDict], freq: Frequency, realm: Realm | MPASRealm, cmip_tables_path: str, e3sm_vars: list[str], -) -> dict[str, Any] | None: +) -> VarHandlerDict | None: """Attempts to derive a handler for a CMIP variable. The function first filters the handlers to those compatible with the @@ -395,7 +380,7 @@ def _derive_handler( Parameters ---------- - var_handlers : list[dict[str, Any]] + var_handlers : list[VarHandlerDict] List of variable handler dictionaries. freq : Frequency The requested output frequency. @@ -408,7 +393,7 @@ def _derive_handler( Returns ------- - dict[str, Any] | None + VarHandlerDict | None The derived handler dictionary if found, otherwise None. """ # Step 1: Filter handlers by frequency and attempt to derive a handler. @@ -436,8 +421,8 @@ def _derive_handler( def _select_handlers_for_freq( - handlers: list[dict[str, Any]], freq: Frequency -) -> list[dict[str, Any]]: + handlers: list[VarHandlerDict], freq: Frequency +) -> list[VarHandlerDict]: """ Filters a list of variable handlers to include only those with CMIP tables that are compatible with the requested frequency. @@ -450,14 +435,14 @@ def _select_handlers_for_freq( Parameters ---------- - handlers : list[dict[str, Any]] + handlers : list[VarHandlerDict] The list of variable handlers to filter. freq : Frequency The requested output frequency (e.g., "mon", "day", "1hr", etc.). Returns ------- - list[dict[str, Any]] + list[VarHandlerDict] A filtered list of variable handlers that match the requested frequency. """ handlers_filtered = [] @@ -500,8 +485,8 @@ def table_matches_freq(freq: str, handler_table: str) -> bool: def _find_handler_by_e3sm_vars( - e3sm_vars: list[str], handlers: list[dict[str, Any]] -) -> dict[str, Any] | None: + e3sm_vars: list[str], handlers: list[VarHandlerDict] +) -> VarHandlerDict | None: """Finds a handler a CMIP variable based on the input E3SM variables. This function loops through a list of VarHandler objects defined for a @@ -513,13 +498,13 @@ def _find_handler_by_e3sm_vars( ---------- e3sm_vars : list[str] The list of E3SM variables from the input files to use for CMORizing. - handlers : list[dict[str, Any]] + handlers : list[VarHandlerDict] The list of VarHandler objects as dictionaries, defined for a CMIP6 variable. Returns ------- - dict[str, Any] | None + VarHandlerDict | None A derived handler. Raises @@ -539,11 +524,11 @@ def _find_handler_by_e3sm_vars( def _adjust_handlers_cmip_table_for_freq( - var_handlers: list[dict[str, Any]], + var_handlers: list[VarHandlerDict], freq: Frequency, realm: Realm | MPASRealm, cmip_tables_path: str, -) -> list[dict[str, Any]] | None: +) -> list[VarHandlerDict] | None: """Update the 'table' field of each handler for the requested frequency and realm. This function is used as a fallback when no handler matches the requested @@ -553,7 +538,7 @@ def _adjust_handlers_cmip_table_for_freq( Parameters ---------- - var_handlers : list[dict[str, Any]] + var_handlers : list[VarHandlerDict] List of handler dictionaries. freq : Frequency Requested output frequency. @@ -564,7 +549,7 @@ def _adjust_handlers_cmip_table_for_freq( Returns ------- - list[dict[str, Any]] | None + list[VarHandlerDict] | None Handlers with updated 'table' fields, if any handlers could be adjusted. If no handlers could be adjusted, returns None. """ diff --git a/e3sm_to_cmip/runner.py b/e3sm_to_cmip/runner.py index 9c370a05..52445059 100755 --- a/e3sm_to_cmip/runner.py +++ b/e3sm_to_cmip/runner.py @@ -8,15 +8,13 @@ """ import argparse -import concurrent import os import signal import subprocess -import sys import tempfile import threading +from concurrent.futures import Future, as_completed from concurrent.futures import ProcessPoolExecutor as Pool -from concurrent.futures import as_completed from dataclasses import dataclass from datetime import datetime, timezone from pathlib import Path @@ -30,6 +28,7 @@ from e3sm_to_cmip import ROOT_HANDLERS_DIR, __version__, resources from e3sm_to_cmip._logger import _add_filehandler, _setup_child_logger from e3sm_to_cmip.argparser import parse_args +from e3sm_to_cmip.cmor_handlers.handler import VarHandlerDict from e3sm_to_cmip.cmor_handlers.utils import ( MPAS_REALMS, REALMS, @@ -43,6 +42,8 @@ from e3sm_to_cmip.util import ( _get_table_info, add_metadata, + exit_failure, + exit_success, find_atm_files, find_mpas_files, get_handler_info_msg, @@ -67,6 +68,7 @@ class CLIArguments: simple: bool serial: bool info: bool + on_var_failure: Literal["ignore", "fail", "stop"] # Run settings. num_proc: int @@ -110,6 +112,9 @@ def __init__(self, args: argparse.Namespace | list[str] | None = None): self.simple_mode: bool = parsed_args.simple self.serial_mode: bool = parsed_args.serial self.info_mode: bool = parsed_args.info + self.on_var_failure: Literal["ignore", "fail", "stop"] = ( + parsed_args.on_var_failure + ) # ====================================================================== # Run settings. @@ -165,23 +170,27 @@ def __init__(self, args: argparse.Namespace | list[str] | None = None): if self.serial_mode else "Parallel" ), - "Variable List": self.var_list, - "Input Path": self.input_path, - "Output Path": self.output_path, - "Precheck Path": self.precheck_path, - "Log Path": self.log_path, - "CMOR Log Path": self.cmor_log_dir, - "CMIP Metadata Path": self.new_metadata_path, + "Variable Failure Behavior (--on-var-failure)": self.on_var_failure, + "Variable List (--var-list)": f"{self.var_list} ({len(self.var_list)})", + "Input Path (--input-path)": self.input_path, + "Output Path (--output-path)": self.output_path, + "Precheck Path (--precheck)": self.precheck_path, + "Log Path (--logdir)": self.log_path, + "CMOR Log Path (--logdir)": self.cmor_log_dir, + "CMIP Metadata Path (--user-metadata)": self.new_metadata_path, "Temp Path for Processing MPAS Files": self.temp_path, - "Frequency": self.freq, - "Realm": self.realm, + "Frequency (--freq)": self.freq, + "Realm (--realm)": self.realm, } for key, value in config_details.items(): - logger.info(f" * {key}: {value}") + logger.info(f" * {key}: {value}") # Load the CMOR handlers based on the realm and variable list. - self.handlers = self._get_handlers() + self.handlers, self.missing_handlers, self.non_derivable_handlers = ( + self._get_handlers() + ) + self._validate_handlers() def _get_version_info(self) -> str: """Retrieve version information for the current codebase. @@ -228,7 +237,7 @@ def run(self): # ====================================================================== if self.info_mode: self._run_info_mode() - sys.exit(0) + exit_success() # Run e3sm_to_cmip to CMORize serially or in parallel. # ====================================================================== @@ -250,89 +259,10 @@ def run(self): if timer is not None: timer.cancel() - def _get_handlers(self): - if self.info_mode: - handlers = load_all_handlers(self.realm, self.var_list) - elif not self.info_mode and self.input_path is not None: - e3sm_vars = self._get_e3sm_vars(self.input_path) - logger.debug(f"Input dataset variables: {e3sm_vars}") - - if self.realm in REALMS: - handlers = derive_handlers( - cmip_tables_path=self.tables_path, - cmip_vars=self.var_list, - e3sm_vars=e3sm_vars, - freq=self.freq, - realm=self.realm, - ) - - cmip_to_e3sm_vars = { - handler["name"]: handler["raw_variables"] for handler in handlers - } - - logger.info("--------------------------------------") - logger.info("| Derived CMIP6 Variable Handlers") - logger.info("--------------------------------------") - for k, v in cmip_to_e3sm_vars.items(): - logger.info(f" * '{k}' -> {v}") - - elif self.realm in MPAS_REALMS: - handlers = _get_mpas_handlers(self.var_list) - - if len(handlers) == 0: - logger.error( - "No CMIP6 variable handlers were derived from the variables found " - "in using the E3SM input datasets." - ) - sys.exit(1) - - return handlers - - def _get_e3sm_vars(self, input_path: str) -> list[str]: - """Gets all E3SM variables from the input files to derive CMIP variables. - - This method walks through the input file path and reads each `.nc` file - into a xr.Dataset to retrieve the `data_vars` keys. These `data_vars` keys - are appended to a list, which is returned. - - NOTE: This method is not used to derive CMIP variables from MPAS input - files. - - Parameters - ---------- - input_path: str - The path to the input `.nc` files. - - Returns - ------- - list[str] - List of data variables in the input files. - - Raises - ------ - IndexError - If no data variables were found in the input files. - """ - paths: list[str] = [] - e3sm_vars: list[str] = [] - - for root, _, files in os.walk(input_path): - for filename in files: - if ".nc" in filename: - paths.append(str(Path(root, filename).absolute())) - - for path in paths: - ds = xr.open_dataset(path, decode_timedelta=True) - data_vars = list(ds.data_vars.keys()) - - e3sm_vars = e3sm_vars + data_vars - - if len(e3sm_vars) == 0: - raise IndexError( - f"No variables were found in the input file(s) at '{input_path}'." - ) - - return e3sm_vars + # NOTE: If the run was not successful with --on-var-failure=stop, + # or --on-var-failure=fail, the process would have already + # exited with sys.exit(1) in _finalize_failure_exit(). + exit_success() def _get_var_list(self, input_var_list: list[str]) -> list[str]: if len(input_var_list) == 1 and " " in input_var_list[0]: @@ -497,105 +427,317 @@ def _copy_user_metadata(self): fin.close() fout.close() + def _get_handlers(self) -> tuple[list[VarHandlerDict], list[str], list[str]]: + handlers: list[VarHandlerDict] = [] + missing_handlers: list[str] = [] + non_derivable_handlers: list[str] = [] + + if self.info_mode: + handlers, missing_handlers = load_all_handlers(self.realm, self.var_list) + elif not self.info_mode and self.input_path is not None: + e3sm_vars = self._get_e3sm_vars(self.input_path) + logger.debug(f"Input dataset variables: {e3sm_vars}") + + if self.realm in REALMS: + handlers, missing_handlers, non_derivable_handlers = derive_handlers( + cmip_tables_path=self.tables_path, + cmip_vars=self.var_list, + e3sm_vars=e3sm_vars, + freq=self.freq, + realm=self.realm, + ) + + elif self.realm in MPAS_REALMS: + handlers, missing_handlers = _get_mpas_handlers(self.var_list) + + return handlers, missing_handlers, non_derivable_handlers + + def _get_e3sm_vars(self, input_path: str) -> list[str]: + """Gets all E3SM variables from the input files to derive CMIP variables. + + This method walks through the input file path and reads each `.nc` file + into a xr.Dataset to retrieve the `data_vars` keys. These `data_vars` keys + are appended to a list, which is returned. + + NOTE: This method is not used to derive CMIP variables from MPAS input + files. + + Parameters + ---------- + input_path: str + The path to the input `.nc` files. + + Returns + ------- + list[str] + List of data variables in the input files. + + Raises + ------ + IndexError + If no data variables were found in the input files. + """ + paths: list[str] = [] + e3sm_vars: list[str] = [] + + for root, _, files in os.walk(input_path): + for filename in files: + if ".nc" in filename: + paths.append(str(Path(root, filename).absolute())) + + for path in paths: + ds = xr.open_dataset(path, decode_timedelta=True) + data_vars = list(ds.data_vars.keys()) + + e3sm_vars = e3sm_vars + data_vars + + if len(e3sm_vars) == 0: + raise IndexError( + f"No variables were found in the input file(s) at '{input_path}'." + ) + + return e3sm_vars + + def _validate_handlers(self): + """Validates the derived CMOR handlers and logs a summary. + + If there are any missing or non-derivable handlers, they are logged + as errors. Depending on the `on_var_failure` setting, the program may + exit with a failure code ``sys.exit(1)`` if such issues are detected. + """ + self._log_handler_summary() + + if self._exit_due_to_handler_issues(): + exit_failure() + + def _log_handler_summary(self): + """ + Logs a summary of the derived CMOR handlers, including any missing or + non-derivable handlers. + """ + if self.handlers: + cmip_to_e3sm_vars = { + handler["name"]: handler["raw_variables"] for handler in self.handlers + } + + logger.info("--------------------------------------") + logger.info("| SUCCESS: Derived Variable Handlers") + logger.info("--------------------------------------") + logger.info(f" * Count: {len(self.handlers)}") + logger.info(" * Variable Mappings (CMIP to E3SM):") + for k, v in cmip_to_e3sm_vars.items(): + logger.info(f" * '{k}' -> {v}") + + if self.missing_handlers: + logger.error("--------------------------------------") + logger.error("| NOTICE: Missing Handlers") + logger.error("---------------------------------------") + logger.error( + "Solution: Make sure handlers for these variables are defined " + "in `handlers.yaml`." + ) + logger.error(f" * Count: {len(self.missing_handlers)}") + logger.error(f" * Variables: {self.missing_handlers}") + + if self.non_derivable_handlers: + logger.error("--------------------------------------") + logger.error("| NOTICE: Non-derivable Handlers") + logger.error("---------------------------------------") + logger.error( + "Handlers were defined for these variables, but they could not " + "be derived using the input E3SM datasets." + ) + logger.error( + "Possible Reasons: 1) No matching CMIP table was found for the " + "requested frequency or 2) The input E3SM datasets don't have " + "the required variables." + ) + logger.error(f" * Count: {len(self.non_derivable_handlers)}") + logger.error(f" * Variables: {self.non_derivable_handlers}") + + def _exit_due_to_handler_issues(self) -> bool: + """ + Determines if the program should exit due to missing or non-derivable + handlers based on the ``on_var_failure`` setting. + + Returns + ------- + bool + True if the program should exit, False otherwise. + """ + if not self.handlers: + logger.error( + "No variable handlers are defined or derivable from the raw " + "variables found in the E3SM input datasets." + ) + return True + + if self.missing_handlers or self.non_derivable_handlers: + if self.on_var_failure in ["stop", "fail"]: + logger.error( + "Exiting due to missing or non-derivable handlers with " + f"--on-var-failure={self.on_var_failure}." + ) + + return True + + return False + def _run_info_mode(self): # noqa: C901 - messages = [] + """ + Executes the "info mode" logic for the runner, providing information + about variable handlers, their inclusion in CMIP tables, and dataset + consistency. - # if the user just asked for the handler info - if self.freq == "mon" and not self.input_path and not self.tables_path: - for handler in self.handlers: - hand_msg = get_handler_info_msg(handler) - messages.append(hand_msg) - - # if the user asked if the variable is included in the table - # but didnt ask about the files in the inpath - elif self.freq and self.tables_path and not self.input_path: # info mode 2 - for handler in self.handlers: - table_info = _get_table_info(self.tables_path, handler["table"]) - if handler["name"] not in table_info["variable_entry"]: - logger.error( - f"Variable {handler['name']} is not included in the table " - f"{handler['table']}" - ) + The method operates in three modes based on the provided inputs: - continue - else: - if self.freq == "mon" and handler["table"] == "CMIP6_day.json": - continue - if (self.freq == "day" or self.freq == "3hr") and handler[ - "table" - ] == "CMIP6_Amon.json": - continue + 1. **Handler Information Mode**: Lists handler info when frequency is + "mon" and no input or table paths are provided. + 2. **Variable Inclusion Mode**: Checks if variables are in CMIP tables + when frequency and table paths are provided, but no input path. + 3. **Table-Dataset Consistency Mode**: Validates dataset and CMIP table + consistency when frequency, table paths, and input paths are given. + Logs errors for unsupported variables, missing raw variables, or invalid + frequency-table combinations. Outputs results to a YAML file or prints + them to the console. + + Raises + ------ + Exception + Logs and handles unexpected errors during execution. + + Notes + ----- + - Uses `self.handlers` to iterate over variable handlers. + - Outputs are written to `self.info_out_path` or `self.output_path` if + specified. + - Finalizes failure behavior using `_finalize_failure_exit`. + """ + messages = [] + failed_handlers: list[str] = [] + + try: + # Info mode 1: only show handler info + # Use case: when a user just asks for the handler information. + if self.freq == "mon" and not self.input_path and not self.tables_path: + for handler in self.handlers: hand_msg = get_handler_info_msg(handler) messages.append(hand_msg) - elif self.freq and self.tables_path and self.input_path: # info mode 3 - file_path = next(Path(self.input_path).glob("*.nc")) - - with xr.open_dataset(file_path) as ds: + # Info mode 2: check variable inclusion in tables. + # Use case: when a user asked if the variable is included in the + # table but did not ask about the files in the inpath. + elif self.freq and self.tables_path and not self.input_path: for handler in self.handlers: + # FIXME: This check is duplicated in mode 3 below. Refactor. + # --- DUPLICATE CODE --- table_info = _get_table_info(self.tables_path, handler["table"]) if handler["name"] not in table_info["variable_entry"]: - continue - - raw_vars = handler["raw_variables"] - has_vars = True - - for raw_var in raw_vars: - if raw_var not in ds.data_vars: - has_vars = False - - logger.error( - f"Variable {handler['name']} is not present in the input dataset" - ) + logger.error( + f"Variable {handler['name']} is not included in the table " + f"{handler['table']}" + ) - break + failed_handlers.append(handler["name"]) + self._stop_with_failed_handler(handler["name"]) - if not has_vars: continue + # --- DUPLICATE CODE --- - # We test here against the input "freq", because atmos mon - # data satisfies BOTH CMIP6_day.json AND CMIP6_mon.json, but - # we only want the latter in the "hand_msg" output. The vars - # "hass" and "rlut" have multiple freqs. + # FIXME: This check is duplicated in mode 3 below. Refactor. + # --- DUPLICATE CODE --- + # Skip irrelevant table-frequency combos. if self.freq == "mon" and handler["table"] == "CMIP6_day.json": continue - if (self.freq == "day" or self.freq == "3hr") and handler[ + if (self.freq in ["day", "3hr"]) and handler[ "table" ] == "CMIP6_Amon.json": continue + # --- DUPLICATE CODE --- - hand_msg = None - stat_msg = None + hand_msg = get_handler_info_msg(handler) + messages.append(hand_msg) - raw_vars = [] - raw_vars.extend(handler["raw_variables"]) + # Info mode 3: check table + dataset consistency + elif self.freq and self.tables_path and self.input_path: + filepath = next(Path(self.input_path).glob("*.nc")) + + with xr.open_dataset(filepath) as ds: + for handler in self.handlers: + # FIXME: This check is duplicated in mode 2 above. Refactor. + # --- DUPLICATE CODE --- + table_info = _get_table_info(self.tables_path, handler["table"]) + + # If the variable is not in the table, it is not supported + # and therefore logged as a failure. + if handler["name"] not in table_info["variable_entry"]: + logger.error( + f"Variable {handler['name']} is not included in the table " + f"{handler['table']}" + ) - allpass = True - for raw_var in raw_vars: - if raw_var in ds.data_vars: + failed_handlers.append(handler["name"]) + self._stop_with_failed_handler(handler["name"]) + + continue + # --- DUPLICATE CODE --- + + # FIXME: This check is duplicated in mode 2 above. Refactor. + # --- DUPLICATE CODE --- + # Skip invalid frequency-table pairs. + # We test here against the input "freq", because atmos mon + # data satisfies BOTH CMIP6_day.json AND CMIP6_mon.json, but + # we only want the latter in the "hand_msg" output. The vars + # "hass" and "rlut" have multiple freqs. + if self.freq == "mon" and handler["table"] == "CMIP6_day.json": continue - allpass = False + if (self.freq in ["day", "3hr"]) and handler[ + "table" + ] == "CMIP6_Amon.json": + continue + # --- DUPLICATE CODE --- - if allpass: - stat_msg = f"Table={handler['table']}:Variable={handler['name']}:DataSupport=TRUE" - hand_msg = get_handler_info_msg(handler) - messages.append(hand_msg) - else: - stat_msg = f"Table={handler['table']}:Variable={handler['name']}:DataSupport=FALSE" - logger.info(stat_msg) + raw_vars = list(handler["raw_variables"]) + missing_vars = [v for v in raw_vars if v not in ds.data_vars] - if self.info_out_path is not None: - with open(self.info_out_path, "w") as outstream: - yaml.dump(messages, outstream) - elif self.output_path is not None: - yaml_filepath = os.path.join(self.output_path, "info.yaml") + if missing_vars: + logger.error( + f"Variable {handler['name']} is missing raw vars " + f"{missing_vars} in the input dataset" + ) + failed_handlers.append(handler["name"]) + self._stop_with_failed_handler(handler["name"]) - with open(yaml_filepath, "w") as outstream: - yaml.dump(messages, outstream) - else: - pprint(messages) + continue + + if not missing_vars: + # Passed all checks → supported + stat_msg = f"Table={handler['table']}:Variable={handler['name']}:DataSupport=TRUE" + hand_msg = get_handler_info_msg(handler) + messages.append(hand_msg) + else: + # Missing raw vars → not supported + stat_msg = f"Table={handler['table']}:Variable={handler['name']}:DataSupport=FALSE" + + logger.info(stat_msg) + + # Output log messages. + if self.info_out_path is not None: + with open(self.info_out_path, "w") as outstream: + yaml.dump(messages, outstream) + elif self.output_path is not None: + yaml_filepath = os.path.join(self.output_path, "info.yaml") + with open(yaml_filepath, "w") as outstream: + yaml.dump(messages, outstream) + else: + pprint(messages) + + except Exception as e: + logger.error(f"Unexpected error in info mode: {e}") + + self._finalize_on_failure(failed_handlers) def _run_by_mode(self) -> bool: """ @@ -622,15 +764,25 @@ def _run_by_mode(self) -> bool: return result - def _run_serial(self) -> bool: - """Run each of the handlers one at a time on the main process + def _run_serial(self) -> Literal[True]: + """Run each of the handlers one at a time on the main process. + + This method processes each handler sequentially and logs the status of + each handler. + + The behavior depends on the `self.on_var_failure` setting: + + - "ignore": Continues processing even if some handlers fail. + Always returns True. + - "fail": Exits with a status code of 1 if any handler fails. + - "stop": Terminates immediately upon the first failure and exits with a + status code of 1. Returns ------- - bool - True if the run was successful (even with failed handlers), - False if there was an exception raised beyond the CMORization - process. + Literal[True] + Always True, even if some handlers fail, unless `self.on_var_failure` + is set to "fail" or "stop", in which case the process may terminate early. """ num_handlers = len(self.handlers) num_success = 0 @@ -638,7 +790,7 @@ def _run_serial(self) -> bool: try: if self.realm != "atm": - pbar = tqdm(total=len(self.handlers)) + pbar = tqdm(total=num_handlers) logger.info("========== STARTING CMORIZING PROCESS ==========") for index, handler in enumerate(self.handlers): @@ -669,9 +821,6 @@ def _run_serial(self) -> bool: self.cmor_log_dir, handler_table, ) - except TypeError as te: - logger.error(f"TypeError in handler '{handler['name']}': {te}") - is_cmor_successful = False except Exception as e: logger.error(f"Exception in handler '{handler['name']}': {e}") is_cmor_successful = False @@ -684,6 +833,9 @@ def _run_serial(self) -> bool: failed_handlers, ) + if not is_cmor_successful: + self._stop_with_failed_handler(handler["name"]) + if self.realm != "atm": pbar.update(1) @@ -693,33 +845,36 @@ def _run_serial(self) -> bool: except Exception as error: logger.error(error) - return False - self._log_final_result(num_handlers, num_success, failed_handlers) + self._finalize_on_failure(failed_handlers) return True - def _run_parallel(self) -> Literal[True]: - """Run all the handlers in parallel using multiprocessing.Pool. + def _run_parallel(self) -> Literal[True]: # noqa: C901 + """Run all handlers in parallel using ProcessPoolExecutor. + + This method processes handlers concurrently, tracks their success or failure, + and logs the results. - Note, this method will always return True even if a handler fails to - cmorize. This is because the handlers are run in parallel and - the main process does not wait for them to finish. Instead, it - returns immediately after starting the handlers. The handlers - will log their own success or failure messages. + The behavior depends on the `self.on_var_failure` setting: - If the user wants to check if all handlers succeeded, they should - check the console output and/or log files in the output directory. + - "ignore": Continues processing even if some handlers fail. + Always returns True. + - "fail": Exits with a status code of 1 if any handler fails. + - "stop": Terminates immediately upon the first failure and exits with a + status code of 1. + + TODO: Refactor this method to reduce its complexity (C901). Returns - -------- + ------- Literal[True] - Always True, even with failed handlers. Failed jobs are logged - for the user to debug. + True if the process completes, unless terminated early due to "fail" or "stop". """ pool = Pool(max_workers=self.num_proc) - jobs: list[concurrent.futures.Future] = [] - future_to_name = {} # Map each future to its handler name + futures: list[Future[bool]] = [] + # Map each future to its handler name + future_to_name = {} pbar = tqdm(total=len(self.handlers)) num_handlers = len(self.handlers) @@ -727,7 +882,7 @@ def _run_parallel(self) -> Literal[True]: failed_handlers: list[str] = [] logger.info("========== STARTING CMORIZING PROCESS ==========") - for _, handler in enumerate(self.handlers): + for handler in self.handlers: handler_method = handler["method"] handler_variables = handler["raw_variables"] handler_table = handler["table"] @@ -735,7 +890,7 @@ def _run_parallel(self) -> Literal[True]: try: if self.realm in MPAS_REALMS: - future = pool.submit( + future: Future[bool] = pool.submit( handler_method, vars_to_filepaths, self.tables_path, @@ -757,30 +912,36 @@ def _run_parallel(self) -> Literal[True]: ) continue - jobs.append(future) - # Map future job to handler name for progress tracking as they - # complete + futures.append(future) + # Map future job to handler name for progress tracking as they complete future_to_name[future] = handler.get("name", "unknown") # Execute the jobs in the pool and log their status as they complete. - for future in as_completed(jobs): - job_result = None - handler_name = future_to_name[future] # Get the correct handler name + for future in as_completed(futures): + handler_name = future_to_name[future] + future_result = None try: - job_result = future.result() + future_result = future.result() except Exception as e: - logger.error(e) + logger.error(f"Handler '{handler_name}' raised an exception: {e}") + future_result = False num_success, failed_handlers = self._log_handler_status( - job_result, handler_name, num_handlers, num_success, failed_handlers + future_result, handler_name, num_handlers, num_success, failed_handlers ) + if not future_result: + self._stop_with_failed_handler_parallel( + handler_name, pool, pbar, futures + ) + pbar.update(1) pbar.close() pool.shutdown() self._log_final_result(num_handlers, num_success, failed_handlers) + self._finalize_on_failure(failed_handlers) return True @@ -868,11 +1029,14 @@ def _log_handler_status( logger.info("STATUS UPDATE:") logger.info(f" * Successful handlers: {num_success} of {num_handlers}") logger.info(f" * Failed handlers: {len(failed_handlers)}") + if failed_handlers: logger.info(f" - Failed handler names: {', '.join(failed_handlers)}") else: logger.info(" - No failed handlers so far.") + logger.info("=" * 60) + return num_success, failed_handlers def _log_final_result( @@ -890,22 +1054,129 @@ def _log_final_result( failed_handlers : list[str] A list of handler names that failed during processing. """ - logger.info("========== FINAL RUN RESULTS ==========") - logger.info(f"* {num_successes} of {num_handlers} handlers succeeded.") + logger.info("") + logger.info("=======================================") + logger.info("| FINAL RUN SUMMARY") + logger.info("---------------------------------------") + logger.info(f" * Total variables (--var-list): {len(self.var_list)}") + logger.info(f" * Total handlers successfully derived: {num_handlers}") + logger.info( + f" * Total handlers successfully cmorized: {num_successes} / {num_handlers}" + ) if failed_handlers: logger.error( - "* The following handlers failed: " - + ", ".join(str(h) for h in failed_handlers) + f" * Total handlers failed to cmorize: {len(failed_handlers)}" ) - else: - logger.info("* All handlers completed successfully.") + logger.error(f" - Failed variables: {failed_handlers}") + + if self.missing_handlers: + logger.error( + f" * Total handlers missing (not defined in handlers.yaml): " + f"{len(self.missing_handlers)}" + ) + logger.error(f" - Includes: {self.missing_handlers}") + + if self.non_derivable_handlers: + logger.error( + f" * Total handlers non-derivable (defined but not derivable): " + f"{len(self.non_derivable_handlers)}" + ) + logger.error(f" - Includes: {self.non_derivable_handlers}") + logger.info("=======================================") def _timeout_exit(self): logger.info("Hit timeout limit, exiting") os.kill(os.getpid(), signal.SIGINT) + def _stop_with_failed_handler(self, handler_name: str) -> None: + """Gracefully stop with a failed handler in serial or info mode. + + If ``self.on_var_failure`` is set to "stop", the program will log an + error message and terminate execution immediately (exit code 1). + + Parameters + ---------- + handler_name : str + The name of the handler that failed. + + Returns + ------- + None + """ + if self.on_var_failure == "stop": + logger.error( + f"Stopping immediately due to --on-var-failure=stop " + f"(failed at handler: '{handler_name}')" + ) + + exit_failure() + + def _stop_with_failed_handler_parallel( + self, handler_name: str, pool: Pool, pbar: tqdm, futures: list[Future[bool]] + ) -> None: + """Gracefully stop parallel processing when a handler fails. + + This method is triggered when a handler fails during parallel processing. + It logs the failure, shuts down the processing pool, closes the progress + bar, and waits for active futures to settle before exiting with a + failure code. + + The function ensures that pending jobs are canceled gracefully while + allowing running jobs to complete. Active futures are given a brief + timeout to settle before the process exits. The process exits with a + failure code after handling the failure. + + Parameters + ---------- + handler_name : str + The name of the handler that failed. + pool : Pool + The multiprocessing pool managing parallel tasks. + pbar : tqdm + The progress bar instance to be closed. + futures : list[Future[bool]] + A collection of futures representing the parallel tasks. + """ + logger.error( + f"Stopping immediately due to --on-var-failure=stop " + f"(failed handler: '{handler_name}')" + ) + # Gracefully cancel pending jobs, allow running ones to complete + pool.shutdown(cancel_futures=False) + pbar.close() + + # Wait briefly for active futures to settle (optional safety) + for f in futures: + if not f.done(): + try: + f.result(timeout=2) + except Exception: + pass + + exit_failure() + + def _finalize_on_failure(self, failed_handlers: list[str]) -> None: + """Finalize exit behavior based on --on-var-failure mode "fail". + + This method finalizes the process by checking for failed handlers and + exiting if necessary if ``self.on_var_failure`` is set to "fail" + (exit code 1). + + Parameters + ---------- + failed_handlers : list[str] + A list of handler names that failed during processing. + """ + if failed_handlers and self.on_var_failure == "fail": + logger.error( + f"{len(failed_handlers)} handler(s) failed. " + f"Exiting with code 1 (--on-var-failure=fail)." + ) + + exit_failure() + def convert_parsed_args_to_data_class( self, parsed_args: argparse.Namespace ) -> CLIArguments: diff --git a/e3sm_to_cmip/util.py b/e3sm_to_cmip/util.py index 32f2a977..7ff34f35 100644 --- a/e3sm_to_cmip/util.py +++ b/e3sm_to_cmip/util.py @@ -2,7 +2,6 @@ import os import re import sys -import traceback from pathlib import Path from pprint import pprint @@ -76,11 +75,16 @@ } -def print_debug(e): - # TODO: Deprecate this function. We use Python logger now. - _, _, tb = sys.exc_info() - traceback.print_tb(tb) - print(e) +def exit_success(): + """A convenience function to exit with a success code (0).""" + logger.info("Exiting with success code (0).") + sys.exit(0) + + +def exit_failure(): + """A convenience function to exit with a failure code (1).""" + logger.error("Exiting with failure code (1).") + sys.exit(1) class colors: diff --git a/tests/cmor_handlers/test_utils.py b/tests/cmor_handlers/test_utils.py index 99ee9881..448dd03e 100644 --- a/tests/cmor_handlers/test_utils.py +++ b/tests/cmor_handlers/test_utils.py @@ -58,7 +58,7 @@ def test_prints_logger_warning_if_mpas_handler_does_not_exist_for_var(self, capl assert record.levelname == "WARNING" def test_updates_CMIP_table_for_variable_based_on_freq_param(self): - result = load_all_handlers("lnd", cmip_vars=["pr"]) + result, missing_handlers = load_all_handlers("lnd", cmip_vars=["pr"]) expected = [ dict( name="pr", @@ -93,9 +93,10 @@ def test_updates_CMIP_table_for_variable_based_on_freq_param(self): handler["method"] = handler["method"].__func__ assert result == expected + assert not missing_handlers def test_returns_handlers_based_on_var_list(self): - result = load_all_handlers("lnd", cmip_vars=["orog", "sftlf"]) + result, missing_handlers = load_all_handlers("lnd", cmip_vars=["orog", "sftlf"]) expected = [ { "name": "orog", @@ -124,9 +125,10 @@ def test_returns_handlers_based_on_var_list(self): handler["method"] = handler["method"].__name__ assert result == expected + assert not missing_handlers def test_returns_mpas_var_handlers_based_on_var_list(self): - result = load_all_handlers("Omon", cmip_vars=["so", "uo"]) + result, missing_handlers = load_all_handlers("Omon", cmip_vars=["so", "uo"]) expected = [ { "name": "so", @@ -155,6 +157,7 @@ def test_returns_mpas_var_handlers_based_on_var_list(self): handler["method"] = handler["method"].__name__ assert result == expected + assert not missing_handlers class TestDeriveHandlers: @@ -246,7 +249,7 @@ def test_raises_error_when_table_does_not_exist(self): def test_returns_handler_with_updated_referenced_CMIP6_table_based_on_freq_arg( self, ): - result = derive_handlers( + result, missing_handlers, non_derivable_handlers = derive_handlers( self.tables_path, cmip_vars=["pr"], e3sm_vars=["PRECL", "PRECC"], @@ -275,9 +278,11 @@ def test_returns_handler_with_updated_referenced_CMIP6_table_based_on_freq_arg( handler["method"] = handler["method"].__func__ assert result == expected + assert not missing_handlers + assert not non_derivable_handlers def test_returns_handler_objects_for_Amon_freq_based_on_existing_e3sm_vars(self): - result = derive_handlers( + result, missing_handlers, non_derivable_handlers = derive_handlers( self.tables_path, cmip_vars=["pr"], e3sm_vars=["PRECL", "PRECC"], @@ -305,9 +310,11 @@ def test_returns_handler_objects_for_Amon_freq_based_on_existing_e3sm_vars(self) handler["method"] = handler["method"].__func__ assert result == expected + assert not missing_handlers + assert not non_derivable_handlers def test_returns_handler_objects_for_day_freq_based_on_existing_e3sm_vars(self): - result = derive_handlers( + result, missing_handlers, non_derivable_handlers = derive_handlers( self.tables_path, cmip_vars=["pr"], e3sm_vars=["PRECT"], @@ -335,9 +342,11 @@ def test_returns_handler_objects_for_day_freq_based_on_existing_e3sm_vars(self): handler["method"] = handler["method"].__func__ assert result == expected + assert not missing_handlers + assert not non_derivable_handlers def test_loads_handler_from_module(self): - result = derive_handlers( + result, missing_handlers, non_derivable_handlers = derive_handlers( self.tables_path, cmip_vars=["orog", "sftlf"], e3sm_vars=["PHIS", "LANDFRAC"], @@ -372,23 +381,29 @@ def test_loads_handler_from_module(self): handler["method"] = handler["method"].__name__ assert result == expected + assert not missing_handlers + assert not non_derivable_handlers def test_returns_empty_list_when_no_cmip_vars_given(self): - result = derive_handlers( + result, missing_handlers, non_derivable_handlers = derive_handlers( self.tables_path, cmip_vars=[], e3sm_vars=["PRECT"], freq="mon", realm="atm", ) - assert result == [] + assert not result + assert not missing_handlers + assert not non_derivable_handlers def test_returns_empty_list_when_no_e3sm_vars_given(self): - result = derive_handlers( + result, missing_handlers, non_derivable_handlers = derive_handlers( self.tables_path, cmip_vars=["pr"], e3sm_vars=[], freq="mon", realm="atm", ) - assert result == [] + assert not result + assert not missing_handlers + assert "pr" in non_derivable_handlers