From 41f1220e2009129034becad6f1761ef11c6ec5db Mon Sep 17 00:00:00 2001 From: tomvothecoder Date: Tue, 18 Mar 2025 15:03:35 -0500 Subject: [PATCH 01/16] Initial work on logger revamp --- e3sm_to_cmip/__main__.py | 56 +++++++-- e3sm_to_cmip/_logger.py | 144 +++++++++++++++------- e3sm_to_cmip/cmor_handlers/vars/README.md | 1 - 3 files changed, 147 insertions(+), 54 deletions(-) diff --git a/e3sm_to_cmip/__main__.py b/e3sm_to_cmip/__main__.py index 5ecf3504..51d42e0b 100755 --- a/e3sm_to_cmip/__main__.py +++ b/e3sm_to_cmip/__main__.py @@ -6,6 +6,7 @@ import argparse import os +import pathlib import signal import sys import tempfile @@ -22,7 +23,13 @@ from tqdm import tqdm from e3sm_to_cmip import ROOT_HANDLERS_DIR, __version__, resources -from e3sm_to_cmip._logger import _setup_logger, _setup_root_logger +from e3sm_to_cmip._logger import ( + LOG_DIR, + LOG_FILENAME, + TIMESTAMP, + _setup_logger, + _update_root_logger_filepath, +) from e3sm_to_cmip.cmor_handlers.utils import ( MPAS_REALMS, REALMS, @@ -46,14 +53,18 @@ print_message, ) -os.environ["CDAT_ANONYMOUS_LOG"] = "false" - -warnings.filterwarnings("ignore") - +# TODO: This doesn't work. +# Suppress specific warnings related to ESMF and ESMPy version mismatch +warnings.filterwarnings( + "ignore", + message=r"ESMF installation version .*?, ESMPy version .*?", + category=UserWarning, + module=r".*?esmpy\.interface\.loadESMF", +) -# Setup the root logger and this module's logger. -log_filename = _setup_root_logger() -logger = _setup_logger(__name__, propagate=True) +# Set up a module level logger object. This logger object is a child of the +# root logger. +logger = _setup_logger(__name__) @dataclass @@ -136,6 +147,25 @@ def __init__(self, args: Optional[List[str]] = None): self.cmor_log_dir: Optional[str] = parsed_args.logdir self.user_metadata: Optional[str] = parsed_args.user_metadata self.custom_metadata: Optional[str] = parsed_args.custom_metadata + + # Sets a default output_path if it is not set. + if self.output_path is not None: + self.output_path = os.path.abspath(self.output_path) + else: + self.output_path = os.path.join("runs", f"run_{TIMESTAMP}") + + os.makedirs(self.output_path, exist_ok=True) + + # Make sure cmor log directory saves in the output directory. + self.cmor_log_dir = os.path.join(self.output_path, self.cmor_log_dir) # type: ignore + + # Make the provenance directory to store the log file. + prov_dir = os.path.join(self.output_path, LOG_DIR) + pathlib.Path(prov_dir).mkdir(parents=True, exist_ok=True) + + log_path = os.path.join(prov_dir, LOG_FILENAME) + _update_root_logger_filepath(log_path) + # Run the pre-check to determine if any of the variables have already # been CMORized. if self.precheck_path is not None: @@ -150,7 +180,8 @@ def __init__(self, args: Optional[List[str]] = None): logger.info(f" * precheck_path='{self.precheck_path}'") logger.info(f" * freq='{self.freq}'") logger.info(f" * realm='{self.realm}'") - logger.info(f" * Writing log output file to: {log_filename}") + logger.info(f" * Log Path: {log_path}") + logger.info(f" * CMOR log path: {self.cmor_log_dir}") self.handlers = self._get_handlers() @@ -497,8 +528,11 @@ def _setup_argparser(self) -> argparse.ArgumentParser: optional.add_argument( "--logdir", type=str, - default="./cmor_logs", - help="Where to put the logging output from CMOR.", + default="cmor_logs", + help=( + "The sub-directory that stores the CMOR logs. This sub-directory will " + "be stored under --output-path." + ), ) required_no_simple.add_argument( "-u", diff --git a/e3sm_to_cmip/_logger.py b/e3sm_to_cmip/_logger.py index 2e14df7c..5101de4e 100644 --- a/e3sm_to_cmip/_logger.py +++ b/e3sm_to_cmip/_logger.py @@ -1,68 +1,128 @@ +"""Logger module for setting up a custom logger.""" + import logging +import logging.handlers import os -import time -from datetime import datetime - -from pytz import UTC - +from datetime import datetime, timezone -def _setup_root_logger() -> str: # pragma: no cover - """Sets up the root logger. +TIMESTAMP = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S_%f") +LOG_DIR = "prov" +LOG_FILENAME = f"{TIMESTAMP}.log" +LOG_FORMAT = ( + "%(asctime)s [%(levelname)s]: %(filename)s(%(funcName)s:%(lineno)s) >> %(message)s" +) +LOG_FILEMODE = "w" +LOG_LEVEL = logging.INFO - The logger module will write to a log file and stream the console - simultaneously. - The log files are saved in a `/logs` directory relative to where - `e3sm_to_cmip` is executed. - - Returns - ------- - str - The name of the logfile. - """ - os.makedirs("logs", exist_ok=True) - filename = f'logs/{UTC.localize(datetime.utcnow()).strftime("%Y%m%d_%H%M%S_%f")}' - log_format = "%(asctime)s_%(msecs)03d:%(levelname)s:%(funcName)s:%(message)s" +# Setup the root logger with a default log file. +# `force` is set to `True` to automatically remove root handlers whenever +# `basicConfig` called. This is required for cases where multiple e3sm_to_cmip +# runs are executed. Otherwise, the logger objects attempt to share the same +# root file reference (which gets deleted between runs), resulting in +# `FileNotFoundError: [Errno 2] No such file or directory: 'e3sm_diags_run.log'`. +# More info here: https://stackoverflow.com/a/49202811 +logging.basicConfig( + format=LOG_FORMAT, + filename=LOG_FILENAME, + filemode=LOG_FILEMODE, + level=LOG_LEVEL, + force=True, +) +logging.captureWarnings(True) - # Setup the logging module. - logging.basicConfig( - filename=filename, - format=log_format, - datefmt="%Y%m%d_%H%M%S", - level=logging.DEBUG, - ) - logging.captureWarnings(True) - logging.Formatter.converter = time.gmtime +# FIXME: A logger file is made initially then esmpy logs it to the file. +# Adding the below will result in duplicate console messages. +# # Add a console handler to display warnings in the console. This is useful +# # for when other package loggers raise warnings (e.g, NumPy, Xarray). +# console_handler = logging.StreamHandler() +# console_handler.setLevel(logging.INFO) +# console_handler.setFormatter(logging.Formatter(LOG_FORMAT)) +# logging.getLogger().addHandler(console_handler) - # Configure and add a console stream handler. - console_handler = logging.StreamHandler() - console_handler.setLevel(logging.INFO) - log_formatter = logging.Formatter(log_format) - console_handler.setFormatter(log_formatter) - logging.getLogger().addHandler(console_handler) - return filename +def _setup_logger(name: str, propagate: bool = True) -> logging.Logger: + """Sets up a custom logger that is a child of the root logger. + This custom logger inherits the root logger's handlers. -def _setup_logger(name, propagate=True) -> logging.Logger: - """Sets up a logger object. - - This function is intended to be used at the top-level of a module. + The log files are saved in a `/logs` directory relative to where + `e3sm_to_cmip` is executed. Parameters ---------- name : str Name of the file where this function is called. propagate : bool, optional - Propogate this logger module's messages to the root logger or not, by - default True. + Whether to propagate logger messages or not, by default True. Returns ------- logging.Logger The logger. + + Examples + --------- + Detailed information, typically of interest only when diagnosing problems: + + >>> logger.debug("") + + Confirmation that things are working as expected: + + >>> logger.info("") + + An indication that something unexpected happened, or indicative of some + problem in the near future: + + >>> logger.warning("") + + The software has not been able to perform some function due to a more + serious problem: + + >>> logger.error("") + + Similar to ``logger.error()``, but also outputs stack trace: + + >>> logger.exception("", exc_info=True) + + A serious error, indicating that the program itself may be unable to + continue running: + + >>> logger.critical("") """ logger = logging.getLogger(name) logger.propagate = propagate return logger + + +def _update_root_logger_filepath(log_path: str): + """Updates the log file path to the provenance directory. + + This method changes the log file path to a subdirectory named 'prov' + or a specified path. It updates the filename of the existing file handler + to the new path. + + Parameters + ---------- + log_path : str + The path to the log file.. + + Notes + ----- + - The method assumes that a logging file handler is already configured. + - The log file is closed and reopened at the new location. + - The log file mode is determined by the constant `LOG_FILEMODE`. + - The log file name is determined by the constant `LOG_FILENAME`. + """ + for handler in logging.root.handlers: + if isinstance(handler, logging.FileHandler): + if os.path.exists(log_path): + # Move the existing log file to the new path + os.rename(handler.baseFilename, log_path) + + handler.baseFilename = log_path + handler.stream.close() + handler.stream = open(log_path, LOG_FILEMODE) # type: ignore + + break diff --git a/e3sm_to_cmip/cmor_handlers/vars/README.md b/e3sm_to_cmip/cmor_handlers/vars/README.md index e83aec11..39232d52 100644 --- a/e3sm_to_cmip/cmor_handlers/vars/README.md +++ b/e3sm_to_cmip/cmor_handlers/vars/README.md @@ -6,4 +6,3 @@ to `handlers.yaml`. For example: - Some contain legacy `handle_simple()` functions that have since been refactored as a single `handle_simple()` function -- `phalf.py` and `pfull.py` still use `cdms2` and `cdutil` From 65a5d105a961a16a207c8b3b267e88d484ddd224 Mon Sep 17 00:00:00 2001 From: tomvothecoder Date: Tue, 18 Mar 2025 17:50:17 -0500 Subject: [PATCH 02/16] Fix info mode writing out yaml to `output_path` --- e3sm_to_cmip/__main__.py | 108 +++++++++++++++++---------------------- e3sm_to_cmip/_logger.py | 29 ++++------- 2 files changed, 57 insertions(+), 80 deletions(-) diff --git a/e3sm_to_cmip/__main__.py b/e3sm_to_cmip/__main__.py index 51d42e0b..f566c3e4 100755 --- a/e3sm_to_cmip/__main__.py +++ b/e3sm_to_cmip/__main__.py @@ -2,16 +2,12 @@ A python command line tool to turn E3SM model output into CMIP6 compatable data. """ -from __future__ import absolute_import, division, print_function, unicode_literals - import argparse import os -import pathlib import signal import sys import tempfile import threading -import warnings from concurrent.futures import ProcessPoolExecutor as Pool from dataclasses import dataclass from pathlib import Path @@ -24,9 +20,7 @@ from e3sm_to_cmip import ROOT_HANDLERS_DIR, __version__, resources from e3sm_to_cmip._logger import ( - LOG_DIR, LOG_FILENAME, - TIMESTAMP, _setup_logger, _update_root_logger_filepath, ) @@ -53,15 +47,6 @@ print_message, ) -# TODO: This doesn't work. -# Suppress specific warnings related to ESMF and ESMPy version mismatch -warnings.filterwarnings( - "ignore", - message=r"ESMF installation version .*?, ESMPy version .*?", - category=UserWarning, - module=r".*?esmpy\.interface\.loadESMF", -) - # Set up a module level logger object. This logger object is a child of the # root logger. logger = _setup_logger(__name__) @@ -148,56 +133,51 @@ def __init__(self, args: Optional[List[str]] = None): self.user_metadata: Optional[str] = parsed_args.user_metadata self.custom_metadata: Optional[str] = parsed_args.custom_metadata - # Sets a default output_path if it is not set. if self.output_path is not None: self.output_path = os.path.abspath(self.output_path) - else: - self.output_path = os.path.join("runs", f"run_{TIMESTAMP}") - os.makedirs(self.output_path, exist_ok=True) - # Make sure cmor log directory saves in the output directory. - self.cmor_log_dir = os.path.join(self.output_path, self.cmor_log_dir) # type: ignore - - # Make the provenance directory to store the log file. - prov_dir = os.path.join(self.output_path, LOG_DIR) - pathlib.Path(prov_dir).mkdir(parents=True, exist_ok=True) - - log_path = os.path.join(prov_dir, LOG_FILENAME) - _update_root_logger_filepath(log_path) + # Setup directories using the CLI argument paths (e.g., output dir). + # ====================================================================== + self._setup_dirs_with_paths() # Run the pre-check to determine if any of the variables have already # been CMORized. + # ====================================================================== if self.precheck_path is not None: self._run_precheck() + # Setup logger information and print out e3sm_to_cmip CLI arguments. + # ====================================================================== logger.info("--------------------------------------") logger.info("| E3SM to CMIP Configuration") logger.info("--------------------------------------") - logger.info(f" * var_list='{self.var_list}'") - logger.info(f" * input_path='{self.input_path}'") - logger.info(f" * output_path='{self.output_path}'") - logger.info(f" * precheck_path='{self.precheck_path}'") - logger.info(f" * freq='{self.freq}'") - logger.info(f" * realm='{self.realm}'") - logger.info(f" * Log Path: {log_path}") - logger.info(f" * CMOR log path: {self.cmor_log_dir}") + config_details = { + "Mode": ( + "Info" + if self.info_mode + else "Serial" + if self.serial_mode + else "Parallel" + ), + "Variable List": self.var_list, + "Input Path": self.input_path, + "Output Path": self.output_path, + "Precheck Path": self.precheck_path, + "Log Path": self.log_path, + "CMOR Log Path": self.cmor_log_dir, + "Frequency": self.freq, + "Realm": self.realm, + } + + for key, value in config_details.items(): + logger.info(f" * {key}: {value}") + + # Load the CMOR handlers based on the realm and variable list. self.handlers = self._get_handlers() def run(self): - # Setup logger information and print out e3sm_to_cmip CLI arguments. - # ====================================================================== - if self.output_path is not None: - self.new_metadata_path = os.path.join( - self.output_path, "user_metadata.json" - ) - - # Setup directories using the CLI argument paths (e.g., output dir). - # ====================================================================== - if not self.info_mode: - self._setup_dirs_with_paths() - # Run e3sm_to_cmip with info mode. # ====================================================================== if self.info_mode: @@ -663,9 +643,22 @@ def _run_precheck(self): self.var_list = new_var_list def _setup_dirs_with_paths(self): - # Create the output directory if it doesn't exist. - if not os.path.exists(self.output_path): # type: ignore - os.makedirs(self.output_path) # type: ignore + """Sets up various directories and paths required for e3sm_to_cmip. + + This method initializes paths for metadata, logs, and temporary storage. + It also updates the root logger's file path and copies user metadata + to the output directory if not in simple mode. + + Notes + ----- + If the environment variable `TMPDIR` is not set, a temporary directory + is created under the output path. + """ + self.new_metadata_path = os.path.join(self.output_path, "user_metadata.json") # type: ignore + self.cmor_log_dir = os.path.join(self.output_path, self.cmor_log_dir) # type: ignore + + self.log_path = os.path.join(self.output_path, LOG_FILENAME) # type: ignore + _update_root_logger_filepath(self.log_path) # Copy the user's metadata json file with the updated output directory if not self.simple_mode: @@ -681,10 +674,6 @@ def _setup_dirs_with_paths(self): tempfile.tempdir = temp_path def _run_info_mode(self): # noqa: C901 - logger.info("--------------------------------------") - logger.info("| Running E3SM to CMIP in Info Mode") - logger.info("--------------------------------------") - messages = [] # if the user just asked for the handler info @@ -771,23 +760,20 @@ def _run_info_mode(self): # noqa: C901 with open(self.info_out_path, "w") as outstream: yaml.dump(messages, outstream) elif self.output_path is not None: - with open(self.output_path, "w") as outstream: + yaml_filepath = os.path.join(self.output_path, "info.yaml") + + with open(yaml_filepath, "w") as outstream: yaml.dump(messages, outstream) else: pprint(messages) def _run(self): if self.serial_mode: - mode_str = "Serial" run_func = self._run_serial else: - mode_str = "Parallel" run_func = self._run_parallel try: - logger.info("--------------------------------------") - logger.info(f"| Running E3SM to CMIP in {mode_str}") - logger.info("--------------------------------------") status = run_func() except KeyboardInterrupt: print_message(" -- keyboard interrupt -- ", "error") diff --git a/e3sm_to_cmip/_logger.py b/e3sm_to_cmip/_logger.py index 5101de4e..c034b73e 100644 --- a/e3sm_to_cmip/_logger.py +++ b/e3sm_to_cmip/_logger.py @@ -2,7 +2,7 @@ import logging import logging.handlers -import os +import shutil from datetime import datetime, timezone TIMESTAMP = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S_%f") @@ -11,10 +11,9 @@ LOG_FORMAT = ( "%(asctime)s [%(levelname)s]: %(filename)s(%(funcName)s:%(lineno)s) >> %(message)s" ) -LOG_FILEMODE = "w" +LOG_FILEMODE = "a" LOG_LEVEL = logging.INFO - # Setup the root logger with a default log file. # `force` is set to `True` to automatically remove root handlers whenever # `basicConfig` called. This is required for cases where multiple e3sm_to_cmip @@ -29,16 +28,8 @@ level=LOG_LEVEL, force=True, ) -logging.captureWarnings(True) -# FIXME: A logger file is made initially then esmpy logs it to the file. -# Adding the below will result in duplicate console messages. -# # Add a console handler to display warnings in the console. This is useful -# # for when other package loggers raise warnings (e.g, NumPy, Xarray). -# console_handler = logging.StreamHandler() -# console_handler.setLevel(logging.INFO) -# console_handler.setFormatter(logging.Formatter(LOG_FORMAT)) -# logging.getLogger().addHandler(console_handler) +logging.captureWarnings(True) def _setup_logger(name: str, propagate: bool = True) -> logging.Logger: @@ -99,14 +90,13 @@ def _setup_logger(name: str, propagate: bool = True) -> logging.Logger: def _update_root_logger_filepath(log_path: str): """Updates the log file path to the provenance directory. - This method changes the log file path to a subdirectory named 'prov' - or a specified path. It updates the filename of the existing file handler - to the new path. + This function updates the filename of the existing file handler to the new + path. Parameters ---------- log_path : str - The path to the log file.. + The path to the log file. Notes ----- @@ -117,9 +107,10 @@ def _update_root_logger_filepath(log_path: str): """ for handler in logging.root.handlers: if isinstance(handler, logging.FileHandler): - if os.path.exists(log_path): - # Move the existing log file to the new path - os.rename(handler.baseFilename, log_path) + # Move the log file to the new directory because it might contain + # warnings that are raised before this function is called + # (e.g., esmpy VersionWarning). + shutil.move(handler.baseFilename, log_path) handler.baseFilename = log_path handler.stream.close() From 11900c7d1669c3412dfcbec3b4941215effe2d26 Mon Sep 17 00:00:00 2001 From: tomvothecoder Date: Tue, 18 Mar 2025 18:07:49 -0500 Subject: [PATCH 03/16] Update `_cmor_write()` to capture errors in log file --- e3sm_to_cmip/cmor_handlers/handler.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/e3sm_to_cmip/cmor_handlers/handler.py b/e3sm_to_cmip/cmor_handlers/handler.py index c7e4510f..f0f11f79 100644 --- a/e3sm_to_cmip/cmor_handlers/handler.py +++ b/e3sm_to_cmip/cmor_handlers/handler.py @@ -645,7 +645,10 @@ def _cmor_write( """ output_data = self._get_output_data(ds) - cmor.write(var_id=cmor_var_id, data=output_data) + try: + cmor.write(var_id=cmor_var_id, data=output_data) + except Exception as e: + logger.error(f"Error writing variable {self.name} to file: {e}") def _cmor_write_with_time( self, From 064c35e8ca70200ab912a8e634dfc9084134e731 Mon Sep 17 00:00:00 2001 From: tomvothecoder Date: Wed, 19 Mar 2025 13:31:11 -0500 Subject: [PATCH 04/16] Encapsulate root logger instantiation in `_setup_root_logger()` - Rename `_setup_logger()` to `_setup_child_logger()` - Replace `_update_root_logger_filepath()` with `_add_filehandler()` - Encapsulating the root logger means no duplicate StreamHandler is added upon import which prevents duplicate messages from appearing --- e3sm_to_cmip/__main__.py | 19 +++-- e3sm_to_cmip/_logger.py | 78 +++++++++----------- e3sm_to_cmip/cmor_handlers/handler.py | 4 +- e3sm_to_cmip/cmor_handlers/utils.py | 4 +- e3sm_to_cmip/cmor_handlers/vars/areacella.py | 4 +- e3sm_to_cmip/cmor_handlers/vars/clisccp.py | 4 +- e3sm_to_cmip/cmor_handlers/vars/orog.py | 4 +- e3sm_to_cmip/cmor_handlers/vars/sftlf.py | 4 +- e3sm_to_cmip/mpas.py | 2 +- e3sm_to_cmip/util.py | 4 +- 10 files changed, 62 insertions(+), 65 deletions(-) diff --git a/e3sm_to_cmip/__main__.py b/e3sm_to_cmip/__main__.py index f566c3e4..d23cb8e2 100755 --- a/e3sm_to_cmip/__main__.py +++ b/e3sm_to_cmip/__main__.py @@ -21,8 +21,9 @@ from e3sm_to_cmip import ROOT_HANDLERS_DIR, __version__, resources from e3sm_to_cmip._logger import ( LOG_FILENAME, - _setup_logger, - _update_root_logger_filepath, + _add_filehandler, + _setup_child_logger, + _setup_root_logger, ) from e3sm_to_cmip.cmor_handlers.utils import ( MPAS_REALMS, @@ -47,9 +48,11 @@ print_message, ) -# Set up a module level logger object. This logger object is a child of the -# root logger. -logger = _setup_logger(__name__) +# Set up the root logger and module level logger. The module level logger is +# a child of the root logger. +_setup_root_logger() + +logger = _setup_child_logger(__name__) @dataclass @@ -657,8 +660,12 @@ def _setup_dirs_with_paths(self): self.new_metadata_path = os.path.join(self.output_path, "user_metadata.json") # type: ignore self.cmor_log_dir = os.path.join(self.output_path, self.cmor_log_dir) # type: ignore + # NOTE: Any warnings that appear before the log filehandler is + # instantiated will not be captured (e.g,. esmpy VersionWarning). + # However, they will still be captured by the console via a + # StreamHandler. self.log_path = os.path.join(self.output_path, LOG_FILENAME) # type: ignore - _update_root_logger_filepath(self.log_path) + _add_filehandler(self.log_path) # Copy the user's metadata json file with the updated output directory if not self.simple_mode: diff --git a/e3sm_to_cmip/_logger.py b/e3sm_to_cmip/_logger.py index c034b73e..aa014c72 100644 --- a/e3sm_to_cmip/_logger.py +++ b/e3sm_to_cmip/_logger.py @@ -2,43 +2,44 @@ import logging import logging.handlers -import shutil from datetime import datetime, timezone TIMESTAMP = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S_%f") -LOG_DIR = "prov" LOG_FILENAME = f"{TIMESTAMP}.log" LOG_FORMAT = ( "%(asctime)s [%(levelname)s]: %(filename)s(%(funcName)s:%(lineno)s) >> %(message)s" ) -LOG_FILEMODE = "a" +LOG_FILEMODE = "w" LOG_LEVEL = logging.INFO -# Setup the root logger with a default log file. -# `force` is set to `True` to automatically remove root handlers whenever -# `basicConfig` called. This is required for cases where multiple e3sm_to_cmip -# runs are executed. Otherwise, the logger objects attempt to share the same -# root file reference (which gets deleted between runs), resulting in -# `FileNotFoundError: [Errno 2] No such file or directory: 'e3sm_diags_run.log'`. -# More info here: https://stackoverflow.com/a/49202811 -logging.basicConfig( - format=LOG_FORMAT, - filename=LOG_FILENAME, - filemode=LOG_FILEMODE, - level=LOG_LEVEL, - force=True, -) -logging.captureWarnings(True) +def _setup_root_logger(): + """Configures the root logger. + + This function sets up the root logger with a predefined format and log level. + It also enables capturing of warnings issued by the `warnings` module and + redirects them to the logging system. + + Notes + ----- + - The `force=True` parameter ensures that any existing logging configuration + is overridden. + - The file handler is added dynamically to the root logger later in the + E3SMtoCMIP class once the log file path is known. + """ + logging.basicConfig( + format=LOG_FORMAT, + level=LOG_LEVEL, + force=True, + ) + logging.captureWarnings(True) -def _setup_logger(name: str, propagate: bool = True) -> logging.Logger: - """Sets up a custom logger that is a child of the root logger. - This custom logger inherits the root logger's handlers. +def _setup_child_logger(name: str, propagate: bool = True) -> logging.Logger: + """Sets up a logger that is a child of the root logger. - The log files are saved in a `/logs` directory relative to where - `e3sm_to_cmip` is executed. + This child logger inherits the root logger's handlers. Parameters ---------- @@ -50,7 +51,7 @@ def _setup_logger(name: str, propagate: bool = True) -> logging.Logger: Returns ------- logging.Logger - The logger. + The child logger. Examples --------- @@ -87,11 +88,10 @@ def _setup_logger(name: str, propagate: bool = True) -> logging.Logger: return logger -def _update_root_logger_filepath(log_path: str): - """Updates the log file path to the provenance directory. +def _add_filehandler(log_path: str): + """Adds a file handler to the root logger dynamically. - This function updates the filename of the existing file handler to the new - path. + Adding the file handler will also create the log file automatically. Parameters ---------- @@ -100,20 +100,10 @@ def _update_root_logger_filepath(log_path: str): Notes ----- - - The method assumes that a logging file handler is already configured. - - The log file is closed and reopened at the new location. - - The log file mode is determined by the constant `LOG_FILEMODE`. - - The log file name is determined by the constant `LOG_FILENAME`. + Any warnings that appear before the log filehandler is instantiated will not + be captured (e.g,. esmpy VersionWarning). However, they will still be + captured by the console via the default StreamHandler. """ - for handler in logging.root.handlers: - if isinstance(handler, logging.FileHandler): - # Move the log file to the new directory because it might contain - # warnings that are raised before this function is called - # (e.g., esmpy VersionWarning). - shutil.move(handler.baseFilename, log_path) - - handler.baseFilename = log_path - handler.stream.close() - handler.stream = open(log_path, LOG_FILEMODE) # type: ignore - - break + file_handler = logging.FileHandler(log_path, mode=LOG_FILEMODE) + file_handler.setFormatter(logging.Formatter(LOG_FORMAT)) + logging.root.addHandler(file_handler) diff --git a/e3sm_to_cmip/cmor_handlers/handler.py b/e3sm_to_cmip/cmor_handlers/handler.py index f0f11f79..8d90bceb 100644 --- a/e3sm_to_cmip/cmor_handlers/handler.py +++ b/e3sm_to_cmip/cmor_handlers/handler.py @@ -11,11 +11,11 @@ import xcdat as xc import yaml -from e3sm_to_cmip._logger import _setup_logger +from e3sm_to_cmip._logger import _setup_child_logger from e3sm_to_cmip.cmor_handlers import FILL_VALUE, _formulas from e3sm_to_cmip.util import _get_table_for_non_monthly_freq -logger = _setup_logger(__name__) +logger = _setup_child_logger(__name__) # The names for valid hybrid sigma levels. HYBRID_SIGMA_LEVEL_NAMES = [ diff --git a/e3sm_to_cmip/cmor_handlers/utils.py b/e3sm_to_cmip/cmor_handlers/utils.py index 8f946e8e..b6f23f2e 100644 --- a/e3sm_to_cmip/cmor_handlers/utils.py +++ b/e3sm_to_cmip/cmor_handlers/utils.py @@ -11,11 +11,11 @@ LEGACY_HANDLER_DIR_PATH, MPAS_HANDLER_DIR_PATH, ) -from e3sm_to_cmip._logger import _setup_logger +from e3sm_to_cmip._logger import _setup_child_logger from e3sm_to_cmip.cmor_handlers.handler import VarHandler from e3sm_to_cmip.util import _get_table_for_non_monthly_freq -logger = _setup_logger(__name__) +logger = _setup_child_logger(__name__) # Type aliases Frequency = Literal["mon", "day", "6hrLev", "6hrPlev", "6hrPlevPt", "3hr", "1hr"] diff --git a/e3sm_to_cmip/cmor_handlers/vars/areacella.py b/e3sm_to_cmip/cmor_handlers/vars/areacella.py index 9f0d50b8..5014304d 100644 --- a/e3sm_to_cmip/cmor_handlers/vars/areacella.py +++ b/e3sm_to_cmip/cmor_handlers/vars/areacella.py @@ -13,11 +13,11 @@ import xarray as xr from e3sm_to_cmip import resources -from e3sm_to_cmip._logger import _setup_logger +from e3sm_to_cmip._logger import _setup_child_logger from e3sm_to_cmip.mpas import write_netcdf from e3sm_to_cmip.util import print_message, setup_cmor -logger = _setup_logger(__name__) +logger = _setup_child_logger(__name__) # list of raw variable names needed RAW_VARIABLES = [str("area")] diff --git a/e3sm_to_cmip/cmor_handlers/vars/clisccp.py b/e3sm_to_cmip/cmor_handlers/vars/clisccp.py index 5402ef72..9ce9b0ea 100644 --- a/e3sm_to_cmip/cmor_handlers/vars/clisccp.py +++ b/e3sm_to_cmip/cmor_handlers/vars/clisccp.py @@ -13,10 +13,10 @@ import numpy as np import xarray as xr -from e3sm_to_cmip._logger import _setup_logger +from e3sm_to_cmip._logger import _setup_child_logger from e3sm_to_cmip.util import print_message, setup_cmor -logger = _setup_logger(__name__) +logger = _setup_child_logger(__name__) # list of raw variable names needed RAW_VARIABLES = [str("FISCCP1_COSP")] diff --git a/e3sm_to_cmip/cmor_handlers/vars/orog.py b/e3sm_to_cmip/cmor_handlers/vars/orog.py index 7de757df..2ed2b167 100644 --- a/e3sm_to_cmip/cmor_handlers/vars/orog.py +++ b/e3sm_to_cmip/cmor_handlers/vars/orog.py @@ -13,11 +13,11 @@ import xarray as xr from e3sm_to_cmip import resources -from e3sm_to_cmip._logger import _setup_logger +from e3sm_to_cmip._logger import _setup_child_logger from e3sm_to_cmip.mpas import write_netcdf from e3sm_to_cmip.util import print_message, setup_cmor -logger = _setup_logger(__name__) +logger = _setup_child_logger(__name__) # list of raw variable names needed RAW_VARIABLES = [str("PHIS")] diff --git a/e3sm_to_cmip/cmor_handlers/vars/sftlf.py b/e3sm_to_cmip/cmor_handlers/vars/sftlf.py index 127f7788..246ca2aa 100644 --- a/e3sm_to_cmip/cmor_handlers/vars/sftlf.py +++ b/e3sm_to_cmip/cmor_handlers/vars/sftlf.py @@ -13,11 +13,11 @@ import xarray as xr from e3sm_to_cmip import resources -from e3sm_to_cmip._logger import _setup_logger +from e3sm_to_cmip._logger import _setup_child_logger from e3sm_to_cmip.mpas import write_netcdf from e3sm_to_cmip.util import print_message, setup_cmor -logger = _setup_logger(__name__) +logger = _setup_child_logger(__name__) # list of raw variable names needed RAW_VARIABLES = [str("LANDFRAC")] diff --git a/e3sm_to_cmip/mpas.py b/e3sm_to_cmip/mpas.py index b3f101fa..0fd77a29 100644 --- a/e3sm_to_cmip/mpas.py +++ b/e3sm_to_cmip/mpas.py @@ -728,7 +728,7 @@ def _compute_moc_time_series( lat_bnds = np.zeros((len(lat) - 1, 2)) lat_bnds[:, 0] = lat[0:-1] lat_bnds[:, 1] = lat[1:] - lat = 0.5 * (lat_bnds[:, 0] + lat_bnds[:, 1]) + lat = 0.5 * (lat_bnds[:, 0] + lat_bnds[:, 1]) # type: ignore lat_bnds = xarray.DataArray(lat_bnds, dims=("lat", "nbnd")) # type: ignore lat = xarray.DataArray(lat, dims=("lat",)) # type: ignore diff --git a/e3sm_to_cmip/util.py b/e3sm_to_cmip/util.py index 3627d9e5..56665f24 100644 --- a/e3sm_to_cmip/util.py +++ b/e3sm_to_cmip/util.py @@ -14,9 +14,9 @@ import yaml from tqdm import tqdm -from e3sm_to_cmip._logger import _setup_logger +from e3sm_to_cmip._logger import _setup_child_logger -logger = _setup_logger(__name__) +logger = _setup_child_logger(__name__) ATMOS_TABLES = [ From b44e78d314741f0a948f966a78b2ad558ce193d4 Mon Sep 17 00:00:00 2001 From: tomvothecoder Date: Wed, 19 Mar 2025 13:57:33 -0500 Subject: [PATCH 05/16] Replace `print_message()` and `print_debug()` with logger - Colors from `print_message()` cannot be captured in log files --- e3sm_to_cmip/__main__.py | 34 +++++++++++++++++++--------------- e3sm_to_cmip/util.py | 16 +++++++++++----- tests/run_script.py | 31 ++++++++++++++++++++++--------- 3 files changed, 52 insertions(+), 29 deletions(-) diff --git a/e3sm_to_cmip/__main__.py b/e3sm_to_cmip/__main__.py index d23cb8e2..49054497 100755 --- a/e3sm_to_cmip/__main__.py +++ b/e3sm_to_cmip/__main__.py @@ -44,8 +44,6 @@ find_mpas_files, get_handler_info_msg, precheck, - print_debug, - print_message, ) # Set up the root logger and module level logger. The module level logger is @@ -197,7 +195,7 @@ def run(self): status = self._run() if status != 0: - print_message( + logger.error( f"Error running handlers: { ' '.join([x['name'] for x in self.handlers]) }" ) return 1 @@ -637,12 +635,12 @@ def _run_precheck(self): self.input_path, self.precheck_path, self.var_list, self.realm ) if not new_var_list: - print("All variables previously computed") + logger.info("All variables previously computed") if self.output_path is not None: os.mkdir(os.path.join(self.output_path, "CMIP6")) return 0 else: - print_message(f"Setting up conversion for {' '.join(new_var_list)}", "ok") + logger.info(f"Setting up conversion for {' '.join(new_var_list)}", "ok") self.var_list = new_var_list def _setup_dirs_with_paths(self): @@ -695,8 +693,11 @@ def _run_info_mode(self): # noqa: C901 for handler in self.handlers: table_info = _get_table_info(self.tables_path, handler["table"]) if handler["name"] not in table_info["variable_entry"]: - msg = f"Variable {handler['name']} is not included in the table {handler['table']}" - print_message(msg, status="error") + logger.error( + "Variable {handler['name']} is not included in the table " + f"{handler['table']}" + ) + continue else: if self.freq == "mon" and handler["table"] == "CMIP6_day.json": @@ -705,6 +706,7 @@ def _run_info_mode(self): # noqa: C901 "table" ] == "CMIP6_Amon.json": continue + hand_msg = get_handler_info_msg(handler) messages.append(hand_msg) @@ -714,6 +716,7 @@ def _run_info_mode(self): # noqa: C901 with xr.open_dataset(file_path) as ds: for handler in self.handlers: table_info = _get_table_info(self.tables_path, handler["table"]) + if handler["name"] not in table_info["variable_entry"]: continue @@ -724,8 +727,9 @@ def _run_info_mode(self): # noqa: C901 if raw_var not in ds.data_vars: has_vars = False - msg = f"Variable {handler['name']} is not present in the input dataset" - print_message(msg, status="error") + logger.error( + f"Variable {handler['name']} is not present in the input dataset" + ) break @@ -783,10 +787,10 @@ def _run(self): try: status = run_func() except KeyboardInterrupt: - print_message(" -- keyboard interrupt -- ", "error") + logger.error(" -- keyboard interrupt -- ") return 1 except Exception as e: - print_debug(e) + logger.error(e) return 1 return status @@ -857,7 +861,7 @@ def _run_serial(self) -> int: # noqa: C901 self.new_metadata_path, ) except Exception as e: - print_debug(e) + logger.error(e) if name is not None: num_success += 1 @@ -873,7 +877,7 @@ def _run_serial(self) -> int: # noqa: C901 pbar.close() except Exception as error: - print_debug(error) + logger.error(error) return 1 else: msg = f"{num_success} of {num_handlers} handlers complete" @@ -964,7 +968,7 @@ def _run_parallel(self) -> int: # noqa: C901 logger.info(msg) except Exception as e: - print_debug(e) + logger.error(e) pbar.update(1) pbar.close() @@ -981,7 +985,7 @@ def _run_parallel(self) -> int: # noqa: C901 return 0 def _timeout_exit(self): - print_message("Hit timeout limit, exiting") + logger.info("Hit timeout limit, exiting") os.kill(os.getpid(), signal.SIGINT) diff --git a/e3sm_to_cmip/util.py b/e3sm_to_cmip/util.py index 56665f24..3486c2a9 100644 --- a/e3sm_to_cmip/util.py +++ b/e3sm_to_cmip/util.py @@ -50,6 +50,7 @@ def print_debug(e): + # TODO: Deprecate this function. We use Python logger now. _, _, tb = sys.exc_info() traceback.print_tb(tb) print(e) @@ -73,11 +74,16 @@ def print_message(message, status="error"): """ Prints a message with either a green + or a red - + # TODO: Deprecate this function. We use Python logger now. Colors can't + # be captured in log files. + Parameters: message (str): the message to print - status (str): th""" + status (str): the status message. + """ + if status == "error": - print( + logger.error( colors.FAIL + "[-] " + colors.ENDC @@ -86,11 +92,11 @@ def print_message(message, status="error"): + colors.ENDC ) elif status == "ok": - print(colors.OKGREEN + "[+] " + colors.ENDC + str(message)) + logger.info(colors.OKGREEN + "[+] " + colors.ENDC + str(message)) elif status == "info": - print(str(message)) + logger.info(str(message)) elif status == "debug": - print( + logger.info( colors.OKBLUE + "[*] " + colors.ENDC diff --git a/tests/run_script.py b/tests/run_script.py index 99d262f0..50134f94 100644 --- a/tests/run_script.py +++ b/tests/run_script.py @@ -9,20 +9,25 @@ NOTE: This script can only be executed on LCRC machines. """ -import datetime +import os from e3sm_to_cmip.__main__ import main # The list of variables to process. Update as needed. -VAR_LIST = ( - "pfull, phalf, tas, ts, psl, ps, sfcWind, huss, pr, prc, prsn, evspsbl, tauu, " - "tauv, hfls, clt, rlds, rlus, rsds, rsus, hfss, cl, clw, cli, clivi, clwvi, prw, " - "rldscs, rlut, rlutcs, rsdt, rsuscs, rsut, rsutcs, rtmt, abs550aer, od550aer " - "rsdscs, hur" -) +# VAR_LIST = ( +# "pfull, phalf, tas, ts, psl, ps, sfcWind, huss, pr, prc, prsn, evspsbl, tauu, " +# "tauv, hfls, clt, rlds, rlus, rsds, rsus, hfss, cl, clw, cli, clivi, clwvi, prw, " +# "rldscs, rlut, rlutcs, rsdt, rsuscs, rsut, rsutcs, rtmt, abs550aer, od550aer " +# "rsdscs, hur" +# )1 + +VAR_LIST = "pfull, phalf, tas" # The output path for CMORized datasets. Update as needed. -OUTPUT_PATH = f"../qa/run_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}" +OUTPUT_PATH = ( + "/lcrc/group/e3sm/public_html/e3sm_to_cmip/feature-274-redesign-logger-info" +) + args = [ "--var-list", @@ -35,9 +40,17 @@ "/lcrc/group/e3sm/e3sm_to_cmip/cmip6-cmor-tables/Tables/", "--user-metadata", "/lcrc/group/e3sm/e3sm_to_cmip/CMIP6-Metadata/template.json", - "--serial", + "--info", ] # `main()` creates an `E3SMtoCMIP` object and passes `args` to it, which sets # the object parameters to execute a run. main(args) + +# Ensure the path and its contents have the correct permissions recursively +for root, dirs, files in os.walk(OUTPUT_PATH): + os.chmod(root, 0o505) # o=rx (read and execute for others) + for d in dirs: + os.chmod(os.path.join(root, d), 0o505) + for f in files: + os.chmod(os.path.join(root, f), 0o404) # o=r (read for others) From a00af10cb463a25f77bc25320536c78219e03d96 Mon Sep 17 00:00:00 2001 From: tomvothecoder Date: Wed, 19 Mar 2025 16:00:48 -0500 Subject: [PATCH 06/16] Add verison info and timestamp to metadata printed out for a run --- e3sm_to_cmip/__main__.py | 55 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/e3sm_to_cmip/__main__.py b/e3sm_to_cmip/__main__.py index 49054497..f19fe952 100755 --- a/e3sm_to_cmip/__main__.py +++ b/e3sm_to_cmip/__main__.py @@ -5,6 +5,7 @@ import argparse import os import signal +import subprocess import sys import tempfile import threading @@ -21,6 +22,7 @@ from e3sm_to_cmip import ROOT_HANDLERS_DIR, __version__, resources from e3sm_to_cmip._logger import ( LOG_FILENAME, + TIMESTAMP, _add_filehandler, _setup_child_logger, _setup_root_logger, @@ -137,6 +139,11 @@ def __init__(self, args: Optional[List[str]] = None): if self.output_path is not None: self.output_path = os.path.abspath(self.output_path) os.makedirs(self.output_path, exist_ok=True) + elif self.output_path is None: + self.output_path = os.path.join( + os.getcwd(), f"e3sm_to_cmip_run_{TIMESTAMP}" + ) + os.makedirs(self.output_path, exist_ok=True) # Setup directories using the CLI argument paths (e.g., output dir). # ====================================================================== @@ -155,6 +162,8 @@ def __init__(self, args: Optional[List[str]] = None): logger.info("--------------------------------------") config_details = { + "Timestamp": TIMESTAMP, + "Version Info": self._get_version_info(), "Mode": ( "Info" if self.info_mode @@ -178,6 +187,52 @@ def __init__(self, args: Optional[List[str]] = None): # Load the CMOR handlers based on the realm and variable list. self.handlers = self._get_handlers() + def _get_version_info(self) -> str: + """Retrieve version information for the current codebase. + + This method attempts to determine the current Git branch name and commit + hash of the repository containing this file. If the Git information + cannot be retrieved, it falls back to using the `__version__` variable. + + Returns + ------- + str + A string containing the Git branch name and commit hash in the + format "branch with commit ", or the + fallback version string in the format "version <__version__>" if Git + information is unavailable. + + Raises + ------ + subprocess.CalledProcessError + If there is an error executing the Git commands to retrieve branch + or commit information. + """ + try: + branch_name = ( + subprocess.check_output( + ["git", "rev-parse", "--abbrev-ref", "HEAD"], + cwd=os.path.dirname(__file__), + stderr=subprocess.DEVNULL, + ) + .strip() + .decode("utf-8") + ) + commit_hash = ( + subprocess.check_output( + ["git", "rev-parse", "HEAD"], + cwd=os.path.dirname(__file__), + stderr=subprocess.DEVNULL, + ) + .strip() + .decode("utf-8") + ) + version_info = f"branch {branch_name} with commit {commit_hash}" + except subprocess.CalledProcessError: + version_info = f"version {__version__}" + + return version_info + def run(self): # Run e3sm_to_cmip with info mode. # ====================================================================== From 477a3cbfe859ab5044093c9161c9895813bd6fb8 Mon Sep 17 00:00:00 2001 From: tomvothecoder Date: Mon, 14 Apr 2025 16:27:11 -0500 Subject: [PATCH 07/16] Override log formatter to show microseconds up to 6 digits --- e3sm_to_cmip/_logger.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/e3sm_to_cmip/_logger.py b/e3sm_to_cmip/_logger.py index aa014c72..f41c1a61 100644 --- a/e3sm_to_cmip/_logger.py +++ b/e3sm_to_cmip/_logger.py @@ -13,6 +13,14 @@ LOG_LEVEL = logging.INFO +class CustomFormatter(logging.Formatter): + def formatTime(self, record, datefmt=None): + # Includes microseconds up to 6 digits + dt = datetime.fromtimestamp(record.created) + + return dt.strftime("%Y-%m-%d %H:%M:%S.%f") + + def _setup_root_logger(): """Configures the root logger. @@ -27,10 +35,14 @@ def _setup_root_logger(): - The file handler is added dynamically to the root logger later in the E3SMtoCMIP class once the log file path is known. """ + custom_formatter = CustomFormatter(LOG_FORMAT) + console_handler = logging.StreamHandler() + console_handler.setFormatter(custom_formatter) + logging.basicConfig( - format=LOG_FORMAT, level=LOG_LEVEL, force=True, + handlers=[console_handler], ) logging.captureWarnings(True) From c34bc081f6b2669e1fad2833e2757ee85dc5c09a Mon Sep 17 00:00:00 2001 From: tomvothecoder Date: Mon, 14 Apr 2025 16:42:54 -0500 Subject: [PATCH 08/16] Set custom formatter in `_add_filehandler()` - Add `_cleanup_temp_dir()` to remove temp dir at the end of a successful run --- e3sm_to_cmip/__main__.py | 15 +++++++++++++++ e3sm_to_cmip/_logger.py | 5 ++++- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/e3sm_to_cmip/__main__.py b/e3sm_to_cmip/__main__.py index f19fe952..79590199 100755 --- a/e3sm_to_cmip/__main__.py +++ b/e3sm_to_cmip/__main__.py @@ -4,6 +4,7 @@ import argparse import os +import shutil import signal import subprocess import sys @@ -265,6 +266,9 @@ def run(self): if timer is not None: timer.cancel() + # Clean up temporary directory + self._cleanup_temp_dir() + return 0 def _get_handlers(self): @@ -1043,6 +1047,17 @@ def _timeout_exit(self): logger.info("Hit timeout limit, exiting") os.kill(os.getpid(), signal.SIGINT) + def _cleanup_temp_dir(self): + """Deletes the temporary directory created in the tempfile module.""" + temp_path = tempfile.gettempdir() + + if os.path.exists(temp_path): + try: + shutil.rmtree(temp_path) + logger.info(f"Temporary directory '{temp_path}' deleted successfully.") + except Exception as e: + logger.error(f"Failed to delete temporary directory '{temp_path}': {e}") + def main(args: Optional[List[str]] = None): app = E3SMtoCMIP(args) diff --git a/e3sm_to_cmip/_logger.py b/e3sm_to_cmip/_logger.py index f41c1a61..cee5044b 100644 --- a/e3sm_to_cmip/_logger.py +++ b/e3sm_to_cmip/_logger.py @@ -117,5 +117,8 @@ def _add_filehandler(log_path: str): captured by the console via the default StreamHandler. """ file_handler = logging.FileHandler(log_path, mode=LOG_FILEMODE) - file_handler.setFormatter(logging.Formatter(LOG_FORMAT)) + + custom_formatter = CustomFormatter(LOG_FORMAT) + file_handler.setFormatter(custom_formatter) + logging.root.addHandler(file_handler) From 0b69b1a7a325e3daf1648fce24bbb383d4d4afed Mon Sep 17 00:00:00 2001 From: tomvothecoder Date: Tue, 15 Apr 2025 15:06:29 -0500 Subject: [PATCH 09/16] Prevent appending log messages to previous log files - Make timestamp unique per run by moving timestamp initialization to `app.__init__` --- e3sm_to_cmip/__main__.py | 21 ++++++++++++++++----- e3sm_to_cmip/_logger.py | 5 +---- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/e3sm_to_cmip/__main__.py b/e3sm_to_cmip/__main__.py index 79590199..8088dce2 100755 --- a/e3sm_to_cmip/__main__.py +++ b/e3sm_to_cmip/__main__.py @@ -3,6 +3,7 @@ """ import argparse +import logging import os import shutil import signal @@ -12,6 +13,7 @@ import threading from concurrent.futures import ProcessPoolExecutor as Pool from dataclasses import dataclass +from datetime import datetime, timezone from pathlib import Path from pprint import pprint from typing import List, Optional, Union @@ -22,8 +24,6 @@ from e3sm_to_cmip import ROOT_HANDLERS_DIR, __version__, resources from e3sm_to_cmip._logger import ( - LOG_FILENAME, - TIMESTAMP, _add_filehandler, _setup_child_logger, _setup_root_logger, @@ -98,6 +98,9 @@ class CLIArguments: class E3SMtoCMIP: def __init__(self, args: Optional[List[str]] = None): + self.timestamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S_%f") + self.log_filename = f"{self.timestamp}.log" + # A dictionary of command line arguments. parsed_args = self._parse_args(args) @@ -142,7 +145,7 @@ def __init__(self, args: Optional[List[str]] = None): os.makedirs(self.output_path, exist_ok=True) elif self.output_path is None: self.output_path = os.path.join( - os.getcwd(), f"e3sm_to_cmip_run_{TIMESTAMP}" + os.getcwd(), f"e3sm_to_cmip_run_{self.timestamp}" ) os.makedirs(self.output_path, exist_ok=True) @@ -163,7 +166,7 @@ def __init__(self, args: Optional[List[str]] = None): logger.info("--------------------------------------") config_details = { - "Timestamp": TIMESTAMP, + "Timestamp": self.timestamp, "Version Info": self._get_version_info(), "Mode": ( "Info" @@ -721,7 +724,7 @@ def _setup_dirs_with_paths(self): # instantiated will not be captured (e.g,. esmpy VersionWarning). # However, they will still be captured by the console via a # StreamHandler. - self.log_path = os.path.join(self.output_path, LOG_FILENAME) # type: ignore + self.log_path = os.path.join(self.output_path, self.log_filename) # type: ignore _add_filehandler(self.log_path) # Copy the user's metadata json file with the updated output directory @@ -1061,6 +1064,14 @@ def _cleanup_temp_dir(self): def main(args: Optional[List[str]] = None): app = E3SMtoCMIP(args) + + # Remove any existing filehandlers from the root logger. This prevents + # multiple filehandlers from being added to the root logger, which can + # cause log messages from newer runs to be written log files from older + # runs. + for handler in logging.root.handlers[:]: + logging.root.removeHandler(handler) + app.run() diff --git a/e3sm_to_cmip/_logger.py b/e3sm_to_cmip/_logger.py index cee5044b..5f7c320c 100644 --- a/e3sm_to_cmip/_logger.py +++ b/e3sm_to_cmip/_logger.py @@ -1,11 +1,8 @@ """Logger module for setting up a custom logger.""" import logging -import logging.handlers -from datetime import datetime, timezone +from datetime import datetime -TIMESTAMP = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S_%f") -LOG_FILENAME = f"{TIMESTAMP}.log" LOG_FORMAT = ( "%(asctime)s [%(levelname)s]: %(filename)s(%(funcName)s:%(lineno)s) >> %(message)s" ) From a4b9f45cd8e2ae5632952a0631b190bd36134be1 Mon Sep 17 00:00:00 2001 From: tomvothecoder Date: Tue, 15 Apr 2025 15:20:46 -0500 Subject: [PATCH 10/16] Prevent `copy_user_metadata()` function call with info mode --- e3sm_to_cmip/__main__.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/e3sm_to_cmip/__main__.py b/e3sm_to_cmip/__main__.py index 8088dce2..6e95da1c 100755 --- a/e3sm_to_cmip/__main__.py +++ b/e3sm_to_cmip/__main__.py @@ -728,17 +728,18 @@ def _setup_dirs_with_paths(self): _add_filehandler(self.log_path) # Copy the user's metadata json file with the updated output directory - if not self.simple_mode: + if not self.simple_mode and not self.info_mode: copy_user_metadata(self.user_metadata, self.output_path) - # Setup temp storage directory - temp_path = os.environ.get("TMPDIR") - if temp_path is None: - temp_path = f"{self.output_path}/tmp" - if not os.path.exists(temp_path): - os.makedirs(temp_path) + if not self.info_mode: + # Setup temp storage directory + temp_path = os.environ.get("TMPDIR") + if temp_path is None: + temp_path = f"{self.output_path}/tmp" + if not os.path.exists(temp_path): + os.makedirs(temp_path) - tempfile.tempdir = temp_path + tempfile.tempdir = temp_path def _run_info_mode(self): # noqa: C901 messages = [] From 4b4c788fd9c904de3ca8f3e02035a463488c791d Mon Sep 17 00:00:00 2001 From: Tom Vo Date: Tue, 15 Apr 2025 13:35:16 -0700 Subject: [PATCH 11/16] Apply suggestions from code review --- e3sm_to_cmip/__main__.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/e3sm_to_cmip/__main__.py b/e3sm_to_cmip/__main__.py index 6e95da1c..b0065958 100755 --- a/e3sm_to_cmip/__main__.py +++ b/e3sm_to_cmip/__main__.py @@ -205,12 +205,6 @@ def _get_version_info(self) -> str: format "branch with commit ", or the fallback version string in the format "version <__version__>" if Git information is unavailable. - - Raises - ------ - subprocess.CalledProcessError - If there is an error executing the Git commands to retrieve branch - or commit information. """ try: branch_name = ( From 6ca5b1ce87357116b7d2917d7e18712c3e9da25d Mon Sep 17 00:00:00 2001 From: Tom Vo Date: Wed, 23 Apr 2025 10:32:56 -0700 Subject: [PATCH 12/16] Apply suggestions from code review --- e3sm_to_cmip/__main__.py | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/e3sm_to_cmip/__main__.py b/e3sm_to_cmip/__main__.py index b0065958..4519b89f 100755 --- a/e3sm_to_cmip/__main__.py +++ b/e3sm_to_cmip/__main__.py @@ -263,9 +263,6 @@ def run(self): if timer is not None: timer.cancel() - # Clean up temporary directory - self._cleanup_temp_dir() - return 0 def _get_handlers(self): @@ -1045,17 +1042,6 @@ def _timeout_exit(self): logger.info("Hit timeout limit, exiting") os.kill(os.getpid(), signal.SIGINT) - def _cleanup_temp_dir(self): - """Deletes the temporary directory created in the tempfile module.""" - temp_path = tempfile.gettempdir() - - if os.path.exists(temp_path): - try: - shutil.rmtree(temp_path) - logger.info(f"Temporary directory '{temp_path}' deleted successfully.") - except Exception as e: - logger.error(f"Failed to delete temporary directory '{temp_path}': {e}") - def main(args: Optional[List[str]] = None): app = E3SMtoCMIP(args) From ae373faea75161dc5610dab2c5c9f41bfcc12081 Mon Sep 17 00:00:00 2001 From: Tom Vo Date: Wed, 23 Apr 2025 10:41:03 -0700 Subject: [PATCH 13/16] Update e3sm_to_cmip/__main__.py --- e3sm_to_cmip/__main__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/e3sm_to_cmip/__main__.py b/e3sm_to_cmip/__main__.py index 4519b89f..f695011a 100755 --- a/e3sm_to_cmip/__main__.py +++ b/e3sm_to_cmip/__main__.py @@ -5,7 +5,6 @@ import argparse import logging import os -import shutil import signal import subprocess import sys From bcfa40ac860213092bf6c2405364edcae46e6b21 Mon Sep 17 00:00:00 2001 From: Tom Vo Date: Wed, 23 Apr 2025 15:29:15 -0700 Subject: [PATCH 14/16] Make tmp dir timestamp unique and restore clean up code --- e3sm_to_cmip/__main__.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/e3sm_to_cmip/__main__.py b/e3sm_to_cmip/__main__.py index f695011a..38b97902 100755 --- a/e3sm_to_cmip/__main__.py +++ b/e3sm_to_cmip/__main__.py @@ -5,6 +5,7 @@ import argparse import logging import os +import shutil import signal import subprocess import sys @@ -262,6 +263,9 @@ def run(self): if timer is not None: timer.cancel() + # Clean up temporary directory + self._cleanup_temp_dir() + return 0 def _get_handlers(self): @@ -722,10 +726,11 @@ def _setup_dirs_with_paths(self): copy_user_metadata(self.user_metadata, self.output_path) if not self.info_mode: - # Setup temp storage directory temp_path = os.environ.get("TMPDIR") + if temp_path is None: - temp_path = f"{self.output_path}/tmp" + temp_path = f"{self.output_path}/tmp_{self.timestamp}" + if not os.path.exists(temp_path): os.makedirs(temp_path) @@ -1041,6 +1046,17 @@ def _timeout_exit(self): logger.info("Hit timeout limit, exiting") os.kill(os.getpid(), signal.SIGINT) + def _cleanup_temp_dir(self): + """Deletes the temporary directory created in the tempfile module.""" + temp_path = tempfile.gettempdir() + + if os.path.exists(temp_path): + try: + shutil.rmtree(temp_path) + logger.info(f"Temporary directory '{temp_path}' deleted successfully.") + except Exception as e: + logger.error(f"Failed to delete temporary directory '{temp_path}': {e}") + def main(args: Optional[List[str]] = None): app = E3SMtoCMIP(args) From 7564099c18772ace70a150b32bd4819e93fc5232 Mon Sep 17 00:00:00 2001 From: Tom Vo Date: Thu, 24 Apr 2025 11:17:41 -0700 Subject: [PATCH 15/16] Remove temp dir clean up code and output temp dir path --- e3sm_to_cmip/__main__.py | 28 +++++++--------------------- 1 file changed, 7 insertions(+), 21 deletions(-) diff --git a/e3sm_to_cmip/__main__.py b/e3sm_to_cmip/__main__.py index 38b97902..120aa8e9 100755 --- a/e3sm_to_cmip/__main__.py +++ b/e3sm_to_cmip/__main__.py @@ -5,7 +5,6 @@ import argparse import logging import os -import shutil import signal import subprocess import sys @@ -181,6 +180,7 @@ def __init__(self, args: Optional[List[str]] = None): "Precheck Path": self.precheck_path, "Log Path": self.log_path, "CMOR Log Path": self.cmor_log_dir, + "Temp Path for Processing MPAS Files": self.temp_path, "Frequency": self.freq, "Realm": self.realm, } @@ -263,9 +263,6 @@ def run(self): if timer is not None: timer.cancel() - # Clean up temporary directory - self._cleanup_temp_dir() - return 0 def _get_handlers(self): @@ -726,15 +723,15 @@ def _setup_dirs_with_paths(self): copy_user_metadata(self.user_metadata, self.output_path) if not self.info_mode: - temp_path = os.environ.get("TMPDIR") + self.temp_path = os.environ.get("TMPDIR") - if temp_path is None: - temp_path = f"{self.output_path}/tmp_{self.timestamp}" + if self.temp_path is None: + self.temp_path = f"{self.output_path}/tmp" - if not os.path.exists(temp_path): - os.makedirs(temp_path) + if not os.path.exists(self.temp_path): + os.makedirs(self.temp_path) - tempfile.tempdir = temp_path + tempfile.tempdir = self.temp_path def _run_info_mode(self): # noqa: C901 messages = [] @@ -1046,17 +1043,6 @@ def _timeout_exit(self): logger.info("Hit timeout limit, exiting") os.kill(os.getpid(), signal.SIGINT) - def _cleanup_temp_dir(self): - """Deletes the temporary directory created in the tempfile module.""" - temp_path = tempfile.gettempdir() - - if os.path.exists(temp_path): - try: - shutil.rmtree(temp_path) - logger.info(f"Temporary directory '{temp_path}' deleted successfully.") - except Exception as e: - logger.error(f"Failed to delete temporary directory '{temp_path}': {e}") - def main(args: Optional[List[str]] = None): app = E3SMtoCMIP(args) From 66abd3af1f8e5039f0cee43541b919c4a98cad2b Mon Sep 17 00:00:00 2001 From: Tom Vo Date: Thu, 24 Apr 2025 16:16:48 -0700 Subject: [PATCH 16/16] Make `self.temp_path = None` if info mode --- e3sm_to_cmip/__main__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/e3sm_to_cmip/__main__.py b/e3sm_to_cmip/__main__.py index 120aa8e9..7276ebf9 100755 --- a/e3sm_to_cmip/__main__.py +++ b/e3sm_to_cmip/__main__.py @@ -732,6 +732,8 @@ def _setup_dirs_with_paths(self): os.makedirs(self.temp_path) tempfile.tempdir = self.temp_path + else: + self.temp_path = None def _run_info_mode(self): # noqa: C901 messages = []