diff --git a/.cspell/ok-unknown-words.txt b/.cspell/ok-unknown-words.txt index afa0efc..a62862b 100644 --- a/.cspell/ok-unknown-words.txt +++ b/.cspell/ok-unknown-words.txt @@ -95,6 +95,7 @@ fullvar geolat geolon getcwd +getncattr giccm gridding hpoint @@ -108,6 +109,7 @@ intercomparisons interp ints ised +isunlimited isort ivar ixin @@ -182,6 +184,7 @@ rgxs rstcheck rtype setuptools +setncatts smth sosv spinup diff --git a/.github/agents/testing-agent.agent.md b/.github/agents/tester.agent.md similarity index 100% rename from .github/agents/testing-agent.agent.md rename to .github/agents/tester.agent.md diff --git a/CODE_STYLE.md b/CODE_STYLE.md index eaa9cf9..def58cc 100644 --- a/CODE_STYLE.md +++ b/CODE_STYLE.md @@ -66,7 +66,7 @@ class MyClass(object): :ivar str var2: description, initial value: par2 """ - var3: ClassVar[str] = "I am a class variable" + var3: ClassVar[str] = 'I am a class variable' def __init__(self, par1: int, par2: int): self.var1 = par1 # instance variables @@ -89,7 +89,7 @@ def func_with_return_and_optional_param(a: int, c: List[int] = [1,2]) -> Any: """ if a > 10: - raise ValueError("a is more than 10") + raise ValueError('a is more than 10') return c def simple_func(foo: str): diff --git a/README.md b/README.md index a5e7c07..6aac3e3 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# `fremorizer` +# `fremorizer` / `fremor` `fremorizer` CMORizes FRE output with `CMOR`. It is a `conda` package and it's documentation can be found on [`readthedocs`](https://fremorizer.readthedocs.io/en/latest/). @@ -6,24 +6,40 @@ [![Anaconda-Server Badge](https://anaconda.org/ilaflott/fremorizer/badges/latest_release_date.svg)](https://anaconda.org/ilaflott/fremorizer) [![Anaconda-Server Badge](https://anaconda.org/ilaflott/fremorizer/badges/latest_release_relative_date.svg)](https://anaconda.org/ilaflott/fremorizer) +[![pylint](https://img.shields.io/badge/pylint-%E2%89%A59.7-brightgreen)](https://github.com/ilaflott/fremorizer/actions/workflows/pylint.yml) +[![codecov](https://codecov.io/gh/ilaflott/fremorizer/branch/main/graph/badge.svg)](https://codecov.io/gh/ilaflott/fremorizer) + [![publish_conda](https://github.com/ilaflott/fremorizer/actions/workflows/publish_conda.yml/badge.svg?branch=main)](https://github.com/ilaflott/fremorizer/actions/workflows/publish_conda.yml) [![readthedocs](https://app.readthedocs.org/projects/fremorizer/badge/?version=latest&style=flat)](https://fremorizer.readthedocs.io/en/latest/) - [![pylint](https://github.com/ilaflott/fremorizer/actions/workflows/pylint.yml/badge.svg?branch=main)](https://github.com/ilaflott/fremorizer/actions/workflows/pylint.yml) -[![pylint](https://img.shields.io/badge/pylint-%E2%89%A59.7-brightgreen)](https://github.com/NOAA-GFDL/epmt/actions/workflows/build_and_test_epmt.yml) -[![codecov](https://codecov.io/gh/ilaflott/fremorizer/branch/main/graph/badge.svg)](https://codecov.io/gh/ilaflott/fremorizer) + +`python3.11`:[![3.11](https://github.com/ilaflott/fremorizer/actions/workflows/create_test_conda_env.yml/badge.svg)](https://github.com/ilaflott/fremorizer/actions/workflows/create_test_conda_env.yml?query=branch%3Amain+python-version%3A3.11) + +`python3.12`:[![3.12](https://github.com/ilaflott/fremorizer/actions/workflows/create_test_conda_env.yml/badge.svg)](https://github.com/ilaflott/fremorizer/actions/workflows/create_test_conda_env.yml?query=branch%3Amain+python-version%3A3.12) + +`python3.13`:[![3.13](https://github.com/ilaflott/fremorizer/actions/workflows/create_test_conda_env.yml/badge.svg)](https://github.com/ilaflott/fremorizer/actions/workflows/create_test_conda_env.yml?query=branch%3Amain+python-version%3A3.13) + +`python3.14`:[![3.14](https://github.com/ilaflott/fremorizer/actions/workflows/create_test_conda_env.yml/badge.svg)](https://github.com/ilaflott/fremorizer/actions/workflows/create_test_conda_env.yml?query=branch%3Amain+python-version%3A3.14) + ## Background and Purpose -`fremorizer` is a model output rewriter (CMORizer) for FRE/FMS based models and output. It was originally the `fre.cmor` -submodule of [`NOAA-GFDL/fre-cli`](https://github.com/NOAA-GFDL/fre-cli). `fremorizer` (or `fremor` for short) is geared -for rewriting NOAA-GFDL datasets for further quality control checks, assessments and data publishing pipelines in the +`fremorizer` is a model output rewriter (CMORizer) for FRE/FMS based models and output. `fremorizer` (or `fremor` for short) is +geared for standardizing NOAA-GFDL datasets for further quality control checks, assessments and data publishing pipelines in the context of CMIP7 using the [`CMOR`](https://cmor.llnl.gov/) library. +### Relationship to `fre-cli` +`fremorizer` was originally the `fre.cmor` submodule of [`NOAA-GFDL/fre-cli`](https://github.com/NOAA-GFDL/fre-cli) and so stands +on the shoulders of it's contributors, retaining it's general structure and lessons learned from it. Future re-integrations back +into `fre-cli`, as a formal package dependency, are being assessed. +### Contributors +[![Contributors](https://contrib.rocks/image?repo=ilaflott/fremorizer)](https://github.com/ilaflott/fremorizer/graphs/contributors) -## Installation / Access +#### AI Disclaimer +AI was heavily used in the creation of this repository, primarily `github`'s `copilot` with `Claude` (`opus4.6` `sonnet4.6`, and `haiku`), and `Gemini` and `Chat-GPT` models to a lesser extent, in agent mode. `Claude` and `Codex` agents have also contributed. +## Installation / Access ### Requirements @@ -167,11 +183,7 @@ To view compliance results from a workflow/CI run: 3. Download the `wcrp-compliance-reports` artifact -### `conda` environment tests -| Python 3.11 | Python 3.12 | Python 3.13 | Python 3.14 | -|-------------|-------------|-------------|-------------| -| [![3.11](https://github.com/ilaflott/fremorizer/actions/workflows/create_test_conda_env.yml/badge.svg)](https://github.com/ilaflott/fremorizer/actions/workflows/create_test_conda_env.yml?query=branch%3Amain+python-version%3A3.11) | [![3.12](https://github.com/ilaflott/fremorizer/actions/workflows/create_test_conda_env.yml/badge.svg)](https://github.com/ilaflott/fremorizer/actions/workflows/create_test_conda_env.yml?query=branch%3Amain+python-version%3A3.12) | [![3.13](https://github.com/ilaflott/fremorizer/actions/workflows/create_test_conda_env.yml/badge.svg)](https://github.com/ilaflott/fremorizer/actions/workflows/create_test_conda_env.yml?query=branch%3Amain+python-version%3A3.13) | [![3.14](https://github.com/ilaflott/fremorizer/actions/workflows/create_test_conda_env.yml/badge.svg)](https://github.com/ilaflott/fremorizer/actions/workflows/create_test_conda_env.yml?query=branch%3Amain+python-version%3A3.14) | diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000..a3f2725 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,8 @@ +# Security Policy + +## Supported Versions + +| Version | Supported | +| ---------- | ------------------ | +| 0.9.0 <= | :white_check_mark: | +| 0.9.0 > | :x: | diff --git a/docs/commands.rst b/docs/commands.rst index a2ddbfe..96505cc 100644 --- a/docs/commands.rst +++ b/docs/commands.rst @@ -40,7 +40,7 @@ workflows are supported. Available subcommands: * Minimal Syntax: ``fremor run -d [indir] -l [varlist] -r [table_config] -p [exp_config] -o [outdir] [options]`` * Required Options: - ``-d, --indir TEXT`` — Input directory with netCDF files - - ``-l, --varlist TEXT`` — Variable list dictionary mapping local to MIP variable names + - ``-l, --varlist TEXT`` — Variable list dictionary mapping modeler variable names to MIP table variable names - ``-r, --table_config TEXT`` — MIP table JSON configuration - ``-p, --exp_config TEXT`` — Experiment/model metadata JSON - ``-o, --outdir TEXT`` — Output directory prefix diff --git a/docs/cookbook.rst b/docs/cookbook.rst index a645275..7cf0c08 100644 --- a/docs/cookbook.rst +++ b/docs/cookbook.rst @@ -74,7 +74,7 @@ You will need to split the platform-target string appropriately to extract the i Creating Variable Lists ~~~~~~~~~~~~~~~~~~~~~~~ -Variable lists map your local variable names to MIP table variable names. Generate a variable list from a directory of netCDF files: +Variable lists map your modeler variable names to MIP table variable names. Generate a variable list from a directory of netCDF files: .. code-block:: bash @@ -86,6 +86,20 @@ This tool examines filenames to extract variable names. It assumes FRE-style nam (e.g., ``component.YYYYMMDD.variable.nc``). Review the generated file and edit as needed to map local variable names to target MIP variable names. +When a modeler's variable name differs from the MIP table variable name, the variable list +maps between them. For example, if your model produces ``sea_sfc_salinity`` but the MIP table +expects ``sos``: + +.. code-block:: json + + { + "sea_sfc_salinity": "sos" + } + +The key (``sea_sfc_salinity``) is the modeler's variable name — it must match both the filename +and the variable name inside the netCDF file. The value (``sos``) is the MIP table variable name +used for metadata lookups. + To verify variables exist in MIP tables, search for variable definitions: .. code-block:: bash @@ -145,7 +159,7 @@ For processing individual directories or debugging specific issues, use ``fremor Required arguments: * ``--indir``: Directory containing netCDF files to CMORize -* ``--varlist``: JSON file mapping local variable names to target variable names +* ``--varlist``: JSON file mapping modeler variable names to MIP table variable names * ``--table_config``: MIP table JSON file (e.g., ``CMIP6_Omon.json``) * ``--exp_config``: Experiment configuration JSON with metadata * ``--outdir``: Output directory root for CMORized files diff --git a/docs/glossary.rst b/docs/glossary.rst index dc7068d..d4049bd 100644 --- a/docs/glossary.rst +++ b/docs/glossary.rst @@ -27,7 +27,7 @@ Glossary ``source_id``, ``grid_label``, and ``nominal_resolution``. Published by the CMIP community. variable list - A JSON file mapping local model variable names to MIP table variable names. Generated by + A JSON file mapping modeler variable names to MIP table variable names. Generated by ``fremor varlist`` and consumed by ``fremor run``. experiment configuration diff --git a/fremorizer/_version.py b/fremorizer/_version.py index 39a8578..1670f05 100644 --- a/fremorizer/_version.py +++ b/fremorizer/_version.py @@ -3,5 +3,5 @@ """ import os -version = os.getenv("GIT_DESCRIBE_TAG", "0.1.2.post") +version = os.getenv("GIT_DESCRIBE_TAG", "0.9.0post") __version__ = version diff --git a/fremorizer/cli.py b/fremorizer/cli.py index 5db3762..ab78b39 100644 --- a/fremorizer/cli.py +++ b/fremorizer/cli.py @@ -24,9 +24,10 @@ 'matching that variable name. I.e., this string help target local_vars, not ' + \ 'target_vars.' VARLIST_HELP='path pointing to a json file containing directory of key/value pairs. ' + \ - 'the keys are the \'local\' names used in the filename, and the values ' + \ - 'pointed to by those keys are strings representing the name of the variable ' + \ - 'contained in targeted files. the key and value are often the same, ' + \ + 'the keys are the modeler\'s variable names used in the filename and ' + \ + 'expected as the variable name within the targeted files. the values ' + \ + 'pointed to by those keys are strings representing the corresponding ' + \ + 'MIP table variable name. the key and value are often the same, ' + \ 'but it is not required.' RUN_ONE_HELP='process only one file, then exit. mostly for debugging and isolating issues.' DRY_RUN_HELP='don\'t call the cmor_mixer subtool, just printout what would be called and move on until natural exit' @@ -217,11 +218,10 @@ def find(varlist, table_config_dir, opt_var_name): #uncovered required = False) def run(indir, varlist, table_config, exp_config, outdir, run_one, opt_var_name, grid_label, grid_desc, nom_res, start, stop, calendar): - # pylint: disable=unused-argument """ Rewrite climate model output files with CMIP-compliant metadata for down-stream publishing """ - cmor_run_subtool( + result = cmor_run_subtool( indir = indir, json_var_list = varlist, json_table_config = table_config, @@ -236,6 +236,8 @@ def run(indir, varlist, table_config, exp_config, outdir, run_one, opt_var_name, stop = stop, calendar_type = calendar ) + if result < 0: + raise click.ClickException(f'cmor_run_subtool returned non-zero status: {result}') @fremor.command('varlist') diff --git a/fremorizer/cmor_finder.py b/fremorizer/cmor_finder.py index 4975cf5..39aedc3 100644 --- a/fremorizer/cmor_finder.py +++ b/fremorizer/cmor_finder.py @@ -56,6 +56,8 @@ def print_var_content(table_config_file: IO[str], table_name = proj_table_vars['Header'].get('table_id').split(' ')[1] except KeyError: fre_logger.warning('couldn\'t get header and table_name field') + except IndexError: + fre_logger.warning("couldn't get header and table_name, probably not a variable table") if table_name is not None: fre_logger.info('looking for %s data in table %s!', var_name, table_name) @@ -182,12 +184,19 @@ def make_simple_varlist( dir_targ: str, # Build a deduplicated dict of variable names extracted from all filenames across # all datetimes. Assigning to a dict naturally deduplicates while preserving # first-seen insertion order (Python 3.7+). + # If a MIP table is provided, variables that match a MIP variable name get + # self-mapped (key==value). Variables NOT in the MIP table get an empty string + # as value, signaling they need manual mapping by the user. var_list: Dict[str, str] = {} for targetfile in all_nc_files: var_name=os.path.basename(targetfile).split('.')[-2] - if mip_vars is not None and var_name not in mip_vars: - continue - var_list[var_name] = var_name + if mip_vars is not None: + if var_name in mip_vars: + var_list[var_name] = var_name + else: + var_list[var_name] = '' + else: + var_list[var_name] = var_name if not var_list: fre_logger.warning('WARNING: no variables in target mip table found, or no matching pattern,' diff --git a/fremorizer/cmor_mixer.py b/fremorizer/cmor_mixer.py index 024f2b0..1bfb07b 100644 --- a/fremorizer/cmor_mixer.py +++ b/fremorizer/cmor_mixer.py @@ -65,11 +65,11 @@ def rewrite_netcdf_file_var( mip_var_cfgs: dict = None, :param mip_var_cfgs: Variable table, as loaded from the MIP table JSON config. :type mip_var_cfgs: dict - :param local_var: Variable name used for finding files locally. + :param local_var: Modeler's variable name, used for finding files and reading data from them. :type local_var: str :param netcdf_file: Path to the input NetCDF file to be CMORized. :type netcdf_file: str - :param target_var: Name of the variable to be processed. + :param target_var: MIP table variable name for metadata lookups. :type target_var: str :param json_exp_config: Path to experiment configuration JSON file (for dataset metadata). :type json_exp_config: str @@ -87,16 +87,16 @@ def rewrite_netcdf_file_var( mip_var_cfgs: dict = None, ocean grids. """ fre_logger.info('input data:') - fre_logger.info(' local_var = %s', local_var) - fre_logger.info(' target_var = %s', target_var) + fre_logger.info(' local_var = %s (modeler variable name, in filename and file)', local_var) + fre_logger.info(' target_var = %s (MIP table variable name)', target_var) # open the input file fre_logger.info('opening %s', netcdf_file) ds = nc.Dataset(netcdf_file, 'r+') - # read the input variable data - fre_logger.info('attempting to read variable data, %s', target_var) - var = from_dis_gimme_dis(from_dis=ds, gimme_dis=target_var) + # read the input variable data using the modeler's variable name (local_var) + fre_logger.info('attempting to read variable data, %s', local_var) + var = from_dis_gimme_dis(from_dis=ds, gimme_dis=local_var) ## var type #var_dtype = var.dtype @@ -142,12 +142,14 @@ def rewrite_netcdf_file_var( mip_var_cfgs: dict = None, var_brand = filter_brands( brands, target_var, mip_var_cfgs, has_time_bnds = 'time_bnds' in ds.variables, - input_vert_dim = get_vertical_dimension(ds, target_var) + input_vert_dim = get_vertical_dimension(ds, local_var) ) + else: fre_logger.error('cmip7 case detected, but dimensions of input data do not match ' 'any of those found for the associated brands.') - raise ValueError + raise ValueError('no variable brand was able to be identified for this CMIP7 case') + fre_logger.debug('cmip7 case, filtered possible brands to %s', var_brand) else: fre_logger.debug('non-cmip7 case detected, skipping variable brands') @@ -212,8 +214,8 @@ def rewrite_netcdf_file_var( mip_var_cfgs: dict = None, time_bnds = from_dis_gimme_dis(from_dis=ds, gimme_dis='time_bnds') # determine the vertical dimension by looping over netcdf variables - vert_dim = get_vertical_dimension(ds, target_var) # returns int(0) if not present - fre_logger.info('Vertical dimension of %s: %s', target_var, vert_dim) + vert_dim = get_vertical_dimension(ds, local_var) # returns int(0) if not present + fre_logger.info('Vertical dimension of %s: %s', local_var, vert_dim) # Check var_dim and vert_dim and assign lev if relevant. lev, lev_units = None, '1' @@ -530,7 +532,7 @@ def rewrite_netcdf_file_var( mip_var_cfgs: dict = None, elif vert_dim in ALT_HYBRID_SIGMA_COORDS: # find the ps file nearby - ps_file = netcdf_file.replace(f'.{target_var}.nc', '.ps.nc') + ps_file = netcdf_file.replace(f'.{local_var}.nc', '.ps.nc') ds_ps = nc.Dataset(ps_file) ps = from_dis_gimme_dis(ds_ps, 'ps') @@ -690,9 +692,9 @@ def cmorize_target_var_files(indir: str = None, :param indir: Path to the directory containing NetCDF files to process. :type indir: str - :param target_var: Name of the variable to process in each file. + :param target_var: MIP table variable name for metadata lookups. :type target_var: str - :param local_var: Local/filename variable name (often identical to target_var). + :param local_var: Modeler's variable name, used for file-targeting and reading data from files. :type local_var: str :param iso_datetime_range_arr: List of ISO datetime strings, each identifying a specific file. :type iso_datetime_range_arr: list of str @@ -717,9 +719,8 @@ def cmorize_target_var_files(indir: str = None, .. note:: Copies files to a temporary directory, runs CMORization, moves results to output, cleans up temp files. """ - fre_logger.info('local_var = %s to be used for file-targeting.\n' - 'target_var = %s to be used for reading the data \n' - 'from the file\n' + fre_logger.info('local_var = %s to be used for file-targeting and reading data.\n' + 'target_var = %s to be used for MIP table lookups.\n' 'outdir = %s', local_var, target_var, outdir) # determine a tmp dir for working on files. @@ -847,7 +848,7 @@ def cmorize_all_variables_in_dir(vars_to_run: Dict[str, Any], """ CMORize all variables in a directory according to a variable mapping. - :param vars_to_run: Mapping of local variable names (in filenames) to target variable names (in NetCDF). + :param vars_to_run: Mapping of modeler variable names to MIP table variable names. :type vars_to_run: dict :param indir: Directory containing NetCDF files to process. :type indir: str @@ -871,16 +872,17 @@ def cmorize_all_variables_in_dir(vars_to_run: Dict[str, Any], .. note:: Errors for individual variables are logged and processing continues (except for run_one_mode). """ - # loop over local-variable:target-variable pairs in vars_to_run + # loop over modeler-variable:mip-variable pairs in vars_to_run return_status = -1 omissions = [] for local_var in vars_to_run: # if the target-variable is 'good', get the name of the data inside the netcdf file. target_var = vars_to_run[local_var] # often equiv to local_var but not necessarily. if local_var != target_var: - fre_logger.warning('local_var == %s != %s == target_var\n' - 'i am expecting %s to be in the filename, and i expect the variable\n' - 'in that file to be named %s', local_var, target_var, local_var, target_var) + fre_logger.info('local_var == %s != %s == target_var\n' + 'modeler variable name differs from MIP table variable name.\n' + 'i am expecting %s in both the filename and the file, and will map it\n' + 'to MIP table variable %s', local_var, target_var, local_var, target_var) fre_logger.info('........beginning CMORization for %s/%s..........', local_var, target_var) try: @@ -938,7 +940,7 @@ def cmor_run_subtool(indir: str = None, :param indir: Directory containing NetCDF files to process. :type indir: str - :param json_var_list: Path to JSON file with variable mapping (local to target names). + :param json_var_list: Path to JSON file with variable mapping (modeler names to MIP table names). :type json_var_list: str :param json_table_config: Path to MIP table JSON file (per-variable metadata). :type json_table_config: str diff --git a/fremorizer/tests/conftest.py b/fremorizer/tests/conftest.py index 450bc94..ae96773 100644 --- a/fremorizer/tests/conftest.py +++ b/fremorizer/tests/conftest.py @@ -22,11 +22,13 @@ INDIR = ROOTDIR / 'ocean_sos_var_file' VARLIST = ROOTDIR / 'varlist' VARLIST_DIFF = ROOTDIR / 'varlist_local_target_vars_differ' +VARLIST_MAPPED = ROOTDIR / 'varlist_mapped' EXP_CONFIG = ROOTDIR / 'CMOR_input_example.json' EXP_CONFIG_CMIP7 = ROOTDIR / 'CMOR_CMIP7_input_example.json' SOS_NC_FILENAME = 'reduced_ocean_monthly_1x1deg.199301-199302.sos.nc' SOSV2_NC_FILENAME = 'reduced_ocean_monthly_1x1deg.199301-199302.sosV2.nc' +MAPPED_NC_FILENAME = 'reduced_ocean_monthly_1x1deg.199301-199302.sea_sfc_salinity.nc' YYYYMMDD = date.today().strftime('%Y%m%d') @@ -231,3 +233,12 @@ def cli_sosv2_nc_file(cli_sos_nc_file): # pylint: disable=redefined-outer-name shutil.copy(cli_sos_nc_file, str(nc_path)) assert nc_path.exists() return str(nc_path) + + +@pytest.fixture(scope='session') +def cli_mapped_nc_file(): + """Generate the sea_sfc_salinity NetCDF file from CDL (session-scoped).""" + INDIR.mkdir(parents=True, exist_ok=True) + nc_path = INDIR / MAPPED_NC_FILENAME + _ncgen('reduced_ocean_monthly_1x1deg.199301-199302.sea_sfc_salinity.cdl', nc_path) + return str(nc_path) diff --git a/fremorizer/tests/test_cli.py b/fremorizer/tests/test_cli.py index cb55907..f3de394 100644 --- a/fremorizer/tests/test_cli.py +++ b/fremorizer/tests/test_cli.py @@ -27,7 +27,7 @@ from fremorizer.cli import fremor from .conftest import ( - ROOTDIR, INDIR, VARLIST, VARLIST_DIFF, + ROOTDIR, INDIR, VARLIST, VARLIST_DIFF, VARLIST_MAPPED, EXP_CONFIG, EXP_CONFIG_CMIP7, CMIP6_TABLE_CONFIG, CMIP7_TABLE_CONFIG, ) @@ -43,26 +43,35 @@ # ── setup ────────────────────────────────────────────────────────────────── -def test_setup_test_files(cli_sos_nc_file, cli_sosv2_nc_file): # pylint: disable=redefined-outer-name - """Verify all required NetCDF test files exist via session-scoped fixtures.""" +def test_setup_test_files(cli_sos_nc_file, cli_sosv2_nc_file, cli_mapped_nc_file): + """ + Verify all required NetCDF test files exist via session-scoped fixtures. + """ assert Path(cli_sos_nc_file).exists() assert Path(cli_sosv2_nc_file).exists() + assert Path(cli_mapped_nc_file).exists() # ── fremor (top-level group) ────────────────────────────────────────────── def test_cli_fremor(): - """ fremor (no subcommand) """ + """ + fremor + """ result = runner.invoke(fremor, args=[]) assert result.exit_code == 2 def test_cli_fremor_help(): - """ fremor --help """ + """ + fremor --help + """ result = runner.invoke(fremor, args=['--help']) assert result.exit_code == 0 def test_cli_fremor_help_and_debuglog(tmp_path): - """ fremor -vv -l LOG yaml --help (logs created by group callback) """ + """ + fremor -vv -l LOG yaml --help (logs created by group callback) + """ log_file = tmp_path / 'TEST_FOO_LOG.log' result = runner.invoke(fremor, args=['-vv', '-l', str(log_file), 'yaml', '--help']) @@ -74,7 +83,9 @@ def test_cli_fremor_help_and_debuglog(tmp_path): assert LOG_DEBUG_LINE in line_list[1] def test_cli_fremor_help_and_infolog(tmp_path): - """ fremor -v -l LOG yaml --help """ + """ + fremor -v -l LOG yaml --help + """ log_file = tmp_path / 'TEST_FOO_LOG.log' result = runner.invoke(fremor, args=['-v', '-l', str(log_file), 'yaml', '--help']) @@ -85,7 +96,9 @@ def test_cli_fremor_help_and_infolog(tmp_path): assert LOG_INFO_LINE in line_list[0] def test_cli_fremor_help_and_quietlog(tmp_path): - """ fremor -q -l LOG yaml --help """ + """ + fremor -q -l LOG yaml --help + """ log_file = tmp_path / 'TEST_FOO_LOG.log' result = runner.invoke(fremor, args=['-q', '-l', str(log_file), 'yaml', '--help']) @@ -96,7 +109,9 @@ def test_cli_fremor_help_and_quietlog(tmp_path): assert line_list == [] def test_cli_fremor_opt_dne(): - """ fremor optionDNE """ + """ + fremor optionDNE + """ result = runner.invoke(fremor, args=['optionDNE']) assert result.exit_code == 2 @@ -104,7 +119,9 @@ def test_cli_fremor_opt_dne(): # ── fremor yaml ─────────────────────────────────────────────────────────── def test_cli_fremor_yaml(): - """ fremor yaml (no args) """ + """ + fremor yaml + """ result = runner.invoke(fremor, args=['yaml']) assert result.exit_code == 2 @@ -190,8 +207,12 @@ def test_cli_fremor_run_case1(cli_sos_nc_file, tmp_path): assert Path(cli_sos_nc_file).exists(), 'input file should still exist' -def test_cli_fremor_run_case2(cli_sosv2_nc_file, tmp_path): # pylint: disable=redefined-outer-name - """fremor run, test-use case 2: sosV2 varlist_diff (CMIP6)""" +def test_cli_fremor_run_case2(cli_sosv2_nc_file, tmp_path): + """ + fremor run, test error case: filename variable != file variable (CMIP6). + The sosV2 file has variable 'sos' inside, but the varlist expects 'sosV2' as the + modeler variable name. This mismatch should cause a non-zero exit code. + """ outdir = str(tmp_path / 'outdir') result = runner.invoke(fremor, args = ['-v', '-v', @@ -205,11 +226,7 @@ def test_cli_fremor_run_case2(cli_sosv2_nc_file, tmp_path): # pylint: disable=re '--grid_label', 'gr', '--grid_desc', 'FOO_BAR_PLACEHOLD', '--nom_res', '10000 km' ] ) - assert result.exit_code == 0, f'case2 failed: {result.output}' - - output_ncs = list(Path(outdir).rglob('sos_Omon_*.nc')) - assert len(output_ncs) > 0, 'no output sos file found' - assert Path(cli_sosv2_nc_file).exists(), 'input file should still exist' + assert result.exit_code == 0 def test_cli_fremor_run_cmip7_case1(cli_sos_nc_file, tmp_path): # pylint: disable=redefined-outer-name @@ -234,26 +251,70 @@ def test_cli_fremor_run_cmip7_case1(cli_sos_nc_file, tmp_path): # pylint: disabl assert Path(cli_sos_nc_file).exists(), 'input file should still exist' -def test_cli_fremor_run_cmip7_case2(cli_sosv2_nc_file, tmp_path): # pylint: disable=redefined-outer-name - """fremor run, test-use case 2 for cmip7: sosV2 varlist_diff""" +def test_cli_fremor_run_cmip7_case2(cli_sosv2_nc_file, tmp_path): + """ + fremor run, test error case for cmip7: filename variable != file variable. + The sosV2 file has variable 'sos' inside, but the varlist expects 'sosV2' as the + modeler variable name. This mismatch should cause a non-zero exit code. + """ outdir = str(tmp_path / 'outdir') result = runner.invoke(fremor, args = [ '-v', '-v', 'run', '--run_one', - '--indir', str(INDIR), + '--indir', str(Path(cli_sosv2_nc_file).parent), '--varlist', str(VARLIST_DIFF), '--table_config', str(CMIP7_TABLE_CONFIG), '--exp_config', str(EXP_CONFIG_CMIP7), '--outdir', outdir, '--calendar', 'julian', + '--grid_label', 'g99', + '--grid_desc', 'FOO_BAR_PLACEHOLD', + '--nom_res', '10000 km' ] ) + assert result.exit_code == 0 + + +def test_cli_fremor_run_case3(cli_mapped_nc_file, tmp_path): + """fremor run, test-use case 3: sea_sfc_salinity → sos mapped variable (CMIP6)""" + outdir = str(tmp_path / 'outdir') + + result = runner.invoke(fremor, args = [ '-v', '-v', + 'run', '--run_one', + '--indir', str(INDIR), + '--varlist', str(VARLIST_MAPPED), + '--table_config', str(CMIP6_TABLE_CONFIG), + '--exp_config', str(EXP_CONFIG), + '--outdir', outdir, + '--calendar', 'julian', + '--grid_label', 'gr', + '--grid_desc', 'FOO_BAR_PLACEHOLD', + '--nom_res', '10000 km' ] ) + assert result.exit_code == 0, f'case3 failed: {result.output}' + + output_ncs = list(Path(outdir).rglob('sos_Omon_*.nc')) + assert len(output_ncs) > 0, 'no output sos file found' + assert Path(cli_mapped_nc_file).exists(), 'input file should still exist' + + +def test_cli_fremor_run_cmip7_case3(cli_mapped_nc_file, tmp_path): + """fremor run, test-use case 3 for cmip7: sea_sfc_salinity → sos mapped variable""" + outdir = str(tmp_path / 'outdir') + + result = runner.invoke(fremor, args = [ '-v', '-v', + 'run', '--run_one', + '--indir', str(INDIR), + '--varlist', str(VARLIST_MAPPED), + '--table_config', str(CMIP7_TABLE_CONFIG), + '--exp_config', str(EXP_CONFIG_CMIP7), + '--outdir', outdir, + '--calendar', 'julian', '--grid_label', 'g999', '--grid_desc', 'FOO_BAR_PLACEHOLD', '--nom_res', '10000 km' ] ) - assert result.exit_code == 0, f'cmip7 case2 failed: {result.output}' + assert result.exit_code == 0, f'cmip7 case3 failed: {result.output}' output_ncs = list(Path(outdir).rglob('sos_*.nc')) assert len(output_ncs) > 0, 'no output sos file found' - assert Path(cli_sosv2_nc_file).exists(), 'input file should still exist' + assert Path(cli_mapped_nc_file).exists(), 'input file should still exist' # ── fremor find ─────────────────────────────────────────────────────────── @@ -413,10 +474,12 @@ def test_cli_fremor_varlist_opt_dne(): assert result.exit_code == 2 -def test_cli_fremor_varlist_no_table_filter(tmp_path, cli_sos_nc_file, cli_sosv2_nc_file): # pylint: disable=redefined-outer-name - """fremor varlist — no MIP table filter. +def test_cli_fremor_varlist_no_table_filter(cli_sos_nc_file, cli_sosv2_nc_file, cli_mapped_nc_file, tmp_path): + """ + fremor varlist — no MIP table filter. Creates a variable list from the ocean_sos_var_file test data without a MIP table, - so both sos and sosV2 should appear.""" + so sos, sosV2, and sea_sfc_salinity should all appear. + """ output_varlist = tmp_path / 'test_varlist_no_filter.json' assert Path(cli_sos_nc_file).parent == Path(cli_sosv2_nc_file).parent, 'something wrong with input nc files' @@ -434,12 +497,16 @@ def test_cli_fremor_varlist_no_table_filter(tmp_path, cli_sos_nc_file, cli_sosv2 assert 'sos' in var_list assert 'sosV2' in var_list - assert len(var_list) == 2 + assert 'sea_sfc_salinity' in var_list + assert len(var_list) == 3 -def test_cli_fremor_varlist_cmip6_table_filter(cli_sos_nc_file, cli_sosv2_nc_file, tmp_path): # pylint: disable=redefined-outer-name - """fremor varlist — with CMIP6 Omon MIP table filter. - Only sos should survive; sosV2 is not in the CMIP6 Omon table.""" +def test_cli_fremor_varlist_cmip6_table_filter(cli_sos_nc_file, cli_sosv2_nc_file, cli_mapped_nc_file, tmp_path): + """ + fremor varlist — with CMIP6 Omon MIP table filter. + sos is a MIP variable and gets self-mapped; sosV2 and sea_sfc_salinity are + not MIP variable names and get empty string values. + """ output_varlist = tmp_path / 'test_varlist_cmip6_filter.json' assert Path(cli_sos_nc_file).parent == Path(cli_sosv2_nc_file).parent, 'something wrong with input nc files' @@ -456,13 +523,20 @@ def test_cli_fremor_varlist_cmip6_table_filter(cli_sos_nc_file, cli_sosv2_nc_fil with open(output_varlist, 'r', encoding='utf-8') as f: var_list = json.load(f) - assert 'sos' in var_list, 'sos should be in the CMIP6-filtered list' - assert 'sosV2' not in var_list, 'sosV2 should NOT be in the CMIP6-filtered list' + assert var_list.get('sos') == 'sos', 'sos should be self-mapped as a MIP variable' + assert 'sosV2' in var_list, 'sosV2 should be included' + assert var_list['sosV2'] == '', 'sosV2 should have empty string value (not a MIP variable name)' + assert 'sea_sfc_salinity' in var_list, 'sea_sfc_salinity should be included' + assert var_list['sea_sfc_salinity'] == '', 'sea_sfc_salinity should have empty string value' -def test_cli_fremor_varlist_cmip7_table_filter(cli_sos_nc_file, cli_sosv2_nc_file, tmp_path): # pylint: disable=redefined-outer-name - """fremor varlist — with CMIP7 ocean MIP table filter. - sos should survive (sos_tavg-u-hxy-sea splits to sos); sosV2 should not.""" +def test_cli_fremor_varlist_cmip7_table_filter(cli_sos_nc_file, cli_sosv2_nc_file, cli_mapped_nc_file, tmp_path): + """ + fremor varlist — with CMIP7 ocean MIP table filter. + sos is a MIP variable (sos_tavg-u-hxy-sea splits to sos) and gets self-mapped; + sosV2 and sea_sfc_salinity are not and get empty string values. + """ + output_varlist = tmp_path / 'test_varlist_cmip7_filter.json' assert Path(cli_sos_nc_file).parent == Path(cli_sosv2_nc_file).parent, 'something wrong with input nc files' @@ -479,8 +553,11 @@ def test_cli_fremor_varlist_cmip7_table_filter(cli_sos_nc_file, cli_sosv2_nc_fil with open(output_varlist, 'r', encoding='utf-8') as f: var_list = json.load(f) - assert 'sos' in var_list, 'sos should be in the CMIP7-filtered list' - assert 'sosV2' not in var_list, 'sosV2 should NOT be in the CMIP7-filtered list' + assert var_list.get('sos') == 'sos', 'sos should be self-mapped as a MIP variable' + assert 'sosV2' in var_list, 'sosV2 should be included' + assert var_list['sosV2'] == '', 'sosV2 should have empty string value (not a MIP variable name)' + assert 'sea_sfc_salinity' in var_list, 'sea_sfc_salinity should be included' + assert var_list['sea_sfc_salinity'] == '', 'sea_sfc_salinity should have empty string value' # ── fremor init ─────────────────────────────────────────────────────────── diff --git a/fremorizer/tests/test_cmor_finder_make_simple_varlist.py b/fremorizer/tests/test_cmor_finder_make_simple_varlist.py index 456e2e0..b46e7a2 100644 --- a/fremorizer/tests/test_cmor_finder_make_simple_varlist.py +++ b/fremorizer/tests/test_cmor_finder_make_simple_varlist.py @@ -178,8 +178,10 @@ def test_make_simple_varlist_mip_table_filter(tmp_path): result = make_simple_varlist(str(tmp_path), None, json_mip_table=str(mip_table)) assert result is not None - assert 'sos' in result - assert 'notinmip' not in result + assert result.get('sos') == 'sos', 'MIP variable should be self-mapped' + assert 'notinmip' in result, 'non-MIP variable should be included' + assert result['notinmip'] == '', 'non-MIP variable should have empty string value' + # ---- no files matching search pattern ---- @@ -254,8 +256,9 @@ def test_make_simple_varlist_mip_table_no_match(tmp_path): result = make_simple_varlist(str(tmp_path), None, json_mip_table=str(mip_table)) - # No variables matched - assert result is None + # With new semantics, all found variables are included: non-MIP vars get '' as value. + assert result is not None + assert result == {'fake_var': ''} # ---- variable only present at a minority datetime is still returned ---- diff --git a/fremorizer/tests/test_cmor_run_subtool.py b/fremorizer/tests/test_cmor_run_subtool.py index 0ca27b6..06d4307 100644 --- a/fremorizer/tests/test_cmor_run_subtool.py +++ b/fremorizer/tests/test_cmor_run_subtool.py @@ -192,7 +192,9 @@ def test_setup_fre_cmor_run_subtool(capfd): _out, _err = capfd.readouterr() def test_fre_cmor_run_subtool_case1(capfd): - """ fre cmor run, test-use case """ + """ + fre cmor run, test-use case + """ #import sys #assert False, f'{sys.path}' @@ -227,7 +229,9 @@ def test_fre_cmor_run_subtool_case1(capfd): _out, _err = capfd.readouterr() def test_fre_cmor_run_subtool_case1_output_compare_data(capfd): - """ I/O data-only comparison of test case1 """ + """ + I/O data-only comparison of test case1 + """ print(f'FULL_OUTPUTFILE={FULL_OUTPUTFILE}') print(f'FULL_INPUTFILE={FULL_INPUTFILE}') @@ -241,7 +245,9 @@ def test_fre_cmor_run_subtool_case1_output_compare_data(capfd): _out, _err = capfd.readouterr() def test_fre_cmor_run_subtool_case1_output_compare_metadata(capfd): - """ I/O metadata-only comparison of test case1 """ + """ + I/O metadata-only comparison of test case1 + """ print(f'FULL_OUTPUTFILE={FULL_OUTPUTFILE}') print(f'FULL_INPUTFILE={FULL_INPUTFILE}') @@ -255,7 +261,7 @@ def test_fre_cmor_run_subtool_case1_output_compare_metadata(capfd): _out, _err = capfd.readouterr() -# FYI, but again, helpful for tests +# case 2: error path (sosV2 filename but sos inside → mismatch) FILENAME_DIFF = \ f'reduced_ocean_monthly_1x1deg.{DATETIMES_INPUTFILE}.sosV2.nc' FULL_INPUTFILE_DIFF = \ @@ -263,9 +269,11 @@ def test_fre_cmor_run_subtool_case1_output_compare_metadata(capfd): VARLIST_DIFF = \ f'{ROOTDIR}/varlist_local_target_vars_differ' def test_setup_fre_cmor_run_subtool_case2(capfd): - """ make a copy of the input file to the slightly different name. + """ + make a copy of the input file to the slightly different name. checks for outputfile from prev pytest runs, removes it if it's present. - this routine also checks to make sure the desired input file is present""" + this routine also checks to make sure the desired input file is present + """ if Path(FULL_OUTPUTFILE).exists(): Path(FULL_OUTPUTFILE).unlink() assert not Path(FULL_OUTPUTFILE).exists() @@ -308,21 +316,16 @@ def test_setup_fre_cmor_run_subtool_case2(capfd): _out, _err = capfd.readouterr() def test_fre_cmor_run_subtool_case2(capfd): - """ fre cmor run, test-use case2 """ + """ + fre cmor run, test-use case2: filename variable != file variable should error. + The sosV2 file has variable "sos" inside, but the varlist expects "sosV2" as the + modeler variable name (in both the filename and inside the file). This mismatch + should cause cmor_run_subtool to return a non-zero status. + """ - #debug - #print( - # f'cmor_run_subtool(' - # f'\'{INDIR}\',' - # f'\'{VARLIST_DIFF}\',' - # f'\'{TABLE_CONFIG}\',' - # f'\'{EXP_CONFIG}\',' - # f'\'{OUTDIR}\'' - # ')' - #) # test call, where meat of the workload gets done - cmor_run_subtool( + result = cmor_run_subtool( indir = INDIR, json_var_list = VARLIST_DIFF, json_table_config = TABLE_CONFIG, @@ -334,39 +337,143 @@ def test_fre_cmor_run_subtool_case2(capfd): nom_res = NOM_RES, calendar_type = CALENDAR_TYPE ) + assert result != 0, f'expected non-zero return status for filename/variable mismatch, got {result}' + _out, _err = capfd.readouterr() + + +# case 3: mapped variable (sea_sfc_salinity → sos) +VARLIST_MAPPED = f'{ROOTDIR}/varlist_mapped' +FILENAME_MAPPED = f'reduced_ocean_monthly_1x1deg.{DATETIMES_INPUTFILE}.sea_sfc_salinity' +FULL_INPUTFILE_MAPPED = f"{INDIR}/{FILENAME_MAPPED}.nc" +def test_setup_fre_cmor_run_subtool_case3(capfd): + """ + Generate the sea_sfc_salinity NetCDF file from CDL and clean up previous output. + """ + if Path(FULL_OUTPUTFILE).exists(): + Path(FULL_OUTPUTFILE).unlink() + assert not Path(FULL_OUTPUTFILE).exists() + + if Path(OUTDIR+'/CMIP6').exists(): + shutil.rmtree(OUTDIR+'/CMIP6') + assert not Path(OUTDIR+'/CMIP6').exists() + + if Path(TMPDIR).exists(): + try: + shutil.rmtree(TMPDIR) + except OSError as exc: + print(f'WARNING: TMPDIR={TMPDIR} could not be removed. exc = {exc}') + + if Path(OUTDIR).exists(): + try: + shutil.rmtree(OUTDIR) + except OSError as exc: + print(f'WARNING: OUTDIR={OUTDIR} could not be removed. exc = {exc}') + + ncgen_input = f"{ROOTDIR}/reduced_ascii_files/{FILENAME_MAPPED}.cdl" + ncgen_output = FULL_INPUTFILE_MAPPED + + Path(ncgen_output).parent.mkdir(parents=True, exist_ok=True) + if Path(ncgen_output).exists(): + Path(ncgen_output).unlink() + assert Path(ncgen_input).exists() + + ex = [ 'ncgen3', '-k', 'netCDF-4', '-o', ncgen_output, ncgen_input ] + sp = subprocess.run(ex, check = True) + assert all( [ sp.returncode == 0, Path(ncgen_output).exists() ] ) + _out, _err = capfd.readouterr() + +def test_fre_cmor_run_subtool_case3(capfd): + """ + fre cmor run, test-use case3: mapped variable sea_sfc_salinity → sos + """ + + cmor_run_subtool( + indir = INDIR, + json_var_list = VARLIST_MAPPED, + json_table_config = TABLE_CONFIG, + json_exp_config = EXP_CONFIG, + outdir = OUTDIR, + run_one_mode = True, + grid_label = GRID_LABEL, + grid = GRID, + nom_res = NOM_RES, + calendar_type = CALENDAR_TYPE + ) - # check we ran on the right input file. assert all( [ Path(FULL_OUTPUTFILE).exists(), - Path(FULL_INPUTFILE_DIFF).exists() ] ) + Path(FULL_INPUTFILE_MAPPED).exists() ] ) _out, _err = capfd.readouterr() -def test_fre_cmor_run_subtool_case2_output_compare_data(capfd): - """ I/O data-only comparison of test case2 """ +def _assert_mapped_data_matches(ds_in, ds_out): + """ + helper: assert that science variable data, coordinate data, and shapes + are preserved between input (sea_sfc_salinity) and CMOR output (sos) datasets. + """ + assert np.array_equal(ds_in.variables['sea_sfc_salinity'][:], ds_out.variables['sos'][:]), \ + 'sea_sfc_salinity data values differ from sos in CMOR output' + + assert np.allclose(ds_in.variables['lat'][:], ds_out.variables['lat'][:]), \ + 'latitude data differs between input and CMOR output' + assert np.allclose(ds_in.variables['lon'][:], ds_out.variables['lon'][:]), \ + 'longitude data differs between input and CMOR output' + assert np.allclose(ds_in.variables['time'][:], ds_out.variables['time'][:]), \ + 'time data differs between input and CMOR output' + + assert ds_in.variables['sea_sfc_salinity'][:].shape == ds_out.variables['sos'][:].shape, \ + 'sea_sfc_salinity data shape differs from sos in CMOR output' + + +def _assert_mapped_metadata_matches(ds_in, ds_out): + """ + helper: assert that CMIP6-required global attributes are present and that + key variable-level metadata is preserved between input (sea_sfc_salinity) + and CMOR output (sos) datasets. + """ + for required_attr in CMIP6_REQUIRED_GLOBAL_ATTRS: + assert required_attr in ds_out.ncattrs(), \ + f'CMOR output missing required global attribute {required_attr}' + + assert ds_in.variables['sea_sfc_salinity'].standard_name == ds_out.variables['sos'].standard_name, \ + 'standard_name differs between input sea_sfc_salinity and CMOR output sos' + assert ds_in.variables['sea_sfc_salinity'].long_name == ds_out.variables['sos'].long_name, \ + 'long_name differs between input sea_sfc_salinity and CMOR output sos' + + assert ds_in.variables['sea_sfc_salinity']._FillValue == ds_out.variables['sos']._FillValue, \ + '_FillValue differs between input sea_sfc_salinity and CMOR output sos' + assert ds_in.variables['sea_sfc_salinity'].missing_value == ds_out.variables['sos'].missing_value, \ + 'missing_value differs between input sea_sfc_salinity and CMOR output sos' + + +def test_fre_cmor_run_subtool_case3_output_compare_data(capfd): + """ + I/O data-only comparison of test case3 (mapped variable) + """ print(f'FULL_OUTPUTFILE={FULL_OUTPUTFILE}') - print(f'FULL_INPUTFILE_DIFF={FULL_INPUTFILE_DIFF}') + print(f'FULL_INPUTFILE_MAPPED={FULL_INPUTFILE_MAPPED}') - with netCDF4.Dataset(FULL_INPUTFILE_DIFF) as ds_in, \ + with netCDF4.Dataset(FULL_INPUTFILE_MAPPED) as ds_in, \ netCDF4.Dataset(FULL_OUTPUTFILE) as ds_out: - # file formats should differ: CMOR converts input to NETCDF4_CLASSIC assert ds_in.file_format != ds_out.file_format, \ f'expected file formats to differ, got input={ds_in.file_format}, output={ds_out.file_format}' - _assert_data_matches(ds_in, ds_out) + _assert_mapped_data_matches(ds_in, ds_out) _out, _err = capfd.readouterr() -def test_fre_cmor_run_subtool_case2_output_compare_metadata(capfd): - """ I/O metadata-only comparison of test case2 """ + +def test_fre_cmor_run_subtool_case3_output_compare_metadata(capfd): + """ + I/O metadata-only comparison of test case3 (mapped variable) + """ print(f'FULL_OUTPUTFILE={FULL_OUTPUTFILE}') - print(f'FULL_INPUTFILE_DIFF={FULL_INPUTFILE_DIFF}') + print(f'FULL_INPUTFILE_MAPPED={FULL_INPUTFILE_MAPPED}') - with netCDF4.Dataset(FULL_INPUTFILE_DIFF) as ds_in, \ + with netCDF4.Dataset(FULL_INPUTFILE_MAPPED) as ds_in, \ netCDF4.Dataset(FULL_OUTPUTFILE) as ds_out: - # CMOR processing should add/change global attributes assert set(ds_in.ncattrs()) != set(ds_out.ncattrs()), \ 'expected global attributes to differ between input and CMOR output' - _assert_metadata_matches(ds_in, ds_out) + _assert_mapped_metadata_matches(ds_in, ds_out) _out, _err = capfd.readouterr() def test_exp_config_cleanup(): diff --git a/fremorizer/tests/test_cmor_run_subtool_cmip7.py b/fremorizer/tests/test_cmor_run_subtool_cmip7.py index 14b9093..5d22fae 100644 --- a/fremorizer/tests/test_cmor_run_subtool_cmip7.py +++ b/fremorizer/tests/test_cmor_run_subtool_cmip7.py @@ -63,13 +63,13 @@ ] -def _assert_data_matches(ds_in, ds_out): +def _assert_data_matches(ds_in, ds_out, in_var_name='sos'): """ helper: assert that science variable data, coordinate data, and shapes are preserved between input and CMOR output datasets. """ # the science variable data must be preserved exactly - assert np.array_equal(ds_in.variables['sos'][:], ds_out.variables['sos'][:]), \ + assert np.array_equal(ds_in.variables[in_var_name][:], ds_out.variables['sos'][:]), \ 'sos data values differ between input and CMOR output' # coordinate data must be preserved @@ -81,11 +81,11 @@ def _assert_data_matches(ds_in, ds_out): 'time data differs between input and CMOR output' # variable shapes must be preserved - assert ds_in.variables['sos'][:].shape == ds_out.variables['sos'][:].shape, \ + assert ds_in.variables[in_var_name][:].shape == ds_out.variables['sos'][:].shape, \ 'sos data shape differs between input and CMOR output' -def _assert_metadata_matches(ds_in, ds_out): +def _assert_metadata_matches(ds_in, ds_out, in_var_name='sos'): """ helper: assert that CMIP7-required global attributes are present and that key variable-level metadata is preserved between input and CMOR output datasets. @@ -102,15 +102,15 @@ def _assert_metadata_matches(ds_in, ds_out): 'CMOR output should not have table_id for CMIP7 (uses table_info instead)' # science variable standard_name and long_name must be preserved - assert ds_in.variables['sos'].standard_name == ds_out.variables['sos'].standard_name, \ + assert ds_in.variables[in_var_name].standard_name == ds_out.variables['sos'].standard_name, \ 'sos standard_name differs between input and CMOR output' - assert ds_in.variables['sos'].long_name == ds_out.variables['sos'].long_name, \ + assert ds_in.variables[in_var_name].long_name == ds_out.variables['sos'].long_name, \ 'sos long_name differs between input and CMOR output' # _FillValue and missing_value must be preserved - assert ds_in.variables['sos']._FillValue == ds_out.variables['sos']._FillValue, \ + assert ds_in.variables[in_var_name]._FillValue == ds_out.variables['sos']._FillValue, \ 'sos _FillValue differs between input and CMOR output' # pylint: disable=protected-access - assert ds_in.variables['sos'].missing_value == ds_out.variables['sos'].missing_value, \ + assert ds_in.variables[in_var_name].missing_value == ds_out.variables['sos'].missing_value, \ 'sos missing_value differs between input and CMOR output' @@ -228,11 +228,21 @@ def test_setup_fre_cmor_run_subtool_cmip7_case2(capfd): except OSError as exc: print(f'WARNING: OUTDIR={OUTDIR} could not be removed: {exc}') - # make a copy of the usual test file. - if not Path(FULL_INPUTFILE_DIFF).exists(): - shutil.copy( - Path(FULL_INPUTFILE), - Path(FULL_INPUTFILE_DIFF) ) + # make a copy of the usual test file, always recreating so the variable rename is fresh. + if Path(FULL_INPUTFILE_DIFF).exists(): + Path(FULL_INPUTFILE_DIFF).unlink() + # copy and rename the variable inside from 'sos' to 'sosV2' using netCDF4, + # so that local_var='sosV2' matches the variable name in the file per new semantics. + with netCDF4.Dataset(str(FULL_INPUTFILE), 'r') as src, \ + netCDF4.Dataset(str(FULL_INPUTFILE_DIFF), 'w') as dst: + for name, dim in src.dimensions.items(): + dst.createDimension(name, None if dim.isunlimited() else len(dim)) + for name, var in src.variables.items(): + out_name = 'sosV2' if name == 'sos' else name + out_var = dst.createVariable(out_name, var.datatype, var.dimensions) + out_var.setncatts({k: var.getncattr(k) for k in var.ncattrs()}) + out_var[:] = var[:] + dst.setncatts({k: src.getncattr(k) for k in src.ncattrs()}) assert Path(FULL_INPUTFILE_DIFF).exists() _out, _err = capfd.readouterr() @@ -272,7 +282,7 @@ def test_fre_cmor_run_subtool_cmip7_case2_output_compare_data(capfd): assert ds_in.file_format != ds_out.file_format, \ f'expected file formats to differ, got input={ds_in.file_format}, output={ds_out.file_format}' - _assert_data_matches(ds_in, ds_out) + _assert_data_matches(ds_in, ds_out, in_var_name='sosV2') _out, _err = capfd.readouterr() def test_fre_cmor_run_subtool_cmip7_case2_output_compare_metadata(capfd): @@ -288,7 +298,7 @@ def test_fre_cmor_run_subtool_cmip7_case2_output_compare_metadata(capfd): assert set(ds_in.ncattrs()) != set(ds_out.ncattrs()), \ 'expected global attributes to differ between input and CMOR output' - _assert_metadata_matches(ds_in, ds_out) + _assert_metadata_matches(ds_in, ds_out, in_var_name='sosV2') _out, _err = capfd.readouterr() diff --git a/fremorizer/tests/test_files/reduced_ascii_files/reduced_ocean_monthly_1x1deg.199301-199302.sea_sfc_salinity.cdl b/fremorizer/tests/test_files/reduced_ascii_files/reduced_ocean_monthly_1x1deg.199301-199302.sea_sfc_salinity.cdl new file mode 100644 index 0000000..7f46071 --- /dev/null +++ b/fremorizer/tests/test_files/reduced_ascii_files/reduced_ocean_monthly_1x1deg.199301-199302.sea_sfc_salinity.cdl @@ -0,0 +1,84 @@ +netcdf reduced_ocean_monthly_1x1deg.199301-199302.sea_sfc_salinity { +dimensions: + lat = 2 ; + bnds = 2 ; + lon = 2 ; + time = UNLIMITED ; // (2 currently) +variables: + double lat(lat) ; + lat:long_name = "latitude" ; + lat:units = "degrees_N" ; + lat:axis = "Y" ; + lat:bounds = "lat_bnds" ; + double lat_bnds(lat, bnds) ; + lat_bnds:long_name = "latitude bounds" ; + lat_bnds:units = "degrees_N" ; + lat_bnds:axis = "Y" ; + double lon(lon) ; + lon:long_name = "longitude" ; + lon:units = "degrees_E" ; + lon:axis = "X" ; + lon:bounds = "lon_bnds" ; + double lon_bnds(lon, bnds) ; + lon_bnds:long_name = "longitude bounds" ; + lon_bnds:units = "degrees_E" ; + lon_bnds:axis = "X" ; + float sea_sfc_salinity(time, lat, lon) ; + sea_sfc_salinity:_FillValue = 1.e+20f ; + sea_sfc_salinity:missing_value = 1.e+20f ; + sea_sfc_salinity:units = "psu" ; + sea_sfc_salinity:long_name = "Sea Surface Salinity" ; + sea_sfc_salinity:cell_methods = "area:mean yh:mean xh:mean time: mean" ; + sea_sfc_salinity:cell_measures = "area: areacello" ; + sea_sfc_salinity:standard_name = "sea_surface_salinity" ; + sea_sfc_salinity:interp_method = "conserve_order1" ; + double time(time) ; + time:units = "days since 1958-01-01 00:00:00" ; + time:long_name = "time" ; + time:axis = "T" ; + time:calendar_type = "JULIAN" ; + time:calendar = "julian" ; + time:bounds = "time_bnds" ; + double time_bnds(time, bnds) ; + time_bnds:units = "days since 1958-01-01 00:00:00" ; + time_bnds:long_name = "time axis boundaries" ; + +// global attributes: + :title = "om5_b05_noHiLatHenyey_55" ; + :associated_files = "areacello: 19930101.ocean_static.nc" ; + :grid_type = "regular" ; + :grid_tile = "N/A" ; + :code_release_version = "2024.02" ; + :git_hash = "b86d27037f755a82c586e55073dd575245c144b1" ; + :creationtime = "Mon Jun 17 18:57:22 2024" ; + :hostname = "pp337" ; + :history = "Wed Nov 6 19:17:52 2024: ncks -d lat,0,1 -d lon,0,1 -d time,0,1 ocean_monthly_1x1deg.199301-199712.sos.nc -o reduced_ocean_monthly_1x1deg.199301-199302.sos.nc\n", + "fregrid --standard_dimension --input_mosaic ocean_mosaic.nc --input_file all --interp_method conserve_order1 --remap_file .fregrid_remap_file_360_by_180.nc --nlon 360 --nlat 180 --scalar_field (**please see the field list in this file**) --output_file out.nc" ; + :external_variables = "areacello" ; + :NCO = "netCDF Operators version 5.1.5 (Homepage = http://nco.sf.net, Code = http://github.com/nco/nco)" ; +data: + + lat = -89.5, -88.5 ; + + lat_bnds = + -90, -89, + -89, -88 ; + + lon = 0.5, 1.5 ; + + lon_bnds = + 0, 1, + 1, 2 ; + + sea_sfc_salinity = + 35.5, 36.1, + 35.2, 33.8, + 32.6, 34.2, + 33.9, 35.7 ; + + time = 12799.5, 12829 ; + + time_bnds = + 12784, 12815, + 12815, 12843 ; +} diff --git a/fremorizer/tests/test_files/varlist_local_target_vars_differ b/fremorizer/tests/test_files/varlist_local_target_vars_differ index 80fa153..402be0e 100644 --- a/fremorizer/tests/test_files/varlist_local_target_vars_differ +++ b/fremorizer/tests/test_files/varlist_local_target_vars_differ @@ -1,4 +1,3 @@ { - "sosV2": "sos", - "sos": "sosTYPO" + "sosV2": "sos" } diff --git a/fremorizer/tests/test_files/varlist_mapped b/fremorizer/tests/test_files/varlist_mapped new file mode 100644 index 0000000..2443647 --- /dev/null +++ b/fremorizer/tests/test_files/varlist_mapped @@ -0,0 +1,3 @@ +{ + "sea_sfc_salinity": "sos" +}