-
Notifications
You must be signed in to change notification settings - Fork 22
Expand file tree
/
Copy pathcmor_finder.py
More file actions
207 lines (171 loc) · 9.38 KB
/
cmor_finder.py
File metadata and controls
207 lines (171 loc) · 9.38 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
"""
fre cmor find
=============
This module provides tools to find and print information about variables in CMIP6 JSON configuration files.
It is primarily used for inspecting variable entries and generating variable lists for use in FRE CMORization
workflows.
Functions
---------
- ``print_var_content(table_config_file, var_name)``
- ``cmor_find_subtool(json_var_list, json_table_config_dir, opt_var_name)``
- ``make_simple_varlist(dir_targ, output_variable_list, json_mip_table)``
Notes
-----
These utilities are intended to make it easier to inspect and extract variable information from CMIP6 JSON
tables, avoiding the need for manual shell scripting and ad-hoc file inspection.
"""
import glob
import json
import logging
import os
from pathlib import Path
from typing import Optional, Dict, IO
from .cmor_helpers import get_json_file_data
from .cmor_constants import DO_NOT_PRINT_LIST
fre_logger = logging.getLogger(__name__)
# TODO update for cmip7 if desired
def print_var_content(table_config_file: IO[str],
var_name: str) -> None:
"""
Print information about a specific variable from a given CMIP6 JSON configuration file.
:param table_config_file: An open file object for a CMIP6 table JSON file. The file should be opened in text mode
:type table_config_file: Input buffer/stream of text, usually output by the open() built-in. See python typing doc
:param var_name: The name of the variable to look for in the configuration file.
:type var_name: str
:raises Exception: If there is an issue reading the JSON content from the file.
:return: None
:rtype: None
.. note:: Outputs information to the logger at INFO level.
.. note:: If the variable is not found, logs a debug message and returns.
.. note:: Only prints selected fields, omitting any in DO_NOT_PRINT_LIST.
"""
# this function can assume the existence of this was checked in the prev routinue.
proj_table_vars = json.load(table_config_file)
table_name = None
try:
table_name = proj_table_vars["Header"].get('table_id').split(' ')[1]
except KeyError:
fre_logger.warning("couldn't get header and table_name field")
except IndexError:
fre_logger.warning("couldn't get header and table_name, probably not a variable table")
if table_name is not None:
fre_logger.info('looking for %s data in table %s!', var_name, table_name)
else:
fre_logger.info('looking for %s data in table %s, but could not find its table_name!',
var_name, table_config_file.name)
var_content = proj_table_vars.get("variable_entry", {}).get(var_name)
if var_content is None:
fre_logger.debug('variable %s not found in %s, moving on!', var_name, Path(table_config_file.name).name)
return
fre_logger.info(' variable key: %s', var_name)
for content in var_content:
if content in DO_NOT_PRINT_LIST:
continue
fre_logger.info(' %s: %s', content, var_content[content])
fre_logger.info('\n')
def cmor_find_subtool( json_var_list: Optional[str] = None,
json_table_config_dir: Optional[str] = None,
opt_var_name: Optional[str] = None) -> None:
"""
Find and print information about variables in CMIP6 JSON configuration files in a specified directory.
:param json_var_list: path to JSON file containing variable names to look up in tables.
:type json_var_list: str or None, optional
:param json_table_config_dir: Directory containing CMIP6 table JSON files.
:type json_table_config_dir: str
:param opt_var_name: Name of a single variable to look up. If None, json_var_list must be provided.
:type opt_var_name: str or None, optional
:raises OSError: If the specified directory does not exist or contains no JSON files.
:raises ValueError: If neither opt_var_name nor json_var_list is provided.
:return: None
:rtype: None
.. note:: This function is intended as a helper tool for CLI users to quickly inspect variable definitions in
CMIP6 tables. Information is printed via the logger.
"""
if not Path(json_table_config_dir).exists():
raise OSError(f'ERROR directory {json_table_config_dir} does not exist! exit.')
fre_logger.info('attempting to find and open files in dir: \n %s ', json_table_config_dir)
json_table_configs = glob.glob(f'{json_table_config_dir}/*.json')
if not json_table_configs:
raise OSError(f'ERROR directory {json_table_config_dir} contains no JSON files, exit.')
fre_logger.info('found content in json_table_config_dir')
var_list = None
if json_var_list is not None:
with open(json_var_list, "r", encoding="utf-8") as var_list_file:
var_list = json.load(var_list_file)
if opt_var_name is None and var_list is None:
raise ValueError('ERROR: no opt_var_name given but also no content in variable list!!! exit!')
if opt_var_name is not None:
fre_logger.info('opt_var_name is not None: looking for only ONE variables worth of info!')
for json_table_config in json_table_configs:
with open(json_table_config, "r", encoding="utf-8") as table_config_file:
print_var_content(table_config_file, opt_var_name)
elif var_list is not None:
fre_logger.info('opt_var_name is None, and var_list is not None, looking for many variables worth of info!')
for var in var_list:
for json_table_config in json_table_configs:
with open(json_table_config, "r", encoding="utf-8") as table_config_file:
print_var_content(table_config_file, str(var_list[var]))
def make_simple_varlist( dir_targ: str,
output_variable_list: Optional[str],
json_mip_table: Optional[str] = None) -> Optional[Dict[str, str]]:
"""
Generate a JSON file containing a list of variable names from NetCDF files in a specified directory.
This function searches for NetCDF files in the given directory, or a subdirectory, "ts/monthly/5yr",
if not already included. It then extracts variable names from the filenames, and writes these variable
names to a JSON file.
:param dir_targ: The target directory to search for NetCDF files.
:type dir_targ: str
:param output_variable_list: The path to the output JSON file where the variable list will be saved.
:type output_variable_list: str
:param json_mip_table: target table for making the var list. found variables are included if they are in the table
:type json_mip_table: str
:raises OSError: if the outputfile cannot be written
:return: Dictionary of variable names (keys == values), or None if no files are found or an error occurs
:rtype: dict or None
.. note:: Assumes NetCDF filenames are of the form: <something>.<datetime>.<variable>.nc
.. note:: Variable name is assumed to be the second-to-last component when split by periods.
.. note:: Logs a warning if only one file is found.
.. warning:: Logs errors if no files are found in the directory or if no files match the expected pattern.
"""
# if the variable is in the filename, it's likely delimited by another period.
all_nc_files = glob.glob(os.path.join(dir_targ, "*.*.nc"))
if not all_nc_files:
fre_logger.error("No files found in the directory.") #uncovered
return None
if len(all_nc_files) == 1:
fre_logger.warning("Warning: Only one file found matching the pattern.")
fre_logger.info("Files found matching pattern. Number of files: %d", len(all_nc_files))
mip_vars = None
if json_mip_table is not None:
try:
# read in mip vars to check against later
fre_logger.debug('attempting to read in variable entries in specified mip table')
full_mip_vars_list=get_json_file_data(json_mip_table)["variable_entry"].keys()
except Exception as exc:
raise Exception( 'problem opening mip table and getting variable entry data.'
f'exc = {exc}') from exc
fre_logger.debug('attempting to make mip variable list')
mip_vars=[ key.split('_')[0] for key in full_mip_vars_list ]
fre_logger.debug('mip vars extracted for comparison when making var list: %s', mip_vars)
# Build a deduplicated dict of variable names extracted from all filenames across
# all datetimes. Assigning to a dict naturally deduplicates while preserving
# first-seen insertion order (Python 3.7+).
var_list: Dict[str, str] = {}
for targetfile in all_nc_files:
var_name=os.path.basename(targetfile).split('.')[-2]
if mip_vars is not None and var_name not in mip_vars:
continue
var_list[var_name] = var_name
if not var_list:
fre_logger.warning('WARNING: no variables in target mip table found, or no matching pattern,'
' or not enough info in the filenames (i am expecting FRE-bronx like filenames)')
return None
# Write the variable list to the output JSON file
if output_variable_list is not None:
try:
fre_logger.info('writing output variable list, %s', list(var_list.keys()))
with open(output_variable_list, 'w', encoding='utf-8') as f:
json.dump(var_list, f, indent=4)
except Exception as exc:
raise OSError('output variable list created but cannot be written') from exc
return var_list