Skip to content

Commit b9b50c6

Browse files
authored
Merge pull request #195 from NOAA-GFDL/194.bug-fix
Various bug fixes for AM5 analysis scripts
2 parents 3dbbbf2 + ffd0cbe commit b9b50c6

17 files changed

+100
-79
lines changed

Jinja2Filters/form_remap_dep.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,27 @@
1-
"""
1+
"""
22
form_remap_dep:
33
- parses the remap_pp_components rose-app.conf
44
- uses input from rose-suite.conf in the form of env variables
55
- returns the pp component and source name dependencies for
6-
remap_pp_components task execution.
6+
remap_pp_components task execution.
77
88
For instance, for an atmos PP component that requires the regridded
99
atmos_month and regridded atmos_daily history files, this JinjaFilter
1010
when called within flow.cylc helps identify this dependency to
11-
complete the corresponding task graph.
11+
complete the corresponding task graph.
1212
1313
This JinjaFilter ensures a remap-pp-component only waits for the
1414
dependent make-timeseries tasks such that the succeeded components
15-
output are made available in the final destination.
15+
output are made available in the final destination.
1616
1717
See form_remap_dep invocations from flow.cylc
1818
"""
1919
# Function parameter type hints, PEP 484
2020

2121
import os
2222
from pathlib import Path
23-
import yaml
2423
import logging
24+
import yaml
2525

2626
# set up logging
2727
logging.basicConfig()
@@ -35,22 +35,22 @@ def form_remap_dep(grid_type: str,
3535
output_type: str,
3636
yamlfile: str,
3737
history_segment: str=None) -> str:
38-
"""
38+
"""
3939
Form the task parameter list based on the grid type,
4040
the temporal type, and the desired pp component(s)
4141
4242
Arguments:
4343
@param grid_type (str): One of: native or regrid-xy
4444
@param temporal_type (str): One of: temporal or static
45-
@param chunk (str): e.g P5Y for 5-year time series
45+
@param chunk (str): e.g P5Y for 5-year time series
4646
@param pp_component (str): all, or a space-separated list
4747
@param output_type (str): ts or av
4848
@param yamlfile (str): yaml configuration file passed through workflow
4949
@param history_segment (str): if given, handle special case where history
5050
segment equals pp-chunk-a
5151
5252
@return remap_dep (multiline str) with Jinja formatting listing source-pp
53-
dependencies
53+
dependencies
5454
"""
5555
pp_components = pp_components_str.split(' ')
5656
if grid_type == "regrid-xy":

Jinja2Filters/form_task_parameters.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
"""
2-
Form the task parameter list based on the grid type,
2+
Form the task parameter list based on the grid type,
33
the temporal type,and the desired pp component(s)
44
"""
55

Jinja2Filters/get_analysis_info.py

Lines changed: 49 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
1+
import logging
12
from pathlib import Path
23

34
from metomi.isodatetime.parsers import DurationParser, TimePointParser
45
from yaml import safe_load
56

6-
from legacy_date_conversions import *
7+
from legacy_date_conversions import convert_iso_duration_to_bronx_chunk
78

89
# set up logging
9-
import logging
1010
logging.basicConfig()
1111
logger = logging.getLogger(__name__)
1212
logger.setLevel(logging.INFO)
@@ -17,9 +17,9 @@
1717
time_parser = TimePointParser(assumed_time_zone=(0, 0))
1818

1919

20-
class AnalysisScript(object):
20+
class AnalysisScript:
2121
def __init__(self, name, config, experiment_components, experiment_starting_date,
22-
experiment_stopping_date, pp_chunks):
22+
experiment_stopping_date, pp_chunks, yaml):
2323
"""Initialize the analysis script object.
2424
2525
Args:
@@ -29,13 +29,14 @@ def __init__(self, name, config, experiment_components, experiment_starting_date
2929
experiment_starting_date: Starting date for the experiment.
3030
experiment_stopping_date: Stopping date for the experiment.
3131
pp_chunks: List of ISO8601 durations used by the workflow.
32+
yaml: Resolved postprocessing yaml
3233
"""
3334
self.name = name
3435
logger.debug(f"{name}: initializing AnalysisScript instance")
3536

3637
# Skip if configuration wants to skip it
3738
self.switch = config["workflow"]["analysis_on"]
38-
if self.switch == False:
39+
if self.switch is False:
3940
return
4041

4142
# Skip if the components are not available
@@ -55,9 +56,30 @@ def __init__(self, name, config, experiment_components, experiment_starting_date
5556
self.script_type = config["workflow"]["script_type"]
5657
self.chunk = duration_parser.parse(config["workflow"]["chunk_size"])
5758

58-
if self.chunk not in pp_chunks:
59-
raise ValueError(f"ERROR: Analysis script '{self.name}' requests chunk size '{self.chunk}', but " +
60-
f"this chunk size is not declared in 'pp_chunks'")
59+
# Retrieve other config
60+
self.data_frequency = config["required"]["data_frequency"]
61+
62+
# check for needed pp prerequisites
63+
if self.product not in ['av', 'ts']:
64+
raise ValueError("ERROR: product type must be 'ts' or 'av'")
65+
if self.product == "ts":
66+
if self.chunk not in pp_chunks:
67+
raise ValueError(f"ERROR: Analysis script '{self.name}' requests timeseries chunk size '{self.chunk}', but " +
68+
"this chunk size is not declared in 'pp_chunks'")
69+
else:
70+
# Loop through the components and look for the ones specified by the analysis script
71+
# For each component to check, confirm that its climatology section contains the requested climo chunk
72+
for ana_comp in config["workflow"]["components"]:
73+
found_needed_inputs_for_component = False
74+
for exp_comp in yaml["postprocess"]["components"]:
75+
if exp_comp["type"] == ana_comp:
76+
if 'climatology' in exp_comp:
77+
for climo_request in exp_comp["climatology"]:
78+
if climo_request["frequency"] == self.data_frequency and climo_request["interval_years"] == self.chunk.years:
79+
found_needed_inputs_for_component = True
80+
if not found_needed_inputs_for_component:
81+
raise ValueError(f"ERROR: Analysis script '{self.name}' requests climatology chunk size '{self.chunk}', but " +
82+
f"no suitable climatology sections were found in postprocess component '{ana_comp}'")
6183

6284
# Parse the new analysis config items
6385
if 'legacy' in config:
@@ -73,8 +95,6 @@ def __init__(self, name, config, experiment_components, experiment_starting_date
7395
else:
7496
self.is_legacy = False
7597

76-
self.data_frequency = config["required"]["data_frequency"]
77-
7898
# if dates are years, convert to string or else ISO conversion will fail
7999
if isinstance(config["required"]["date_range"][0], int):
80100
one = "{:04d}".format(config["required"]["date_range"][0])
@@ -98,7 +118,7 @@ def graph(self, analysis_only):
98118
Returns:
99119
String cylc task graph for the analysis.
100120
"""
101-
if self.switch == False:
121+
if self.switch is False:
102122
return ""
103123

104124
graph = ""
@@ -191,7 +211,7 @@ def definition(self, pp_dir):
191211
Returns:
192212
Cylc task definition string for this analysis script
193213
"""
194-
if self.switch == False:
214+
if self.switch is False:
195215
return ""
196216

197217
definitions = ""
@@ -270,12 +290,12 @@ def definition(self, pp_dir):
270290
new_analysis_str = f"""
271291
[[analysis-{self.name}]]
272292
script = '''
273-
fre analysis run \
274-
--name freanalysis_{self.name} \
275-
--catalog $catalog \
276-
--output-directory $out_dir/{self.name} \
277-
--output-yaml $out_dir/{self.name}/output.yaml \
278-
--experiment-yaml $experiment_yaml \
293+
fre analysis run
294+
--name freanalysis_{self.name}
295+
--catalog $catalog
296+
--output-directory $out_dir/{self.name}
297+
--output-yaml $out_dir/{self.name}/output.yaml
298+
--experiment-yaml $experiment_yaml
279299
--library-directory $CYLC_WORKFLOW_SHARE_DIR/analysis-envs/freanalysis_{self.name}
280300
'''
281301
# retry 10 times (due to mysterious intake-esm issue)
@@ -293,25 +313,27 @@ def definition(self, pp_dir):
293313
'''
294314
"""
295315

296-
if self.script_type == "independent" and self.date_range == self.experiment_date_range:
316+
if self.script_type == "independent":
297317
# to make the task run, we will create a corresponding task graph below
298318
# corresponding to the interval (chunk), e.g. ANALYSIS-P1Y.
299319
# Then, the analysis script will inherit from that family, to enable
300320
# both the task triggering and the yr1 and datachunk template vars.
301-
logger.info(f"{self.name}: Will run every chunk {self.chunk}")
321+
logger.debug(f"{self.name}: Will run every chunk {self.chunk}")
302322
if self.is_legacy:
303323
definitions += legacy_analysis_str
304324
else:
305325
definitions += new_analysis_str
306326

307327
# create the task family for all every-interval analysis scripts
328+
interval_years_minus_one = self.chunk - one_year
308329
definitions += f"""
309330
[[data-catalog-{self.chunk}]]
310331
inherit = DATA-CATALOG
311332
[[ANALYSIS-{self.chunk}]]
312333
inherit = ANALYSIS
313334
[[[environment]]]
314335
yr1 = $(cylc cycle-point --template=CCYY)
336+
yr2 = $(cylc cycle-point --template=CCYY --offset-years={interval_years_minus_one.years})
315337
databegyr = $yr1
316338
dataendyr = $yr2
317339
datachunk = {self.chunk.years}
@@ -346,12 +368,12 @@ def definition(self, pp_dir):
346368
logger.debug(f"{self.name}: Finished determining scripting")
347369
return definitions
348370

349-
if self.script_type == "cumulative" and self.date_range == self.experiment_date_range:
371+
if self.script_type == "cumulative":
350372
# Case 2: run the analysis every chunk, but depend on all previous chunks too.
351373
# To make the task run, we will create a task family for
352374
# each chunk/interval, starting from the beginning of pp data
353375
# then we create an analysis script task for each of these task families.
354-
logger.info(f"{self.name}: Will run each chunk {self.chunk} from beginning {self.experiment_date_range[0]}")
376+
logger.debug(f"{self.name}: Will run each chunk {self.chunk} from beginning {self.experiment_date_range[0]}")
355377
date = self.experiment_date_range[0]
356378
while date <= self.experiment_date_range[1]:
357379
date_str = f"{date.year:04}"
@@ -428,7 +450,7 @@ def definition(self, pp_dir):
428450
d2 -= self.chunk
429451
d1_str = f"{d1.year:04}"
430452
d2_str = f"{d2.year:04}"
431-
logger.info(f"{self.name}: Will run once for time period {self.date_range[0]} to {self.date_range[1]} (chunks {d1_str} to {d2_str})")
453+
logger.debug(f"{self.name}: Will run once for time period {self.date_range[0]} to {self.date_range[1]} (chunks {d1_str} to {d2_str})")
432454
date1_str = f"{self.date_range[0].year:04}"
433455
date2_str = f"{self.date_range[1].year:04}"
434456

@@ -494,9 +516,9 @@ def task_generator(yaml_, experiment_components, experiment_start, experiment_st
494516
for script_name, script_params in yaml_["analysis"].items():
495517
# Retrieve information about the script
496518
script_info = AnalysisScript(script_name, script_params, experiment_components,
497-
experiment_start, experiment_stop, pp_chunks)
498-
if script_info.switch == False:
499-
logger.info(f"{script_name}: Skipping, switch set to off")
519+
experiment_start, experiment_stop, pp_chunks, yaml_)
520+
if script_info.switch is False:
521+
logger.debug(f"{script_name}: Skipping, switch set to off")
500522
continue
501523
yield script_info
502524

@@ -575,7 +597,7 @@ def get_analysis_info(experiment_yaml, info_type, experiment_components, pp_dir,
575597
logger.debug("get_analysis_info: about to return graph")
576598
return task_graph(yaml_, experiment_components, experiment_start,
577599
experiment_stop, pp_chunks, analysis_only)
578-
elif info_type == "task-definitions":
600+
if info_type == "task-definitions":
579601
logger.debug("get_analysis_info: about to return definitions")
580602
return task_definitions(yaml_, experiment_components, experiment_start,
581603
experiment_stop, pp_chunks, pp_dir)

Jinja2Filters/get_climatology_info.py

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,14 @@
1-
from pathlib import Path
2-
1+
import logging
32
import metomi.isodatetime.dumpers
43
import metomi.isodatetime.parsers
54
from yaml import safe_load
6-
import pprint
75

86
from legacy_date_conversions import *
97

108
# set up logging
11-
import logging
129
logging.basicConfig()
1310
logger = logging.getLogger(__name__)
14-
logger.setLevel(logging.DEBUG)
11+
logger.setLevel(logging.INFO)
1512

1613
# Global variables just set to reduce typing a little.
1714
duration_parser = metomi.isodatetime.parsers.DurationParser()
@@ -22,9 +19,9 @@
2219
def sort_pp_chunks(unsorted_strings):
2320
"""Create descending list of pp chunk durations"""
2421
durations = []
25-
for s in unsorted_strings:
26-
durations.append(duration_parser.parse(s))
27-
return(sorted(durations, reverse=True))
22+
for string in unsorted_strings:
23+
durations.append(duration_parser.parse(string))
24+
return sorted(durations, reverse=True)
2825

2926
def lookup_source_for_component(yaml_, component):
3027
"""Return list of history files associated with a pp component"""
@@ -33,9 +30,9 @@ def lookup_source_for_component(yaml_, component):
3330
if item["type"] == component:
3431
for source in item["sources"]:
3532
sources.append(source["history_file"])
36-
return(sources)
33+
return sources
3734

38-
class Climatology(object):
35+
class Climatology:
3936
def __init__(self, component, frequency, interval_years, pp_chunk, sources, grid):
4037
"""Initialize the climatology object
4138
@@ -74,10 +71,10 @@ def graph(self, history_segment, clean_work):
7471

7572
chunks_per_interval = self.interval_years / self.pp_chunk.years
7673
assert chunks_per_interval == int(chunks_per_interval)
77-
for index, source in enumerate(self.sources):
74+
for source in self.sources:
7875
count = 0
7976
while count < chunks_per_interval:
80-
if index == 0:
77+
if count == 0:
8178
connector = ""
8279
else:
8380
connector = " & "
@@ -97,7 +94,10 @@ def graph(self, history_segment, clean_work):
9794
graph += f" => climo-{self.frequency}-P{self.interval_years}Y_{self.component}\n"
9895
graph += f" => remap-climo-{self.frequency}-P{self.interval_years}Y_{self.component}\n"
9996
graph += f" => combine-climo-{self.frequency}-P{self.interval_years}Y_{self.component}\n"
100-
graph += f"\"\"\"\n"
97+
if clean_work:
98+
graph += f"remap-climo-{self.frequency}-P{self.interval_years}Y_{self.component} => clean-shards-av-P{self.interval_years}Y\n"
99+
graph += f"combine-climo-{self.frequency}-P{self.interval_years}Y_{self.component} => clean-pp-timeavgs-P{self.interval_years}Y\n"
100+
graph += f"\"\"\"\n"
101101

102102
# then, create the cleaning tasks
103103
if clean_work:
@@ -140,12 +140,14 @@ def definition(self, clean_work):
140140

141141
definitions += f"""
142142
[[combine-climo-{self.frequency}-P{self.interval_years}Y_{self.component}]]
143-
inherit = COMBINE-TIMEAVGS
143+
inherit = COMBINE-TIMEAVGS-P{self.interval_years}Y
144144
[[[environment]]]
145145
component = {self.component}
146146
frequency = {self.frequency}
147147
interval = P{self.interval_years}Y
148148
end = $(cylc cycle-point --print-year --offset={offset})
149+
[[COMBINE-TIMEAVGS-P{self.interval_years}Y]]
150+
inherit = COMBINE-TIMEAVGS
149151
"""
150152

151153
if clean_work:
@@ -163,8 +165,6 @@ def definition(self, clean_work):
163165
return definitions
164166

165167
def task_generator(yaml_):
166-
history_segment = yaml_["postprocess"]["settings"]["history_segment"]
167-
168168
# Retrieve the pp components
169169
components = []
170170
for component in yaml_["postprocess"]["components"]:
@@ -265,7 +265,7 @@ def get_climatology_info(experiment_yaml, info_type):
265265
if info_type == "task-graph":
266266
logger.debug("about to return graph")
267267
return task_graphs(yaml_, history_segment, clean_work)
268-
elif info_type == "task-definitions":
268+
if info_type == "task-definitions":
269269
logger.debug("about to return definitions")
270270
return task_definitions(yaml_, clean_work)
271271

Jinja2Filters/get_components.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,7 @@
1-
import re
2-
import os
3-
from pathlib import Path
1+
import logging
42
import yaml
53

64
# set up logging
7-
import logging
85
logging.basicConfig(level=logging.INFO)
96
fre_logger = logging.getLogger(__name__)
107

@@ -26,4 +23,4 @@ def get_components(yamlfile):
2623

2724
# we want to return a list, but some other scripts are expecting a space-separated string
2825
#return(components)
29-
return(" ".join(components))
26+
return " ".join(components)

0 commit comments

Comments
 (0)