5454from collections import Counter
5555from typing import TYPE_CHECKING
5656
57- from cylc .flow import LOG
5857from cylc .flow .exceptions import CylcError
5958from cylc .flow .id_cli import parse_id
6059from cylc .flow .option_parsers import (
@@ -90,25 +89,45 @@ def smart_open(filename=None):
9089 fh .close ()
9190
9291
92+ def format_raw (row_buf , output ):
93+ """Implement --format=raw"""
94+ output .write (row_buf .getvalue ())
95+
96+
97+ def format_summary (row_buf , output ):
98+ """Implement --format=summary"""
99+ summary = TextTimingSummary (row_buf )
100+ summary .write_summary (output )
101+
102+
103+ def format_html (row_buf , output ):
104+ """Implement --format=html"""
105+ summary = HTMLTimingSummary (row_buf )
106+ summary .write_summary (output )
107+
108+
109+ # suported output formats
110+ FORMATS = {
111+ 'raw' : format_raw ,
112+ 'summary' : format_summary ,
113+ 'html' : format_html ,
114+ }
115+
116+
93117def get_option_parser () -> COP :
94118 parser = COP (
95119 __doc__ ,
96120 argdoc = [WORKFLOW_ID_ARG_DOC ]
97121 )
98122 parser .add_option (
99- "-r" , "--raw" ,
100- help = "Show raw timing output suitable for custom diagnostics." ,
101- action = "store_true" , default = False , dest = "show_raw"
102- )
103- parser .add_option (
104- "-s" , "--summary" ,
105- help = "Show textual summary timing output for tasks." ,
106- action = "store_true" , default = False , dest = "show_summary"
107- )
108- parser .add_option (
109- "-w" , "--web-summary" ,
110- help = "Show HTML summary timing output for tasks." ,
111- action = "store_true" , default = False , dest = "html_summary"
123+ '--format' , '-t' ,
124+ help = (
125+ f"Select output format. Choose from: { ', ' .join (FORMATS )} . "
126+ r"Default: %default."
127+ ),
128+ action = 'store' ,
129+ default = 'summary' ,
130+ choices = list (FORMATS )
112131 )
113132 parser .add_option (
114133 "-O" , "--output-file" ,
@@ -120,39 +139,19 @@ def get_option_parser() -> COP:
120139
121140@cli_function (get_option_parser )
122141def main (parser : COP , options : 'Values' , workflow_id : str ) -> None :
142+ _main (options , workflow_id )
143+
144+
145+ def _main (options : 'Values' , workflow_id : str ) -> None :
123146 workflow_id , * _ = parse_id (
124147 workflow_id ,
125148 constraint = 'workflows' ,
126149 )
127-
128- LOG .warning (
129- "cylc report-timings is deprecated."
130- " The analysis view in the GUI provides"
131- " similar functionality."
132- )
133-
134- output_options = [
135- options .show_raw , options .show_summary , options .html_summary
136- ]
137- if output_options .count (True ) > 1 :
138- parser .error ('Cannot combine output formats (choose one)' )
139- if not any (output_options ):
140- # No output specified - choose summary by default
141- options .show_summary = True
142-
143150 db_file = get_workflow_run_pub_db_path (workflow_id )
144151 with CylcWorkflowDAO (db_file , is_public = True ) as dao :
145152 row_buf = format_rows (* dao .select_task_times ())
146153 with smart_open (options .output_filename ) as output :
147- if options .show_raw :
148- output .write (row_buf .getvalue ())
149- else :
150- summary : TimingSummary
151- if options .show_summary :
152- summary = TextTimingSummary (row_buf )
153- elif options .html_summary :
154- summary = HTMLTimingSummary (row_buf )
155- summary .write_summary (output )
154+ FORMATS [options .format ](row_buf , output )
156155
157156
158157def format_rows (header , rows ):
@@ -172,7 +171,7 @@ def format_rows(header, rows):
172171 ]
173172 formatter = ' ' .join ('%%-%ds' % line for line in max_lengths ) + '\n '
174173 sio .write (formatter % header )
175- for r in rows :
174+ for r in sorted ( rows ) :
176175 sio .write (formatter % r )
177176 sio .seek (0 )
178177 return sio
@@ -181,15 +180,10 @@ def format_rows(header, rows):
181180class TimingSummary :
182181 """Base class for summarizing timing output from cylc.flow run database."""
183182
184- def __init__ (self , filepath_or_buffer = None ):
183+ def __init__ (self , filepath_or_buffer ):
185184 """Set up internal dataframe storage for time durations."""
186-
187185 self ._check_imports ()
188- if filepath_or_buffer is not None :
189- self .read_timings (filepath_or_buffer )
190- else :
191- self .df = None
192- self .by_host_and_job_runner = None
186+ self .read_timings (filepath_or_buffer )
193187
194188 def read_timings (self , filepath_or_buffer ):
195189 """
@@ -203,7 +197,7 @@ def read_timings(self, filepath_or_buffer):
203197 pd .set_option ('display.max_colwidth' , 10000 )
204198
205199 df = pd .read_csv (
206- filepath_or_buffer , delim_whitespace = True , index_col = [0 , 1 , 2 , 3 ],
200+ filepath_or_buffer , sep = r'\s+' , index_col = [0 , 1 , 2 , 3 ],
207201 parse_dates = [4 , 5 , 6 ]
208202 )
209203 self .df = pd .DataFrame ({
@@ -219,18 +213,13 @@ def read_timings(self, filepath_or_buffer):
219213 level = ['host' , 'job_runner' ]
220214 )
221215
222- def write_summary (self , buf = None ):
216+ def write_summary (self , buf = sys . stdout ):
223217 """Using the stored timings dataframe, output the data summary."""
224-
225- if buf is None :
226- buf = sys .stdout
227218 self .write_summary_header (buf )
228219 for group , df in self .by_host_and_job_runner :
229220 self .write_group_header (buf , group )
230221 df_reshape = self ._reshape_timings (df )
231222 df_describe = df .groupby (level = 'name' ).describe ()
232- if df_describe .index .nlevels > 1 :
233- df_describe = df_describe .unstack () # for pandas < 0.20.0
234223 df_describe .index .rename (None , inplace = True )
235224 for timing_category in self .df .columns :
236225 self .write_category (
@@ -286,17 +275,11 @@ def _reshape_timings(timings):
286275 timings = timings .assign (retry = retry )
287276 timings = timings .set_index ('retry' , append = True )
288277
289- return timings .unstack ('name' ).stack (level = 0 )
278+ return timings .unstack ('name' ).stack (level = 0 , future_stack = True )
290279
291280 @staticmethod
292281 def _dt_to_s (dt ):
293- import pandas as pd
294- try :
295- return dt .total_seconds ()
296- except AttributeError :
297- # Older versions of pandas have the timedelta as a numpy
298- # timedelta64 type, which didn't support total_seconds
299- return pd .Timedelta (dt ).total_seconds ()
282+ return dt .total_seconds ()
300283
301284
302285class TextTimingSummary (TimingSummary ):
@@ -374,23 +357,15 @@ def write_category(self, buf, category, df_reshape, df_describe):
374357 ax = (
375358 df_reshape
376359 .xs (category , level = 'timing_category' )
377- .plot (kind = 'box' , vert = False )
360+ .plot (kind = 'box' , orientation = 'vertical' )
378361 )
379362 ax .invert_yaxis ()
380363 ax .set_xlabel ('Seconds' )
381364 plt .tight_layout ()
382365 plt .gcf ().savefig (buf , format = 'svg' )
383- try :
384- table = df_describe [category ].to_html (
385- classes = "summary" , index_names = False , border = 0
386- )
387- except TypeError :
388- # older pandas don't support the "border" argument
389- # so explicitly remove it
390- table = df_describe [category ].to_html (
391- classes = "summary" , index_names = False
392- )
393- table = table .replace ('border="1"' , '' )
366+ table = df_describe [category ].to_html (
367+ classes = "summary" , index_names = False , border = 0
368+ )
394369 buf .write (table )
395370 buf .write ('</div>' )
396371 pass
0 commit comments