Skip to content

Commit 86a323c

Browse files
Merge pull request #7204 from oliver-sanders/report-timings
report timings: port and test
1 parent 752f765 commit 86a323c

File tree

7 files changed

+194
-78
lines changed

7 files changed

+194
-78
lines changed

.codecov.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ coverage:
2929
ignore:
3030
- "tests/**"
3131
- "ws_messages_pb2.py"
32-
- "cylc/flow/scripts/report_timings.py"
3332
- "cylc/flow/network/graphql_subscribe.py"
3433

3534
flag_management:

.coveragerc

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@ omit =
1616
tests/*
1717
*/cylc/flow/*_pb2.py
1818
cylc/flow/etc/*
19-
cylc/flow/scripts/report_timings.py
2019
parallel = True
2120
source = ./cylc
2221
# https://github.com/coveragepy/coveragepy/issues/2082:

changes.d/7204.feat.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
The `cylc report-timings` command has been ported to the latest version of Pandas allowing it to be more easily installed.
2+
The deprecation notice has been removed.

conda-environment.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ dependencies:
1919
- urwid >=2.2,<4,!=2.6.2,!=2.6.3
2020

2121
# optional dependencies
22-
#- pandas >=1.0,<2
22+
#- pandas >=2,<3
2323
#- pympler
2424
#- matplotlib-base
2525
#- sqlparse

cylc/flow/scripts/report_timings.py

Lines changed: 49 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,6 @@
5454
from collections import Counter
5555
from typing import TYPE_CHECKING
5656

57-
from cylc.flow import LOG
5857
from cylc.flow.exceptions import CylcError
5958
from cylc.flow.id_cli import parse_id
6059
from cylc.flow.option_parsers import (
@@ -90,25 +89,45 @@ def smart_open(filename=None):
9089
fh.close()
9190

9291

92+
def format_raw(row_buf, output):
93+
"""Implement --format=raw"""
94+
output.write(row_buf.getvalue())
95+
96+
97+
def format_summary(row_buf, output):
98+
"""Implement --format=summary"""
99+
summary = TextTimingSummary(row_buf)
100+
summary.write_summary(output)
101+
102+
103+
def format_html(row_buf, output):
104+
"""Implement --format=html"""
105+
summary = HTMLTimingSummary(row_buf)
106+
summary.write_summary(output)
107+
108+
109+
# suported output formats
110+
FORMATS = {
111+
'raw': format_raw,
112+
'summary': format_summary,
113+
'html': format_html,
114+
}
115+
116+
93117
def get_option_parser() -> COP:
94118
parser = COP(
95119
__doc__,
96120
argdoc=[WORKFLOW_ID_ARG_DOC]
97121
)
98122
parser.add_option(
99-
"-r", "--raw",
100-
help="Show raw timing output suitable for custom diagnostics.",
101-
action="store_true", default=False, dest="show_raw"
102-
)
103-
parser.add_option(
104-
"-s", "--summary",
105-
help="Show textual summary timing output for tasks.",
106-
action="store_true", default=False, dest="show_summary"
107-
)
108-
parser.add_option(
109-
"-w", "--web-summary",
110-
help="Show HTML summary timing output for tasks.",
111-
action="store_true", default=False, dest="html_summary"
123+
'--format', '-t',
124+
help=(
125+
f"Select output format. Choose from: {', '.join(FORMATS)}. "
126+
r"Default: %default."
127+
),
128+
action='store',
129+
default='summary',
130+
choices=list(FORMATS)
112131
)
113132
parser.add_option(
114133
"-O", "--output-file",
@@ -120,39 +139,19 @@ def get_option_parser() -> COP:
120139

121140
@cli_function(get_option_parser)
122141
def main(parser: COP, options: 'Values', workflow_id: str) -> None:
142+
_main(options, workflow_id)
143+
144+
145+
def _main(options: 'Values', workflow_id: str) -> None:
123146
workflow_id, *_ = parse_id(
124147
workflow_id,
125148
constraint='workflows',
126149
)
127-
128-
LOG.warning(
129-
"cylc report-timings is deprecated."
130-
" The analysis view in the GUI provides"
131-
" similar functionality."
132-
)
133-
134-
output_options = [
135-
options.show_raw, options.show_summary, options.html_summary
136-
]
137-
if output_options.count(True) > 1:
138-
parser.error('Cannot combine output formats (choose one)')
139-
if not any(output_options):
140-
# No output specified - choose summary by default
141-
options.show_summary = True
142-
143150
db_file = get_workflow_run_pub_db_path(workflow_id)
144151
with CylcWorkflowDAO(db_file, is_public=True) as dao:
145152
row_buf = format_rows(*dao.select_task_times())
146153
with smart_open(options.output_filename) as output:
147-
if options.show_raw:
148-
output.write(row_buf.getvalue())
149-
else:
150-
summary: TimingSummary
151-
if options.show_summary:
152-
summary = TextTimingSummary(row_buf)
153-
elif options.html_summary:
154-
summary = HTMLTimingSummary(row_buf)
155-
summary.write_summary(output)
154+
FORMATS[options.format](row_buf, output)
156155

157156

158157
def format_rows(header, rows):
@@ -172,7 +171,7 @@ def format_rows(header, rows):
172171
]
173172
formatter = ' '.join('%%-%ds' % line for line in max_lengths) + '\n'
174173
sio.write(formatter % header)
175-
for r in rows:
174+
for r in sorted(rows):
176175
sio.write(formatter % r)
177176
sio.seek(0)
178177
return sio
@@ -181,15 +180,10 @@ def format_rows(header, rows):
181180
class TimingSummary:
182181
"""Base class for summarizing timing output from cylc.flow run database."""
183182

184-
def __init__(self, filepath_or_buffer=None):
183+
def __init__(self, filepath_or_buffer):
185184
"""Set up internal dataframe storage for time durations."""
186-
187185
self._check_imports()
188-
if filepath_or_buffer is not None:
189-
self.read_timings(filepath_or_buffer)
190-
else:
191-
self.df = None
192-
self.by_host_and_job_runner = None
186+
self.read_timings(filepath_or_buffer)
193187

194188
def read_timings(self, filepath_or_buffer):
195189
"""
@@ -203,7 +197,7 @@ def read_timings(self, filepath_or_buffer):
203197
pd.set_option('display.max_colwidth', 10000)
204198

205199
df = pd.read_csv(
206-
filepath_or_buffer, delim_whitespace=True, index_col=[0, 1, 2, 3],
200+
filepath_or_buffer, sep=r'\s+', index_col=[0, 1, 2, 3],
207201
parse_dates=[4, 5, 6]
208202
)
209203
self.df = pd.DataFrame({
@@ -219,18 +213,13 @@ def read_timings(self, filepath_or_buffer):
219213
level=['host', 'job_runner']
220214
)
221215

222-
def write_summary(self, buf=None):
216+
def write_summary(self, buf=sys.stdout):
223217
"""Using the stored timings dataframe, output the data summary."""
224-
225-
if buf is None:
226-
buf = sys.stdout
227218
self.write_summary_header(buf)
228219
for group, df in self.by_host_and_job_runner:
229220
self.write_group_header(buf, group)
230221
df_reshape = self._reshape_timings(df)
231222
df_describe = df.groupby(level='name').describe()
232-
if df_describe.index.nlevels > 1:
233-
df_describe = df_describe.unstack() # for pandas < 0.20.0
234223
df_describe.index.rename(None, inplace=True)
235224
for timing_category in self.df.columns:
236225
self.write_category(
@@ -286,17 +275,11 @@ def _reshape_timings(timings):
286275
timings = timings.assign(retry=retry)
287276
timings = timings.set_index('retry', append=True)
288277

289-
return timings.unstack('name').stack(level=0)
278+
return timings.unstack('name').stack(level=0, future_stack=True)
290279

291280
@staticmethod
292281
def _dt_to_s(dt):
293-
import pandas as pd
294-
try:
295-
return dt.total_seconds()
296-
except AttributeError:
297-
# Older versions of pandas have the timedelta as a numpy
298-
# timedelta64 type, which didn't support total_seconds
299-
return pd.Timedelta(dt).total_seconds()
282+
return dt.total_seconds()
300283

301284

302285
class TextTimingSummary(TimingSummary):
@@ -374,23 +357,15 @@ def write_category(self, buf, category, df_reshape, df_describe):
374357
ax = (
375358
df_reshape
376359
.xs(category, level='timing_category')
377-
.plot(kind='box', vert=False)
360+
.plot(kind='box', orientation='vertical')
378361
)
379362
ax.invert_yaxis()
380363
ax.set_xlabel('Seconds')
381364
plt.tight_layout()
382365
plt.gcf().savefig(buf, format='svg')
383-
try:
384-
table = df_describe[category].to_html(
385-
classes="summary", index_names=False, border=0
386-
)
387-
except TypeError:
388-
# older pandas don't support the "border" argument
389-
# so explicitly remove it
390-
table = df_describe[category].to_html(
391-
classes="summary", index_names=False
392-
)
393-
table = table.replace('border="1"', '')
366+
table = df_describe[category].to_html(
367+
classes="summary", index_names=False, border=0
368+
)
394369
buf.write(table)
395370
buf.write('</div>')
396371
pass

setup.cfg

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ main_loop-log_memory =
9090
main_loop-log_db =
9191
sqlparse
9292
report-timings =
93-
pandas==1.*
93+
pandas==2.*
9494
matplotlib
9595
tests =
9696
aiosmtpd
@@ -131,6 +131,7 @@ all =
131131
%(main_loop-log_memory)s
132132
%(tests)s
133133
%(tutorials)s
134+
%(report-timings)s
134135

135136
[options.entry_points]
136137
# top level shell commands

0 commit comments

Comments
 (0)