1- # mypy: ignore-errors
21"""
32=====================
43Performance Reporting
1110import json
1211import re
1312from pathlib import Path
13+ from typing import Generator
1414
1515import numpy as np
1616import pandas as pd
@@ -45,7 +45,7 @@ def __init__(self, log_dir: Path):
4545 self .log_dir : Path = log_dir
4646 self .errors : int = 0
4747
48- def get_summaries (self ) -> dict :
48+ def get_summaries (self ) -> Generator [ pd . DataFrame , None , None ] :
4949 """Generator to get all performance summary log messages in PerformanceSummary"""
5050 for log in [
5151 f for f in self .log_dir .iterdir () if self .PERF_LOG_PATTERN .fullmatch (f .name )
@@ -85,15 +85,15 @@ def to_df(self) -> pd.DataFrame:
8585 TELEMETRY_PATTERN = re .compile (r"^{\"host\".+\"job_number\".+}$" )
8686 PERF_LOG_PATTERN = re .compile (r"^perf\.([0-9]+)\.([0-9]+)\.log$" )
8787
88- def clean_perf_logs (self ):
88+ def clean_perf_logs (self ) -> None :
8989 """Remove all performance logs from the log_dir (after to_df has been called)"""
9090 for log in [
9191 f for f in self .log_dir .iterdir () if self .PERF_LOG_PATTERN .fullmatch (f .name )
9292 ]:
9393 log .unlink ()
9494
9595
96- def set_index_scenario_cols (perf_df : pd .DataFrame ) -> tuple [pd .DataFrame , list ]:
96+ def set_index_scenario_cols (perf_df : pd .DataFrame ) -> tuple [pd .DataFrame , list [ str ] ]:
9797 """Get the columns useful to index performance data by."""
9898 index_cols = BASE_PERF_INDEX_COLS
9999 scenario_cols = [col for col in perf_df .columns if col .startswith ("scenario_" )]
@@ -102,7 +102,7 @@ def set_index_scenario_cols(perf_df: pd.DataFrame) -> tuple[pd.DataFrame, list]:
102102 return perf_df , scenario_cols
103103
104104
105- def add_squid_api_data (perf_df : pd .DataFrame ):
105+ def add_squid_api_data (perf_df : pd .DataFrame ) -> pd . DataFrame :
106106 """Add Squid API data to the performance dataframe.
107107
108108 Given a dataframe from PerformanceSummary.to_df, add Squid API data for the job.
@@ -126,7 +126,7 @@ def add_squid_api_data(perf_df: pd.DataFrame):
126126 return perf_df
127127
128128
129- def print_stat_report (perf_df : pd .DataFrame , scenario_cols : list ) :
129+ def print_stat_report (perf_df : pd .DataFrame , scenario_cols : list [ str ]) -> None :
130130 """Print some helpful stats from the performance data.
131131
132132 The stats are grouped by scenario_cols.
@@ -145,7 +145,7 @@ def print_stat_report(perf_df: pd.DataFrame, scenario_cols: list):
145145 )
146146 perf_df ["compound_scenario" ] = (
147147 perf_df [scenario_cols ]
148- .to_csv (header = None , index = False , sep = "/" )
148+ .to_csv (header = False , index = False , sep = "/" )
149149 .strip ("\n " )
150150 .split ("\n " )
151151 )
@@ -189,7 +189,7 @@ def report_performance(
189189 output_directory : Path | str ,
190190 output_hdf : bool ,
191191 verbose : int ,
192- ):
192+ ) -> pd . DataFrame | None :
193193 """Main method for vipin reporting.
194194
195195 Gets job performance data, outputs to a file, and logs a report.
@@ -201,7 +201,7 @@ def report_performance(
201201
202202 if len (perf_df ) < 1 :
203203 logger .warning (f"No performance data found in { input_directory } ." )
204- return # nothing left to do
204+ return None # nothing left to do
205205
206206 # Add jobapi data about the job to dataframe
207207 perf_df = add_squid_api_data (perf_df )
0 commit comments