1111
1212# Mapping of cli-friendly names to (internal_data_key, Display Header, numeric_sort_key)
1313COL_MAP = {
14+ "tot-usec" : ("tot_usec" , "Tot usec" , "_sort_tot_usec" ),
1415 "op" : ("op" , "Op" , "op" ),
1516 "dims" : ("dims" , "Dims" , "dims" ),
1617 "dtypes" : ("dtypes" , "DTypes" , "dtypes" ),
2425}
2526
2627op_pattern = re .compile (
27- r"profile-op\s+(?P<op_name>[A-Z_0-9+]+):\s+.*?\s+:\s+(?P<dims>[\d:x\s\->!]+)\s+:\s+(?P<types>[a-z\d_\s\->x]+)\s+:\s+.*?\s+usec\s+(?P<usec>\d+)\s+cycles\s+(?P<cycles>\d+)(?:\s+pmu\s+\[(?P<pmu>[\d,\s]+)\])?"
28+ r"profile-op\s+(?P<op_name>[A-Z_0-9+]+):\s+.*?\s+:\s+(?P<dims>[\d:x\s\->!]+)\s+:\s+(?P<types>[a-z\d_\s\->x]+)\s+:\s+.*?\s+(?:op-)? usec\s+(?P<usec>\d+)\s+(?:op-)? cycles\s+(?P<cycles>\d+)(?:\s+pmu\s+\[(?P<pmu>[\d,\s]+)\])?"
2829)
2930
3031logger = logging .getLogger ("ggml-hexagon-profile" )
@@ -85,21 +86,27 @@ def generate_report(ops, top_n, width_overrides, sort_col, pmu_name=None):
8586 cycles = [o ['cycles' ] for o in group_ops ]
8687 pmu_vals = [o ['pmu_val' ] for o in group_ops if o ['pmu_val' ] is not None ]
8788
89+ avg_usec_val = statistics .mean (usecs )
90+ count_val = len (group_ops )
91+ tot_usec_val = avg_usec_val * count_val
92+
8893 group_stats .append ({
8994 'op' : name ,
9095 'dims' : dims ,
9196 'dtypes' : types ,
92- 'count' : str (len ( group_ops ) ),
97+ 'count' : str (count_val ),
9398 'max_usec' : str (max (usecs )),
94- 'avg_usec' : f"{ statistics .mean (usecs ):.2f} " ,
99+ 'avg_usec' : f"{ avg_usec_val :.2f} " ,
100+ 'tot_usec' : f"{ tot_usec_val :.2f} " ,
95101 'max_cycles' : str (max (cycles )),
96102 'avg_cycles' : f"{ statistics .mean (cycles ):.2f} " ,
97103 'max_pmu' : str (max (pmu_vals )) if pmu_vals else "0" ,
98104 'avg_pmu' : f"{ statistics .mean (pmu_vals ):.2f} " if pmu_vals else "0.00" ,
99105 # Numeric values for accurate sorting
100- '_sort_count' : len ( group_ops ) ,
106+ '_sort_count' : count_val ,
101107 '_sort_max_usec' : max (usecs ),
102- '_sort_avg_usec' : statistics .mean (usecs ),
108+ '_sort_avg_usec' : avg_usec_val ,
109+ '_sort_tot_usec' : tot_usec_val ,
103110 '_sort_max_cycles' : max (cycles ),
104111 '_sort_avg_cycles' : statistics .mean (cycles ),
105112 '_sort_max_pmu' : max (pmu_vals ) if pmu_vals else 0 ,
@@ -116,7 +123,7 @@ def generate_report(ops, top_n, width_overrides, sort_col, pmu_name=None):
116123 active_cols = ["op" , "dims" , "dtypes" ]
117124 if pmu_name :
118125 active_cols += ["max-pmu" , "avg-pmu" ]
119- active_cols += ["max -usec" , "avg-usec" , "max -cycles" , "avg -cycles" , "count" ]
126+ active_cols += ["tot -usec" , "avg-usec" , "avg -cycles" , "max-usec" , "max -cycles" , "count" ]
120127
121128 final_headers , final_keys , final_widths = [], [], []
122129
@@ -156,7 +163,7 @@ def main():
156163 parser = argparse .ArgumentParser (description = "Post-process Op profile info." )
157164 parser .add_argument ("logfile" )
158165 parser .add_argument ("-n" , "--top" , type = int , default = 100 )
159- parser .add_argument ("--sort" , type = str , default = "max -usec" , choices = list (COL_MAP .keys ()))
166+ parser .add_argument ("--sort" , type = str , default = "tot -usec" , choices = list (COL_MAP .keys ()))
160167 parser .add_argument ("--pmu-index" , type = int )
161168 parser .add_argument ("--pmu-name" , type = str )
162169 parser .add_argument ("--width" , action = 'append' , default = ['dims:40' ], help = "Override column width, e.g. --width dims:50" )
0 commit comments