Skip to content
196 changes: 196 additions & 0 deletions mlir/utils/performance/quickTunerPreproc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
#!/usr/bin/env python3

"""
quickTuner preprocessor script to combine .debug output files from tuningRunner.py or tuna-script.sh

Usage: quickTunerPreproc.py [-h] --input-dir INPUT_DIR --output OUTPUT [--op {gemm,conv}] [-d] [--file-ext FILE_EXT]

Example Usage:

python3 quickTunerPreproc.py --input-dir /path/to/debug/files --no-splitK --output combined_data


Note:
If using MITuna edit MITuna/tuna/rocmlir/rocmlir_worker.py, editing:


cmd = env_str + f" python3 ./bin/tuningRunner.py -q {special_args} \
--config='{config_string}' --mlir-build-dir `pwd` \
--output=- --tflops \
--rocmlir_gen_flags='--device={self.gpu_id}' 2>/dev/null"


to:

import uuid
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could this instead be a patch to rocmlir_worker?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not in its current form; it cuts off the usual functionality of passing results to stdout and we don't have code to read results from the files and send it on. I do have an open issue about collecting all the results into the database and this may become part of it.


if not os.path.exists("./run"):
os.makedirs("./run")

unique_file_id = uuid.uuid4().hex

file_id = os.path.join("./run", unique_file_id)

cmd = env_str + f" python3 ./bin/tuningRunner.py -q {special_args} \
--config='{config_string}' --mlir-build-dir `pwd` \
--output={file_id} --tflops --debug \
--rocmlir_gen_flags='--device={self.gpu_id}' 2>/dev/null"

"""

import os
import sys
import argparse
import pandas as pd
import glob
from sklearn.preprocessing import MinMaxScaler

class qtPreprocessor():
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Standard complaining re style on class names

"""
class to process *.debug files into a single script
"""

def __init__(self, pargs):
self.input_dir = pargs.input_dir

@staticmethod
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why staticmethod? Not a criticism, I'm just less familiar with why that's done in python and I'm curious.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was intended to be imported and called within the quickTunerGen.py set of classes but I could not rationalize each individual class having their own instance of this preprocessor class hanging out inside:
So, it was either have a standalone function or a class with a static method that would allow me to do something like:

qtPreprocessor.process( /* args */ )

From my experience with using the @staticmethod decorator, it has always been what to use when you want to package functions together that have similar functionality and can share data but do not share state.

def __get_stats_gemm(df, ct):
"""
static helper method to get stats for a dataframe:
(number of files processed, number of unique gemms, group by datatype)
"""
print(f"Files processed: {ct}")

# num of dtypes
dtypes= {t[0]:df for t,df in df.groupby(['DataType'])}

print("Types found:")
for dt in dtypes:
print(f"\t{dt}")

# num unique gemms in file:
cols = ['TransA', 'TransB', 'G', 'M', 'N','K']
unique_gemms = df[cols].drop_duplicates()

num_gemms = len(unique_gemms)
print(f"Number of unique Gemms: {num_gemms}")
for _,row in unique_gemms.iterrows():
tup = tuple(row)
print(','.join(map(str, tup)))


@staticmethod
def __get_stats_conv(df, ct):
"""
static helper method to get stats for a dataframe:
(number of files processed, number of unique gemms, group by datatype)
"""
print(f"Files processed: {ct}")

# num of dtypes
dtypes= {t[0]:df for t,df in df.groupby(['DataType'])}

print("Types found:")
for dt in dtypes:
print(f"\t{dt}")

# num unique gemms in file:
cols = ['N', 'C', 'K', 'Y', 'X', 'DilationH', 'DilationW', 'StrideH', 'StrideW', 'PaddingH', 'PaddingW']
unique_gemms = df[cols].drop_duplicates()

num_gemms = len(unique_gemms)
print(f"Number of unique Gemms: {num_gemms}")
for _,row in unique_gemms.iterrows():
tup = tuple(row)
print(','.join(map(str, tup)))


@staticmethod
def process(input_dir, output_name=None, op='gemm', file_ext="debug", debug=False, normalize=True, no_splitK=False):
"""
staticmethod process() function that compiles output files into a single dataframe and saves to tsv file
"""

tsv_files = glob.glob(os.path.join(input_dir, f"*.{file_ext}"))
print(os.path.join(input_dir, f"*.{file_ext}"))

dfs = []
for file in tsv_files:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can emumerate() instead of maintaining a ct

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will replace this with a len() call since it is not needed within the loop only for generating stats.

df = pd.read_csv(file, sep='\t', index_col=None)
df = df[df.columns[1:]]
if normalize:
scaler = MinMaxScaler()
df['NormalizedTFlops'] = scaler.fit_transform(df[['TFlops']])
dfs.append(df)
if not dfs:
return None
new_df = pd.concat(dfs, ignore_index=True)

# Remove splitK from tuning data
if no_splitK:
df_filtered = new_df[new_df['PerfConfig'].str.extract(r'.*,(\d+),\d+,\d+')[0] == '1']
new_df = df_filtered

if output_name:
new_df.to_csv(output_name, sep='\t', index=False)
if debug:
print(f"Saved to {output_name}")

if debug:
# here output some stats about files
if op == 'gemm':
qtPreprocessor.__get_stats_gemm(new_df, len(tsv_files))
elif op == 'conv':
qtPreprocessor.__get_stats_conv(new_df, len(tsv_files))

return new_df


def main(args=None):
if args is None:
args = sys.argv[1:]

parser = argparse.ArgumentParser(prog='quickTunerPreprocess.py',
description='Collect *.debug files from tuningRunner.py into a single file to be used in quickTunerGen.py')

parser.add_argument('--input-dir',
required=True,
type=str,
help='Input directory where files are saved')

parser.add_argument('--output',
required=True,
type=str,
help='File to save data to')

parser.add_argument('--op',
choices=['gemm', 'conv'],
default='gemm',
help='Formats debug print info')

parser.add_argument('-d', '--debug',
action='store_true',
help='Prints debug information')

parser.add_argument('--file-ext',
default='debug',
type=str,
help='File extension')

parser.add_argument('--normalize',
default=True,
action='store_true',
help='Normalize on a per-file basis, necessary for quickTunerGen to work')

parser.add_argument('--no-splitK',
default=False,
action='store_true',
help='Removing the spliK factor from the generated list')

pargs = parser.parse_args()


qtPreprocessor.process(pargs.input_dir, pargs.output, pargs.op, pargs.file_ext, pargs.debug, pargs.normalize, pargs.no_splitK)

if __name__ == '__main__':
main(sys.argv[1:])