diff --git a/darshan-util/pydarshan/darshan/cli/summary.py b/darshan-util/pydarshan/darshan/cli/summary.py index d70a61045..a7f8ed2f0 100644 --- a/darshan-util/pydarshan/darshan/cli/summary.py +++ b/darshan-util/pydarshan/darshan/cli/summary.py @@ -7,7 +7,7 @@ from collections import OrderedDict import importlib.resources as importlib_resources -from typing import Any, Union, Callable +from typing import Any, Union, Callable, List, Optional import pandas as pd from mako.template import Template @@ -124,7 +124,7 @@ def generate_fig(self): elif isinstance(fig, plot_common_access_table.DarshanReportTable): # retrieve html table from `DarshanReportTable` self.fig_html = fig.html - else: + elif fig is not None: err_msg = f"Figure of type {type(fig)} not supported." raise NotImplementedError(err_msg) @@ -137,21 +137,24 @@ class ReportData: ---------- log_path: path to a darshan log file. enable_dxt_heatmap: flag indicating whether DXT heatmaps should be enabled + filter_patterns: regex patterns for names to exclude/include + filter_mode: whether to "exclude" or "include" the filter patterns """ - def __init__(self, log_path: str, enable_dxt_heatmap: bool = False): + def __init__(self, log_path: str, enable_dxt_heatmap: bool = False, + filter_patterns: Optional[List[str]] = None, filter_mode: str = "exclude"): # store the log path and use it to generate the report self.log_path = log_path self.enable_dxt_heatmap = enable_dxt_heatmap # store the report self.report = darshan.DarshanReport(log_path, read_all=False) # read only generic module data and heatmap data by default - self.report.read_all_generic_records() + self.report.read_all_generic_records(filter_patterns=filter_patterns, filter_mode=filter_mode) if "HEATMAP" in self.report.data['modules']: self.report.read_all_heatmap_records() # if DXT heatmaps requested, additionally read-in DXT data if self.enable_dxt_heatmap: - self.report.read_all_dxt_records() + self.report.read_all_dxt_records(filter_patterns=filter_patterns, filter_mode=filter_mode) # create the header/footer self.get_header() self.get_footer() @@ -496,7 +499,11 @@ def register_figures(self): elif "PNETCDF_FILE" in self.report.modules: opcounts_mods.append("PNETCDF_FILE") - for mod in self.report.modules: + for mod in self.report.records: + # skip over modules with no records -- this likely means + # records in the log were filtered out via name exclusions + if len(self.report.records[mod]) == 0: + continue if "H5" in mod: sect_title = "Per-Module Statistics: HDF5" @@ -633,6 +640,12 @@ def build_sections(self): """ self.sections = {} for fig in self.figures: + # skip empty figures that can be generated by report sections + # "Data Access by Category" and "Cross-Module Comparisons" + if (fig.fig_html == None and + (fig.section_title == "Data Access by Category" or + fig.section_title == "Cross-Module Comparisons")): + continue # if a section title is not already in sections, add # the section title and a corresponding empty list # to store its figures @@ -669,6 +682,16 @@ def setup_parser(parser: argparse.ArgumentParser): action="store_true", help="Enable DXT-based versions of I/O activity heatmaps." ) + parser.add_argument( + "--exclude_names", + action='append', + help="regex patterns for file record names to exclude in summary report" + ) + parser.add_argument( + "--include_names", + action='append', + help="regex patterns for file record names to include in summary report" + ) def main(args: Union[Any, None] = None): @@ -687,6 +710,17 @@ def main(args: Union[Any, None] = None): log_path = args.log_path enable_dxt_heatmap = args.enable_dxt_heatmap + filter_patterns=None + filter_mode="exclude" + if args.exclude_names and args.include_names: + print('Error: only one of --exclude_names and --include_names may be used.') + sys.exit(1) + elif args.exclude_names: + filter_patterns = args.exclude_names + filter_mode = "exclude" + elif args.include_names: + filter_patterns = args.include_names + filter_mode = "include" if args.output is None: # if no output is provided, use the log file @@ -699,7 +733,9 @@ def main(args: Union[Any, None] = None): # collect the report data to feed into the template report_data = ReportData( log_path=log_path, - enable_dxt_heatmap=enable_dxt_heatmap + enable_dxt_heatmap=enable_dxt_heatmap, + filter_patterns=filter_patterns, + filter_mode=filter_mode ) with importlib_resources.path(darshan.cli, "base.html") as base_path: diff --git a/darshan-util/pydarshan/darshan/experimental/aggregators/agg_ioops.py b/darshan-util/pydarshan/darshan/experimental/aggregators/agg_ioops.py index 90bf765d4..6496f89d6 100644 --- a/darshan-util/pydarshan/darshan/experimental/aggregators/agg_ioops.py +++ b/darshan-util/pydarshan/darshan/experimental/aggregators/agg_ioops.py @@ -28,7 +28,7 @@ def agg_ioops(self, mode='append'): for mod in mods: # check records for module are present - if mod not in recs: + if mod not in recs or len(recs[mod]) == 0: continue agg = None diff --git a/darshan-util/pydarshan/darshan/experimental/plots/data_access_by_filesystem.py b/darshan-util/pydarshan/darshan/experimental/plots/data_access_by_filesystem.py index c60c666b1..0bd61e8ae 100644 --- a/darshan-util/pydarshan/darshan/experimental/plots/data_access_by_filesystem.py +++ b/darshan-util/pydarshan/darshan/experimental/plots/data_access_by_filesystem.py @@ -208,11 +208,11 @@ def rec_to_rw_counter_dfs(report: Any, rec_counters = pd.DataFrame() df_reads = pd.DataFrame() df_writes = pd.DataFrame() - if "POSIX" in report.modules: + if "POSIX" in report.modules and len(report.records["POSIX"]) > 0: rec_counters = pd.concat(objs=(rec_counters, report.records["POSIX"].to_df()['counters'])) df_reads = pd.concat(objs=(df_reads, rec_counters.loc[rec_counters[f'POSIX_BYTES_READ'] >= 1])) df_writes = pd.concat(objs=(df_writes, rec_counters.loc[rec_counters[f'POSIX_BYTES_WRITTEN'] >= 1])) - if "STDIO" in report.modules: + if "STDIO" in report.modules and len(report.records["STDIO"]) > 0: rec_counters = pd.concat(objs=(rec_counters, report.records["STDIO"].to_df()['counters'])) df_reads = pd.concat(objs=(df_reads, rec_counters.loc[rec_counters[f'STDIO_BYTES_READ'] >= 1])) df_writes = pd.concat(objs=(df_writes, rec_counters.loc[rec_counters[f'STDIO_BYTES_WRITTEN'] >= 1])) @@ -632,7 +632,7 @@ def plot_with_report(report: darshan.DarshanReport, Returns ------- - fig: matplotlib figure object + fig: matplotlib figure object or None if no data to plot """ fig = plt.figure() file_id_dict = report.data["name_records"] @@ -648,6 +648,10 @@ def plot_with_report(report: darshan.DarshanReport, for ident in allowed_ids: allowed_file_id_dict[ident] = file_id_dict[ident] + if len(allowed_file_id_dict) == 0: + # no data, likely because all records have been filtered out + return None + filesystem_roots = identify_filesystems(file_id_dict=allowed_file_id_dict, verbose=verbose) # NOTE: this is a bit ugly, STDIO and POSIX are both combined diff --git a/darshan-util/pydarshan/darshan/experimental/plots/plot_io_cost.py b/darshan-util/pydarshan/darshan/experimental/plots/plot_io_cost.py index 27a1ca960..9f8d1961e 100644 --- a/darshan-util/pydarshan/darshan/experimental/plots/plot_io_cost.py +++ b/darshan-util/pydarshan/darshan/experimental/plots/plot_io_cost.py @@ -111,7 +111,7 @@ def get_io_cost_df(report: darshan.DarshanReport) -> Any: io_cost_dict = {} supported_modules = ["POSIX", "MPI-IO", "STDIO", "H5F", "H5D", "PNETCDF_FILE", "PNETCDF_VAR"] for mod_key in report.modules: - if mod_key in supported_modules: + if mod_key in supported_modules and len(report.records[mod_key]) > 0: # collect the records in dataframe form recs = report.records[mod_key].to_df(attach=None) # correct the MPI module key @@ -150,13 +150,18 @@ def plot_io_cost(report: darshan.DarshanReport) -> Any: Returns ------- io_cost_fig: a ``matplotlib.pyplot.figure`` object containing a - stacked bar graph of the average read, write, and metadata times. + stacked bar graph of the average read, write, and metadata times -- + or None when there is no data to plot """ # get the run time from the report metadata runtime = report.metadata["job"]["run_time"] # get the I/O cost dataframe io_cost_df = get_io_cost_df(report=report) + if io_cost_df.empty: + # return an empty figure if there's no data + # this typically occurs when all module records have been filtered out + return None # generate a figure with 2 y axes io_cost_fig = plt.figure(figsize=(4.5, 4)) ax_raw = io_cost_fig.add_subplot(111) diff --git a/darshan-util/pydarshan/darshan/report.py b/darshan-util/pydarshan/darshan/report.py index c0f7c8a9b..0df68efe1 100644 --- a/darshan-util/pydarshan/darshan/report.py +++ b/darshan-util/pydarshan/darshan/report.py @@ -309,14 +309,16 @@ def __init__(self, filename=None, dtype='numpy', start_time=None, end_time=None, automatic_summary=False, - read_all=True, lookup_name_records=True): + read_all=True, + filter_patterns=None, filter_mode="exclude"): """ Args: filename (str): filename to open (optional) dtype (str): default dtype for internal structures automatic_summary (bool): automatically generate summary after loading read_all (bool): whether to read all records for log - lookup_name_records (bool): lookup and update name_records as records are loaded + filter_patterns (list of strings): list of Python regex strings to match against + filter_mode (str): filter mode to use (either "exclude" or "include") Return: None @@ -326,14 +328,13 @@ def __init__(self, self.log = None # Behavioral Options - self.dtype = dtype # default dtype to return when viewing records + self.dtype = dtype # default dtype to return when viewing records + self.name_records_read = False # True if name records have been read from the log self.automatic_summary = automatic_summary - self.lookup_name_records = lookup_name_records # State dependent book-keeping self.converted_records = False # true if convert_records() was called (unnumpyfy) - # Report Metadata # # Start/End + Timebase are @@ -354,7 +355,6 @@ def __init__(self, self.summary_revision = 0 # counter to check if summary needs update (see data_revision) self.summary = {} - # legacy references (deprecate before 1.0?) self.data_revision = 0 # counter for consistency checks self.data = {'version': 1} @@ -365,16 +365,14 @@ def __init__(self, self.data['counters'] = self.counters self.data['name_records'] = self.name_records - # when using report algebra this log allows to untangle potentially # unfair aggregations (e.g., double accounting) self.provenance_enabled = True self.provenance_graph = [] self.provenance_reports = {} - if filename: - self.open(filename, read_all=read_all) + self.open(filename, read_all=read_all, filter_patterns=filter_patterns, filter_mode=filter_mode) @property @@ -408,13 +406,15 @@ def heatmaps(self): # - def open(self, filename, read_all=False): + def open(self, filename, read_all=False, filter_patterns=None, filter_mode="exclude"): """ Open log file via CFFI backend. Args: filename (str): filename to open (optional) read_all (bool): whether to read all records for log + filter_patterns (list of strings): list of Python regex strings to match against + filter_mode (str): filter mode to use (either "exclude" or "include") Return: None @@ -428,10 +428,10 @@ def open(self, filename, read_all=False): if not bool(self.log['handle']): raise RuntimeError("Failed to open file.") - self.read_metadata(read_all=read_all) + self.read_metadata() if read_all: - self.read_all() + self.read_all(filter_patterns=filter_patterns, filter_mode=filter_mode) def __add__(self, other): @@ -464,8 +464,7 @@ def __deepcopy__(self, memo): # TODO: might consider treating self.log as list of open logs to not deactivate load functions? - - def read_metadata(self, read_all=False): + def read_metadata(self): """ Read metadata such as the job, the executables and available modules. @@ -488,60 +487,55 @@ def read_metadata(self, read_all=False): self.data['modules'] = backend.log_get_modules(self.log) self._modules = self.data['modules'] - if read_all == True: - self.data["name_records"] = backend.log_get_name_records(self.log) - self.name_records = self.data['name_records'] - - def update_name_records(self, mod=None): + def read_name_records(self, filter_patterns=None, filter_mode="exclude"): """ - Update (and prune unused) name records from resolve table. - - First reindexes all used name record identifiers and then queries - darshan-utils library to compile filtered list of name records. + Read all name records (record ID -> record name map) from the + Darshan log file using darshan-utils library. If filter patterns + are provided, either filter those records out (exclude) or in (include). Args: - None + filter_patterns (list of strs): regex patterns for names to exclude/include + filter_mode (str): whether to "exclude" or "include" the filter patterns Return: None """ - # sanitize inputs - mods = mod - if mods is None: - mods = self.records - else: - mods = [mod] - - - # state - ids = set() - - for mod in mods: - logger.debug(f" Refreshing name_records for mod={mod}") - for rec in self.records[mod]: - ids.add(rec['id']) - - - self.name_records.update(backend.log_lookup_name_records(self.log, ids)) - - - def read_all(self, dtype=None): + if filter_patterns and filter_mode not in {"exclude", "include"}: + raise RuntimeError("Invalid filter mode used for read_name_records().") + tmp_name_records = backend.log_get_name_records(self.log) + # filter name records according to user-supplied patterns + if filter_patterns: + compiled_patterns = [re.compile(p) for p in filter_patterns] + tmp_name_records = { + rec_id : rec_name + for (rec_id, rec_name) in tmp_name_records.items() + if ((filter_mode == "exclude" and not any(p.search(rec_name) for p in compiled_patterns)) + or + (filter_mode == "include" and any(p.search(rec_name) for p in compiled_patterns))) + } + self.data["name_records"] = tmp_name_records + self.name_records = self.data['name_records'] + self.name_records_read = True + + + def read_all(self, dtype=None, filter_patterns=None, filter_mode="exclude"): """ Read all available records from darshan log and return as dictionary. Args: - None + filter_patterns (list of strings): list of Python regex strings to match against + filter_mode (str): filter mode to use (either "exclude" or "include") Return: None """ - self.read_all_generic_records(dtype=dtype) - self.read_all_dxt_records(dtype=dtype) + self.read_all_generic_records(dtype=dtype, filter_patterns=filter_patterns, filter_mode=filter_mode) + self.read_all_dxt_records(dtype=dtype, filter_patterns=filter_patterns, filter_mode=filter_mode) if "LUSTRE" in self.data['modules']: - self.mod_read_all_lustre_records(dtype=dtype) + self.mod_read_all_lustre_records(dtype=dtype, filter_patterns=filter_patterns, filter_mode=filter_mode) if "APMPI" in self.data['modules']: self.mod_read_all_apmpi_records(dtype=dtype) if "APXC" in self.data['modules']: @@ -552,12 +546,14 @@ def read_all(self, dtype=None): return - def read_all_generic_records(self, counters=True, fcounters=True, dtype=None): + def read_all_generic_records(self, counters=True, fcounters=True, dtype=None, + filter_patterns=None, filter_mode="exclude"): """ Read all generic records from darshan log and return as dictionary. Args: - None + filter_patterns (list of strings): list of Python regex strings to match against + filter_mode (str): filter mode to use (either "exclude" or "include") Return: None @@ -566,16 +562,18 @@ def read_all_generic_records(self, counters=True, fcounters=True, dtype=None): dtype = dtype if dtype else self.dtype for mod in self.data['modules']: - self.mod_read_all_records(mod, dtype=dtype, warnings=False) - + self.mod_read_all_records(mod, dtype=dtype, warnings=False, + filter_patterns=filter_patterns, filter_mode=filter_mode) - def read_all_dxt_records(self, reads=True, writes=True, dtype=None): + def read_all_dxt_records(self, reads=True, writes=True, dtype=None, + filter_patterns=None, filter_mode="exclude"): """ Read all dxt records from darshan log and return as dictionary. Args: - None + filter_patterns (list of strings): list of Python regex strings to match against + filter_mode (str): filter mode to use (either "exclude" or "include") Return: None @@ -584,7 +582,8 @@ def read_all_dxt_records(self, reads=True, writes=True, dtype=None): dtype = dtype if dtype else self.dtype for mod in self.data['modules']: - self.mod_read_all_dxt_records(mod, warnings=False, reads=reads, writes=writes, dtype=dtype) + self.mod_read_all_dxt_records(mod, dtype=dtype, warnings=False, reads=reads, writes=writes, + filter_patterns=filter_patterns, filter_mode=filter_mode) def read_all_heatmap_records(self): @@ -635,13 +634,17 @@ def heatmap_rec_to_module_name(rec, nrecs=None): self._heatmaps = heatmaps - def mod_read_all_records(self, mod, dtype=None, warnings=True): + def mod_read_all_records(self, mod, dtype=None, warnings=True, + filter_patterns=None, filter_mode="exclude", + refresh_names=False): """ Reads all generic records for module Args: mod (str): Identifier of module to fetch all records dtype (str): 'numpy' for ndarray (default), 'dict' for python dictionary, 'pandas' + filter_patterns (list of strings): list of Python regex strings to match against + filter_mode (str): filter mode to use (either "exclude" or "include") Return: None @@ -655,11 +658,9 @@ def mod_read_all_records(self, mod, dtype=None, warnings=True): # skip mod return - # handling options dtype = dtype if dtype else self.dtype - self.records[mod] = DarshanRecordCollection(mod=mod, report=self) cn = backend.counter_names(mod) fcn = backend.fcounter_names(mod) @@ -674,20 +675,22 @@ def mod_read_all_records(self, mod, dtype=None, warnings=True): self.counters[mod]['counters'] = cn self.counters[mod]['fcounters'] = fcn + # get name records if they have not been read yet + if not self.name_records_read or refresh_names: + self.read_name_records(filter_patterns=filter_patterns, filter_mode=filter_mode) # fetch records rec = backend.log_get_generic_record(self.log, mod, dtype=dtype) while rec != None: - self.records[mod].append(rec) - self._modules[mod]['num_records'] += 1 + if rec['id'] in self.name_records: + # only keep records we have names for, otherwise the record + # likely has a name that was excluded + self.records[mod].append(rec) + self._modules[mod]['num_records'] += 1 # fetch next rec = backend.log_get_generic_record(self.log, mod, dtype=dtype) - - if self.lookup_name_records: - self.update_name_records(mod=mod) - # process/combine records if the format dtype allows for this if dtype == 'pandas': combined_c = None @@ -716,7 +719,8 @@ def mod_read_all_records(self, mod, dtype=None, warnings=True): }] - def mod_read_all_apmpi_records(self, mod="APMPI", dtype=None, warnings=True): + def mod_read_all_apmpi_records(self, mod="APMPI", dtype=None, warnings=True, + refresh_names=False): """ Reads all APMPI records for provided module. @@ -751,7 +755,10 @@ def mod_read_all_apmpi_records(self, mod="APMPI", dtype=None, warnings=True): if mod not in self.counters: self.counters[mod] = {} - # fetch records + # get name records if they have not been read yet + if not self.name_records_read or refresh_names: + self.read_name_records() + # fetch header record rec = backend.log_get_apmpi_record(self.log, mod, "HEADER", dtype=dtype) while rec != None: @@ -762,11 +769,8 @@ def mod_read_all_apmpi_records(self, mod="APMPI", dtype=None, warnings=True): rec = backend.log_get_apmpi_record(self.log, mod, "PERF", dtype=dtype) - if self.lookup_name_records: - self.update_name_records(mod=mod) - - - def mod_read_all_apxc_records(self, mod="APXC", dtype=None, warnings=True): + def mod_read_all_apxc_records(self, mod="APXC", dtype=None, warnings=True, + refresh_names=False): """ Reads all APXC records for provided module. @@ -801,7 +805,10 @@ def mod_read_all_apxc_records(self, mod="APXC", dtype=None, warnings=True): if mod not in self.counters: self.counters[mod] = {} - # fetch records + # get name records if they have not been read yet + if not self.name_records_read or refresh_names: + self.read_name_records() + # fetch header record rec = backend.log_get_apxc_record(self.log, mod, "HEADER", dtype=dtype) while rec != None: @@ -811,17 +818,18 @@ def mod_read_all_apxc_records(self, mod="APXC", dtype=None, warnings=True): # fetch next rec = backend.log_get_apxc_record(self.log, mod, "PERF", dtype=dtype) - if self.lookup_name_records: - self.update_name_records(mod=mod) - - def mod_read_all_dxt_records(self, mod, dtype=None, warnings=True, reads=True, writes=True): + def mod_read_all_dxt_records(self, mod, dtype=None, warnings=True, reads=True, writes=True, + filter_patterns=None, filter_mode="exclude", + refresh_names=False): """ Reads all dxt records for provided module. Args: mod (str): Identifier of module to fetch all records dtype (str): 'numpy' for ndarray (default), 'dict' for python dictionary + filter_patterns (list of strings): list of Python regex strings to match against + filter_mode (str): filter mode to use (either "exclude" or "include") Return: None @@ -832,7 +840,6 @@ def mod_read_all_dxt_records(self, mod, dtype=None, warnings=True, reads=True, w logger.warning(f" Skipping. Log does not contain data for mod: {mod}") return - supported = ['DXT_POSIX', 'DXT_MPIIO'] if mod not in supported: @@ -841,11 +848,9 @@ def mod_read_all_dxt_records(self, mod, dtype=None, warnings=True, reads=True, w # skip mod return - # handling options dtype = dtype if dtype else self.dtype - self.records[mod] = DarshanRecordCollection(mod=mod, report=self) # update module metadata @@ -853,54 +858,47 @@ def mod_read_all_dxt_records(self, mod, dtype=None, warnings=True, reads=True, w if mod not in self.counters: self.counters[mod] = {} + # get name records if they have not been read yet + if not self.name_records_read or refresh_names: + self.read_name_records(filter_patterns=filter_patterns, filter_mode=filter_mode) # fetch records - rec = backend.log_get_dxt_record(self.log, mod, dtype=dtype) + rec = backend.log_get_dxt_record(self.log, mod, reads=reads, writes=writes, dtype=dtype) while rec != None: - self.records[mod].append(rec) - self.data['modules'][mod]['num_records'] += 1 + if rec['id'] in self.name_records: + # only keep records we have names for, otherwise the record + # likely has a name that was excluded + self.records[mod].append(rec) + self._modules[mod]['num_records'] += 1 # fetch next rec = backend.log_get_dxt_record(self.log, mod, reads=reads, writes=writes, dtype=dtype) - if self.lookup_name_records: - self.update_name_records(mod=mod) - - - - - def mod_read_all_lustre_records(self, mod="LUSTRE", dtype=None, warnings=True): + def mod_read_all_lustre_records(self, dtype=None, warnings=True, + filter_patterns=None, filter_mode="exclude", + refresh_names=False): """ - Reads all dxt records for provided module. + Reads all lustre records. Args: - mod (str): Identifier of module to fetch all records dtype (str): 'numpy' for ndarray (default), 'dict' for python dictionary + filter_patterns (list of strings): list of Python regex strings to match against + filter_mode (str): filter mode to use (either "exclude" or "include") Return: None """ + mod = "LUSTRE" if mod not in self.modules: if warnings: logger.warning(f" Skipping. Log does not contain data for mod: {mod}") return - - supported = ['LUSTRE'] - - if mod not in supported: - if warnings: - logger.warning(f" Skipping. Unsupported module: {mod} in in mod_read_all_dxt_records(). Supported: {supported}") - # skip mod - return - - # handling options dtype = dtype if dtype else self.dtype - self.records[mod] = DarshanRecordCollection(mod=mod, report=self) cn = backend.counter_names("LUSTRE_COMP") @@ -910,20 +908,22 @@ def mod_read_all_lustre_records(self, mod="LUSTRE", dtype=None, warnings=True): self.counters[mod] = {} self.counters[mod]['counters'] = cn + # get name records if they have not been read yet + if not self.name_records_read or refresh_names: + self.read_name_records(filter_patterns=filter_patterns, filter_mode=filter_mode) # fetch records rec = backend.log_get_record(self.log, mod, dtype=dtype) while rec != None: - self.records[mod].append(rec) - self.data['modules'][mod]['num_records'] += 1 + if rec['id'] in self.name_records: + # only keep records we have names for, otherwise the record + # likely has a name that was excluded + self.records[mod].append(rec) + self._modules[mod]['num_records'] += 1 # fetch next rec = backend.log_get_record(self.log, mod, dtype=dtype) - - if self.lookup_name_records: - self.update_name_records(mod=mod) - # process/combine records if the format dtype allows for this if dtype == 'pandas': combined_c = None @@ -934,7 +934,6 @@ def mod_read_all_lustre_records(self, mod="LUSTRE", dtype=None, warnings=True): else: combined_c = pd.concat([combined_c, rec['components']]) - self.records[mod] = [{ 'rank': -1, 'id': -1, @@ -942,9 +941,6 @@ def mod_read_all_lustre_records(self, mod="LUSTRE", dtype=None, warnings=True): }] - - - def mod_records(self, mod, dtype='numpy', warnings=True): """ diff --git a/darshan-util/pydarshan/darshan/tests/test_data_access_by_filesystem.py b/darshan-util/pydarshan/darshan/tests/test_data_access_by_filesystem.py index a4f0b0523..9fb19fe12 100644 --- a/darshan-util/pydarshan/darshan/tests/test_data_access_by_filesystem.py +++ b/darshan-util/pydarshan/darshan/tests/test_data_access_by_filesystem.py @@ -679,3 +679,32 @@ def test_stdio_basic_inclusion(logname, assert_series_equal(file_wr_series, expected_file_wr_series) assert_series_equal(bytes_rd_series, expected_bytes_rd_series) assert_series_equal(bytes_wr_series, expected_bytes_wr_series) + +def test_plot_with_empty_data(): + # generate a report object that filters out all contained records + # to ensure data access by category plot properly returns None instead of failing + logpath = get_log_path("ior_hdf5_example.darshan") + # use a bogus regex with the "include" filter mode to ensure no records are included + with darshan.DarshanReport(logpath, filter_patterns=["bogus-regex"], filter_mode="include") as report: + fig = data_access_by_filesystem.plot_with_report(report=report) + assert fig == None + +def test_with_filtered_data(): + # ensure get_io_cost_df doesn't include data for modules with no records + logpath = get_log_path("sample-badost.darshan") + # generate a report object with all STDIO module records filtered out + # POSIX records should still remain + with darshan.DarshanReport(logpath, filter_patterns=["ior-posix"], filter_mode="include") as report: + file_id_dict = report.data["name_records"] + actual_df_reads, actual_df_writes = data_access_by_filesystem.rec_to_rw_counter_dfs_with_cols(report=report, + file_id_dict=file_id_dict) + assert len(actual_df_reads) == 0 + assert len(actual_df_writes) == 2048 + # generate a report object with all POSIX module records filtered out + # STDIO records should still remain + with darshan.DarshanReport(logpath, filter_patterns=["ior-posix"], filter_mode="exclude") as report: + file_id_dict = report.data["name_records"] + actual_df_reads, actual_df_writes = data_access_by_filesystem.rec_to_rw_counter_dfs_with_cols(report=report, + file_id_dict=file_id_dict) + assert len(actual_df_reads) == 1 + assert len(actual_df_writes) == 2 diff --git a/darshan-util/pydarshan/darshan/tests/test_plot_io_cost.py b/darshan-util/pydarshan/darshan/tests/test_plot_io_cost.py index 280eff298..2a003d987 100644 --- a/darshan-util/pydarshan/darshan/tests/test_plot_io_cost.py +++ b/darshan-util/pydarshan/darshan/tests/test_plot_io_cost.py @@ -315,3 +315,22 @@ def test_plot_io_cost_x_ticks_and_labels(logname, expected_rotations = 90 x_rotations = [tl.get_rotation() for tl in xticklabels] assert_allclose(x_rotations, expected_rotations) + +def test_plot_io_cost_empty_data(): + # generate a report object that filters out all contained records + # to ensure plot_io_cost properly returns None instead of failing + logpath = get_log_path("ior_hdf5_example.darshan") + # use a bogus regex with the "include" filter mode to ensure no records are included + with darshan.DarshanReport(logpath, filter_patterns=["bogus-regex"], filter_mode="include") as report: + fig = plot_io_cost(report=report) + assert fig == None + +def test_plot_io_cost_filtered_data(): + # ensure get_io_cost_df doesn't include data for modules with no records + logpath = get_log_path("sample-badost.darshan") + # generate a report object with all POSIX module records filtered out + # STDIO records should still remain + with darshan.DarshanReport(logpath, filter_patterns=["ior-posix"], filter_mode="exclude") as report: + io_cost_df = get_io_cost_df(report=report) + assert "POSIX" not in io_cost_df.index + assert "STDIO" in io_cost_df.index diff --git a/darshan-util/pydarshan/darshan/tests/test_report.py b/darshan-util/pydarshan/darshan/tests/test_report.py index c32e53421..a5d18df09 100644 --- a/darshan-util/pydarshan/darshan/tests/test_report.py +++ b/darshan-util/pydarshan/darshan/tests/test_report.py @@ -75,6 +75,15 @@ def test_load_records(): report.mod_read_all_records("POSIX") assert 1 == len(report.data['records']['POSIX']) +def test_load_records_filtered(): + """Sample for an expected number of records after filtering.""" + logfile = get_log_path("shane_macsio_id29959_5-22-32552-7035573431850780836_1590156158.darshan") + with darshan.DarshanReport(logfile, filter_patterns=["\.h5$"], filter_mode="exclude") as report: + assert 2 == len(report.data['records']['POSIX']) + assert 0 == len(report.data['records']['MPI-IO']) + with darshan.DarshanReport(logfile, filter_patterns=["\.h5$"], filter_mode="include") as report: + assert 1 == len(report.data['records']['POSIX']) + assert 1 == len(report.data['records']['MPI-IO']) @pytest.mark.parametrize("unsupported_record", ["DXT_POSIX", "DXT_MPIIO", "LUSTRE", "APMPI", "APXC"]