Skip to content

Commit 4147a4a

Browse files
author
shanedsnyder
authored
Merge pull request #1017 from darshan-hpc/snyder/pydarshan-name-filters
ENH: PyDarshan Report changes to enable name filtering
2 parents 65b654f + a9f1986 commit 4147a4a

File tree

8 files changed

+224
-126
lines changed

8 files changed

+224
-126
lines changed

darshan-util/pydarshan/darshan/cli/summary.py

Lines changed: 43 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from collections import OrderedDict
88
import importlib.resources as importlib_resources
99

10-
from typing import Any, Union, Callable
10+
from typing import Any, Union, Callable, List, Optional
1111

1212
import pandas as pd
1313
from mako.template import Template
@@ -124,7 +124,7 @@ def generate_fig(self):
124124
elif isinstance(fig, plot_common_access_table.DarshanReportTable):
125125
# retrieve html table from `DarshanReportTable`
126126
self.fig_html = fig.html
127-
else:
127+
elif fig is not None:
128128
err_msg = f"Figure of type {type(fig)} not supported."
129129
raise NotImplementedError(err_msg)
130130

@@ -137,21 +137,24 @@ class ReportData:
137137
----------
138138
log_path: path to a darshan log file.
139139
enable_dxt_heatmap: flag indicating whether DXT heatmaps should be enabled
140+
filter_patterns: regex patterns for names to exclude/include
141+
filter_mode: whether to "exclude" or "include" the filter patterns
140142
141143
"""
142-
def __init__(self, log_path: str, enable_dxt_heatmap: bool = False):
144+
def __init__(self, log_path: str, enable_dxt_heatmap: bool = False,
145+
filter_patterns: Optional[List[str]] = None, filter_mode: str = "exclude"):
143146
# store the log path and use it to generate the report
144147
self.log_path = log_path
145148
self.enable_dxt_heatmap = enable_dxt_heatmap
146149
# store the report
147150
self.report = darshan.DarshanReport(log_path, read_all=False)
148151
# read only generic module data and heatmap data by default
149-
self.report.read_all_generic_records()
152+
self.report.read_all_generic_records(filter_patterns=filter_patterns, filter_mode=filter_mode)
150153
if "HEATMAP" in self.report.data['modules']:
151154
self.report.read_all_heatmap_records()
152155
# if DXT heatmaps requested, additionally read-in DXT data
153156
if self.enable_dxt_heatmap:
154-
self.report.read_all_dxt_records()
157+
self.report.read_all_dxt_records(filter_patterns=filter_patterns, filter_mode=filter_mode)
155158
# create the header/footer
156159
self.get_header()
157160
self.get_footer()
@@ -496,7 +499,11 @@ def register_figures(self):
496499
elif "PNETCDF_FILE" in self.report.modules:
497500
opcounts_mods.append("PNETCDF_FILE")
498501

499-
for mod in self.report.modules:
502+
for mod in self.report.records:
503+
# skip over modules with no records -- this likely means
504+
# records in the log were filtered out via name exclusions
505+
if len(self.report.records[mod]) == 0:
506+
continue
500507

501508
if "H5" in mod:
502509
sect_title = "Per-Module Statistics: HDF5"
@@ -633,6 +640,12 @@ def build_sections(self):
633640
"""
634641
self.sections = {}
635642
for fig in self.figures:
643+
# skip empty figures that can be generated by report sections
644+
# "Data Access by Category" and "Cross-Module Comparisons"
645+
if (fig.fig_html == None and
646+
(fig.section_title == "Data Access by Category" or
647+
fig.section_title == "Cross-Module Comparisons")):
648+
continue
636649
# if a section title is not already in sections, add
637650
# the section title and a corresponding empty list
638651
# to store its figures
@@ -669,6 +682,16 @@ def setup_parser(parser: argparse.ArgumentParser):
669682
action="store_true",
670683
help="Enable DXT-based versions of I/O activity heatmaps."
671684
)
685+
parser.add_argument(
686+
"--exclude_names",
687+
action='append',
688+
help="regex patterns for file record names to exclude in summary report"
689+
)
690+
parser.add_argument(
691+
"--include_names",
692+
action='append',
693+
help="regex patterns for file record names to include in summary report"
694+
)
672695

673696

674697
def main(args: Union[Any, None] = None):
@@ -687,6 +710,17 @@ def main(args: Union[Any, None] = None):
687710

688711
log_path = args.log_path
689712
enable_dxt_heatmap = args.enable_dxt_heatmap
713+
filter_patterns=None
714+
filter_mode="exclude"
715+
if args.exclude_names and args.include_names:
716+
print('Error: only one of --exclude_names and --include_names may be used.')
717+
sys.exit(1)
718+
elif args.exclude_names:
719+
filter_patterns = args.exclude_names
720+
filter_mode = "exclude"
721+
elif args.include_names:
722+
filter_patterns = args.include_names
723+
filter_mode = "include"
690724

691725
if args.output is None:
692726
# if no output is provided, use the log file
@@ -699,7 +733,9 @@ def main(args: Union[Any, None] = None):
699733
# collect the report data to feed into the template
700734
report_data = ReportData(
701735
log_path=log_path,
702-
enable_dxt_heatmap=enable_dxt_heatmap
736+
enable_dxt_heatmap=enable_dxt_heatmap,
737+
filter_patterns=filter_patterns,
738+
filter_mode=filter_mode
703739
)
704740

705741
with importlib_resources.path(darshan.cli, "base.html") as base_path:

darshan-util/pydarshan/darshan/experimental/aggregators/agg_ioops.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ def agg_ioops(self, mode='append'):
2828
for mod in mods:
2929

3030
# check records for module are present
31-
if mod not in recs:
31+
if mod not in recs or len(recs[mod]) == 0:
3232
continue
3333

3434
agg = None

darshan-util/pydarshan/darshan/experimental/plots/data_access_by_filesystem.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -208,11 +208,11 @@ def rec_to_rw_counter_dfs(report: Any,
208208
rec_counters = pd.DataFrame()
209209
df_reads = pd.DataFrame()
210210
df_writes = pd.DataFrame()
211-
if "POSIX" in report.modules:
211+
if "POSIX" in report.modules and len(report.records["POSIX"]) > 0:
212212
rec_counters = pd.concat(objs=(rec_counters, report.records["POSIX"].to_df()['counters']))
213213
df_reads = pd.concat(objs=(df_reads, rec_counters.loc[rec_counters[f'POSIX_BYTES_READ'] >= 1]))
214214
df_writes = pd.concat(objs=(df_writes, rec_counters.loc[rec_counters[f'POSIX_BYTES_WRITTEN'] >= 1]))
215-
if "STDIO" in report.modules:
215+
if "STDIO" in report.modules and len(report.records["STDIO"]) > 0:
216216
rec_counters = pd.concat(objs=(rec_counters, report.records["STDIO"].to_df()['counters']))
217217
df_reads = pd.concat(objs=(df_reads, rec_counters.loc[rec_counters[f'STDIO_BYTES_READ'] >= 1]))
218218
df_writes = pd.concat(objs=(df_writes, rec_counters.loc[rec_counters[f'STDIO_BYTES_WRITTEN'] >= 1]))
@@ -632,7 +632,7 @@ def plot_with_report(report: darshan.DarshanReport,
632632
Returns
633633
-------
634634
635-
fig: matplotlib figure object
635+
fig: matplotlib figure object or None if no data to plot
636636
"""
637637
fig = plt.figure()
638638
file_id_dict = report.data["name_records"]
@@ -648,6 +648,10 @@ def plot_with_report(report: darshan.DarshanReport,
648648
for ident in allowed_ids:
649649
allowed_file_id_dict[ident] = file_id_dict[ident]
650650

651+
if len(allowed_file_id_dict) == 0:
652+
# no data, likely because all records have been filtered out
653+
return None
654+
651655
filesystem_roots = identify_filesystems(file_id_dict=allowed_file_id_dict,
652656
verbose=verbose)
653657
# NOTE: this is a bit ugly, STDIO and POSIX are both combined

darshan-util/pydarshan/darshan/experimental/plots/plot_io_cost.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ def get_io_cost_df(report: darshan.DarshanReport) -> Any:
111111
io_cost_dict = {}
112112
supported_modules = ["POSIX", "MPI-IO", "STDIO", "H5F", "H5D", "PNETCDF_FILE", "PNETCDF_VAR"]
113113
for mod_key in report.modules:
114-
if mod_key in supported_modules:
114+
if mod_key in supported_modules and len(report.records[mod_key]) > 0:
115115
# collect the records in dataframe form
116116
recs = report.records[mod_key].to_df(attach=None)
117117
# correct the MPI module key
@@ -150,13 +150,18 @@ def plot_io_cost(report: darshan.DarshanReport) -> Any:
150150
Returns
151151
-------
152152
io_cost_fig: a ``matplotlib.pyplot.figure`` object containing a
153-
stacked bar graph of the average read, write, and metadata times.
153+
stacked bar graph of the average read, write, and metadata times --
154+
or None when there is no data to plot
154155
155156
"""
156157
# get the run time from the report metadata
157158
runtime = report.metadata["job"]["run_time"]
158159
# get the I/O cost dataframe
159160
io_cost_df = get_io_cost_df(report=report)
161+
if io_cost_df.empty:
162+
# return an empty figure if there's no data
163+
# this typically occurs when all module records have been filtered out
164+
return None
160165
# generate a figure with 2 y axes
161166
io_cost_fig = plt.figure(figsize=(4.5, 4))
162167
ax_raw = io_cost_fig.add_subplot(111)

0 commit comments

Comments
 (0)