Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 43 additions & 7 deletions darshan-util/pydarshan/darshan/cli/summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from collections import OrderedDict
import importlib.resources as importlib_resources

from typing import Any, Union, Callable
from typing import Any, Union, Callable, List, Optional

import pandas as pd
from mako.template import Template
Expand Down Expand Up @@ -124,7 +124,7 @@ def generate_fig(self):
elif isinstance(fig, plot_common_access_table.DarshanReportTable):
# retrieve html table from `DarshanReportTable`
self.fig_html = fig.html
else:
elif fig is not None:
err_msg = f"Figure of type {type(fig)} not supported."
raise NotImplementedError(err_msg)

Expand All @@ -137,21 +137,24 @@ class ReportData:
----------
log_path: path to a darshan log file.
enable_dxt_heatmap: flag indicating whether DXT heatmaps should be enabled
filter_patterns: regex patterns for names to exclude/include
filter_mode: whether to "exclude" or "include" the filter patterns

"""
def __init__(self, log_path: str, enable_dxt_heatmap: bool = False):
def __init__(self, log_path: str, enable_dxt_heatmap: bool = False,
filter_patterns: Optional[List[str]] = None, filter_mode: str = "exclude"):
# store the log path and use it to generate the report
self.log_path = log_path
self.enable_dxt_heatmap = enable_dxt_heatmap
# store the report
self.report = darshan.DarshanReport(log_path, read_all=False)
# read only generic module data and heatmap data by default
self.report.read_all_generic_records()
self.report.read_all_generic_records(filter_patterns=filter_patterns, filter_mode=filter_mode)
if "HEATMAP" in self.report.data['modules']:
self.report.read_all_heatmap_records()
# if DXT heatmaps requested, additionally read-in DXT data
if self.enable_dxt_heatmap:
self.report.read_all_dxt_records()
self.report.read_all_dxt_records(filter_patterns=filter_patterns, filter_mode=filter_mode)
# create the header/footer
self.get_header()
self.get_footer()
Expand Down Expand Up @@ -496,7 +499,11 @@ def register_figures(self):
elif "PNETCDF_FILE" in self.report.modules:
opcounts_mods.append("PNETCDF_FILE")

for mod in self.report.modules:
for mod in self.report.records:
# skip over modules with no records -- this likely means
# records in the log were filtered out via name exclusions
if len(self.report.records[mod]) == 0:
continue

if "H5" in mod:
sect_title = "Per-Module Statistics: HDF5"
Expand Down Expand Up @@ -633,6 +640,12 @@ def build_sections(self):
"""
self.sections = {}
for fig in self.figures:
# skip empty figures that can be generated by report sections
# "Data Access by Category" and "Cross-Module Comparisons"
if (fig.fig_html == None and
(fig.section_title == "Data Access by Category" or
fig.section_title == "Cross-Module Comparisons")):
continue
# if a section title is not already in sections, add
# the section title and a corresponding empty list
# to store its figures
Expand Down Expand Up @@ -669,6 +682,16 @@ def setup_parser(parser: argparse.ArgumentParser):
action="store_true",
help="Enable DXT-based versions of I/O activity heatmaps."
)
parser.add_argument(
"--exclude_names",
action='append',
help="regex patterns for file record names to exclude in summary report"
)
parser.add_argument(
"--include_names",
action='append',
help="regex patterns for file record names to include in summary report"
)


def main(args: Union[Any, None] = None):
Expand All @@ -687,6 +710,17 @@ def main(args: Union[Any, None] = None):

log_path = args.log_path
enable_dxt_heatmap = args.enable_dxt_heatmap
filter_patterns=None
filter_mode="exclude"
if args.exclude_names and args.include_names:
print('Error: only one of --exclude_names and --include_names may be used.')
sys.exit(1)
elif args.exclude_names:
filter_patterns = args.exclude_names
filter_mode = "exclude"
elif args.include_names:
filter_patterns = args.include_names
filter_mode = "include"

if args.output is None:
# if no output is provided, use the log file
Expand All @@ -699,7 +733,9 @@ def main(args: Union[Any, None] = None):
# collect the report data to feed into the template
report_data = ReportData(
log_path=log_path,
enable_dxt_heatmap=enable_dxt_heatmap
enable_dxt_heatmap=enable_dxt_heatmap,
filter_patterns=filter_patterns,
filter_mode=filter_mode
)

with importlib_resources.path(darshan.cli, "base.html") as base_path:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def agg_ioops(self, mode='append'):
for mod in mods:

# check records for module are present
if mod not in recs:
if mod not in recs or len(recs[mod]) == 0:
continue

agg = None
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -208,11 +208,11 @@ def rec_to_rw_counter_dfs(report: Any,
rec_counters = pd.DataFrame()
df_reads = pd.DataFrame()
df_writes = pd.DataFrame()
if "POSIX" in report.modules:
if "POSIX" in report.modules and len(report.records["POSIX"]) > 0:
rec_counters = pd.concat(objs=(rec_counters, report.records["POSIX"].to_df()['counters']))
df_reads = pd.concat(objs=(df_reads, rec_counters.loc[rec_counters[f'POSIX_BYTES_READ'] >= 1]))
df_writes = pd.concat(objs=(df_writes, rec_counters.loc[rec_counters[f'POSIX_BYTES_WRITTEN'] >= 1]))
if "STDIO" in report.modules:
if "STDIO" in report.modules and len(report.records["STDIO"]) > 0:
rec_counters = pd.concat(objs=(rec_counters, report.records["STDIO"].to_df()['counters']))
df_reads = pd.concat(objs=(df_reads, rec_counters.loc[rec_counters[f'STDIO_BYTES_READ'] >= 1]))
df_writes = pd.concat(objs=(df_writes, rec_counters.loc[rec_counters[f'STDIO_BYTES_WRITTEN'] >= 1]))
Expand Down Expand Up @@ -632,7 +632,7 @@ def plot_with_report(report: darshan.DarshanReport,
Returns
-------

fig: matplotlib figure object
fig: matplotlib figure object or None if no data to plot
"""
fig = plt.figure()
file_id_dict = report.data["name_records"]
Expand All @@ -648,6 +648,10 @@ def plot_with_report(report: darshan.DarshanReport,
for ident in allowed_ids:
allowed_file_id_dict[ident] = file_id_dict[ident]

if len(allowed_file_id_dict) == 0:
# no data, likely because all records have been filtered out
return None

filesystem_roots = identify_filesystems(file_id_dict=allowed_file_id_dict,
verbose=verbose)
# NOTE: this is a bit ugly, STDIO and POSIX are both combined
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def get_io_cost_df(report: darshan.DarshanReport) -> Any:
io_cost_dict = {}
supported_modules = ["POSIX", "MPI-IO", "STDIO", "H5F", "H5D", "PNETCDF_FILE", "PNETCDF_VAR"]
for mod_key in report.modules:
if mod_key in supported_modules:
if mod_key in supported_modules and len(report.records[mod_key]) > 0:
# collect the records in dataframe form
recs = report.records[mod_key].to_df(attach=None)
# correct the MPI module key
Expand Down Expand Up @@ -150,13 +150,18 @@ def plot_io_cost(report: darshan.DarshanReport) -> Any:
Returns
-------
io_cost_fig: a ``matplotlib.pyplot.figure`` object containing a
stacked bar graph of the average read, write, and metadata times.
stacked bar graph of the average read, write, and metadata times --
or None when there is no data to plot

"""
# get the run time from the report metadata
runtime = report.metadata["job"]["run_time"]
# get the I/O cost dataframe
io_cost_df = get_io_cost_df(report=report)
if io_cost_df.empty:
# return an empty figure if there's no data
# this typically occurs when all module records have been filtered out
return None
# generate a figure with 2 y axes
io_cost_fig = plt.figure(figsize=(4.5, 4))
ax_raw = io_cost_fig.add_subplot(111)
Expand Down
Loading
Loading