Skip to content

Analysis: added matplotlib figure generation with several desirable performance metrics. #612

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 31 commits into
base: development
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 20 commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
adc02c0
added figure file
nikwl May 21, 2021
49f2a17
integrated figfile contents into analyzer.py
nikwl May 21, 2021
4914ee2
passed logfile where I should have passed logdir
nikwl May 21, 2021
c0a75b5
default logdir and logfile should now be none
nikwl May 21, 2021
3eda2a1
the figfile condition to run new code was flipped
nikwl May 21, 2021
ba577f8
removed period for consistency
nikwl May 21, 2021
56d5363
forgot to add code that converts directory to list of files
nikwl May 21, 2021
bf7f02c
pyplot imported incorrectly
nikwl May 21, 2021
8d06393
fig file now passable for either log file or log dir
nikwl May 21, 2021
8458013
assertation prevents generating figure with too few datapoints
nikwl May 21, 2021
876724f
directory handling was passed figfile instead of logfilenames
nikwl May 21, 2021
866dbd3
fixed bug with cumulative plot
nikwl May 21, 2021
f6c958f
Revert "fix: avoid more missing process errors"
nikwl Jun 22, 2021
b106fea
baby's first merge
nikwl Jun 22, 2021
375a90e
updating fork
nikwl Jun 22, 2021
e909d1b
migrated graph
nikwl Jun 22, 2021
b89bc40
Merge branch 'ericaltendorf:development' into development
nikwl Aug 7, 2021
95e1c63
several fixes, added some cli arguments, should work now
nikwl Aug 7, 2021
be3871c
Merge branch 'development' into development
altendky Aug 28, 2021
f87c2ab
Merge branch 'development' into development
altendky Aug 28, 2021
46260ad
Fixed several discontinuities that I think were caused by the previou…
nikwl Aug 29, 2021
a6c65ed
logdir is no longer required, instead it pull from the logdir defined…
nikwl Aug 29, 2021
134d4b7
Added type annotations to functions
nikwl Aug 29, 2021
b478a14
Merge branch 'development' into nikwl/development
altendky Aug 29, 2021
a8039dd
black
altendky Aug 29, 2021
b55fb57
tidy
altendky Aug 29, 2021
2b356f5
Merge branch 'development' into nikwl_development
altendky Aug 29, 2021
37d6dc6
Merge pull request #1 from altendky/nikwl_development
nikwl Aug 29, 2021
1011b0f
Updated graph.py parser to new style. Reformatted graph.py with black.
nikwl Aug 29, 2021
3bc6d90
Update setup.cfg
altendky Aug 30, 2021
d15ec4c
[mypy-matplotlib] ignore_missing_imports = true
altendky Aug 30, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,9 @@ checks =
mypy == 0.902
types-pkg_resources ~= 0.1.2
%(test)s
graph =
matplotlib ~= 3.4
numpy ~= 1.20

[options.data_files]
config = src/plotman/resources/plotman.yaml
Expand Down
4 changes: 1 addition & 3 deletions src/plotman/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,4 @@ def analyze(logfilenames: typing.List[str], clipterminals: bool, bytmp: bool, by
(rows, columns) = os.popen('stty size', 'r').read().split()
tab.set_max_width(int(columns))
s = tab.draw()
print(s)


print(s)
184 changes: 184 additions & 0 deletions src/plotman/graph.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
import os
import time, datetime
import re
import statistics
import sys
import argparse

import numpy as np

import matplotlib
import matplotlib.pyplot as plt

from plotman.log_parser import PlotLogParser


def create_ax_dumbbell(ax, data, max_stacked=50) -> None:
'''
Create a dumbbell plot of concurrent plot instances over time.
Parameters:
ax: a matplotlib axis.
data: numpy arrary with [start times, end times].
'''

def newline(p1, p2, color='r'):
l = matplotlib.lines.Line2D([p1[0],p2[0]], [p1[1],p2[1]], color=color)
ax.add_line(l)
return l

# Prevent the stack from growing to tall
num_rows = data.shape[0]
stacker = []
for _ in range(int(np.ceil(num_rows / float(max_stacked)))):
stacker.extend(list(range(max_stacked)))
stacker = np.array(stacker)
if num_rows % float(max_stacked) != 0:
stacker = stacker[:-(max_stacked-int(num_rows % float(max_stacked)))]

for (p1, p2), i in zip(data[:,:2], stacker):
newline([p1, i], [p2, i])
ax.scatter(data[:,0], stacker, color='b')
ax.scatter(data[:,1], stacker, color='b')

ax.set_ylabel('Plots')
ax.set_xlim(np.min(data[:,0])-2, np.max(data[:,1])+2)


def create_ax_plotrate(ax, data, end=True, window=3) -> None:
'''
Create a plot showing the rate of plotting over time. Can be computed
with respect to the plot start (this is rate of plot creation) or
with respect to the plot end (this is rate of plot completion).
Parameters:
ax: a matplotlib axis.
data: numpy arrary with [start times, end times].
end: T/F, compute plot creation or plot completion rate.
window: Window to compute rate over.
'''

def estimate_rate(data, window):
rate_list = []
window_list = []
# This takes care of when we dont have a full window
for i in range(window):
rate_list.append(data[i] - data[0])
window_list.append(i)
# This takes care of when we do
for i in range(len(data) - window):
rate_list.append(data[i+window] - data[i])
window_list.append(window)
rate_list, window_list = np.array(rate_list), np.array(window_list)
rate_list[rate_list == 0] = np.nan # This prevents div by zero error
return np.where(np.logical_not(np.isnan(rate_list)), (window_list-1) / rate_list, 0)

# Estimate the rate of ending or the rate of starting
if end:
rate = estimate_rate(data[:,1], window)
ax.plot(data[:,1], rate)
else:
rate = estimate_rate(data[:,0], window)
ax.plot(data[:,0], rate)

ax.set_ylabel('Avg Plot Rate (plots/hour)')
ax.set_xlim(np.min(data[:,0])-2, np.max(data[:,1])+2)


def create_ax_plottime(ax, data, window=3) -> None:
'''
Create a plot showing the average time to create a single plot. This is
computed using a moving average. Note that the plot may not be
very accurate for the beginning and ending windows.
Parameters:
ax: a matplotlib axis.
data: numpy arrary with [start times, end times].
window: Window to compute rate over.
'''

# Compute moving avg
kernel = np.ones(window) / window
data_tiled = np.vstack((
np.expand_dims(data[:,1] - data[:,0], axis=1),
np.tile(data[-1,1] - data[-1,0], (window-1, 1))
))
rolling_avg = np.convolve(data_tiled.squeeze(), kernel, mode='valid')

ax.plot(data[:,1], rolling_avg)

ax.set_ylabel('Avg Plot Time (hours)')
ax.set_xlim(np.min(data[:,0])-2, np.max(data[:,1])+2)


def create_ax_plotcumulative(ax, data) -> None:
'''
Create a plot showing the cumulative number of plots over time.
Parameters:
ax: a matplotlib axis.
data: numpy arrary with [start times, end times].
'''
ax.plot(data[:,1], range(data.shape[0]))

ax.set_ylabel('Total plots (plots)')
ax.set_xlim(np.min(data[:,0])-2, np.max(data[:,1])+2)


def graph(logdir : str, figfile : str, latest_k : int, window : int) -> None:
assert window >= 2, "Cannot compute moving average over such a small window"
assert os.path.isdir(logdir)

# Build a list of the logfiles
logdir = os.path.abspath(logdir)
logfilenames = [os.path.join(logdir, l) for l in os.listdir(logdir) if
os.path.splitext(l)[-1] == '.log']

assert len(logfilenames) > 0, "Directory contains no files {}".format(logdir)

# For each log file, extract the start, end, and duration
time_catter = []
parser = PlotLogParser()
for logfilename in logfilenames:
with open(logfilename, 'r') as f:
info = parser.parse(f)
if info.total_time_raw != 0:
time_catter.append(
[
info.started_at.timestamp(),
info.started_at.timestamp() + info.total_time_raw,
info.total_time_raw
]
)

assert len(time_catter) > 0, "No valid log files found, need a finished plot"

# This array will hold start and end data (in hours)
data_started_ended = np.array(time_catter) / (60 * 60)

# Shift the data so that it starts at zero
data_started_ended -= np.min(data_started_ended[:, 0])

# Sort the rows by start time
data_started_ended = data_started_ended[np.argsort(data_started_ended[:, 0])]

# Remove older entries
if latest_k is not None:
data_started_ended = data_started_ended[-latest_k:, :]

# Create figure
num_plots = 4
f, _ = plt.subplots(2,1, figsize=(8, 10))
ax = plt.subplot(num_plots,1,1)
ax.set_title('Plot performance summary')

create_ax_dumbbell(ax, data_started_ended)

if data_started_ended.shape[0] > window:
ax = plt.subplot(num_plots,1,2)
create_ax_plotrate(ax, data_started_ended, end=True, window=window)

ax = plt.subplot(num_plots,1,3)
create_ax_plottime(ax, data_started_ended, window=window)

ax = plt.subplot(num_plots,1,4)
create_ax_plotcumulative(ax, data_started_ended)

ax.set_xlabel('Time (hours)')
f.savefig(figfile)
46 changes: 19 additions & 27 deletions src/plotman/job.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,17 +189,12 @@ def get_running_jobs(
with contextlib.ExitStack() as exit_stack:
processes = []

pids = set()
ppids = set()

for process in psutil.process_iter():
# Ignore processes which most likely have terminated between the time of
# iteration and data access.
with contextlib.suppress(psutil.NoSuchProcess, psutil.AccessDenied):
exit_stack.enter_context(process.oneshot())
if is_plotting_cmdline(process.cmdline()):
ppids.add(process.ppid())
pids.add(process.pid)
processes.append(process)

# https://github.com/ericaltendorf/plotman/pull/418
Expand All @@ -209,6 +204,8 @@ def get_running_jobs(
# both identified as plot processes. Only the child is
# really plotting. Filter out the parent.

pids = {process.pid for process in processes}
ppids = {process.ppid() for process in processes}
wanted_pids = pids - ppids

wanted_processes = [
Expand All @@ -218,28 +215,23 @@ def get_running_jobs(
]

for proc in wanted_processes:
with contextlib.suppress(psutil.NoSuchProcess, psutil.AccessDenied):
if proc.pid in cached_jobs_by_pid.keys():
jobs.append(cached_jobs_by_pid[proc.pid]) # Copy from cache
else:
with proc.oneshot():
command_line = list(proc.cmdline())
if len(command_line) == 0:
# https://github.com/ericaltendorf/plotman/issues/610
continue
parsed_command = parse_chia_plots_create_command_line(
command_line=command_line,
)
if parsed_command.error is not None:
continue
job = cls(
proc=proc,
parsed_command=parsed_command,
logroot=logroot,
)
if job.help:
continue
jobs.append(job)
if proc.pid in cached_jobs_by_pid.keys():
jobs.append(cached_jobs_by_pid[proc.pid]) # Copy from cache
else:
with proc.oneshot():
parsed_command = parse_chia_plots_create_command_line(
command_line=proc.cmdline(),
)
if parsed_command.error is not None:
continue
job = Job(
proc=proc,
parsed_command=parsed_command,
logroot=logroot,
)
if job.help:
continue
jobs.append(job)

return jobs

Expand Down
24 changes: 22 additions & 2 deletions src/plotman/plotman.py
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import pendulum

# Plotman libraries
from plotman import analyzer, archive, configuration, interactive, manager, plot_util, reporting, csv_exporter
from plotman import analyzer, archive, configuration, interactive, manager, plot_util, reporting, csv_exporter, graph
from plotman import resources as plotman_resources
from plotman.job import Job

Expand Down Expand Up @@ -95,8 +95,22 @@ def parse_args(self) -> typing.Any:
p_analyze.add_argument('--bybitfield',
action='store_true',
help='slice by bitfield/non-bitfield sorting')
p_analyze.add_argument('logfile', type=str, nargs='+',
p_analyze.add_argument('--logfile', type=str, nargs='+', default=None,
help='logfile(s) to analyze')
p_analyze.add_argument('--logdir', type=str, default=None,
help='directory containing multiple logfiles to analyze')
p_analyze.add_argument('--figfile', type=str, default=None,
help='figure to be created if logdir is passed')

p_graph = sp.add_parser('graph', help='create graph with plotting statistics')
p_graph.add_argument('logdir', type=str,
help='directory containing multiple logfiles to graph')
p_graph.add_argument('figfile', type=str,
help='graph file produced as output (.png, .jpg, etc.)')
p_graph.add_argument('--latest_k', type=int, default=None,
help='if passed, will only graph statistics for the latest k plots')
p_graph.add_argument('--window', type=int, default=3,
help='window size to compute moving average over')

args = parser.parse_args()
return args
Expand Down Expand Up @@ -204,6 +218,12 @@ def main() -> None:
analyzer.analyze(args.logfile, args.clipterminals,
args.bytmp, args.bybitfield)

#
# Graphing of completed jobs
#
elif args.cmd == 'graph':
graph.graph(args.logdir, args.figfile, args.latest_k, args.window)

#
# Exports log metadata to CSV
#
Expand Down
Empty file modified util/listlogs
100755 → 100644
Empty file.