Skip to content

Commit fd4a679

Browse files
committed
fix hist overflow shifting/comparison bug, add --linesyles to compare-plotdirs.py,
and handle multiple file glob strs. overflow shifting wasn't working if the hist was entirely outside the specified range
1 parent 3144ea1 commit fd4a679

File tree

3 files changed

+33
-24
lines changed

3 files changed

+33
-24
lines changed

bin/compare-plotdirs.py

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -30,13 +30,15 @@
3030
# ----------------------------------------------------------------------------------------
3131
def get_hists_from_dir(dirname, histname, string_to_ignore=None):
3232
hists = {}
33-
for fname in glob.glob('%s/%s' % (dirname, args.file_glob_str)):
34-
varname = os.path.basename(fname)
35-
for rstr in args.file_replace_strs:
36-
varname = varname.replace(rstr, '')
37-
if string_to_ignore is not None:
38-
varname = varname.replace(string_to_ignore, '')
39-
hists[varname] = Hist(fname=fname, title=histname)
33+
file_patterns = args.file_glob_str.split(',')
34+
for pattern in file_patterns:
35+
for fname in glob.glob('%s/%s' % (dirname, pattern)):
36+
varname = os.path.basename(fname)
37+
for rstr in args.file_replace_strs:
38+
varname = varname.replace(rstr, '')
39+
if string_to_ignore is not None:
40+
varname = varname.replace(string_to_ignore, '')
41+
hists[varname] = Hist(fname=fname, title=histname)
4042
if len(hists) == 0:
4143
print(' no csvs found%s in %s' % ('' if args.file_glob_str is None else ' with --file-glob-str \'%s\''%args.file_glob_str, dirname))
4244
return hists
@@ -113,8 +115,6 @@ def plot_single_variable(args, varname, hlist, outdir, pathnameclues):
113115
else:
114116
if bounds is None:
115117
bounds = plotconfig.default_hard_bounds.setdefault(varname, None)
116-
if bounds is None and 'insertion' in varname:
117-
bounds = plotconfig.default_hard_bounds.setdefault('all_insertions', None)
118118
if varname in plotconfig.gene_usage_columns:
119119
# no_labels = True # not sure why i wanted these labels turned off?
120120
if 'j_' not in varname:
@@ -159,6 +159,12 @@ def plot_single_variable(args, varname, hlist, outdir, pathnameclues):
159159
args.log = 'xy'
160160
xticks = [1, 2, 3, 5, 10, 15, 20]
161161
xticklabels = ['1', '2', '3', '5', '10', '15', '20']
162+
if bounds is not None and any(h.xmin > bounds[1] or h.xmax < bounds[0] for h in hlist): # if any hist is entirely outside of <bounds>, widen the <bounds>
163+
hist_bounds = [h.get_filled_bin_xbounds() for h in hlist]
164+
overall_min, overall_max = [f(vals+(b,)) for f, vals, b in zip([min, max], zip(*hist_bounds), bounds)]
165+
if overall_min < bounds[0] or overall_max > bounds[1]:
166+
print(' %s (%s) overriding bounds %s with filled hist bounds to get %s' % (utils.wrnstr(), varname, bounds, (overall_min, overall_max)))
167+
bounds = (overall_min, overall_max)
162168

163169
if xtitle is None:
164170
xtitle = xtitledict.get(varname)
@@ -187,7 +193,7 @@ def plot_single_variable(args, varname, hlist, outdir, pathnameclues):
187193
plotting.draw_no_root(hlist[0], plotname=varname, plotdir=outdir, more_hists=hlist[1:], write_csv=False, stats=stats, bounds=bounds, ybounds=args.ybounds,
188194
shift_overflows=shift_overflows, plottitle=plottitle, colors=args.colors,
189195
xtitle=xtitle if args.xtitle is None else args.xtitle, ytitle=ytitle if args.ytitle is None else args.ytitle, xline=xline, normalize=(args.normalize and '_vs_mute_freq' not in varname),
190-
linewidths=linewidths, markersizes=args.markersizes, alphas=args.alphas, errors=not args.no_errors, remove_empty_bins=True, #='y' in args.log,
196+
linewidths=linewidths, linestyles=args.linestyles, markersizes=args.markersizes, alphas=args.alphas, errors=not args.no_errors, remove_empty_bins=True, #='y' in args.log,
191197
figsize=figsize, no_labels=no_labels, log=args.log, translegend=translegend, xticks=xticks, xticklabels=xticklabels, square_bins=args.square_bins)
192198

193199
if args.swarm_meta_key is not None:
@@ -211,6 +217,7 @@ class MultiplyInheritedFormatter(argparse.RawTextHelpFormatter, argparse.Argumen
211217
parser.add_argument('--alphas')
212218
parser.add_argument('--linewidths', default=':'.join(plotting.default_linewidths), help='colon-separated list of linewidths to cycle through')
213219
parser.add_argument('--markersizes', default=':'.join(plotting.default_markersizes), help='colon-separated list of linewidths to cycle through')
220+
parser.add_argument('--linestyles', help='colon-separated list of linestyles to cycle through')
214221
parser.add_argument('--gldirs', help='On plots showing mutation vs individual gene positions, if you\'d like a dashed veritcal line showing conserved codon positions, set this as a colon-separated list of germline info dirs corresponding to each plotdir') #, default=['data/germlines/human'])
215222
parser.add_argument('--locus', default='igh')
216223
parser.add_argument('--normalize', action='store_true', help='If set, the histograms from each plotdir are normalized (each bin contents divided by the integral) before making the comparison (e.g. for comparing different size samples).')
@@ -219,7 +226,7 @@ class MultiplyInheritedFormatter(argparse.RawTextHelpFormatter, argparse.Argumen
219226
parser.add_argument('--log', default='', help='Display these axes on a log scale, set to either \'x\', \'y\', or \'xy\'')
220227
parser.add_argument('--make-parent-html', action='store_true', help='after doing everything within subdirs, make a single html in the main/parent dir with all plots from subdirs')
221228
parser.add_argument('--add-to-title', help='string to append to existing title (use @ as space)')
222-
parser.add_argument('--file-glob-str', default='*.csv', help='shell glob style regex for matching plot files')
229+
parser.add_argument('--file-glob-str', default='*.csv', help='Shell glob style regex for matching plot files. Separate multiple patterns with \',\' (and no curly braces {})')
223230
parser.add_argument('--file-replace-strs', default='.csv', help='colon-separated list of strings to remove frome file base name to get variable name')
224231
parser.add_argument('--xbounds')
225232
parser.add_argument('--ybounds')
@@ -238,6 +245,7 @@ class MultiplyInheritedFormatter(argparse.RawTextHelpFormatter, argparse.Argumen
238245
args.alphas = utils.get_arg_list(args.alphas, floatify=True)
239246
args.colors = utils.get_arg_list(args.colors)
240247
args.linewidths = utils.get_arg_list(args.linewidths, intify=True)
248+
args.linestyles = utils.get_arg_list(args.linestyles)
241249
args.markersizes = utils.get_arg_list(args.markersizes, intify=True)
242250
args.gldirs = utils.get_arg_list(args.gldirs)
243251
args.translegend = utils.get_arg_list(args.translegend, floatify=True)

partis/plotconfig.py

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -165,24 +165,23 @@
165165

166166
default_hard_bounds = {
167167
# 'hamming_to_true_naive' : (-0.5, 19.5),
168-
'hamming_to_true_naive' : (-0.5, 25),
169-
'cdr3_hamming_to_true_naive' : (-0.5, 25),
168+
'hamming_to_true_naive' : (-0.5, 25.5),
169+
'cdr3_hamming_to_true_naive' : (-0.5, 25.5),
170170
'v_hamming_to_true_naive' : (-0.5, 8.5),
171171
'd_hamming_to_true_naive' : (-0.5, 10.5),
172172
'j_hamming_to_true_naive' : (-0.5, 12.5),
173-
'd_3p_del' : (-1, 18),
174-
'd_5p_del' : (-1, 20),
175-
'dj_insertion' : (-1, 22),
176-
'jf_insertion' : (-1, 13),
177-
'fv_insertion' : (-1, 13),
178-
'j_5p_del' : (-1, 17),
173+
'd_3p_del' : (-0.5, 18.5),
174+
'd_5p_del' : (-0.5, 20.5),
175+
'dj_insertion' : (-0.5, 22.5),
176+
'jf_insertion' : (-0.5, 13.5),
177+
'fv_insertion' : (-0.5, 13.5),
178+
'j_5p_del' : (-0.5, 17.5),
179179
'all-mean-freq' : (0.0, 0.4), # NOTE make sure you know where the decimal place is here!
180180
'v-mean-freq' : (0.0, 0.4), # NOTE make sure you know where the decimal place is here!
181181
'd-mean-freq' : (0.0, 0.5), # NOTE make sure you know where the decimal place is here!
182182
'j-mean-freq' : (0.0, 0.4), # NOTE make sure you know where the decimal place is here!
183-
'v_3p_del' : (-1, 6),
184-
'vd_insertion' : (-1, 18),
185-
'all_insertions' : (-0.5, 20),
183+
'v_3p_del' : (-0.5, 8.5),
184+
'vd_insertion' : (-0.5, 17.5),
186185
'IGHJ6*02' : (-0.5, 39.5),
187186
'IGHJ3*02' : (-0.5, 26),
188187
'IGHJ1*01' : (-0.5, 28),

partis/plotting.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -418,6 +418,8 @@ def shift_hist_overflows(hists, xmin, xmax):
418418
else:
419419
break
420420

421+
if first_shown_bin == -1 or last_shown_bin == -1:
422+
raise RuntimeError(f"Can\'t shift overflows for hist '{htmp.title}' since it has no bins within shift range {xmin}, {xmax} (hist range: {htmp.xmin}, {htmp.xmax})")
421423
htmp.set_ibin(first_shown_bin,
422424
underflows + htmp.bin_contents[first_shown_bin],
423425
error=math.sqrt(under_err2 + htmp.errors[first_shown_bin]**2))
@@ -448,7 +450,7 @@ def draw_no_root(hist, log='', plotdir=None, plotname='', more_hists=None, scale
448450

449451
multiply_by_bin_width = False
450452
if normalize and len(set((h.n_bins, h.xmin, h.xmax) for h in hists)) > 1:
451-
print(' %s normalizing hists with different bins, which will *not* work/look right if there\'s empty bins (turn on square_bins to see)' % utils.wrnstr())
453+
print(' %s (%s) normalizing hists with different bins, which will *not* work/look right if there\'s empty bins (turning on square_bins should better show this)' % (utils.wrnstr(), plotname))
452454
multiply_by_bin_width = True
453455

454456
xmin, xmax, ymin, ymax = None, None, None, None
@@ -505,7 +507,7 @@ def draw_no_root(hist, log='', plotdir=None, plotname='', more_hists=None, scale
505507
while len(tmpcolors) < len(hists):
506508
tmpcolors += tmpcolors
507509

508-
tmplinestyles = [] if linestyles is None or len(linestyles) < len(hists) else copy.deepcopy(linestyles)
510+
tmplinestyles = [] if linestyles is None else copy.deepcopy(linestyles)
509511
itmp = 0
510512
availstyles = ['-', '--', '-.', ':']
511513
while len(tmplinestyles) < len(hists):

0 commit comments

Comments
 (0)