Skip to content

Commit 189017d

Browse files
Dom LaetschDom Laetsch
authored andcommitted
Major fixes
1 parent cb8fc48 commit 189017d

File tree

5 files changed

+430
-88
lines changed

5 files changed

+430
-88
lines changed

blobtools

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,12 @@
55
usage: blobtools <command> [<args>...] [--help]
66
77
commands:
8-
create create a BlobDB
9-
view print BlobDB
10-
plot plot BlobDB as a blobplot
8+
create create a BlobDB
9+
view print BlobDB
10+
plot plot BlobDB as a blobplot
1111
12-
bam2cov generate cov file from bam file
12+
comparecov compare BlobDB cov(s) to additional cov file
13+
bam2cov generate cov file from bam file
1314
1415
-h --help show this
1516
@@ -24,7 +25,7 @@ from docopt import docopt
2425
if __name__ == '__main__':
2526
main_dir = join(dirname(__file__), '')
2627
args = docopt(__doc__,
27-
version='version 0.9.7',
28+
version='version 0.9.8',
2829
options_first=True)
2930
#print(args)
3031

@@ -37,5 +38,7 @@ if __name__ == '__main__':
3738
exit(call(['python', main_dir + 'plot.py'] + argv))
3839
elif args['<command>'] == 'bam2cov':
3940
exit(call(['python', main_dir + 'bam2cov.py'] + argv))
41+
elif args['<command>'] == 'comparecov':
42+
exit(call(['python', main_dir + 'comparecov.py'] + argv))
4043
else:
4144
exit("%r is not a blobtools command. See 'blobtools -h'." % args['<command>'])

comparecov.py

Lines changed: 200 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,200 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*-
3+
4+
"""usage: blobtools comparecov -i BLOBDB -c COV [-p INT] [-l INT] [-n] [-s]
5+
[--xlabel XLABEL] [--ylabel YLABEL]
6+
[--log] [--xmax FLOAT] [--ymax FLOAT]
7+
[-r RANK] [-x TAXRULE] [-o PREFIX] [-m] [--title]
8+
[--sort ORDER] [--hist HIST] [--format FORMAT]
9+
[-h|--help]
10+
11+
Options:
12+
-h --help show this
13+
-i, --infile BLOBDB BlobDB file
14+
-c, --cov COV COV file used for y-axis
15+
16+
--xlabel XLABEL Label for x-axis [default: BlobDB_cov]
17+
--ylabel YLABEL Label for y-axis [default: CovFile_cov]
18+
--log Plot log-scale axes
19+
--xmax FLOAT Maximum values for x-axis [default: 1e10]
20+
--ymax FLOAT Maximum values for y-axis [default: 1e10]
21+
22+
-p, --plotgroups INT Number of (taxonomic) groups to plot, remaining
23+
groups are placed in 'other' [default: 7]
24+
-r, --rank RANK Taxonomic rank used for colouring of blobs [default: phylum]
25+
(Supported: species, genus, family, order, phylum, superkingdom)
26+
-x, --taxrule TAXRULE Taxrule which has been used for computing taxonomy
27+
(Supported: bestsum, bestsumorder) [default: bestsum]
28+
--sort <ORDER> Sort order for plotting [default: span]
29+
span : plot with decreasing span
30+
count : plot with decreasing count
31+
--hist <HIST> Data for histograms [default: span]
32+
span : span-weighted histograms
33+
count : count histograms
34+
35+
--title Add title of BlobDB to plot [default: False]
36+
-l, --length INT Minimum sequence length considered for plotting [default: 100]
37+
-n, --nohit Hide sequences without taxonomic annotation [default: False]
38+
-s, --noscale Do not scale sequences by length [default: False]
39+
-o, --out PREFIX Output prefix
40+
-m, --multiplot Multi-plot. Print plot after addition of each (taxonomic) group
41+
[default: False]
42+
--format FORMAT Figure format for plot (png, pdf, eps, jpeg,
43+
ps, svg, svgz, tiff) [default: png]
44+
"""
45+
46+
from __future__ import division
47+
from docopt import docopt
48+
import lib.BtCore as bt
49+
import lib.BtLog as BtLog
50+
import lib.BtIO as BtIO
51+
import lib.BtPlot as BtPlot
52+
from os.path import dirname, isfile
53+
54+
if __name__ == '__main__':
55+
TAXRULES = ['bestsum', 'bestsumorder']
56+
RANKS = ['species', 'genus', 'family', 'order', 'phylum', 'superkingdom']
57+
main_dir = dirname(__file__)
58+
#print data_dir
59+
args = docopt(__doc__)
60+
blobdb_f = args['--infile']
61+
cov_f = args['--cov']
62+
x_label = args['--xlabel']
63+
y_label = args['--ylabel']
64+
scale = args['--log']
65+
x_max = float(args['--xmax'])
66+
y_max = float(args['--ymax'])
67+
rank = args['--rank']
68+
min_length = int(args['--length'])
69+
multiplot = args['--multiplot']
70+
hide_nohits = args['--nohit']
71+
out_prefix = args['--out']
72+
max_group_plot = int(args['--plotgroups'])
73+
sort_order = args['--sort']
74+
taxrule = args['--taxrule']
75+
hist_type = args['--hist']
76+
plot_title = args['--title']
77+
ignore_contig_length = args['--noscale']
78+
#labels = args['--label']
79+
#colour_f = args['--colours']
80+
#exclude_groups = args['--exclude']
81+
format = args['--format']
82+
#no_plot_blobs = args['--noblobs']
83+
#no_plot_reads = args['--noreads']
84+
#refcov_f = args['--refcov']
85+
#catcolour_f = args['--catcolour']
86+
87+
# Does blobdb_f exist ?
88+
if not isfile(blobdb_f):
89+
BtLog.error('0', blobdb_f)
90+
91+
# Does cov_f exist ?
92+
if not isfile(cov_f):
93+
BtLog.error('0', cov_f)
94+
# parse cov file in dict
95+
cov_dict = BtPlot.parseCovFile(cov_f)
96+
97+
# Are ranks sane ?
98+
if rank not in RANKS:
99+
BtLog.error('9', rank)
100+
101+
# Are sort_order and hist_type sane?
102+
if not sort_order in ['span', 'count']:
103+
BtLog.error('14', sort_order)
104+
if not hist_type in ['span', 'count']:
105+
BtLog.error('15', hist_type)
106+
107+
# is taxrule provided?
108+
if taxrule not in TAXRULES:
109+
BtLog.error('8', taxrule)
110+
111+
# compute labels if supplied
112+
113+
#user_labels = BtPlot.parse_labels(labels)
114+
#
115+
#if (exclude_groups):
116+
# if "," in exclude_groups:
117+
# exclude_groups = exclude_groups.rsplit(",")
118+
# else:
119+
# exclude_groups = exclude_groups
120+
#
121+
#refcov_dict = {}
122+
#if (refcov_f):
123+
# refcov_dict = BtPlot.parseRefCov(refcov_f)
124+
#
125+
#catcolour_dict = {}
126+
#if (catcolour_f) and (c_index):
127+
# BtLog.error('24')
128+
#elif (catcolour_f):
129+
# catcolour_dict = BtPlot.parseCatColour(catcolour_f)
130+
#else:
131+
# pass
132+
133+
# Load BlobDb
134+
print BtLog.status_d['9'] % blobdb_f
135+
blobDB = bt.BlobDb('new')
136+
blobDB.load(blobdb_f)
137+
138+
title = blobDB.title
139+
if plot_title:
140+
plot_title = title
141+
142+
# Is taxrule sane and was it computed?
143+
if taxrule not in blobDB.taxrules:
144+
BtLog.error('11', taxrule, blobDB.taxrules)
145+
146+
data_dict, max_cov, cov_libs, cov_libs_total_reads = blobDB.getPlotData(rank, min_length, hide_nohits, taxrule, False, False)
147+
plotObj = BtPlot.PlotObj(data_dict, cov_libs, cov_libs_total_reads)
148+
#plotObj.exclude_groups = exclude_groups
149+
if max_cov < x_max:
150+
x_max = max_cov
151+
if max_cov < y_max:
152+
y_max = max_cov
153+
154+
if (scale):
155+
scale = 'log'
156+
else:
157+
scale = 'linear'
158+
159+
plotObj.max_cov = max_cov
160+
plotObj.title = title
161+
plotObj.format = format
162+
plotObj.multiplot = multiplot
163+
plotObj.hist_type = hist_type
164+
plotObj.ignore_contig_length = ignore_contig_length
165+
plotObj.max_group_plot = max_group_plot
166+
plotObj.group_order = BtPlot.getSortedGroups(data_dict, sort_order)
167+
plotObj.labels.update(plotObj.group_order)
168+
169+
#if (user_labels):
170+
# for group, label in user_labels.items():
171+
# plotObj.labels.add(label)
172+
plotObj.group_labels = {group : set() for group in plotObj.group_order}
173+
plotObj.relabel_and_colour(None, {})
174+
plotObj.compute_stats()
175+
176+
info_flag = 1
177+
178+
for cov_lib in plotObj.cov_libs:
179+
if (plotObj.title):
180+
plotObj.title = "%s.%s.%s" % (title, taxrule, cov_lib)
181+
182+
out_f = "%s.%s.%s.p%s.%s" % (title, hist_type, rank, max_group_plot, cov_lib)
183+
if out_prefix:
184+
out_f = "%s.%s" % (out_prefix, out_f)
185+
#if catcolour_dict:
186+
# out_f = "%s.%s" % (out_f, "catcolour")
187+
if ignore_contig_length:
188+
out_f = "%s.%s" % (out_f, "noscale")
189+
#if c_index:
190+
# out_f = "%s.%s" % (out_f, "c_index")
191+
#if exclude_groups:
192+
# out_f = "%s.%s" % (out_f, "exclude" + "_".join(exclude_groups))
193+
#if labels:
194+
# out_f = "%s.%s" % (out_f, "label_" + "_".join(set([name for name in user_labels.values()])))
195+
out_f = "%s.%s.%s" % (out_f, min_length, taxrule)
196+
plotObj.out_f = out_f
197+
198+
plotObj.plotScatterCov(cov_lib, cov_dict, info_flag, x_label, y_label, scale, x_max, y_max)
199+
info_flag = 0
200+
plotObj.write_stats()

lib/BtCore.py

Lines changed: 29 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -156,41 +156,42 @@ def getPlotData(self, rank, min_length, hide_nohits, taxrule, c_index, catcolour
156156
data_dict[group]['covs']['cov_sum'] = []
157157
data_dict[group]['reads_mapped']['cov_sum'] = 0
158158

159+
data_dict[group]['count'] = data_dict[group].get('count', 0) + 1
160+
data_dict[group]['span'] = data_dict[group].get('span', 0) + int(length)
159161
if ((hide_nohits) and group == 'no-hit') or length < min_length: # hidden
160162
data_dict[group]['count_hidden'] = data_dict[group].get('count_hidden', 0) + 1
161163
data_dict[group]['span_hidden'] = data_dict[group].get('span_hidden', 0) + int(length)
162164
else: # visible
163165
data_dict[group]['count_visible'] = data_dict[group].get('count_visible', 0) + 1
164166
data_dict[group]['span_visible'] = data_dict[group].get('span_visible', 0) + int(length)
167+
data_dict[group]['name'].append(name)
168+
data_dict[group]['length'].append(length)
169+
data_dict[group]['gc'].append(gc)
165170

166-
data_dict[group]['name'].append(name)
167-
data_dict[group]['length'].append(length)
168-
data_dict[group]['gc'].append(gc)
169-
170-
cov_sum = 0.0
171-
reads_mapped_sum = 0
172-
for cov_lib in sorted(cov_libs):
173-
cov = float(blob['covs'][cov_lib])
174-
cov_sum += cov
175-
cov = cov if cov > 0.02 else 0.02
176-
if cov > max_cov:
177-
max_cov = cov
178-
data_dict[group]['covs'][cov_lib].append(cov)
179-
if cov_lib in blob['read_cov']:
180-
reads_mapped = blob['read_cov'][cov_lib]
181-
reads_mapped_sum += reads_mapped
182-
data_dict[group]['reads_mapped'][cov_lib] += reads_mapped
183-
184-
if len(cov_libs) > 1:
185-
cov_sum = cov_sum if cov_sum > 0.02 else 0.02
186-
data_dict[group]['covs']['cov_sum'].append(cov_sum)
187-
if cov > max_cov:
188-
max_cov = cov
189-
if (reads_mapped_sum):
190-
data_dict[group]['reads_mapped']['cov_sum'] += reads_mapped_sum
191-
192-
data_dict[group]['count'] = data_dict[group].get('count', 0) + 1
193-
data_dict[group]['span'] = data_dict[group].get('span', 0) + int(length)
171+
cov_sum = 0.0
172+
reads_mapped_sum = 0
173+
for cov_lib in sorted(cov_libs):
174+
cov = float(blob['covs'][cov_lib])
175+
cov_sum += cov
176+
cov = cov if cov > 0.02 else 0.02
177+
if cov > max_cov:
178+
max_cov = cov
179+
data_dict[group]['covs'][cov_lib].append(cov)
180+
if cov_lib in blob['read_cov']:
181+
reads_mapped = blob['read_cov'][cov_lib]
182+
reads_mapped_sum += reads_mapped
183+
data_dict[group]['reads_mapped'][cov_lib] += reads_mapped
184+
185+
if len(cov_libs) > 1:
186+
cov_sum = cov_sum if cov_sum > 0.02 else 0.02
187+
data_dict[group]['covs']['cov_sum'].append(cov_sum)
188+
if cov > max_cov:
189+
max_cov = cov
190+
if (reads_mapped_sum):
191+
data_dict[group]['reads_mapped']['cov_sum'] += reads_mapped_sum
192+
193+
#data_dict[group]['count'] = data_dict[group].get('count', 0) + 1
194+
#data_dict[group]['span'] = data_dict[group].get('span', 0) + int(length)
194195

195196
if len(cov_libs) > 1:
196197
cov_libs.append('cov_sum')

lib/BtLog.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,8 @@ def progress(iteration, steps, max_value):
5454
'21' : '[ERROR:21]\t: Refcov file %s does not seem to have the right format.',
5555
'22' : '[ERROR:22]\t: Tax file %s seems to have no taxids.',
5656
'23' : '[ERROR:23]\t: Catcolour file %s does not seem to have the right format.',
57-
'24' : '[ERROR:24]\t: Catcolour file incompatible with c-index colouring.'
57+
'24' : '[ERROR:24]\t: Catcolour file incompatible with c-index colouring.',
58+
'25' : '[ERROR:25]\t: Cov file %s does not seem to have the right format.'
5859
}
5960

6061
warn_d = {

0 commit comments

Comments
 (0)