Major fixes

Dom Laetsch · Dom Laetsch · commit 189017dd3b1c · 2015-12-06T13:57:19.000Z
diff --git a/blobtools b/blobtools
@@ -5,11 +5,12 @@
 usage: blobtools <command> [<args>...] [--help]
 
 commands:
-  create    create a BlobDB
-  view      print BlobDB
-  plot      plot BlobDB as a blobplot
+  create        create a BlobDB
+  view          print BlobDB
+  plot          plot BlobDB as a blobplot
 
-  bam2cov   generate cov file from bam file
+  comparecov    compare BlobDB cov(s) to additional cov file
+  bam2cov       generate cov file from bam file
 
 -h --help    show this
 
@@ -24,7 +25,7 @@ from docopt import docopt
 if __name__ == '__main__':
     main_dir = join(dirname(__file__), '')
     args = docopt(__doc__,
-                  version='version 0.9.7',
+                  version='version 0.9.8',
                   options_first=True)
     #print(args)
 
@@ -37,5 +38,7 @@ if __name__ == '__main__':
         exit(call(['python', main_dir + 'plot.py'] + argv))
     elif args['<command>'] == 'bam2cov':
         exit(call(['python', main_dir + 'bam2cov.py'] + argv))
+    elif args['<command>'] == 'comparecov':
+        exit(call(['python', main_dir + 'comparecov.py'] + argv))  
     else:
         exit("%r is not a blobtools command. See 'blobtools -h'." % args['<command>'])
diff --git a/comparecov.py b/comparecov.py
@@ -0,0 +1,200 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+"""usage: blobtools comparecov  -i BLOBDB -c COV [-p INT] [-l INT] [-n] [-s]
+                                [--xlabel XLABEL] [--ylabel YLABEL]
+                                [--log] [--xmax FLOAT] [--ymax FLOAT]
+                                [-r RANK] [-x TAXRULE] [-o PREFIX] [-m] [--title]
+                                [--sort ORDER] [--hist HIST] [--format FORMAT]
+                                [-h|--help] 
+
+    Options:
+        -h --help                   show this
+        -i, --infile BLOBDB         BlobDB file
+        -c, --cov COV               COV file used for y-axis
+        
+        --xlabel XLABEL             Label for x-axis [default: BlobDB_cov]
+        --ylabel YLABEL             Label for y-axis [default: CovFile_cov]
+        --log                       Plot log-scale axes
+        --xmax FLOAT                Maximum values for x-axis [default: 1e10]
+        --ymax FLOAT                Maximum values for y-axis [default: 1e10]
+
+        -p, --plotgroups INT        Number of (taxonomic) groups to plot, remaining 
+                                     groups are placed in 'other' [default: 7]
+        -r, --rank RANK             Taxonomic rank used for colouring of blobs [default: phylum]
+                                     (Supported: species, genus, family, order, phylum, superkingdom) 
+        -x, --taxrule TAXRULE       Taxrule which has been used for computing taxonomy 
+                                     (Supported: bestsum, bestsumorder) [default: bestsum]
+        --sort <ORDER>              Sort order for plotting [default: span]
+                                     span  : plot with decreasing span
+                                     count : plot with decreasing count 
+        --hist <HIST>               Data for histograms [default: span] 
+                                     span  : span-weighted histograms
+                                     count : count histograms
+
+        --title                     Add title of BlobDB to plot [default: False]
+        -l, --length INT            Minimum sequence length considered for plotting [default: 100]
+        -n, --nohit                 Hide sequences without taxonomic annotation [default: False]
+        -s, --noscale               Do not scale sequences by length [default: False]
+        -o, --out PREFIX            Output prefix
+        -m, --multiplot             Multi-plot. Print plot after addition of each (taxonomic) group 
+                                     [default: False]
+        --format FORMAT             Figure format for plot (png, pdf, eps, jpeg, 
+                                        ps, svg, svgz, tiff) [default: png]
+"""
+
+from __future__ import division
+from docopt import docopt
+import lib.BtCore as bt
+import lib.BtLog as BtLog
+import lib.BtIO as BtIO
+import lib.BtPlot as BtPlot
+from os.path import dirname, isfile
+
+if __name__ == '__main__':
+    TAXRULES = ['bestsum', 'bestsumorder']
+    RANKS = ['species', 'genus', 'family', 'order', 'phylum', 'superkingdom']
+    main_dir = dirname(__file__)
+    #print data_dir
+    args = docopt(__doc__)
+    blobdb_f = args['--infile']
+    cov_f = args['--cov']
+    x_label = args['--xlabel'] 
+    y_label = args['--ylabel']
+    scale = args['--log'] 
+    x_max = float(args['--xmax'])
+    y_max = float(args['--ymax'])
+    rank = args['--rank'] 
+    min_length = int(args['--length'])
+    multiplot = args['--multiplot']
+    hide_nohits = args['--nohit']
+    out_prefix = args['--out']
+    max_group_plot = int(args['--plotgroups'])
+    sort_order = args['--sort']
+    taxrule = args['--taxrule']
+    hist_type = args['--hist']
+    plot_title = args['--title']
+    ignore_contig_length = args['--noscale']
+    #labels = args['--label']
+    #colour_f = args['--colours']
+    #exclude_groups = args['--exclude']
+    format = args['--format'] 
+    #no_plot_blobs = args['--noblobs']
+    #no_plot_reads = args['--noreads']
+    #refcov_f = args['--refcov']
+    #catcolour_f = args['--catcolour']
+
+    # Does blobdb_f exist ?
+    if not isfile(blobdb_f):
+        BtLog.error('0', blobdb_f)
+
+    # Does cov_f exist ?
+    if not isfile(cov_f):
+        BtLog.error('0', cov_f)
+    # parse cov file in dict 
+    cov_dict = BtPlot.parseCovFile(cov_f)
+    
+    # Are ranks sane ?
+    if rank not in RANKS:
+        BtLog.error('9', rank)
+
+    # Are sort_order and hist_type sane?
+    if not sort_order in ['span', 'count']:
+        BtLog.error('14', sort_order)
+    if not hist_type in ['span', 'count']:            
+        BtLog.error('15', hist_type)
+
+    # is taxrule provided?
+    if taxrule not in TAXRULES:
+        BtLog.error('8', taxrule)
+
+    # compute labels if supplied
+    
+    #user_labels = BtPlot.parse_labels(labels)
+    #
+    #if (exclude_groups):
+    #    if "," in exclude_groups:
+    #        exclude_groups = exclude_groups.rsplit(",")
+    #    else:
+    #        exclude_groups = exclude_groups
+    #
+    #refcov_dict = {}
+    #if (refcov_f):
+    #    refcov_dict = BtPlot.parseRefCov(refcov_f)
+#
+    #catcolour_dict = {}
+    #if (catcolour_f) and (c_index):
+    #    BtLog.error('24')
+    #elif (catcolour_f):
+    #    catcolour_dict = BtPlot.parseCatColour(catcolour_f)
+    #else: 
+    #    pass
+
+    # Load BlobDb
+    print BtLog.status_d['9'] % blobdb_f
+    blobDB = bt.BlobDb('new')
+    blobDB.load(blobdb_f)
+
+    title = blobDB.title
+    if plot_title:
+        plot_title = title
+
+    # Is taxrule sane and was it computed?
+    if taxrule not in blobDB.taxrules:
+        BtLog.error('11', taxrule, blobDB.taxrules)
+    
+    data_dict, max_cov, cov_libs, cov_libs_total_reads = blobDB.getPlotData(rank, min_length, hide_nohits, taxrule, False, False)
+    plotObj = BtPlot.PlotObj(data_dict, cov_libs, cov_libs_total_reads)
+    #plotObj.exclude_groups = exclude_groups
+    if max_cov < x_max:
+        x_max = max_cov
+    if max_cov < y_max:
+        y_max = max_cov
+    
+    if (scale):
+        scale = 'log'
+    else:
+        scale = 'linear'
+
+    plotObj.max_cov = max_cov
+    plotObj.title = title
+    plotObj.format = format
+    plotObj.multiplot = multiplot
+    plotObj.hist_type = hist_type
+    plotObj.ignore_contig_length = ignore_contig_length
+    plotObj.max_group_plot = max_group_plot
+    plotObj.group_order = BtPlot.getSortedGroups(data_dict, sort_order)
+    plotObj.labels.update(plotObj.group_order)
+    
+    #if (user_labels):
+    #    for group, label in user_labels.items():
+    #        plotObj.labels.add(label)
+    plotObj.group_labels = {group : set() for group in plotObj.group_order}
+    plotObj.relabel_and_colour(None, {})
+    plotObj.compute_stats()
+
+    info_flag = 1
+
+    for cov_lib in plotObj.cov_libs:
+        if (plotObj.title):
+            plotObj.title = "%s.%s.%s" % (title, taxrule, cov_lib)
+
+        out_f = "%s.%s.%s.p%s.%s" % (title, hist_type, rank, max_group_plot, cov_lib)
+        if out_prefix:
+            out_f = "%s.%s" % (out_prefix, out_f)
+        #if catcolour_dict:
+        #    out_f = "%s.%s" % (out_f, "catcolour")
+        if ignore_contig_length:
+            out_f = "%s.%s" % (out_f, "noscale")
+        #if c_index:
+        #    out_f = "%s.%s" % (out_f, "c_index")
+        #if exclude_groups:
+        #    out_f = "%s.%s" % (out_f, "exclude" + "_".join(exclude_groups))
+        #if labels:
+        #    out_f = "%s.%s" % (out_f, "label_" + "_".join(set([name for name in user_labels.values()])))
+        out_f = "%s.%s.%s" % (out_f, min_length, taxrule)
+        plotObj.out_f = out_f
+        
+        plotObj.plotScatterCov(cov_lib, cov_dict, info_flag, x_label, y_label, scale, x_max, y_max)
+        info_flag = 0
+    plotObj.write_stats()
diff --git a/lib/BtCore.py b/lib/BtCore.py
@@ -156,41 +156,42 @@ def getPlotData(self, rank, min_length, hide_nohits, taxrule, c_index, catcolour
                     data_dict[group]['covs']['cov_sum'] = []
                     data_dict[group]['reads_mapped']['cov_sum'] = 0
 
+            data_dict[group]['count'] = data_dict[group].get('count', 0) + 1
+            data_dict[group]['span'] = data_dict[group].get('span', 0) + int(length)
             if ((hide_nohits) and group == 'no-hit') or length < min_length: # hidden
                 data_dict[group]['count_hidden'] = data_dict[group].get('count_hidden', 0) + 1
                 data_dict[group]['span_hidden'] = data_dict[group].get('span_hidden', 0) + int(length)
             else: # visible
                 data_dict[group]['count_visible'] = data_dict[group].get('count_visible', 0) + 1
                 data_dict[group]['span_visible'] = data_dict[group].get('span_visible', 0) + int(length)
+                data_dict[group]['name'].append(name)
+                data_dict[group]['length'].append(length)
+                data_dict[group]['gc'].append(gc)
 
-            data_dict[group]['name'].append(name)
-            data_dict[group]['length'].append(length)
-            data_dict[group]['gc'].append(gc)
-
-            cov_sum = 0.0
-            reads_mapped_sum = 0
-            for cov_lib in sorted(cov_libs):
-                cov = float(blob['covs'][cov_lib]) 
-                cov_sum += cov
-                cov = cov if cov > 0.02 else 0.02
-                if cov > max_cov:
-                    max_cov = cov
-                data_dict[group]['covs'][cov_lib].append(cov)
-                if cov_lib in blob['read_cov']:
-                    reads_mapped = blob['read_cov'][cov_lib]
-                    reads_mapped_sum += reads_mapped
-                    data_dict[group]['reads_mapped'][cov_lib] += reads_mapped  
-            
-            if len(cov_libs) > 1:
-                cov_sum = cov_sum if cov_sum > 0.02 else 0.02
-                data_dict[group]['covs']['cov_sum'].append(cov_sum)
-                if cov > max_cov:
-                    max_cov = cov
-                if (reads_mapped_sum):
-                    data_dict[group]['reads_mapped']['cov_sum'] += reads_mapped_sum
-
-            data_dict[group]['count'] = data_dict[group].get('count', 0) + 1
-            data_dict[group]['span'] = data_dict[group].get('span', 0) + int(length)
+                cov_sum = 0.0
+                reads_mapped_sum = 0
+                for cov_lib in sorted(cov_libs):
+                    cov = float(blob['covs'][cov_lib]) 
+                    cov_sum += cov
+                    cov = cov if cov > 0.02 else 0.02
+                    if cov > max_cov:
+                        max_cov = cov
+                    data_dict[group]['covs'][cov_lib].append(cov)
+                    if cov_lib in blob['read_cov']:
+                        reads_mapped = blob['read_cov'][cov_lib]
+                        reads_mapped_sum += reads_mapped
+                        data_dict[group]['reads_mapped'][cov_lib] += reads_mapped  
+                
+                if len(cov_libs) > 1:
+                    cov_sum = cov_sum if cov_sum > 0.02 else 0.02
+                    data_dict[group]['covs']['cov_sum'].append(cov_sum)
+                    if cov > max_cov:
+                        max_cov = cov
+                    if (reads_mapped_sum):
+                        data_dict[group]['reads_mapped']['cov_sum'] += reads_mapped_sum
+    
+                #data_dict[group]['count'] = data_dict[group].get('count', 0) + 1
+                #data_dict[group]['span'] = data_dict[group].get('span', 0) + int(length)
 
         if len(cov_libs) > 1:
             cov_libs.append('cov_sum')
diff --git a/lib/BtLog.py b/lib/BtLog.py
@@ -54,7 +54,8 @@ def progress(iteration, steps, max_value):
     '21' : '[ERROR:21]\t: Refcov file %s does not seem to have the right format.',
     '22' : '[ERROR:22]\t: Tax file %s seems to have no taxids.',
     '23' : '[ERROR:23]\t: Catcolour file %s does not seem to have the right format.',
-    '24' : '[ERROR:24]\t: Catcolour file incompatible with c-index colouring.'
+    '24' : '[ERROR:24]\t: Catcolour file incompatible with c-index colouring.',
+    '25' : '[ERROR:25]\t: Cov file %s does not seem to have the right format.'
 }
 
 warn_d = {
diff --git a/lib/BtPlot.py b/lib/BtPlot.py

Original file line number	Diff line number	Diff line change
`@@ -54,7 +54,8 @@ def progress(iteration, steps, max_value):`
`54`	`54`	`'21' : '[ERROR:21]\t: Refcov file %s does not seem to have the right format.',`
`55`	`55`	`'22' : '[ERROR:22]\t: Tax file %s seems to have no taxids.',`
`56`	`56`	`'23' : '[ERROR:23]\t: Catcolour file %s does not seem to have the right format.',`
`57`		`- '24' : '[ERROR:24]\t: Catcolour file incompatible with c-index colouring.'`
	`57`	`+ '24' : '[ERROR:24]\t: Catcolour file incompatible with c-index colouring.',`
	`58`	`+ '25' : '[ERROR:25]\t: Cov file %s does not seem to have the right format.'`
`58`	`59`	`}`
`59`	`60`
`60`	`61`	`warn_d = {`