Skip to content

Commit 70b3cab

Browse files
Jon PalmerJon Palmer
Jon Palmer
authored and
Jon Palmer
committed
updates to v0.2.2
1 parent 0bfa086 commit 70b3cab

File tree

4 files changed

+101
-8
lines changed

4 files changed

+101
-8
lines changed

bin/funannotate-compare.py

+31-2
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ def __init__(self,prog):
107107
busco = []
108108
gbkfilenames = []
109109
scinames = []
110+
signalp = []
110111
num_input = len(args.input)
111112
if num_input == 0:
112113
lib.log.error("Error, you did not specify an input, -i")
@@ -140,6 +141,7 @@ def __init__(self,prog):
140141
pfam.append(lib.getStatsfromDbxref(GBK, 'PFAM'))
141142
cazy.append(lib.getStatsfromNote(GBK, 'CAZy'))
142143
busco.append(lib.getStatsfromNote(GBK, 'BUSCO'))
144+
signalp.append(lib.getStatsfromNote(GBK, 'SECRETED'))
143145
lib.parseGOterms(GBK, go_folder, stats[i][0].replace(' ', '_'))
144146
lib.gb2proteinortho(GBK, protortho, stats[i][0].replace(' ', '_'))
145147
eggnog.append(lib.getEggNogfromNote(GBK))
@@ -401,6 +403,28 @@ def __init__(self,prog):
401403
output.write(lib.FOOTER)
402404
########################################################
403405

406+
####SignalP############################
407+
#flip the dict and just count number for each
408+
signalpDict = lib.busco_dictFlip(signalp)
409+
410+
if not os.path.isdir(os.path.join(args.out, 'signalp')):
411+
os.makedirs(os.path.join(args.out, 'signalp'))
412+
sig = {}
413+
for i in range(0,len(scinames)):
414+
if scinames[i] not in sig:
415+
sig[scinames[i]] = len(signalpDict[i])
416+
sigdf = pd.DataFrame([sig])
417+
sigdf.transpose().to_csv(os.path.join(args.out, 'signalp', 'signalp.csv'))
418+
lib.drawbarplot(sigdf, os.path.join(args.out, 'signalp', 'signalp.pdf'))
419+
420+
#create html output
421+
with open(os.path.join(args.out, 'signalp.html'), 'w') as output:
422+
output.write(lib.HEADER)
423+
output.write(lib.SIGNALP)
424+
output.write(lib.FOOTER)
425+
426+
########################################################
427+
404428
####GO Terms, GO enrichment############################
405429
if not os.path.isdir(os.path.join(args.out, 'go_enrichment')):
406430
os.makedirs(os.path.join(args.out, 'go_enrichment'))
@@ -636,8 +660,9 @@ def __init__(self,prog):
636660
meropsDict = lib.dictFlip(merops)
637661
cazyDict = lib.dictFlip(cazy)
638662

663+
639664
table = []
640-
header = ['GeneID','length','description', 'Ortho Group', 'EggNog', 'BUSCO','Protease family', 'CAZyme family', 'InterPro Domains', 'PFAM Domains', 'GO terms', 'SecMet Cluster', 'SMCOG']
665+
header = ['GeneID','length','description', 'Ortho Group', 'EggNog', 'BUSCO', 'Secreted', 'Protease family', 'CAZyme family', 'InterPro Domains', 'PFAM Domains', 'GO terms', 'SecMet Cluster', 'SMCOG']
641666
for y in range(0,num_input):
642667
outputname = os.path.join(args.out, 'annotations', scinames[y]+'.all.annotations.tsv')
643668
with open(outputname, 'w') as output:
@@ -657,6 +682,10 @@ def __init__(self,prog):
657682
IPRdomains = "; ".join(iprDict.get(ID))
658683
else:
659684
IPRdomains = ''
685+
if ID in signalpDict[y]:
686+
signalphit = signalpDict[y].get(ID)[0]
687+
else:
688+
signalphit = ''
660689
if ID in pfamDict:
661690
pfamdomains = "; ".join(pfamDict.get(ID))
662691
else:
@@ -693,7 +722,7 @@ def __init__(self,prog):
693722
if i.startswith('SMCOG:'):
694723
smcog = i
695724

696-
final_result = [ID, str(length), description, orthogroup, egg, buscogroup, meropsdomains, cazydomains, IPRdomains, pfamdomains, goTerms, cluster, smcog]
725+
final_result = [ID, str(length), description, orthogroup, egg, buscogroup, signalphit, meropsdomains, cazydomains, IPRdomains, pfamdomains, goTerms, cluster, smcog]
697726
output.write("%s\n" % ('\t'.join(final_result)))
698727
############################################
699728

bin/funannotate-functional.py

+13-2
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,16 @@ def runIPRpython(Input):
252252
if not lib.checkannotations(busco_out):
253253
lib.runBUSCO(Proteins, args.cpus, os.path.join(outputdir, 'annotate_misc'), busco_out)
254254
num_annotations = lib.line_count(busco_out)
255-
lib.log.info('{0:,}'.format(num_annotations) + ' annotations added')
255+
lib.log.info('{0:,}'.format(num_annotations) + ' annotations added')
256+
#run signalP if installed, have to manually install, so test if exists first, then run it if it does
257+
signalp_out = os.path.join(outputdir, 'annotate_misc', 'annotations.signalp.txt')
258+
if lib.which('signalp'):
259+
if not lib.checkannotations(signalp_out):
260+
lib.signalP(Proteins, os.path.join(outputdir, 'annotate_misc'), signalp_out)
261+
num_annotations = lib.line_count(signalp_out)
262+
lib.log.info('{0:,}'.format(num_annotations) + ' annotations added')
263+
else:
264+
lib.log.info("SignalP not installed, skipping")
256265

257266
if not args.skip_iprscan:
258267
if not args.iprscan:
@@ -345,8 +354,10 @@ def runIPRpython(Input):
345354
for file in os.listdir(os.path.join(outputdir, 'annotate_misc')):
346355
if file.startswith('annotations'):
347356
file = os.path.join(outputdir, 'annotate_misc', file)
348-
with open(file, 'rU') as input:
357+
with open(file) as input:
349358
for line in input:
359+
if 'go.obo' in line: #new goatools adds this damn line in my output, remove it here
360+
continue
350361
total_annotations += 1
351362
if line not in lines_seen:
352363
output.write(line)

funannotate.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def fmtcols(mylist, cols):
3131
for i in range(0,num_lines))
3232
return "\n".join(lines)
3333

34-
version = '0.2.1'
34+
version = '0.2.2'
3535

3636
default_help = """
3737
Usage: funannotate <command> <arguments>

lib/library.py

+56-3
Original file line numberDiff line numberDiff line change
@@ -91,11 +91,14 @@ def multipleReplace(text, wordDict):
9191
def which(name):
9292
try:
9393
with open(os.devnull) as devnull:
94-
diff = ['tbl2asn', 'dustmasker', 'mafft']
94+
diff = ['tbl2asn', 'dustmasker', 'mafft', 'signalp']
9595
if not any(name in x for x in diff):
9696
subprocess.Popen([name], stdout=devnull, stderr=devnull).communicate()
9797
else:
98-
subprocess.Popen([name, '--version'], stdout=devnull, stderr=devnull).communicate()
98+
if name == 'signalp':
99+
subprocess.Popen([name, '-V'], stdout=devnull, stderr=devnull).communicate()
100+
else:
101+
subprocess.Popen([name, '--version'], stdout=devnull, stderr=devnull).communicate()
99102
except OSError as e:
100103
if e.errno == os.errno.ENOENT:
101104
return False
@@ -573,6 +576,29 @@ def dbCANsearch(input, cpus, evalue, tmpdir, output):
573576
query = query + '-T1'
574577
out.write("%s\tnote\tCAZy:%s\n" % (query, hit))
575578

579+
def signalP(input, tmpdir, output):
580+
log.info("Predicting secreted proteins with SignalP")
581+
FNULL = open(os.devnull, 'w')
582+
signalp_result = os.path.join(tmpdir, 'signalp.txt')
583+
with open(signalp_result, 'w') as out:
584+
subprocess.call(['signalp', '-t', 'euk', '-f', 'short', input], stdout = out, stderr=FNULL)
585+
#parse output and turn into annotation file
586+
with open(output, 'w') as signalp:
587+
with open(signalp_result, 'rU') as results:
588+
for line in results:
589+
line = line.replace('\n', '')
590+
if line.startswith('#'):
591+
continue
592+
col = line.split(' ') #not tab delimited
593+
col = filter(None, col) #clean up empty spaces
594+
if col[9] == 'Y': #then there is signal peptide
595+
ID = col[0]
596+
if not ID.endswith('-T1'):
597+
ID = ID + '-T1'
598+
start = 1
599+
end = int(col[2]) - 1
600+
signalp.write("%s\tnote\tSECRETED:SignalP(%i-%i)\n" % (ID, start, end))
601+
576602
def RepeatModelMask(input, cpus, tmpdir, output, debug):
577603
log.info("Loading sequences and soft-masking genome")
578604
FNULL = open(os.devnull, 'w')
@@ -1263,6 +1289,21 @@ def drawHeatmap(df, color, output, annotate):
12631289
item.set_fontsize(4)
12641290
fig.savefig(output, format='pdf', dpi=1000, bbox_inches='tight')
12651291
plt.close(fig)
1292+
1293+
def drawbarplot(df, output):
1294+
with warnings.catch_warnings():
1295+
warnings.simplefilter('ignore')
1296+
import matplotlib.pyplot as plt
1297+
import seaborn as sns
1298+
#num = len(df.columns) + 1
1299+
sns.set(style="darkgrid")
1300+
fig = plt.figure()
1301+
ax = sns.barplot(data=df, palette=pref_colors)
1302+
plt.xlabel('Genomes')
1303+
plt.ylabel('Secreted Proteins')
1304+
fig.savefig(output, format='pdf', dpi=1000, bbox_inches='tight')
1305+
plt.close(fig)
1306+
12661307

12671308
def distance2mds(df, distance, type, output):
12681309
import numpy as np
@@ -1515,18 +1556,20 @@ def ortho2phylogeny(folder, df, num, dict, cpus, bootstrap, tmpdir, outgroup, sp
15151556
<span class="icon-bar"></span>
15161557
<span class="icon-bar"></span>
15171558
<span class="icon-bar"></span>
1559+
<span class="icon-bar"></span>
15181560
</button>
15191561
<a class="navbar-brand" href="index.html">Funannotate</a>
15201562
</div>
15211563
<div id="navbar" class="collapse navbar-collapse">
15221564
<ul class="nav navbar-nav">
1523-
<li class="active"><a href="stats.html">Stats</a></li>
1565+
<li><a href="stats.html">Stats</a></li>
15241566
<li><a href="phylogeny.html">Phylogeny</a></li>
15251567
<li><a href="orthologs.html">Orthologs</a></li>
15261568
<li><a href="interpro.html">InterProScan</a></li>
15271569
<li><a href="pfam.html">PFAM</a></li>
15281570
<li><a href="merops.html">Merops</a></li>
15291571
<li><a href="cazy.html">CAZymes</a></li>
1572+
<li><a href="signalp.html">SignalP</a></li>
15301573
<li><a href="go.html">GO ontology</a></li>
15311574
<li><a href="citation.html">Citation</a></li>
15321575
</ul>
@@ -1549,6 +1592,7 @@ def ortho2phylogeny(folder, df, num, dict, cpus, bootstrap, tmpdir, outgroup, sp
15491592
<p><a href='phylogeny.html'>Maximum likelihood Phylogeny (RAxML)</a></p>
15501593
<p><a href='merops.html'>MEROPS Protease Stats</a></p>
15511594
<p><a href='cazy.html'>CAZyme carbohydrate activating enzyme Stats</a></p>
1595+
<p><a href='signal.html'>Secreted proteins (SignalP)</a></p>
15521596
<p><a href='interpro.html'>InterProScan Domain Stats</a></p>
15531597
<p><a href='pfam.html'>PFAM Domain Stats</a></p>
15541598
<p><a href='go.html'>Gene Ontology Enrichment Analysis</a></p>
@@ -1593,6 +1637,14 @@ def ortho2phylogeny(folder, df, num, dict, cpus, bootstrap, tmpdir, outgroup, sp
15931637
<a href='pfam/PFAM.nmds.pdf'><img src="pfam/PFAM.nmds.pdf" height="500" /></a></div>
15941638
<div class="table-responsive">
15951639
'''
1640+
SIGNALP = '''
1641+
<div class="container">
1642+
<div class="starter-template">
1643+
<h2 class="sub-header">Secreted Proteins per Genome Results</h2>
1644+
<div class='row'>
1645+
<a href='signalp/signalp.pdf'><img src="signalp/signalp.pdf" height="500" /></a></div>
1646+
<div class="table-responsive">
1647+
'''
15961648
CAZY = '''
15971649
<div class="container">
15981650
<div class="starter-template">
@@ -1666,6 +1718,7 @@ def ortho2phylogeny(folder, df, num, dict, cpus, bootstrap, tmpdir, outgroup, sp
16661718
<li><a href="pfam.html">PFAM</a></li>
16671719
<li><a href="merops.html">Merops</a></li>
16681720
<li><a href="cazy.html">CAZymes</a></li>
1721+
<li><a href="signalp.html">SignalP</a></li>
16691722
<li><a href="go.html">GO ontology</a></li>
16701723
<li><a href="citation.html">Citation</a></li>
16711724
<li class="dropdown">

0 commit comments

Comments
 (0)