Skip to content

Commit 1ba402a

Browse files
Jon PalmerJon Palmer
Jon Palmer
authored and
Jon Palmer
committed
update to v0.3.8
1 parent 7b32564 commit 1ba402a

File tree

3 files changed

+45
-41
lines changed

3 files changed

+45
-41
lines changed

bin/funannotate-predict.py

+37-37
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,23 @@
11
#!/usr/bin/env python
22

3-
import sys, os, subprocess, inspect, multiprocessing, shutil, argparse, time, re, platform
3+
import sys, os, subprocess, inspect, shutil, argparse, re
44
from Bio import SeqIO
55
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
66
parentdir = os.path.dirname(currentdir)
7-
sys.path.insert(0,parentdir)
7+
sys.path.insert(0, parentdir)
88
import lib.library as lib
99

1010
#setup menu with argparse
1111
class MyFormatter(argparse.ArgumentDefaultsHelpFormatter):
12-
def __init__(self,prog):
13-
super(MyFormatter,self).__init__(prog,max_help_position=48)
14-
parser=argparse.ArgumentParser(prog='funannotate-predict.py', usage="%(prog)s [options] -i genome.fasta",
15-
description='''Script that does it all...''',
16-
epilog="""Written by Jon Palmer (2016) [email protected]""",
12+
def __init__(self, prog):
13+
super(MyFormatter, self).__init__(prog, max_help_position=48)
14+
parser = argparse.ArgumentParser(prog='funannotate-predict.py', usage="%(prog)s [options] -i genome.fasta",
15+
description = '''Script that does it all.''',
16+
epilog = """Written by Jon Palmer (2016) [email protected]""",
1717
formatter_class = MyFormatter)
18-
parser.add_argument('-i','--input', required=True, help='Genome in FASTA format')
19-
parser.add_argument('-o','--out', required=True, help='Basename of output files')
20-
parser.add_argument('-s','--species', required=True, help='Species name (e.g. "Aspergillus fumigatus") use quotes if there is a space')
18+
parser.add_argument('-i', '--input', required=True, help='Genome in FASTA format')
19+
parser.add_argument('-o', '--out', required=True, help='Basename of output files')
20+
parser.add_argument('-s', '--species', required=True, help='Species name (e.g. "Aspergillus fumigatus") use quotes if there is a space')
2121
parser.add_argument('--isolate', help='Isolate/strain name (e.g. Af293)')
2222
parser.add_argument('--header_length', default=16, type=int, help='Max length for fasta headers')
2323
parser.add_argument('--name', default="FUN_", help='Shortname for genes, perhaps assigned by NCBI, eg. VC83')
@@ -52,7 +52,7 @@ def __init__(self,prog):
5252
conflict = ['busco', 'busco_proteins', 'RepeatMasker', 'RepeatModeler', 'genemark', 'EVM_tmp', 'braker']
5353
if args.out in conflict:
5454
lib.log.error("%s output folder conflicts with a hard coded tmp folder, please change -o parameter" % args.out)
55-
os._exit(1)
55+
sys.exit(1)
5656

5757
#create folder structure
5858
if not os.path.exists(args.out):
@@ -87,19 +87,18 @@ def __init__(self,prog):
8787
blastdb = os.path.join(parentdir,'DB','REPEATS.psq')
8888
if not os.path.isfile(blastdb):
8989
lib.log.error("funannotate database is not properly configured, please run `./setup.sh` in the %s directory" % parentdir)
90-
os._exit(1)
90+
sys.exit(1)
9191
#check buscos, download if necessary
9292
if not os.path.isdir(os.path.join(parentdir, 'DB', args.busco_db)):
9393
lib.download_buscos(args.busco_db)
9494

95-
9695
#do some checks and balances
9796
try:
9897
EVM = os.environ["EVM_HOME"]
9998
except KeyError:
10099
if not args.EVM_HOME:
101100
lib.log.error("$EVM_HOME environmental variable not found, Evidence Modeler is not properly configured. You can use the --EVM_HOME argument to specifiy a path at runtime")
102-
os._exit(1)
101+
sys.exit(1)
103102
else:
104103
EVM = args.EVM_HOME
105104

@@ -108,7 +107,7 @@ def __init__(self,prog):
108107
except KeyError:
109108
if not args.AUGUSTUS_CONFIG_PATH:
110109
lib.log.error("$AUGUSTUS_CONFIG_PATH environmental variable not found, Augustus is not properly configured. You can use the --AUGUSTUS_CONFIG_PATH argument to specify a path at runtime.")
111-
os._exit(1)
110+
sys.exit(1)
112111
else:
113112
AUGUSTUS = args.AUGUSTUS_CONFIG_PATH
114113

@@ -119,7 +118,7 @@ def __init__(self,prog):
119118
if not lib.which('gmes_petap.pl'):
120119
if not args.GENEMARK_PATH:
121120
lib.log.error("GeneMark not found and $GENEMARK_PATH environmental variable missing, BRAKER1 is not properly configured. You can use the --GENEMARK_PATH argument to specify a path at runtime.")
122-
os._exit(1)
121+
sys.exit(1)
123122
else:
124123
GENEMARK_PATH = args.GENEMARK_PATH
125124

@@ -130,7 +129,7 @@ def __init__(self,prog):
130129
if not lib.which('bamtools'):
131130
if not args.BAMTOOLS_PATH:
132131
lib.log.error("Bamtools not found and $BAMTOOLS_PATH environmental variable missing, BRAKER1 is not properly configured. You can use the --BAMTOOLS_PATH argument to specify a path at runtime.")
133-
os._exit(1)
132+
sys.exit(1)
134133
else:
135134
BAMTOOLS_PATH = args.BAMTOOLS_PATH
136135

@@ -141,7 +140,7 @@ def __init__(self,prog):
141140
AutoAug = os.path.join(AUGUSTUS_BASE, 'scripts', 'autoAug.pl')
142141
GeneMark2GFF = os.path.join(parentdir, 'util', 'genemark_gtf2gff3.pl')
143142

144-
programs = ['tblastn', 'exonerate', 'makeblastdb','dustmasker','gag.py','tbl2asn','gmes_petap.pl', 'BuildDatabase', 'RepeatModeler', 'RepeatMasker', GeneMark2GFF, AutoAug, 'bedtools', 'gmap', 'gmap_build', 'blat', 'pslCDnaFilter', 'augustus', 'etraining', 'rmOutToGFF3.pl']
143+
programs = ['tblastn', 'exonerate', 'makeblastdb', 'dustmasker', 'gag.py', 'tbl2asn', 'gmes_petap.pl', 'BuildDatabase', 'RepeatModeler', 'RepeatMasker', GeneMark2GFF, AutoAug, 'bedtools', 'gmap', 'gmap_build', 'blat', 'pslCDnaFilter', 'augustus', 'etraining', 'rmOutToGFF3.pl']
145144
lib.CheckDependencies(programs)
146145

147146
#check augustus species now, so that you don't get through script and then find out it is already in DB
@@ -155,10 +154,11 @@ def __init__(self,prog):
155154

156155
#check augustus functionality
157156
augustuscheck = lib.checkAugustusFunc(AUGUSTUS_BASE)
157+
system_os = lib.systemOS()
158158
if args.rna_bam:
159159
if augustuscheck[1] == 0:
160160
lib.log.error("ERROR: %s is not installed properly for BRAKER1 (check bam2hints compilation)" % augustuscheck[0])
161-
os._exit(1)
161+
sys.exit(1)
162162
if not augspeciescheck: #means training needs to be done
163163
if augustuscheck[2] == 0:
164164
if 'MacOSX' in system_os:
@@ -170,7 +170,7 @@ def __init__(self,prog):
170170
else:
171171
lib.log.error("ERROR: %s is not installed properly and this version not work with BUSCO, this is a problem with Augustus compliatation, you may need to compile manually on %s." % (augustuscheck[0], system_os))
172172
if not args.pasa_gff: #first training will use pasa, otherwise BUSCO
173-
os._exit(1)
173+
sys.exit(1)
174174
else:
175175
lib.log.info("Will proceed with PASA models to train Augustus")
176176

@@ -204,7 +204,7 @@ def __init__(self,prog):
204204
header_test = lib.checkFastaHeaders(args.input, args.header_length)
205205
if not header_test:
206206
lib.log.error("Fasta headers on your input have more characters than the max (16), reformat headers to continue.")
207-
os._exit(1)
207+
sys.exit(1)
208208

209209
#setup augustus parallel command
210210
AUGUSTUS_PARALELL = os.path.join(parentdir, 'bin', 'augustus_parallel.py')
@@ -248,7 +248,7 @@ def __init__(self,prog):
248248
#check for masked genome here
249249
if not os.path.isfile(MaskGenome) or lib.getSize(MaskGenome) < 10:
250250
lib.log.error("RepeatMasking failed, check log files.")
251-
os._exit(1)
251+
sys.exit(1)
252252

253253
#load contig names and sizes into dictionary.
254254
ContigSizes = {}
@@ -258,7 +258,7 @@ def __init__(self,prog):
258258
ContigSizes[rec.id] = len(rec.seq)
259259
else:
260260
lib.log.error("Error, duplicate contig names, exiting")
261-
os._exit(1)
261+
sys.exit(1)
262262

263263
#check for previous files and setup output files
264264
Predictions = os.path.join(args.out, 'predict_misc', 'gene_predictions.gff3')
@@ -293,7 +293,7 @@ def __init__(self,prog):
293293
genesources.append(source)
294294
if not genesources:
295295
lib.log.error("Maker2 GFF not parsed correctly, no gene models found, exiting.")
296-
os._exit(1)
296+
sys.exit(1)
297297
for i in genesources:
298298
if i == 'maker':
299299
output.write("ABINITIO_PREDICTION\t%s\t1\n" % i)
@@ -373,7 +373,7 @@ def __init__(self,prog):
373373
#check for protein evidence/format as needed
374374
p2g_out = os.path.join(args.out, 'predict_misc', 'exonerate.out')
375375
prot_temp = os.path.join(args.out, 'predict_misc', 'proteins.combined.fa')
376-
P2G = os.path.join(parentdir, 'bin','funannotate-p2g.py')
376+
P2G = os.path.join(parentdir, 'bin', 'funannotate-p2g.py')
377377
if not args.exonerate_proteins:
378378
if args.protein_evidence:
379379
if os.path.isfile(prot_temp):
@@ -412,7 +412,7 @@ def __init__(self,prog):
412412
subprocess.call([ExoConverter, exonerate_out], stdout = output, stderr = FNULL)
413413
except OSError:
414414
lib.log.error("$EVM_HOME variable is incorrect, please double-check: %s" % EVM)
415-
os._exit(1)
415+
sys.exit(1)
416416
Exonerate = os.path.abspath(Exonerate)
417417
#now run exonerate2 hints for Augustus
418418
exonerate2hints = os.path.join(AUGUSTUS_BASE, 'scripts', 'exonerate2hints.pl')
@@ -448,7 +448,7 @@ def __init__(self,prog):
448448
GeneMark = os.path.join(args.out, 'predict_misc', 'genemark.evm.gff3')
449449
with open(GeneMark, 'w') as output:
450450
with open(GeneMarkTemp, 'rU') as input:
451-
lines = input.read().replace("Augustus","GeneMark")
451+
lines = input.read().replace("Augustus", "GeneMark")
452452
output.write(lines)
453453

454454
if args.augustus_gff:
@@ -556,7 +556,7 @@ def __init__(self,prog):
556556
GeneMark = os.path.join(args.out, 'predict_misc', 'genemark.evm.gff3')
557557
with open(GeneMark, 'w') as output:
558558
with open(GeneMarkTemp, 'rU') as input:
559-
lines = input.read().replace("Augustus","GeneMark")
559+
lines = input.read().replace("Augustus", "GeneMark")
560560
output.write(lines)
561561
else: #have training parameters file, so just run genemark with
562562
GeneMarkGFF3 = os.path.join(args.out, 'predict_misc', 'genemark.gff')
@@ -586,7 +586,7 @@ def __init__(self,prog):
586586
subprocess.call(['perl', Converter, GeneMarkTemp], stdout = output, stderr = FNULL)
587587
with open(GeneMark, 'w') as output:
588588
with open(GeneMarkTemp2, 'rU') as input:
589-
lines = input.read().replace("Augustus","GeneMark")
589+
lines = input.read().replace("Augustus", "GeneMark")
590590
output.write(lines)
591591

592592
else:
@@ -602,7 +602,7 @@ def __init__(self,prog):
602602
GeneMark = os.path.join(args.out, 'predict_misc', 'genemark.evm.gff3')
603603
with open(GeneMark, 'w') as output:
604604
with open(GeneMarkTemp, 'rU') as input:
605-
lines = input.read().replace("Augustus","GeneMark")
605+
lines = input.read().replace("Augustus", "GeneMark")
606606
output.write(lines)
607607

608608
if not Augustus:
@@ -644,7 +644,7 @@ def __init__(self,prog):
644644
lib.log.error("BUSCO training of Augusus failed, check busco logs, exiting")
645645
#remove the augustus training config folder
646646
shutil.rmtree(os.path.join(AUGUSTUS, 'species', aug_species))
647-
os._exit(1)
647+
sys.exit(1)
648648
#proper training files exist, now run EVM on busco models to get high quality predictions.
649649
lib.log.info("BUSCO predictions complete, now formatting for EVM")
650650
#move the busco folder now where it should reside
@@ -735,12 +735,12 @@ def __init__(self,prog):
735735
total = lib.countGFFgenes(EVM_busco)
736736
except IOError:
737737
lib.log.error("EVM did not run correctly, output file missing")
738-
os._exit(1)
738+
sys.exit(1)
739739
#check number of gene models, if 0 then failed, delete output file for re-running
740740
if total < 1:
741741
lib.log.error("Evidence modeler has failed, exiting")
742742
os.remove(EVM_busco)
743-
os._exit(1)
743+
sys.exit(1)
744744
else:
745745
lib.log.info('{0:,}'.format(total) + ' total gene models from EVM')
746746
#move EVM folder to predict folder
@@ -787,7 +787,7 @@ def __init__(self,prog):
787787
#just double-check that you've gotten here and both Augustus/GeneMark are finished
788788
if not any([Augustus, GeneMark]):
789789
lib.log.error("Augustus or GeneMark prediction is missing, check log files for errors")
790-
os._exit(1)
790+
sys.exit(1)
791791

792792
#GeneMark can fail if you try to pass a single contig, check file length
793793
GM_check = lib.line_count(GeneMark)
@@ -902,12 +902,12 @@ def __init__(self,prog):
902902
total = lib.countGFFgenes(EVM_out)
903903
except IOError:
904904
lib.log.error("EVM did not run correctly, output file missing")
905-
os._exit(1)
905+
sys.exit(1)
906906
#check number of gene models, if 0 then failed, delete output file for re-running
907907
if total < 1:
908908
lib.log.error("Evidence modeler has failed, exiting")
909909
os.remove(EVM_out)
910-
os._exit(1)
910+
sys.exit(1)
911911
else:
912912
lib.log.info('{0:,}'.format(total) + ' total gene models from EVM')
913913

@@ -1037,4 +1037,4 @@ def __init__(self,prog):
10371037
os.rename('funannotate-EVM.log', os.path.join(args.out, 'logfiles', 'funannotate-EVM.log'))
10381038
if os.path.isfile('funannotate-p2g.log'):
10391039
os.rename('funannotate-p2g.log', os.path.join(args.out, 'logfiles', 'funannotate-p2g.log'))
1040-
os._exit(1)
1040+
sys.exit(1)

funannotate.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def fmtcols(mylist, cols):
3131
for i in range(0,num_lines))
3232
return "\n".join(lines)
3333

34-
version = '0.3.7'
34+
version = '0.3.8'
3535

3636
default_help = """
3737
Usage: funannotate <command> <arguments>
@@ -95,6 +95,7 @@ def fmtcols(mylist, cols):
9595
Arguments: -i, --input Multi-fasta genome file. (Required)
9696
-o, --output Sorted by size and relabeled output file. (Required)
9797
-b, --base Base name to relabel contigs. Default: scaffold
98+
--minlen Shorter contigs are discarded. Default: 0
9899
99100
Written by Jon Palmer (2016) [email protected]
100101
""" % (sys.argv[1], version)
@@ -316,7 +317,6 @@ def fmtcols(mylist, cols):
316317
if len(arguments) > 0:
317318
cmd = os.path.join(script_path, 'setup.sh')
318319
arguments.insert(0, cmd)
319-
print [cmd, 'dep']
320320
if '--all' in arguments:
321321
subprocess.call(cmd, cwd = script_path)
322322
elif '--dep' in arguments:

lib/library.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -842,15 +842,19 @@ def MemoryCheck():
842842
mem = psutil.virtual_memory()
843843
RAM = int(mem.total)
844844
return round(RAM / 1024000000)
845-
846-
def SystemInfo():
845+
846+
def systemOS():
847847
if sys.platform == 'darwin':
848848
system_os = 'MacOSX '+ platform.mac_ver()[0]
849849
elif sys.platform == 'linux':
850850
linux_version = platform.linux_distribution()
851851
system_os = linux_version[0]+ ' '+linux_version[1]
852852
else:
853853
system_os = sys.platform
854+
return system_os
855+
856+
def SystemInfo():
857+
system_os = systemOS()
854858
python_vers = str(sys.version_info[0])+'.'+str(sys.version_info[1])+'.'+str(sys.version_info[2])
855859
log.info("OS: %s, %i cores, ~ %i GB RAM. Python: %s" % (system_os, multiprocessing.cpu_count(), MemoryCheck(), python_vers))
856860

0 commit comments

Comments
 (0)