1
1
#!/usr/bin/env python
2
2
3
- import sys , os , subprocess , inspect , multiprocessing , shutil , argparse , time , re , platform
3
+ import sys , os , subprocess , inspect , shutil , argparse , re
4
4
from Bio import SeqIO
5
5
currentdir = os .path .dirname (os .path .abspath (inspect .getfile (inspect .currentframe ())))
6
6
parentdir = os .path .dirname (currentdir )
7
- sys .path .insert (0 ,parentdir )
7
+ sys .path .insert (0 , parentdir )
8
8
import lib .library as lib
9
9
10
10
#setup menu with argparse
11
11
class MyFormatter (argparse .ArgumentDefaultsHelpFormatter ):
12
- def __init__ (self ,prog ):
13
- super (MyFormatter ,self ).__init__ (prog ,max_help_position = 48 )
14
- parser = argparse .ArgumentParser (prog = 'funannotate-predict.py' , usage = "%(prog)s [options] -i genome.fasta" ,
15
- description = '''Script that does it all.. .''' ,
16
- epilog = """Written by Jon Palmer (2016) [email protected] """ ,
12
+ def __init__ (self , prog ):
13
+ super (MyFormatter , self ).__init__ (prog , max_help_position = 48 )
14
+ parser = argparse .ArgumentParser (prog = 'funannotate-predict.py' , usage = "%(prog)s [options] -i genome.fasta" ,
15
+ description = '''Script that does it all.''' ,
16
+ epilog = """Written by Jon Palmer (2016) [email protected] """ ,
17
17
formatter_class = MyFormatter )
18
- parser .add_argument ('-i' ,'--input' , required = True , help = 'Genome in FASTA format' )
19
- parser .add_argument ('-o' ,'--out' , required = True , help = 'Basename of output files' )
20
- parser .add_argument ('-s' ,'--species' , required = True , help = 'Species name (e.g. "Aspergillus fumigatus") use quotes if there is a space' )
18
+ parser .add_argument ('-i' , '--input' , required = True , help = 'Genome in FASTA format' )
19
+ parser .add_argument ('-o' , '--out' , required = True , help = 'Basename of output files' )
20
+ parser .add_argument ('-s' , '--species' , required = True , help = 'Species name (e.g. "Aspergillus fumigatus") use quotes if there is a space' )
21
21
parser .add_argument ('--isolate' , help = 'Isolate/strain name (e.g. Af293)' )
22
22
parser .add_argument ('--header_length' , default = 16 , type = int , help = 'Max length for fasta headers' )
23
23
parser .add_argument ('--name' , default = "FUN_" , help = 'Shortname for genes, perhaps assigned by NCBI, eg. VC83' )
@@ -52,7 +52,7 @@ def __init__(self,prog):
52
52
conflict = ['busco' , 'busco_proteins' , 'RepeatMasker' , 'RepeatModeler' , 'genemark' , 'EVM_tmp' , 'braker' ]
53
53
if args .out in conflict :
54
54
lib .log .error ("%s output folder conflicts with a hard coded tmp folder, please change -o parameter" % args .out )
55
- os . _exit (1 )
55
+ sys . exit (1 )
56
56
57
57
#create folder structure
58
58
if not os .path .exists (args .out ):
@@ -87,19 +87,18 @@ def __init__(self,prog):
87
87
blastdb = os .path .join (parentdir ,'DB' ,'REPEATS.psq' )
88
88
if not os .path .isfile (blastdb ):
89
89
lib .log .error ("funannotate database is not properly configured, please run `./setup.sh` in the %s directory" % parentdir )
90
- os . _exit (1 )
90
+ sys . exit (1 )
91
91
#check buscos, download if necessary
92
92
if not os .path .isdir (os .path .join (parentdir , 'DB' , args .busco_db )):
93
93
lib .download_buscos (args .busco_db )
94
94
95
-
96
95
#do some checks and balances
97
96
try :
98
97
EVM = os .environ ["EVM_HOME" ]
99
98
except KeyError :
100
99
if not args .EVM_HOME :
101
100
lib .log .error ("$EVM_HOME environmental variable not found, Evidence Modeler is not properly configured. You can use the --EVM_HOME argument to specifiy a path at runtime" )
102
- os . _exit (1 )
101
+ sys . exit (1 )
103
102
else :
104
103
EVM = args .EVM_HOME
105
104
@@ -108,7 +107,7 @@ def __init__(self,prog):
108
107
except KeyError :
109
108
if not args .AUGUSTUS_CONFIG_PATH :
110
109
lib .log .error ("$AUGUSTUS_CONFIG_PATH environmental variable not found, Augustus is not properly configured. You can use the --AUGUSTUS_CONFIG_PATH argument to specify a path at runtime." )
111
- os . _exit (1 )
110
+ sys . exit (1 )
112
111
else :
113
112
AUGUSTUS = args .AUGUSTUS_CONFIG_PATH
114
113
@@ -119,7 +118,7 @@ def __init__(self,prog):
119
118
if not lib .which ('gmes_petap.pl' ):
120
119
if not args .GENEMARK_PATH :
121
120
lib .log .error ("GeneMark not found and $GENEMARK_PATH environmental variable missing, BRAKER1 is not properly configured. You can use the --GENEMARK_PATH argument to specify a path at runtime." )
122
- os . _exit (1 )
121
+ sys . exit (1 )
123
122
else :
124
123
GENEMARK_PATH = args .GENEMARK_PATH
125
124
@@ -130,7 +129,7 @@ def __init__(self,prog):
130
129
if not lib .which ('bamtools' ):
131
130
if not args .BAMTOOLS_PATH :
132
131
lib .log .error ("Bamtools not found and $BAMTOOLS_PATH environmental variable missing, BRAKER1 is not properly configured. You can use the --BAMTOOLS_PATH argument to specify a path at runtime." )
133
- os . _exit (1 )
132
+ sys . exit (1 )
134
133
else :
135
134
BAMTOOLS_PATH = args .BAMTOOLS_PATH
136
135
@@ -141,7 +140,7 @@ def __init__(self,prog):
141
140
AutoAug = os .path .join (AUGUSTUS_BASE , 'scripts' , 'autoAug.pl' )
142
141
GeneMark2GFF = os .path .join (parentdir , 'util' , 'genemark_gtf2gff3.pl' )
143
142
144
- programs = ['tblastn' , 'exonerate' , 'makeblastdb' ,'dustmasker' ,'gag.py' ,'tbl2asn' ,'gmes_petap.pl' , 'BuildDatabase' , 'RepeatModeler' , 'RepeatMasker' , GeneMark2GFF , AutoAug , 'bedtools' , 'gmap' , 'gmap_build' , 'blat' , 'pslCDnaFilter' , 'augustus' , 'etraining' , 'rmOutToGFF3.pl' ]
143
+ programs = ['tblastn' , 'exonerate' , 'makeblastdb' , 'dustmasker' , 'gag.py' , 'tbl2asn' , 'gmes_petap.pl' , 'BuildDatabase' , 'RepeatModeler' , 'RepeatMasker' , GeneMark2GFF , AutoAug , 'bedtools' , 'gmap' , 'gmap_build' , 'blat' , 'pslCDnaFilter' , 'augustus' , 'etraining' , 'rmOutToGFF3.pl' ]
145
144
lib .CheckDependencies (programs )
146
145
147
146
#check augustus species now, so that you don't get through script and then find out it is already in DB
@@ -155,10 +154,11 @@ def __init__(self,prog):
155
154
156
155
#check augustus functionality
157
156
augustuscheck = lib .checkAugustusFunc (AUGUSTUS_BASE )
157
+ system_os = lib .systemOS ()
158
158
if args .rna_bam :
159
159
if augustuscheck [1 ] == 0 :
160
160
lib .log .error ("ERROR: %s is not installed properly for BRAKER1 (check bam2hints compilation)" % augustuscheck [0 ])
161
- os . _exit (1 )
161
+ sys . exit (1 )
162
162
if not augspeciescheck : #means training needs to be done
163
163
if augustuscheck [2 ] == 0 :
164
164
if 'MacOSX' in system_os :
@@ -170,7 +170,7 @@ def __init__(self,prog):
170
170
else :
171
171
lib .log .error ("ERROR: %s is not installed properly and this version not work with BUSCO, this is a problem with Augustus compliatation, you may need to compile manually on %s." % (augustuscheck [0 ], system_os ))
172
172
if not args .pasa_gff : #first training will use pasa, otherwise BUSCO
173
- os . _exit (1 )
173
+ sys . exit (1 )
174
174
else :
175
175
lib .log .info ("Will proceed with PASA models to train Augustus" )
176
176
@@ -204,7 +204,7 @@ def __init__(self,prog):
204
204
header_test = lib .checkFastaHeaders (args .input , args .header_length )
205
205
if not header_test :
206
206
lib .log .error ("Fasta headers on your input have more characters than the max (16), reformat headers to continue." )
207
- os . _exit (1 )
207
+ sys . exit (1 )
208
208
209
209
#setup augustus parallel command
210
210
AUGUSTUS_PARALELL = os .path .join (parentdir , 'bin' , 'augustus_parallel.py' )
@@ -248,7 +248,7 @@ def __init__(self,prog):
248
248
#check for masked genome here
249
249
if not os .path .isfile (MaskGenome ) or lib .getSize (MaskGenome ) < 10 :
250
250
lib .log .error ("RepeatMasking failed, check log files." )
251
- os . _exit (1 )
251
+ sys . exit (1 )
252
252
253
253
#load contig names and sizes into dictionary.
254
254
ContigSizes = {}
@@ -258,7 +258,7 @@ def __init__(self,prog):
258
258
ContigSizes [rec .id ] = len (rec .seq )
259
259
else :
260
260
lib .log .error ("Error, duplicate contig names, exiting" )
261
- os . _exit (1 )
261
+ sys . exit (1 )
262
262
263
263
#check for previous files and setup output files
264
264
Predictions = os .path .join (args .out , 'predict_misc' , 'gene_predictions.gff3' )
@@ -293,7 +293,7 @@ def __init__(self,prog):
293
293
genesources .append (source )
294
294
if not genesources :
295
295
lib .log .error ("Maker2 GFF not parsed correctly, no gene models found, exiting." )
296
- os . _exit (1 )
296
+ sys . exit (1 )
297
297
for i in genesources :
298
298
if i == 'maker' :
299
299
output .write ("ABINITIO_PREDICTION\t %s\t 1\n " % i )
@@ -373,7 +373,7 @@ def __init__(self,prog):
373
373
#check for protein evidence/format as needed
374
374
p2g_out = os .path .join (args .out , 'predict_misc' , 'exonerate.out' )
375
375
prot_temp = os .path .join (args .out , 'predict_misc' , 'proteins.combined.fa' )
376
- P2G = os .path .join (parentdir , 'bin' ,'funannotate-p2g.py' )
376
+ P2G = os .path .join (parentdir , 'bin' , 'funannotate-p2g.py' )
377
377
if not args .exonerate_proteins :
378
378
if args .protein_evidence :
379
379
if os .path .isfile (prot_temp ):
@@ -412,7 +412,7 @@ def __init__(self,prog):
412
412
subprocess .call ([ExoConverter , exonerate_out ], stdout = output , stderr = FNULL )
413
413
except OSError :
414
414
lib .log .error ("$EVM_HOME variable is incorrect, please double-check: %s" % EVM )
415
- os . _exit (1 )
415
+ sys . exit (1 )
416
416
Exonerate = os .path .abspath (Exonerate )
417
417
#now run exonerate2 hints for Augustus
418
418
exonerate2hints = os .path .join (AUGUSTUS_BASE , 'scripts' , 'exonerate2hints.pl' )
@@ -448,7 +448,7 @@ def __init__(self,prog):
448
448
GeneMark = os .path .join (args .out , 'predict_misc' , 'genemark.evm.gff3' )
449
449
with open (GeneMark , 'w' ) as output :
450
450
with open (GeneMarkTemp , 'rU' ) as input :
451
- lines = input .read ().replace ("Augustus" ,"GeneMark" )
451
+ lines = input .read ().replace ("Augustus" , "GeneMark" )
452
452
output .write (lines )
453
453
454
454
if args .augustus_gff :
@@ -556,7 +556,7 @@ def __init__(self,prog):
556
556
GeneMark = os .path .join (args .out , 'predict_misc' , 'genemark.evm.gff3' )
557
557
with open (GeneMark , 'w' ) as output :
558
558
with open (GeneMarkTemp , 'rU' ) as input :
559
- lines = input .read ().replace ("Augustus" ,"GeneMark" )
559
+ lines = input .read ().replace ("Augustus" , "GeneMark" )
560
560
output .write (lines )
561
561
else : #have training parameters file, so just run genemark with
562
562
GeneMarkGFF3 = os .path .join (args .out , 'predict_misc' , 'genemark.gff' )
@@ -586,7 +586,7 @@ def __init__(self,prog):
586
586
subprocess .call (['perl' , Converter , GeneMarkTemp ], stdout = output , stderr = FNULL )
587
587
with open (GeneMark , 'w' ) as output :
588
588
with open (GeneMarkTemp2 , 'rU' ) as input :
589
- lines = input .read ().replace ("Augustus" ,"GeneMark" )
589
+ lines = input .read ().replace ("Augustus" , "GeneMark" )
590
590
output .write (lines )
591
591
592
592
else :
@@ -602,7 +602,7 @@ def __init__(self,prog):
602
602
GeneMark = os .path .join (args .out , 'predict_misc' , 'genemark.evm.gff3' )
603
603
with open (GeneMark , 'w' ) as output :
604
604
with open (GeneMarkTemp , 'rU' ) as input :
605
- lines = input .read ().replace ("Augustus" ,"GeneMark" )
605
+ lines = input .read ().replace ("Augustus" , "GeneMark" )
606
606
output .write (lines )
607
607
608
608
if not Augustus :
@@ -644,7 +644,7 @@ def __init__(self,prog):
644
644
lib .log .error ("BUSCO training of Augusus failed, check busco logs, exiting" )
645
645
#remove the augustus training config folder
646
646
shutil .rmtree (os .path .join (AUGUSTUS , 'species' , aug_species ))
647
- os . _exit (1 )
647
+ sys . exit (1 )
648
648
#proper training files exist, now run EVM on busco models to get high quality predictions.
649
649
lib .log .info ("BUSCO predictions complete, now formatting for EVM" )
650
650
#move the busco folder now where it should reside
@@ -735,12 +735,12 @@ def __init__(self,prog):
735
735
total = lib .countGFFgenes (EVM_busco )
736
736
except IOError :
737
737
lib .log .error ("EVM did not run correctly, output file missing" )
738
- os . _exit (1 )
738
+ sys . exit (1 )
739
739
#check number of gene models, if 0 then failed, delete output file for re-running
740
740
if total < 1 :
741
741
lib .log .error ("Evidence modeler has failed, exiting" )
742
742
os .remove (EVM_busco )
743
- os . _exit (1 )
743
+ sys . exit (1 )
744
744
else :
745
745
lib .log .info ('{0:,}' .format (total ) + ' total gene models from EVM' )
746
746
#move EVM folder to predict folder
@@ -787,7 +787,7 @@ def __init__(self,prog):
787
787
#just double-check that you've gotten here and both Augustus/GeneMark are finished
788
788
if not any ([Augustus , GeneMark ]):
789
789
lib .log .error ("Augustus or GeneMark prediction is missing, check log files for errors" )
790
- os . _exit (1 )
790
+ sys . exit (1 )
791
791
792
792
#GeneMark can fail if you try to pass a single contig, check file length
793
793
GM_check = lib .line_count (GeneMark )
@@ -902,12 +902,12 @@ def __init__(self,prog):
902
902
total = lib .countGFFgenes (EVM_out )
903
903
except IOError :
904
904
lib .log .error ("EVM did not run correctly, output file missing" )
905
- os . _exit (1 )
905
+ sys . exit (1 )
906
906
#check number of gene models, if 0 then failed, delete output file for re-running
907
907
if total < 1 :
908
908
lib .log .error ("Evidence modeler has failed, exiting" )
909
909
os .remove (EVM_out )
910
- os . _exit (1 )
910
+ sys . exit (1 )
911
911
else :
912
912
lib .log .info ('{0:,}' .format (total ) + ' total gene models from EVM' )
913
913
@@ -1037,4 +1037,4 @@ def __init__(self,prog):
1037
1037
os .rename ('funannotate-EVM.log' , os .path .join (args .out , 'logfiles' , 'funannotate-EVM.log' ))
1038
1038
if os .path .isfile ('funannotate-p2g.log' ):
1039
1039
os .rename ('funannotate-p2g.log' , os .path .join (args .out , 'logfiles' , 'funannotate-p2g.log' ))
1040
- os . _exit (1 )
1040
+ sys . exit (1 )
0 commit comments