@@ -1738,22 +1738,25 @@ def trainAugustus(AUGUSTUS_BASE, train_species, trainingset, genome, outdir, cpu
1738
1738
aug_cpus = '--cpus=' + str (cpus )
1739
1739
species = '--species=' + train_species
1740
1740
aug_log = os .path .join (outdir , 'logfiles' , 'augustus_training.log' )
1741
+ trainingdir = 'tmp_opt_' + train_species
1741
1742
with open (aug_log , 'w' ) as logfile :
1742
- subprocess .call ([RANDOMSPLIT , trainingset , '100 ' ]) #split off 100 models for testing purposes
1743
+ subprocess .call ([RANDOMSPLIT , trainingset , '200 ' ]) #split off 100 models for testing purposes
1743
1744
if not CheckAugustusSpecies (train_species ): #check if training set exists, if not run etraining
1744
- subprocess .call (['etraining' , species , trainingset + '.train' ], stderr = logfile , stdout = logfile )
1745
+ subprocess .call (['etraining' , species , trainingset ], stderr = logfile , stdout = logfile )
1745
1746
with open (os .path .join (outdir , 'predict_misc' , 'augustus.initial.training.txt' ), 'w' ) as initialtraining :
1746
1747
subprocess .call (['augustus' , species , trainingset + '.test' ], stdout = initialtraining )
1747
1748
train_results = getTrainResults (os .path .join (outdir , 'predict_misc' , 'augustus.initial.training.txt' ))
1748
1749
log .info ('Initial training: ' + '{0:.2%}' .format (float (train_results [4 ]))+ ' genes predicted exactly and ' + '{0:.2%}' .format (float (train_results [2 ]))+ ' of exons predicted exactly' )
1749
1750
#now run optimization
1750
- subprocess .call ([OPTIMIZE , species , aug_cpus , trainingset + '.train' ], stderr = logfile , stdout = logfile )
1751
+ subprocess .call ([OPTIMIZE , species , aug_cpus , trainingset ], stderr = logfile , stdout = logfile )
1751
1752
#run etraining again
1752
- subprocess .call (['etraining' , species , trainingset + '.train' ], stderr = logfile , stdout = logfile )
1753
+ subprocess .call (['etraining' , species , trainingset ], stderr = logfile , stdout = logfile )
1753
1754
with open (os .path .join (outdir , 'predict_misc' , 'augustus.final.training.txt' ), 'w' ) as finaltraining :
1754
- subprocess .call (['augustus' , species , trainingset + '.test ' ], stdout = finaltraining )
1755
+ subprocess .call (['augustus' , species , os . path . join ( trainingdir , 'bucket1.gb ' ], stdout = finaltraining )
1755
1756
train_results = getTrainResults (os .path .join (outdir , 'predict_misc' , 'augustus.final.training.txt' ))
1756
1757
log .info ('Optimized training: ' + '{0:.2%}' .format (float (train_results [4 ]))+ ' genes predicted exactly and ' + '{0:.2%}' .format (float (train_results [2 ]))+ ' of exons predicted exactly' )
1758
+ #clean up tmp folder
1759
+ shutil .rmtree (trainingdir )
1757
1760
1758
1761
HEADER = '''
1759
1762
<!DOCTYPE html>
0 commit comments