@@ -170,12 +170,14 @@ def __init__(self,prog):
170
170
RepeatMasker = os .path .abspath (RepeatMasker )
171
171
MaskGenome = os .path .abspath (MaskGenome )
172
172
173
+ #final output for augustus hints
174
+ hints_all = os .path .join (args .out , 'predict_misc' , 'hints.PE.gff' )
175
+
173
176
#check for masked genome here
174
177
if not os .path .isfile (MaskGenome ) or lib .getSize (MaskGenome ) < 10 :
175
178
lib .log .error ("RepeatMasking failed, check log files." )
176
179
os ._exit (1 )
177
180
178
-
179
181
#if maker_gff passed, use that info and move directly to EVM
180
182
if args .maker_gff :
181
183
lib .log .info ("Maker2 GFF passed, parsing results and proceeding directly to EVidence Modeler" )
@@ -228,6 +230,26 @@ def __init__(self,prog):
228
230
if not os .path .isfile (trans_out ):
229
231
lib .runGMAP (trans_temp , MaskGenome , args .cpus , args .max_intronlen , os .path .join (args .out , 'predict_misc' ), trans_out )
230
232
Transcripts = os .path .abspath (trans_out )
233
+ #now run BLAT for Augustus hints
234
+ blat_out = os .path .join (args .out , 'predict_misc' , 'blat.psl' )
235
+ blat_filt = os .path .join (args .out , 'predict_misc' , 'blat.filt.psl' )
236
+ blat_sort1 = os .path .join (args .out , 'predict_misc' , 'blat.sort.tmp.psl' )
237
+ blat_sort2 = os .path .join (args .out , 'predict_misc' , 'blat.sort.psl' )
238
+ hintsE = os .path .join (args .out , 'predict_misc' , 'hints.E.gff' )
239
+ maxINT = '-maxIntron=' + str (args .max_intronlen )
240
+ lib .log .info ("Aligning transcript evidence to genome with BLAT" )
241
+ if not os .path .isfile (hints_all ):
242
+ subprocess .call (['blat' , '-noHead' , '-minIdentity=80' , maxINT , MaskGenome , trans_temp , blat_out ], stdout = FNULL , stderr = FNULL )
243
+ subprocess .call (['pslCDnaFilter' , '-minId=0.9' , '-localNearBest=0.005' , '-ignoreNs' , '-bestOverlap' , blat_out , blat_filt ], stdout = FNULL , stderr = FNULL )
244
+ with open (blat_sort1 , 'w' ) as output :
245
+ subprocess .call (['sort' , '-n' , '-k' , '16,16' , blat_filt ], stdout = output , stderr = FNULL )
246
+ with open (blat_sort2 , 'w' ) as output :
247
+ subprocess .call (['sort' , '-s' , '-k' , '14,14' , blat_sort1 ], stdout = output , stderr = FNULL )
248
+ #run blat2hints
249
+ blat2hints = os .path .join (AUGUSTUS_BASE , 'scripts' , 'blat2hints.pl' )
250
+ b2h_input = '--in=' + blat_sort2
251
+ b2h_output = '--out=' + hintsE
252
+ subprocess .call ([blat2hints , b2h_input , b2h_output , '--minintronlen=20' , '--trunkSS' ], stdout = FNULL , stderr = FNULL )
231
253
else :
232
254
Transcripts = False
233
255
else :
@@ -254,6 +276,14 @@ def __init__(self,prog):
254
276
if not os .path .isfile (p2g_out ):
255
277
subprocess .call (p2g_cmd )
256
278
exonerate_out = os .path .abspath (p2g_out )
279
+ #now run exonerate2 hints for Augustus
280
+ exonerate2hints = os .path .join (AUGUSTUS_BASE , 'scripts' , 'exonerate2hints.pl' )
281
+ hintsP = os .path .join (args .out , 'predict_misc' , 'hints.P.gff' )
282
+ e2h_in = '--in=' + p2g_out
283
+ e2h_out = '--out=' + hintsP
284
+ e2h_minINT = '--minintronlen=' + str (args .min_intronlen )
285
+ e2h_maxINT = '--maxintronlen=' + str (args .max_intronlen )
286
+ subprocess .call ([exonerate2hints , e2h_in , e2h_out , e2h_minINT , e2h_maxINT ], stdout = FNULL , stderr = FNULL )
257
287
else :
258
288
exonerate_out = False
259
289
else :
@@ -265,7 +295,20 @@ def __init__(self,prog):
265
295
with open (Exonerate , 'w' ) as output :
266
296
subprocess .call ([ExoConverter , exonerate_out ], stdout = output , stderr = FNULL )
267
297
Exonerate = os .path .abspath (Exonerate )
268
-
298
+
299
+ #combine hints for Augustus
300
+ if os .path .isfile (hintsP ) or os .path .isfile (hintsE ):
301
+ with open (hints_all , 'a' ) as out :
302
+ if os .path .isfile (hintsP ):
303
+ with open (hintsP ) as input :
304
+ out .write (input .read ())
305
+ if os .path .isfile (hintsE ):
306
+ with open (hintsE ) as input2 :
307
+ out .write (input2 .read ())
308
+ #setup hints and extrinic input
309
+ hints_input = '--hintsfile=' + hints_all
310
+ extrinsic = '--extrinsicCfgFile=' + os .path .join (AUGUSTUS_BASE , 'config' , 'extrinsic' , 'extrinsic.E.XNT.cfg' )
311
+
269
312
Augustus = ''
270
313
GeneMark = ''
271
314
@@ -345,7 +388,10 @@ def __init__(self,prog):
345
388
lib .log .info ("Running Augustus gene prediction" )
346
389
if not os .path .isfile (aug_out ):
347
390
with open (aug_out , 'w' ) as output :
348
- subprocess .call (['augustus' , species , '--gff3=on' , MaskGenome ], stdout = output , stderr = FNULL )
391
+ if os .path .isfile (hints_all ):
392
+ subprocess .call (['augustus' , species , hints_input , extrinsic , '--gff3=on' , MaskGenome ], stdout = output , stderr = FNULL )
393
+ else :
394
+ subprocess .call (['augustus' , species , '--gff3=on' , MaskGenome ], stdout = output , stderr = FNULL )
349
395
Augustus = os .path .join (args .out , 'predict_misc' , 'augustus.evm.gff3' )
350
396
with open (Augustus , 'w' ) as output :
351
397
subprocess .call (['perl' , Converter , aug_out ], stdout = output , stderr = FNULL )
@@ -363,7 +409,10 @@ def __init__(self,prog):
363
409
else :
364
410
subprocess .call ([AutoAug , '--noutr' , '--singleCPU' , cDNA , species , genome , training ], stdout = logfile , stderr = logfile )
365
411
with open (aug_out , 'w' ) as output :
366
- subprocess .call (['augustus' , species , '--gff3=on' , MaskGenome ], stdout = output , stderr = FNULL )
412
+ if os .path .isfile (hints_all ):
413
+ subprocess .call (['augustus' , species , hints_input , extrinsic , '--gff3=on' , MaskGenome ], stdout = output , stderr = FNULL )
414
+ else :
415
+ subprocess .call (['augustus' , species , '--gff3=on' , MaskGenome ], stdout = output , stderr = FNULL )
367
416
Augustus = os .path .join (args .out , 'predict_misc' , 'augustus.evm.gff3' )
368
417
with open (Augustus , 'w' ) as output :
369
418
subprocess .call (['perl' , Converter , aug_out ], stdout = output , stderr = FNULL )
@@ -406,7 +455,10 @@ def __init__(self,prog):
406
455
lib .log .info ("Running Augustus gene prediction" )
407
456
if not os .path .isfile (aug_out ):
408
457
with open (aug_out , 'w' ) as output :
409
- subprocess .call (['augustus' , species , '--gff3=on' , MaskGenome ], stdout = output , stderr = FNULL )
458
+ if os .path .isfile (hints_all ):
459
+ subprocess .call (['augustus' , species , hints_input , extrinsic , '--gff3=on' , MaskGenome ], stdout = output , stderr = FNULL )
460
+ else :
461
+ subprocess .call (['augustus' , species , '--gff3=on' , MaskGenome ], stdout = output , stderr = FNULL )
410
462
Augustus = os .path .join (args .out , 'predict_misc' , 'augustus.evm.gff3' )
411
463
with open (Augustus , 'w' ) as output :
412
464
subprocess .call (['perl' , Converter , aug_out ], stdout = output , stderr = FNULL )
@@ -428,7 +480,10 @@ def __init__(self,prog):
428
480
lib .log .info ("BUSCO mediated Augustus training is complete, now running Augustus on whole genome." )
429
481
if not os .path .isfile (aug_out ):
430
482
with open (aug_out , 'w' ) as output :
431
- subprocess .call (['augustus' , species , '--gff3=on' , MaskGenome ], stdout = output , stderr = FNULL )
483
+ if os .path .isfile (hints_all ):
484
+ subprocess .call (['augustus' , species , hints_input , extrinsic , '--gff3=on' , MaskGenome ], stdout = output , stderr = FNULL )
485
+ else :
486
+ subprocess .call (['augustus' , species , '--gff3=on' , MaskGenome ], stdout = output , stderr = FNULL )
432
487
Augustus = os .path .join (args .out , 'predict_misc' , 'augustus.evm.gff3' )
433
488
with open (Augustus , 'w' ) as output :
434
489
subprocess .call (['perl' , Converter , aug_out ], stdout = output , stderr = FNULL )
@@ -444,7 +499,7 @@ def __init__(self,prog):
444
499
gmc = 1
445
500
if GM_check < 3 :
446
501
gmc = 0
447
- lib .log .error ("GeneMark predictions failed, proceeding with just Augustus" )
502
+ lib .log .error ("GeneMark predictions failed, proceeding with only Augustus" )
448
503
449
504
#EVM related input tasks, find all predictions and concatenate together
450
505
if args .pasa_gff :
@@ -457,7 +512,7 @@ def __init__(self,prog):
457
512
with open (f ) as input :
458
513
output .write (input .read ())
459
514
460
- #set Weights file dependent on which data is present. I have yet to find an example of where Augustus outperforms GeneMark for fungi, hence the weightings are tilted towards genemark
515
+ #set Weights file dependent on which data is present. I have yet to find an example of where Augustus outperforms GeneMark for fungi, but i don't have too much evidence to think that genemark is perfect either....
461
516
Weights = os .path .join (args .out , 'predict_misc' , 'weights.evm.txt' )
462
517
with open (Weights , 'w' ) as output :
463
518
if args .pasa_gff :
@@ -466,7 +521,7 @@ def __init__(self,prog):
466
521
output .write ("ABINITIO_PREDICTION\t GeneMark\t 1\n " )
467
522
else :
468
523
output .write ("ABINITIO_PREDICTION\t Augustus\t 1\n " )
469
- output .write ("ABINITIO_PREDICTION\t GeneMark\t 2 \n " )
524
+ output .write ("ABINITIO_PREDICTION\t GeneMark\t 1 \n " )
470
525
if exonerate_out :
471
526
output .write ("PROTEIN\t exonerate\t 1\n " )
472
527
if Transcripts :
0 commit comments