Skip to content

Commit f0bc925

Browse files
author
Jon Palmer
committed
add --trnascan option to predict #523
1 parent 05b1810 commit f0bc925

File tree

3 files changed

+42
-13
lines changed

3 files changed

+42
-13
lines changed

funannotate/library.py

+21-5
Original file line numberDiff line numberDiff line change
@@ -6394,13 +6394,28 @@ def SystemInfo():
63946394
(system_os, multiprocessing.cpu_count(), MemoryCheck(), python_vers))
63956395

63966396

6397-
def runtRNAscan(input, tmpdir, output, cpus=1):
6397+
def runtRNAscan(input, tmpdir, output, cpus=1, precalc=False):
63986398
tRNAout = os.path.join(tmpdir, 'tRNAscan.out')
63996399
tRNAlenOut = os.path.join(tmpdir, 'tRNAscan.len-filtered.out')
6400-
if os.path.isfile(tRNAout): # tRNAscan can't overwrite file, so check
6401-
os.remove(tRNAout)
6402-
cmd = ['tRNAscan-SE', '-o', tRNAout, '--thread', str(cpus), input]
6403-
runSubprocess(cmd, '.', log)
6400+
if not precalc:
6401+
if os.path.isfile(tRNAout): # tRNAscan can't overwrite file, so check
6402+
os.remove(tRNAout)
6403+
cmd = ['tRNAscan-SE', '-o', tRNAout, '--thread', str(cpus), input]
6404+
log.debug(' '.join(cmd))
6405+
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE,
6406+
stderr=subprocess.PIPE)
6407+
stdout, stderr = proc.communicate()
6408+
if proc.returncode != 0:
6409+
log.error('CMD ERROR: {}'.format(' '.join(cmd)))
6410+
if stdout:
6411+
log.debug(stdout.decode("utf-8"))
6412+
if stderr:
6413+
log.debug(stderr.decode("utf-8"))
6414+
else:
6415+
shutil.copyfile(precalc, tRNAout)
6416+
if not checkannotations(tRNAout):
6417+
log.info('tRNAscan-SE seems to have failed, check logfile for error. You can pass precalculated results to --trnascan')
6418+
return False
64046419
# enforce NCBI length rules
64056420
with open(tRNAlenOut, 'w') as lenOut:
64066421
with open(tRNAout, 'r') as infile:
@@ -6424,6 +6439,7 @@ def runtRNAscan(input, tmpdir, output, cpus=1):
64246439
trna2gff = os.path.join(parentdir, 'aux_scripts', 'trnascan2gff3.pl')
64256440
with open(output, 'w') as out:
64266441
subprocess.call(['perl', trna2gff, '--input', tRNAlenOut], stdout=out)
6442+
return True
64276443

64286444

64296445
def runtbl2asn(folder, template, discrepency, organism, isolate, strain, parameters, version):

funannotate/predict.py

+20-8
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,8 @@ def __init__(self, prog):
131131
help='Option for p2g on which prefilter')
132132
parser.add_argument('--no-progress', dest='progress', action='store_false',
133133
help='no progress on multiprocessing')
134+
parser.add_argument('--trnascan',
135+
help='Pre-computed tRNAScan results')
134136
args = parser.parse_args(args)
135137

136138
parentdir = os.path.join(os.path.dirname(__file__))
@@ -1763,17 +1765,27 @@ def __init__(self, prog):
17631765
lib.log.info('{:,} gene models remaining'.format(total))
17641766

17651767
# run tRNAscan
1766-
lib.log.info("Predicting tRNAs")
17671768
tRNAscan = os.path.join(args.out, 'predict_misc', 'trnascan.gff3')
1768-
if not os.path.isfile(tRNAscan):
1769-
lib.runtRNAscan(MaskGenome, os.path.join(
1770-
args.out, 'predict_misc'), tRNAscan, cpus=args.cpus)
1769+
if args.trnascan:
1770+
lib.log.info("Existing tRNAscan results passed: {}".format(args.trnascan))
1771+
trna_result = lib.runtRNAscan(MaskGenome, os.path.join(args.out, 'predict_misc'),
1772+
tRNAscan, cpus=args.cpus, precalc=args.trnascan)
1773+
else:
1774+
lib.log.info("Predicting tRNAs")
1775+
if not os.path.isfile(tRNAscan):
1776+
trna_result = lib.runtRNAscan(MaskGenome, os.path.join(args.out, 'predict_misc'),
1777+
tRNAscan, cpus=args.cpus)
1778+
else:
1779+
trna_result = True
17711780

17721781
# combine tRNAscan with EVM gff, dropping tRNA models if they overlap with EVM models
1773-
cleanTRNA = os.path.join(args.out, 'predict_misc',
1774-
'trnascan.no-overlaps.gff3')
1775-
lib.validate_tRNA(tRNAscan, EVMCleanGFF, AssemblyGaps, cleanTRNA)
1776-
lib.log.info("{:,} tRNAscan models are valid (non-overlapping)".format(lib.countGFFgenes(cleanTRNA)))
1782+
cleanTRNA = os.path.join(args.out, 'predict_misc', 'trnascan.no-overlaps.gff3')
1783+
if trna_result:
1784+
lib.validate_tRNA(tRNAscan, EVMCleanGFF, AssemblyGaps, cleanTRNA)
1785+
lib.log.info("{:,} tRNAscan models are valid (non-overlapping)".format(lib.countGFFgenes(cleanTRNA)))
1786+
else:
1787+
with open(cleanTRNA, 'w') as outfile:
1788+
outfile.write('##gff-version 3\n')
17771789

17781790
# load EVM models and tRNAscan models, output tbl annotation file
17791791
lib.log.info("Generating GenBank tbl annotation file")

scripts/funannotate

+1
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,7 @@ Optional:
193193
--transcript_alignments Pre-computed transcript alignments in GFF3 format
194194
--augustus_gff Pre-computed AUGUSTUS GFF3 results (must use --stopCodonExcludedFromCDS=False)
195195
--genemark_gtf Pre-computed GeneMark GTF results
196+
--trnascan Pre-computed tRNAscanSE results
196197
197198
--min_intronlen Minimum intron length. Default: 10
198199
--max_intronlen Maximum intron length. Default: 3000

0 commit comments

Comments
 (0)