Skip to content

Commit ee1456a

Browse files
author
Jon Palmer
committed
update to v0.3.6
1 parent 4298bc0 commit ee1456a

File tree

1 file changed

+14
-2
lines changed

1 file changed

+14
-2
lines changed

bin/funannotate-predict.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -249,7 +249,17 @@ def __init__(self,prog):
249249
if not os.path.isfile(MaskGenome) or lib.getSize(MaskGenome) < 10:
250250
lib.log.error("RepeatMasking failed, check log files.")
251251
os._exit(1)
252-
252+
253+
#load contig names and sizes into dictionary.
254+
ContigSizes = {}
255+
with open(MaskGenome, 'rU') as input:
256+
for rec in SeqIO.parse(input, 'fasta'):
257+
if not rec.id in ContigSizes:
258+
ContigSizes[rec.id] = len(rec.seq)
259+
else:
260+
lib.log.error("Error, duplicate contig names, exiting")
261+
os._exit(1)
262+
253263
#check for previous files and setup output files
254264
Predictions = os.path.join(args.out, 'predict_misc', 'gene_predictions.gff3')
255265
Exonerate = os.path.join(args.out, 'predict_misc', 'protein_alignments.gff3')
@@ -658,7 +668,9 @@ def __init__(self,prog):
658668
start = int(cols[3]) - 100
659669
if start < 1: #negative values no good for intersection
660670
start = 1
661-
end = int(cols[4]) + 100 #not sure if value is larger than contig if this will be problem for bedtools as well? If so will need to read in contig names and lengths into dictionary to check here.
671+
end = int(cols[4]) + 100
672+
if end > ContigSizes.get(cols[2]): #check that it doesn't go over length
673+
end = ContigSizes.get(cols[2])
662674
bedfile.write('%s\t%i\t%i\t%s\n' % (cols[2],start,end,cols[0]))
663675
#now get BUSCO GFF models
664676
busco_augustus_tmp = os.path.join(args.out, 'predict_misc', 'busco_augustus.tmp')

0 commit comments

Comments
 (0)