Skip to content

Commit 14b1b4d

Browse files
Jon PalmerJon Palmer
Jon Palmer
authored and
Jon Palmer
committed
updates to v0.2.11
1 parent 7ed692f commit 14b1b4d

5 files changed

+35
-5
lines changed

bin/funannotate-functional.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -123,8 +123,8 @@ def runIPRpython(Input):
123123
else:
124124
outputdir = args.out
125125
if not args.genbank:
126-
if not args.fasta or not args.proteins or not args.gff or not args.transcripts:
127-
lib.log.error("You did not specifiy the apropriate input files, either: \n1) GenBank \n2) Genome FASTA + Protein FASTA + Transcript FASTA + GFF3")
126+
if not args.fasta or not args.proteins or not args.gff:
127+
lib.log.error("You did not specifiy the apropriate input files, either: \n1) GenBank \n2) Genome FASTA + Protein FASTA + GFF3")
128128
os._exit(1)
129129
else:
130130
Scaffolds = args.fasta

bin/funannotate-predict.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ def __init__(self,prog):
1919
parser.add_argument('-o','--out', required=True, help='Basename of output files')
2020
parser.add_argument('-s','--species', required=True, help='Species name (e.g. "Aspergillus fumigatus") use quotes if there is a space')
2121
parser.add_argument('--isolate', help='Isolate/strain name (e.g. Af293)')
22+
parser.add_argument('--header_length', default=16, type=int, help='Max length for fasta headers')
2223
parser.add_argument('--name', default="FUN_", help='Shortname for genes, perhaps assigned by NCBI, eg. VC83')
2324
parser.add_argument('--augustus_species', help='Specify species for Augustus')
2425
parser.add_argument('--genemark_mod', help='Use pre-existing Genemark training file (e.g. gmhmm.mod)')
@@ -160,6 +161,12 @@ def __init__(self,prog):
160161
if i:
161162
lib.checkinputs(i)
162163

164+
#check fasta header length
165+
header_test = lib.checkFastaHeaders(args.input, args.header_length)
166+
if not header_test:
167+
lib.log.error("Fasta headers on your input have more characters than the max (16), reformat headers to continue.")
168+
os._exit(1)
169+
163170
#EVM command line scripts
164171
Converter = os.path.join(EVM, 'EvmUtils', 'misc', 'augustus_GFF3_to_EVM_GFF3.pl')
165172
ExoConverter = os.path.join(EVM, 'EvmUtils', 'misc', 'exonerate_gff_to_alignment_gff3.pl')

bin/funannotate-sort_rename.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@ def SortRenameHeaders(input, basename, output):
3131
rec.name = ''
3232
rec.description = ''
3333
rec.id = basename + '_' + str(counter)
34+
if len(rec.id) > 16:
35+
print "Error. Fasta header too long %s. Choose a different --base name. Max is 16 characters" % rec.id
36+
os._exit(1)
3437
counter +=1
3538
SeqIO.write(records, output, 'fasta')
3639

funannotate.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def fmtcols(mylist, cols):
3131
for i in range(0,num_lines))
3232
return "\n".join(lines)
3333

34-
version = '0.2.10'
34+
version = '0.2.11'
3535

3636
default_help = """
3737
Usage: funannotate <command> <arguments>

lib/library.py

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,20 @@ def checkGenBank(input):
267267
return False
268268
else:
269269
return True
270+
271+
def checkFastaHeaders(input, limit):
272+
length = 0
273+
with open(input, 'rU') as fasta:
274+
for line in fasta:
275+
if line.startswith('>'):
276+
line = line.replace('\n', '')
277+
headlen = len(line) - 1 #subtract one character for fasta carrot
278+
if headlen > length:
279+
length = headlen
280+
if length > int(limit):
281+
return False
282+
else:
283+
return True
270284

271285
def gb2allout(input, GFF, Proteins, Transcripts, DNA):
272286
#this will not output any UTRs for gene models, don't think this is a problem right now....
@@ -286,7 +300,10 @@ def gb2allout(input, GFF, Proteins, Transcripts, DNA):
286300
proteins.write(">%s\n%s\n" % (f.qualifiers['locus_tag'][0], f.qualifiers['translation'][0]))
287301
chr = record.id
288302
ID = f.qualifiers['locus_tag'][0]
289-
product = f.qualifiers['product'][0]
303+
try:
304+
product = f.qualifiers['product'][0]
305+
except KeyError:
306+
product = "hypothetical protein"
290307
start = f.location.nofuzzy_start + 1
291308
end = f.location.nofuzzy_end
292309
strand = f.location.strand
@@ -323,7 +340,10 @@ def gb2allout(input, GFF, Proteins, Transcripts, DNA):
323340
strand = '+'
324341
elif strand == -1:
325342
strand = '-'
326-
product = f.qualifiers['product'][0]
343+
try:
344+
product = f.qualifiers['product'][0]
345+
except KeyError:
346+
product = "tRNA-XXX"
327347
chr = record.id
328348
gff.write("%s\tGenBank\tgene\t%s\t%s\t.\t%s\t.\tID=%s\n" % (chr, start, end, strand, ID))
329349
gff.write("%s\tGenBank\ttRNA\t%s\t%s\t.\t%s\t.\tID=%s-T1;Parent=%s;product=%s\n" % (chr, start, end, strand, ID, ID, product))

0 commit comments

Comments
 (0)