updates to v0.2.11

Jon Palmer · Jon Palmer · commit 14b1b4d6082a · 2016-05-23T10:44:21.000-05:00
diff --git a/bin/funannotate-functional.py b/bin/funannotate-functional.py
@@ -123,8 +123,8 @@ def runIPRpython(Input):
     else:
         outputdir = args.out
     if not args.genbank:
-        if not args.fasta or not args.proteins or not args.gff or not args.transcripts:
-            lib.log.error("You did not specifiy the apropriate input files, either: \n1) GenBank \n2) Genome FASTA + Protein FASTA + Transcript FASTA + GFF3")
+        if not args.fasta or not args.proteins or not args.gff:
+            lib.log.error("You did not specifiy the apropriate input files, either: \n1) GenBank \n2) Genome FASTA + Protein FASTA + GFF3")
             os._exit(1)
         else:
             Scaffolds = args.fasta
diff --git a/bin/funannotate-predict.py b/bin/funannotate-predict.py
@@ -19,6 +19,7 @@ def __init__(self,prog):
 parser.add_argument('-o','--out', required=True, help='Basename of output files')
 parser.add_argument('-s','--species', required=True, help='Species name (e.g. "Aspergillus fumigatus") use quotes if there is a space')
 parser.add_argument('--isolate', help='Isolate/strain name (e.g. Af293)')
+parser.add_argument('--header_length', default=16, type=int, help='Max length for fasta headers')
 parser.add_argument('--name', default="FUN_", help='Shortname for genes, perhaps assigned by NCBI, eg. VC83')
 parser.add_argument('--augustus_species', help='Specify species for Augustus')
 parser.add_argument('--genemark_mod', help='Use pre-existing Genemark training file (e.g. gmhmm.mod)')
@@ -160,6 +161,12 @@ def __init__(self,prog):
     if i:
         lib.checkinputs(i)
 
+#check fasta header length
+header_test = lib.checkFastaHeaders(args.input, args.header_length)
+if not header_test:
+    lib.log.error("Fasta headers on your input have more characters than the max (16), reformat headers to continue.")
+    os._exit(1)
+
 #EVM command line scripts
 Converter = os.path.join(EVM, 'EvmUtils', 'misc', 'augustus_GFF3_to_EVM_GFF3.pl')
 ExoConverter = os.path.join(EVM, 'EvmUtils', 'misc', 'exonerate_gff_to_alignment_gff3.pl')
diff --git a/bin/funannotate-sort_rename.py b/bin/funannotate-sort_rename.py
@@ -31,6 +31,9 @@ def SortRenameHeaders(input, basename, output):
                 rec.name = ''
                 rec.description = ''
                 rec.id = basename + '_' + str(counter)
+                if len(rec.id) > 16:
+                    print "Error. Fasta header too long %s.  Choose a different --base name. Max is 16 characters" % rec.id
+                    os._exit(1)
                 counter +=1
             SeqIO.write(records, output, 'fasta')
 
diff --git a/funannotate.py b/funannotate.py
@@ -31,7 +31,7 @@ def fmtcols(mylist, cols):
              for i in range(0,num_lines))
     return "\n".join(lines)
 
-version = '0.2.10'
+version = '0.2.11'
 
 default_help = """
 Usage:       funannotate <command> <arguments>
diff --git a/lib/library.py b/lib/library.py
@@ -267,6 +267,20 @@ def checkGenBank(input):
         return False
     else:
         return True
+        
+def checkFastaHeaders(input, limit):
+    length = 0
+    with open(input, 'rU') as fasta:
+        for line in fasta:
+            if line.startswith('>'):
+                line = line.replace('\n', '')
+                headlen = len(line) - 1 #subtract one character for fasta carrot
+                if headlen > length:
+                    length = headlen
+    if length > int(limit):
+        return False
+    else:
+        return True
 
 def gb2allout(input, GFF, Proteins, Transcripts, DNA):
     #this will not output any UTRs for gene models, don't think this is a problem right now....
@@ -286,7 +300,10 @@ def gb2allout(input, GFF, Proteins, Transcripts, DNA):
                                     proteins.write(">%s\n%s\n" % (f.qualifiers['locus_tag'][0], f.qualifiers['translation'][0]))
                                     chr = record.id
                                     ID = f.qualifiers['locus_tag'][0]
-                                    product = f.qualifiers['product'][0]
+                                    try:
+                                        product = f.qualifiers['product'][0]
+                                    except KeyError:
+                                        product = "hypothetical protein"
                                     start = f.location.nofuzzy_start + 1
                                     end = f.location.nofuzzy_end
                                     strand = f.location.strand
@@ -323,7 +340,10 @@ def gb2allout(input, GFF, Proteins, Transcripts, DNA):
                                         strand = '+'
                                     elif strand == -1:
                                         strand = '-'
-                                    product = f.qualifiers['product'][0]
+                                    try:
+                                        product = f.qualifiers['product'][0]
+                                    except KeyError:
+                                        product = "tRNA-XXX"
                                     chr = record.id
                                     gff.write("%s\tGenBank\tgene\t%s\t%s\t.\t%s\t.\tID=%s\n" % (chr, start, end, strand, ID))
                                     gff.write("%s\tGenBank\ttRNA\t%s\t%s\t.\t%s\t.\tID=%s-T1;Parent=%s;product=%s\n" % (chr, start, end, strand, ID, ID, product))