update docs and bump version to 1.4.1

Jon Palmer · Jon Palmer · commit 6a3c1156ef82 · 2018-07-12T13:00:42.000-04:00
diff --git a/docs/annotate.rst b/docs/annotate.rst
@@ -51,16 +51,16 @@ Similarily to :code:`funannotate predict`, the output from :code:`funannotate an
 	$ funannotate annotate
 
 	Usage:       funannotate annotate <arguments>
-	version:     1.4.0-10f90df
+	version:     1.4.1
 
 	Description: Script functionally annotates the results from funannotate predict.  It pulls
 				 annotation from PFAM, InterPro, EggNog, UniProtKB, MEROPS, CAZyme, and GO ontology.
 	
 	Required:    -i, --input        Folder from funannotate predict
-			  or
+
 				 --genbank          Genome in GenBank format
 				 -o, --out          Output folder for results
-			  or   
+
 				 --gff              Genome GFF3 annotation file
 				 --fasta            Genome in multi-fasta format
 				 -s, --species      Species name, use quotes for binomial, e.g. "Aspergillus fumigatus"
diff --git a/docs/commands.rst b/docs/commands.rst
@@ -223,58 +223,59 @@ all of the data present. Finally, the GFF3 file is converted to NCBI GenBank for
 .. code-block:: none
 
     Usage:       funannotate predict <arguments>
-    version:     1.4.0
-
-    Description: Script takes genome multi-fasta file and a variety of inputs to do a comprehensive whole
-                 genome gene prediction.  Uses AUGUSTUS, GeneMark, BUSCO, BRAKER, EVidence Modeler,
-                 tbl2asn, tRNAScan-SE, Exonerate, minimap2.
-    
-    Required:  -i, --input              Genome multi-FASTA file (softmasked repeats).
-               -o, --out                Output folder name.
-               -s, --species            Species name, use quotes for binomial, e.g. "Aspergillus fumigatus"
-
-    Optional:  --isolate                Isolate name, e.g. Af293
-               --strain                 Strain name, e.g. FGSCA4           
-               --name                   Locus tag name (assigned by NCBI?). Default: FUN_
-               --numbering              Specify where gene numbering starts. Default: 1
-               --maker_gff              MAKER2 GFF file. Parse results directly to EVM.
-               --pasa_gff               PASA generated gene models. filename:weight
-               --other_gff              Annotation pass-through to EVM. filename:weight
-               --rna_bam                RNA-seq mapped to genome to train Augustus/GeneMark-ET 
-               --augustus_species       Augustus species config. Default: uses species name
-               --genemark_mode          GeneMark mode. Default: ES [ES,ET]
-               --genemark_mod           GeneMark ini mod file.
-               --busco_seed_species     Augustus pre-trained species to start BUSCO. Default: anidulans
-               --optimize_augustus      Run 'optimze_augustus.pl' to refine training (long runtime)
-               --busco_db               BUSCO models. Default: dikarya. `funannotate outgroups --show_buscos`
-               --organism               Fungal-specific options. Default: fungus. [fungus,other]
-               --ploidy                 Ploidy of assembly. Default: 1
-               -t, --tbl2asn            Assembly parameters for tbl2asn. Example: "-l paired-ends"
-               -d, --database           Path to funannotate database. Default: $FUNANNOTATE_DB
-           
-               --protein_evidence       Proteins to map to genome (prot1.fa prot2.fa uniprot.fa). Default: uniprot.fa
-               --protein_alignments     Pre-computed exonerate protein alignments (see docs for format)
-               --transcript_evidence    mRNA/ESTs to align to genome (trans1.fa ests.fa trinity.fa). Default: none
-               --transcript_alignments  Pre-computed transcript alignments in GFF3 format
-               --augustus_gff           Pre-computed AUGUSTUS GFF3 results (must use --stopCodonExcludedFromCDS=False)
-               --genemark_gtf           Pre-computed GeneMark GTF results
-           
-               --min_intronlen          Minimum intron length. Default: 10
-               --max_intronlen          Maximum intron length. Default: 3000
-               --soft_mask              Softmasked length threshold for GeneMark. Default: 5000
-               --min_protlen            Minimum protein length. Default: 50
-               --repeat_filter          Repetitive gene model filtering. Default: overlap blast [overlap,blast,none]
-               --keep_no_stops          Keep gene models without valid stops.
-               --SeqCenter              Sequencing facilty for NCBI tbl file. Default: CFMR
-               --SeqAccession           Sequence accession number for NCBI tbl file. Default: 12345
-               --force                  Annotated unmasked genome.
-               --cpus                   Number of CPUs to use. Default: 2
-             
-    ENV Vars:  If not specified at runtime, will be loaded from your $PATH 
-               --EVM_HOME
-               --AUGUSTUS_CONFIG_PATH
-               --GENEMARK_PATH
-               --BAMTOOLS_PATH
+	version:     1.4.1
+
+	Description: Script takes genome multi-fasta file and a variety of inputs to do a comprehensive whole
+				 genome gene prediction.  Uses AUGUSTUS, GeneMark, BUSCO, BRAKER, EVidence Modeler,
+				 tbl2asn, tRNAScan-SE, Exonerate, minimap2.
+	
+	Required:  -i, --input              Genome multi-FASTA file (softmasked repeats).
+			   -o, --out                Output folder name.
+			   -s, --species            Species name, use quotes for binomial, e.g. "Aspergillus fumigatus"
+
+	Optional:  --isolate                Isolate name, e.g. Af293
+			   --strain                 Strain name, e.g. FGSCA4           
+			   --name                   Locus tag name (assigned by NCBI?). Default: FUN_
+			   --numbering              Specify where gene numbering starts. Default: 1
+			   --maker_gff              MAKER2 GFF file. Parse results directly to EVM.
+			   --pasa_gff               PASA generated gene models. filename:weight
+			   --other_gff              Annotation pass-through to EVM. filename:weight
+			   --rna_bam                RNA-seq mapped to genome to train Augustus/GeneMark-ET 
+			   --augustus_species       Augustus species config. Default: uses species name
+			   --genemark_mode          GeneMark mode. Default: ES [ES,ET]
+			   --genemark_mod           GeneMark ini mod file.
+			   --busco_seed_species     Augustus pre-trained species to start BUSCO. Default: anidulans
+			   --optimize_augustus      Run 'optimze_augustus.pl' to refine training (long runtime)
+			   --busco_db               BUSCO models. Default: dikarya. `funannotate outgroups --show_buscos`
+			   --organism               Fungal-specific options. Default: fungus. [fungus,other]
+			   --ploidy                 Ploidy of assembly. Default: 1
+			   -t, --tbl2asn            Assembly parameters for tbl2asn. Example: "-l paired-ends"
+			   -d, --database           Path to funannotate database. Default: $FUNANNOTATE_DB
+		   
+			   --protein_evidence       Proteins to map to genome (prot1.fa prot2.fa uniprot.fa). Default: uniprot.fa
+			   --protein_alignments     Pre-computed exonerate protein alignments (see docs for format)
+			   --transcript_evidence    mRNA/ESTs to align to genome (trans1.fa ests.fa trinity.fa). Default: none
+			   --transcript_alignments  Pre-computed transcript alignments in GFF3 format
+			   --augustus_gff           Pre-computed AUGUSTUS GFF3 results (must use --stopCodonExcludedFromCDS=False)
+			   --genemark_gtf           Pre-computed GeneMark GTF results
+		   
+			   --min_intronlen          Minimum intron length. Default: 10
+			   --max_intronlen          Maximum intron length. Default: 3000
+			   --soft_mask              Softmasked length threshold for GeneMark. Default: 2000
+			   --min_protlen            Minimum protein length. Default: 50
+			   --repeats2evm            Use repeats in EVM consensus model building.
+			   --repeat_filter          Repetitive gene model filtering. Default: overlap blast [overlap,blast,none]
+			   --keep_no_stops          Keep gene models without valid stops.
+			   --SeqCenter              Sequencing facilty for NCBI tbl file. Default: CFMR
+			   --SeqAccession           Sequence accession number for NCBI tbl file. Default: 12345
+			   --force                  Annotated unmasked genome.
+			   --cpus                   Number of CPUs to use. Default: 2
+			 
+	ENV Vars:  If not specified at runtime, will be loaded from your $PATH 
+			   --EVM_HOME
+			   --AUGUSTUS_CONFIG_PATH
+			   --GENEMARK_PATH
+			   --BAMTOOLS_PATH
 
 funannotate fix
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/funannotate.py b/funannotate.py
@@ -37,7 +37,7 @@ def fmtcols(mylist, cols):
     pass
 
 git_version = lib.git_version()
-base_version = '1.4.0'
+base_version = '1.4.1'
 if git_version:
     version = base_version+'-'+git_version
 else:
@@ -255,8 +255,9 @@ def fmtcols(mylist, cols):
            
            --min_intronlen          Minimum intron length. Default: 10
            --max_intronlen          Maximum intron length. Default: 3000
-           --soft_mask              Softmasked length threshold for GeneMark. Default: 5000
+           --soft_mask              Softmasked length threshold for GeneMark. Default: 2000
            --min_protlen            Minimum protein length. Default: 50
+           --repeats2evm            Use repeats in EVM consensus model building.
            --repeat_filter          Repetitive gene model filtering. Default: overlap blast [overlap,blast,none]
            --keep_no_stops          Keep gene models without valid stops.
            --SeqCenter              Sequencing facilty for NCBI tbl file. Default: CFMR