Skip to content

Commit 061a808

Browse files
Jon PalmerJon Palmer
Jon Palmer
authored and
Jon Palmer
committed
updates to v0.1.5
1 parent 552c67c commit 061a808

18 files changed

+12874
-2756
lines changed

bin/funannotate-compare.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,8 @@ def __init__(self,prog):
8787

8888

8989
#check dependencies and set path to proteinortho
90-
PROTORTHO = os.path.join(parentdir, 'util', 'proteinortho_v5.11', 'proteinortho5.pl')
91-
programs = ['find_enrichment.py', 'mafft', 'raxmlHPC-PTHREADS', 'trimal']
90+
#PROTORTHO = os.path.join(parentdir, 'util', 'proteinortho_v5.11', 'proteinortho5.pl')
91+
programs = ['find_enrichment.py', 'mafft', 'raxmlHPC-PTHREADS', 'trimal', 'proteinortho5.pl']
9292
lib.CheckDependencies(programs)
9393

9494
#copy over html files
@@ -461,7 +461,7 @@ def __init__(self,prog):
461461
if file.endswith('.faa'):
462462
filelist.append(file)
463463
fileinput = ' '.join(filelist)
464-
cmd = [PROTORTHO, '-project=funannotate', '-synteny', '-cpus='+str(args.cpus), '-singles', '-selfblast']
464+
cmd = ['proteinortho5.pl', '-project=funannotate', '-synteny', '-cpus='+str(args.cpus), '-singles', '-selfblast']
465465
cmd2 = cmd + filelist
466466
if not os.path.isfile(os.path.join(args.out, 'protortho', 'funannotate.poff')):
467467
with open(log, 'w') as logfile:
@@ -706,6 +706,8 @@ def addlink(x):
706706
output.write('<p>Report generated on: '+ d[1]+'/'+d[2]+'/'+d[0]+ ' at '+str(hour)+':'+d[4]+ ' '+m+'</p>')
707707
output.write(lib.FOOTER)
708708

709+
lib.log.info("Compressing results to output file: %s.tar.gz" % args.out)
710+
lib.make_tarfile(args.out+'.tar.gz', args.out)
709711
lib.log.info("Finished!")
710712
os._exit(1)
711713

bin/funannotate-predict.py

+270-232
Large diffs are not rendered by default.

bin/funannotate-runEVM.py

+5
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,9 @@
5252
del arguments[index]
5353
del arguments[index]
5454
cmd1 = base_cmd1 + arguments
55+
#remove intron from partitions command
56+
del cmd1[-1]
57+
del cmd1[-1]
5558

5659

5760
def grouper(n, iterable, fillvalue=None):
@@ -74,6 +77,8 @@ def safe_run(*args, **kwargs):
7477
#split partitions
7578
lib.log.info("Setting up EVM partitions")
7679
subprocess.call(cmd1, cwd = tmpdir, stdout = FNULL, stderr = FNULL)
80+
#check output
81+
lib.checkinputs(os.path.join(tmpdir, 'partitions_list.out'))
7782

7883
#generate commands
7984
lib.log.info("Generating EVM command list")

funannotate.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def fmtcols(mylist, cols):
3131
for i in range(0,num_lines))
3232
return "\n".join(lines)
3333

34-
version = '0.1.4'
34+
version = '0.1.5'
3535

3636
default_help = """
3737
Usage: funannotate <command> <arguments>
@@ -121,6 +121,7 @@ def fmtcols(mylist, cols):
121121
122122
Optional: --isolate Strain isolate, e.g. Af293
123123
--name Locus tag name (assigned by NCBI?). Default: FUN_
124+
--maker_gff MAKER2 GFF file. Parse results directly to EVM.
124125
--pasa_gff PASA generated gene models
125126
--rna_bam RNA-seq mapped to genome to train Augustus/GeneMark-ET
126127
--augustus_species Augustus species config. Default: uses species name

lib/library.py

+46-42
Large diffs are not rendered by default.

sample_data/maker_genome1.all.gff

+12,472
Large diffs are not rendered by default.

setup.sh

+23-9
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,13 @@ command -v makeblastdb >/dev/null 2>&1 || { echo "Funannotate setup requires BLA
1111

1212
dir=$PWD
1313
outputdir='/usr/local/share/funannotate'
14+
echo -n "DB directory set to ($outputdir), continue [y/n]: "
15+
read question1
16+
if [ $question1 == 'n' ]; then
17+
echo -n "Enter path to DB directory: "
18+
read dbname
19+
outputdir=$dbname
20+
fi
1421

1522
if [ -z "$1" ]; then
1623
db='pass'
@@ -194,20 +201,12 @@ if [ "$dep" = 'pass' ]; then
194201
echo "-----------------------------------------------"
195202

196203
#setup some programs and look for dependencies
197-
#do some OS check and rename proteinortho clustering binary accordingly (this was difficult to compile on mac, hopefully this saves everybody the trouble)
198204

199205
#make sure in funannotate directory
200206
cd $dir
201-
if ! [ -f util/proteinortho_v5.11/proteinortho5_clustering ]; then
202-
if [[ $OSTYPE == darwin* ]]; then
203-
cp util/proteinortho_v5.11/proteinortho5_clustering_osx util/proteinortho_v5.11/proteinortho5_clustering
204-
else
205-
sudo cp util/proteinortho_v5.11/proteinortho5_clustering_linux util/proteinortho_v5.11/proteinortho5_clustering
206-
fi
207-
fi
208207

209208
check='pass'
210-
for i in {blastp,hmmsearch,hmmscan,augustus,'gmes_petap.pl',mummer,nucmer,show-coords,exonerate,gmap,blat,RepeatModeler,RepeatMasker,pslCDnaFilter,bedtools,bamtools,'gag.py',tbl2asn,'braker.pl',funannotate,mafft,trimal,raxmlHPC-PTHREADS,tRNAscan-SE,'rmOutToGFF3.pl'}; do
209+
for i in {blastp,hmmsearch,hmmscan,augustus,'gmes_petap.pl',mummer,nucmer,show-coords,exonerate,gmap,blat,RepeatModeler,RepeatMasker,pslCDnaFilter,bedtools,bamtools,'gag.py',tbl2asn,'braker.pl',funannotate,mafft,trimal,raxmlHPC-PTHREADS,tRNAscan-SE,'rmOutToGFF3.pl','proteinortho5.pl'}; do
211210
var=$(command -v $i)
212211
if [ "$var" ]; then
213212
echo "$i installed.........$var"
@@ -259,3 +258,18 @@ if [ "$dep" = 'pass' ]; then
259258
echo -e "Script complete, funannotate is ready to roll!\n"
260259
fi
261260
fi
261+
262+
#vertebrate EggNog
263+
#http://eggnogdb.embl.de/download/eggnog_4.5/data/veNOG/veNOG.hmm.tar.gz
264+
#http://eggnogdb.embl.de/download/eggnog_4.5/data/veNOG/veNOG.annotations.tsv.gz
265+
#http://busco.ezlab.org/files/vertebrata_buscos.tar.gz
266+
267+
#arthropods
268+
#http://eggnogdb.embl.de/download/eggnog_4.5/data/artNOG/artNOG.hmm.tar.gz
269+
#http://eggnogdb.embl.de/download/eggnog_4.5/data/artNOG/artNOG.annotations.tsv.gz
270+
#http://busco.ezlab.org/files/arthropoda_buscos.tar.gz
271+
272+
#metazoans
273+
#http://eggnogdb.embl.de/download/eggnog_4.5/data/meNOG/meNOG.hmm.tar.gz
274+
#http://eggnogdb.embl.de/download/eggnog_4.5/data/meNOG/meNOG.annotations.tsv.gz
275+
#http://busco.ezlab.org/files/metazoa_buscos.tar.gz

util/maker2evm.pl

+51
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
#!/usr/bin/env perl -w
2+
use strict;
3+
use warnings;
4+
5+
#written by Jason Stajich
6+
#https://github.com/hyphaltip/genome-scripts/blob/master/gene_prediction/maker2evm.pl
7+
8+
open(my $tr => ">transcript_alignments.gff3")|| die $!;
9+
open(my $gene => ">gene_predictions.gff3")|| die $!;
10+
open(my $pep => ">protein_alignments.gff3")|| die $!;
11+
12+
while(<>) {
13+
if (/^\#/) {
14+
next;
15+
} elsif (/^>/ ) {
16+
last;
17+
}
18+
chomp;
19+
my @row = split(/\t/,$_);
20+
if( $row[1] =~ /genemark|snap_masked|augustus_masked/ ) {
21+
$row[1] =~ s/_masked//;
22+
if( $row[2] eq 'match' ) {
23+
$row[2] = 'gene';
24+
print $gene join("\t",@row),"\n";
25+
$row[2] = 'mRNA';
26+
} elsif( $row[2] eq 'match_part' ) {
27+
$row[2] = 'CDS';
28+
} else {
29+
warn("unknown field type for $row[2]\n");
30+
}
31+
print $gene join("\t", @row), "\n";
32+
} elsif( $row[1] eq 'est2genome' ) {
33+
next if $row[2] eq 'expressed_sequence_match';
34+
if( $row[2] eq 'match_part' ) {
35+
$row[2] = 'EST_match';
36+
}
37+
print $tr join("\t", @row), "\n";
38+
} elsif( $row[1] eq 'protein2genome' ) {
39+
next if $row[2] eq 'protein_match';
40+
if( $row[2] eq 'match_part' ) {
41+
$row[2] = 'nucleotide_to_protein_match';
42+
}
43+
print $pep join("\t", @row), "\n";
44+
} elsif( $row[1] =~ /maker|repeatmasker|blast[xn]|repeatrunner|\./) {
45+
next; # skipping these
46+
} else {
47+
warn("unknown type for $row[1] $row[2]\n");
48+
next;
49+
}
50+
# print join("\t", @row),"\n";
51+
}

util/proteinortho_v5.11/chk_test.pl

-13
This file was deleted.

util/proteinortho_v5.11/comp_bla.pl

-57
This file was deleted.

0 commit comments

Comments
 (0)