Skip to content

Commit 5899e90

Browse files
Jon PalmerJon Palmer
Jon Palmer
authored and
Jon Palmer
committed
fixes and updates to v0.5.2
1 parent f3e03fd commit 5899e90

File tree

6 files changed

+71
-41
lines changed

6 files changed

+71
-41
lines changed

bin/funannotate-compare.py

+10-10
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def __init__(self,prog):
3131
parser.add_argument('--num_orthos', default=500, type=int, help='Number of Single-copy orthologs to run with RAxML')
3232
parser.add_argument('--outgroup', help='Name of species for RAxML outgroup')
3333
parser.add_argument('--eggnog_db', default='fuNOG', help='EggNog database')
34-
parser.add_argument('--run_dnds', action='store_true', help='Run dN/dS analysis with codeML for each ortholog (long runtime)')
34+
parser.add_argument('--run_dnds', choices=['estimate', 'full'], help='Run dN/dS analysis with codeML for each ortholog (long runtime)')
3535
parser.add_argument('--proteinortho', help='Pre-computed ProteinOrtho POFF')
3636
args=parser.parse_args()
3737

@@ -753,13 +753,6 @@ def __init__(self,prog):
753753
with open(KaKstranscript, 'w') as tranout:
754754
for i in proteins:
755755
SeqIO.write(SeqTranscripts[i], tranout, 'fasta')
756-
#calculate dN/dS
757-
#DnDs,M1M2p,M7M8p = lib.rundNdS(KaKstranscript, ID, ortho_folder)
758-
#else:
759-
#DnDs = 'NC'
760-
#M1M2p = 'NC'
761-
#M7M8p = 'NC'
762-
763756
#write to output
764757
if len(eggs) > 0:
765758
eggs = ', '.join(str(v) for v in eggs)
@@ -776,7 +769,10 @@ def __init__(self,prog):
776769
if args.run_dnds:
777770
#multiprocessing dN/dS on list of folders
778771
dNdSList = lib.get_subdirs(ortho_folder)
779-
lib.runMultiProgress(lib.rundNdS, dNdSList, args.cpus)
772+
if args.run_dnds == 'estimate':
773+
lib.runMultiProgress(lib.rundNdSestimate, dNdSList, args.cpus)
774+
else:
775+
lib.runMultiProgress(lib.rundNdSexhaustive, dNdSList, args.cpus)
780776

781777
#after all data is run, then parse result log files, return dictionary
782778
dNdSresults = lib.parsedNdS(ortho_folder)
@@ -791,7 +787,11 @@ def __init__(self,prog):
791787
dNdS = dNdSresults.get(cols[0])
792788
else:
793789
dNdS = ('NC', 'NC', 'NC')
794-
output.write("%s\t%s (%.4f,%.4f)\t%s\t%s\t%s\n" % (cols[0], dNdS[0], round(float(dNdS[1]),4), round(float(dNdS[2]),4), cols[1], cols[2], cols[3]))
790+
if args.run_dnds == 'estimate':
791+
output.write("%s\t%s (NC,NC)\t%s\t%s\t%s\n" % (cols[0], dNdS[0], cols[1], cols[2], cols[3]))
792+
else:
793+
output.write("%s\t%s (%.4f,%.4f)\t%s\t%s\t%s\n" % (cols[0], dNdS[0], round(float(dNdS[1]),4), round(float(dNdS[2]),4), cols[1], cols[2], cols[3]))
794+
795795
#cleanup
796796
os.remove(orthologstmp)
797797

bin/funannotate-contig_cleaner.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,14 @@ def Sortbysize(input):
4848
contigs.append(rec.id)
4949
return contigs
5050

51+
def countfasta(input):
52+
count = 0
53+
with open(input, 'rU') as f:
54+
for line in f:
55+
if line.startswith (">"):
56+
count += 1
57+
return count
58+
5159
def getFasta(sequences, header):
5260
with open('query.fa', 'w') as fasta:
5361
with open(sequences, 'rU') as input:
@@ -109,7 +117,7 @@ def runNucmer(query, reference, output):
109117
os.remove('reference.fa')
110118

111119
print"------------------------------------"
112-
print"%i input contigs, %i duplicated, %i written to file" % (len(scaffolds), (len(scaffolds) - len(keepers)), len(keepers))
120+
print"%i input contigs, %i duplicated, %i written to file" % (countfasta(args.input), (len(scaffolds) - len(keepers)), len(keepers))
113121

114122
#finally write a new reference based on list of keepers
115123
with open(args.out, 'w') as output:

funannotate.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ def flatten(l):
1919
flatList.append(elem)
2020
return flatList
2121

22-
version = '0.5.0'
22+
version = '0.5.2'
2323

2424
default_help = """
2525
Usage: funannotate <command> <arguments>
@@ -217,7 +217,7 @@ def flatten(l):
217217
218218
Optional: -o, --out Output folder name. Default: funannotate_compare
219219
--cpus Number of CPUs to use. Default: 2
220-
--run_dnds Calculate dN/dS ratio on all orthologs. Very long runtime.
220+
--run_dnds Calculate dN/dS ratio on all orthologs. [estimate,full]
221221
--go_fdr P-value for FDR GO-enrichment. Default: 0.05
222222
--heatmap_stdev Cut-off for heatmap. Default: 1.0
223223
--num_orthos Number of Single-copy orthologs to use for RAxML. Default: 500

lib/library.py

+48-26
Original file line numberDiff line numberDiff line change
@@ -326,27 +326,21 @@ def countGFFgenes(input):
326326
return count
327327

328328
def runMultiProgress(function, inputList, cpus):
329-
from progressbar import ProgressBar, Percentage
330-
try:
331-
from progressbar import AdaptiveETA
332-
eta = AdaptiveETA()
333-
except ImportError:
334-
from progressbar import ETA
335-
eta = ETA()
336-
from time import sleep
337329
#setup pool
338330
p = multiprocessing.Pool(cpus)
339-
#setup progress bar
340-
widgets = [' Progress: ', Percentage(),' || ', eta]
341-
pbar = ProgressBar(widgets=widgets, term_width=30, maxval=len(inputList)).start()
342331
#setup results and split over cpus
332+
tasks = len(inputList)
343333
results = []
344-
r = [p.apply_async(function, (x,), callback=results.append) for x in inputList]
334+
for i in inputList:
335+
results.append(p.apply_async(function, [i]))
345336
#refresh pbar every 5 seconds
346-
while len(results) != len(inputList):
347-
pbar.update(len(results))
348-
sleep(5)
349-
pbar.finish()
337+
while True:
338+
incomplete_count = sum(1 for x in results if not x.ready())
339+
if incomplete_count == 0:
340+
break
341+
sys.stdout.write(" Progress: %.2f%% \r" % (float(tasks - incomplete_count) / tasks * 100))
342+
sys.stdout.flush()
343+
time.sleep(1)
350344
p.close()
351345
p.join()
352346

@@ -1988,9 +1982,8 @@ def simplestTreeEver(fasta, tree):
19881982
for rec in SeqIO.parse(input, 'fasta'):
19891983
ids.append(rec.id)
19901984
outfile.write('(%s,%s);' % (ids[0], ids[1]))
1991-
19921985

1993-
def rundNdS(folder):
1986+
def rundNdSexhaustive(folder):
19941987
FNULL = open(os.devnull, 'w')
19951988
#setup intermediate files
19961989
tmpdir = os.path.dirname(folder)
@@ -2025,6 +2018,43 @@ def rundNdS(folder):
20252018
for file in os.listdir(tmpdir):
20262019
if file.startswith(name+'.'):
20272020
os.rename(os.path.join(tmpdir, file), os.path.join(tmpdir, name, file))
2021+
2022+
2023+
def rundNdSestimate(folder):
2024+
FNULL = open(os.devnull, 'w')
2025+
#setup intermediate files
2026+
tmpdir = os.path.dirname(folder)
2027+
name = os.path.basename(folder)
2028+
transcripts = os.path.join(tmpdir, name+'.transcripts.fa')
2029+
prots = os.path.join(tmpdir, name+'.proteins.fa')
2030+
aln = os.path.join(tmpdir, name+'.aln')
2031+
codon = os.path.join(tmpdir, name+'.codon.aln')
2032+
tree = os.path.join(tmpdir, name+'.tree')
2033+
log = os.path.join(tmpdir, name+'.log')
2034+
finallog = os.path.join(tmpdir, name, name+'.log')
2035+
if not checkannotations(finallog):
2036+
num_seqs = countfasta(transcripts)
2037+
#Translate to protein space
2038+
translatemRNA(transcripts, prots)
2039+
#align protein sequences
2040+
alignMAFFT(prots, aln)
2041+
#convert to codon alignment
2042+
align2Codon(aln, transcripts, codon)
2043+
if checkannotations(codon):
2044+
if num_seqs > 2:
2045+
#now generate a tree using phyml
2046+
drawPhyMLtree(codon, tree)
2047+
else:
2048+
simplestTreeEver(transcripts, tree)
2049+
#now run codeml through ete3
2050+
etecmd = ['ete3', 'evol', '--alg', os.path.abspath(codon), '-t', os.path.abspath(tree), '--models', 'M0', '-o', name, '--clear_all', '--codeml_param', 'cleandata,1']
2051+
with open(log, 'w') as logfile:
2052+
logfile.write('\n%s\n' % ' '.join(etecmd))
2053+
subprocess.call(etecmd, cwd = tmpdir, stdout = logfile, stderr = logfile)
2054+
#clean up
2055+
for file in os.listdir(tmpdir):
2056+
if file.startswith(name+'.'):
2057+
os.rename(os.path.join(tmpdir, file), os.path.join(tmpdir, name, file))
20282058

20292059
def get_subdirs(a_dir):
20302060
return [os.path.join(a_dir, name) for name in os.listdir(a_dir)
@@ -2069,14 +2099,6 @@ def chunkIt(seq, num):
20692099
last += avg
20702100
return out
20712101

2072-
def countKaks(folder, num):
2073-
allfiles = []
2074-
for file in os.listdir(folder):
2075-
if file.endswith('.fasta.axt'):
2076-
f = os.path.join(folder, file)
2077-
allfiles.append(f)
2078-
#split files by x chunks
2079-
return chunkIt(allfiles, num)
20802102

20812103
HEADER = '''
20822104
<!DOCTYPE html>

sample_data/run_unit_tests.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ cmd='funannotate annotate -i genome3 -e [email protected] --cpus 6 --iprscan iprs
3131
echo $cmd; eval $cmd
3232

3333
#now run compare
34-
cmd='funannotate compare -i genome1 genome2 genome3 --cpus 6 --outgroup botrytis_cinerea.dikarya'
34+
cmd='funannotate compare -i genome1 genome2 genome3 --cpus 6 --outgroup botrytis_cinerea.dikarya --run_dnds estimate'
3535
echo $cmd; eval $cmd
3636

3737
#clean up augustus training

setup.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ else
5353
fi
5454
else
5555
echo "HomeBrew installation not detected, specify DB installation directory"
56-
outputdir='/usr/local/share/funannotate'
56+
outputdir="$HOME/funannotate/"
5757
echo -n "Default DB directory set to ($outputdir), continue [y/n]: "
5858
read question1
5959
if [ $question1 == 'n' ]; then

0 commit comments

Comments
 (0)