Skip to content

Commit b99af2c

Browse files
author
Jon Palmer
committed
add funannotate test, bump version to 1.5
1 parent 9ef01dc commit b99af2c

File tree

6 files changed

+394
-12
lines changed

6 files changed

+394
-12
lines changed

bin/funannotate-functional.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -358,7 +358,7 @@ def parseEggNoggMapper(input, output, GeneDict):
358358
FNULL = open(os.devnull, 'w')
359359
cmd_args = " ".join(sys.argv)+'\n'
360360
lib.log.debug(cmd_args)
361-
print "-------------------------------------------------------"
361+
print("-------------------------------------------------------")
362362
lib.SystemInfo()
363363

364364
#get version of funannotate

bin/funannotate-predict.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1182,7 +1182,7 @@ def which_path(file_name):
11821182

11831183
#total up Predictions, get source counts
11841184
EVMCounts = lib.countEVMpredictions(Predictions)
1185-
lib.log.debug('Sumary of gene models: {:}'.format(EVMCounts))
1185+
lib.log.debug('Summary of gene models: {:}'.format(EVMCounts))
11861186
lib.log.debug('EVM Weights: {:}'.format(EVMWeights))
11871187
lib.log.info('Summary of gene models passed to EVM (weights):\n-------------------------------------------------------')
11881188
lib.log.debug('Launching EVM via funannotate-runEVM.py')

bin/funannotate-test.py

+342
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,342 @@
1+
#!/usr/bin/env python
2+
3+
import sys
4+
import os
5+
import subprocess
6+
import urllib2
7+
import socket
8+
import argparse
9+
import shutil
10+
11+
#setup menu with argparse
12+
class MyFormatter(argparse.ArgumentDefaultsHelpFormatter):
13+
def __init__(self,prog):
14+
super(MyFormatter,self).__init__(prog,max_help_position=48)
15+
parser=argparse.ArgumentParser(prog='funannotate-test.py',
16+
description='''Script to download and then test funannotate installation''',
17+
epilog="""Written by Jon Palmer (2016-2018) [email protected]""",
18+
formatter_class = MyFormatter)
19+
parser.add_argument('-t','--tests', required=True, nargs='+',
20+
choices=['all', 'clean', 'mask', 'predict', 'annotate', 'busco', 'rna-seq', 'compare'],
21+
help='select which tests to run')
22+
parser.add_argument('--cpus', default=2, type=int, help='Number of CPUs to use')
23+
args=parser.parse_args()
24+
25+
download_links = {'mask': 'https://osf.io/hbryz/download?version=1',
26+
'clean': 'https://osf.io/8pjbe/download?version=1',
27+
'predict': 'https://osf.io/te2pf/download?version=1',
28+
'busco': 'https://osf.io/kyrd9/download?version=1',
29+
'rna-seq': 'https://osf.io/t7j83/download?version=1',
30+
'annotate': 'https://osf.io/97pyn/download?version=1',
31+
'compare': 'https://osf.io/7s9xh/download?version=1'}
32+
33+
def checkFile(input):
34+
def _getSize(filename):
35+
st = os.stat(filename)
36+
return st.st_size
37+
if os.path.isfile(input):
38+
filesize = _getSize(input)
39+
if int(filesize) < 1:
40+
return False
41+
else:
42+
return True
43+
elif os.path.islink(input):
44+
return True
45+
else:
46+
return False
47+
48+
def countfasta(input):
49+
count = 0
50+
with open(input, 'rU') as f:
51+
for line in f:
52+
if line.startswith (">"):
53+
count += 1
54+
return count
55+
56+
def countGFFgenes(input):
57+
count = 0
58+
with open(input, 'rU') as f:
59+
for line in f:
60+
if "\tgene\t" in line:
61+
count += 1
62+
return count
63+
64+
def runCMD(cmd, dir):
65+
print('CMD: {:}'.format(' '.join(cmd)))
66+
print("#########################################################")
67+
subprocess.call(cmd, cwd=dir)
68+
69+
def download(url, name):
70+
file_name = name
71+
try:
72+
u = urllib2.urlopen(url)
73+
f = open(file_name, 'wb')
74+
meta = u.info()
75+
file_size = int(meta.getheaders("Content-Length")[0])
76+
print("Downloading: {0} Bytes: {1}".format(url, file_size))
77+
file_size_dl = 0
78+
block_sz = 8192
79+
while True:
80+
buffer = u.read(block_sz)
81+
if not buffer:
82+
break
83+
file_size_dl += len(buffer)
84+
f.write(buffer)
85+
p = float(file_size_dl) / file_size
86+
status = r"{0} [{1:.2%}]".format(file_size_dl, p)
87+
status = status + chr(8)*(len(status)+1)
88+
sys.stdout.write(status)
89+
sys.stdout.flush()
90+
f.close()
91+
except socket.error as e:
92+
if e.errno != errno.ECONNRESET:
93+
raise
94+
pass
95+
96+
def runMaskTest():
97+
print("#########################################################")
98+
print('Running `funannotate mask` unit testing: RepeatModeler --> RepeatMasker')
99+
tmpdir = 'test-mask_'+pid
100+
os.makedirs(tmpdir)
101+
inputFasta = 'test.fa'
102+
if not os.path.isfile(inputFasta):
103+
if not os.path.isfile('test-mask.tar.gz'):
104+
download(download_links.get('mask'), 'test-mask.tar.gz')
105+
subprocess.call(['tar', '-zxf', 'test-mask.tar.gz'])
106+
shutil.copyfile(inputFasta, os.path.join(tmpdir, inputFasta))
107+
runCMD(['funannotate', 'mask', '-i', inputFasta, '-o', 'test.masked.fa', '--cpus', str(args.cpus)], tmpdir)
108+
#check that everything worked
109+
assert checkFile(os.path.join(tmpdir, 'test.masked.fa'))
110+
library = False
111+
for file in os.listdir(tmpdir):
112+
if file.startswith('repeatmodeler-library'):
113+
assert checkFile(os.path.join(tmpdir, file))
114+
library = True
115+
print("#########################################################")
116+
if library:
117+
print('SUCCESS: `funannotate mask` test complete.')
118+
shutil.rmtree(tmpdir)
119+
else:
120+
print('ERROR: `funannotate mask` test failed, RepeatModeler or RepeatMasker not properly installed.')
121+
print("#########################################################\n")
122+
123+
def runCleanTest():
124+
print("#########################################################")
125+
print('Running `funannotate clean` unit testing: minimap2 mediated assembly duplications')
126+
tmpdir = 'test-clean_'+pid
127+
os.makedirs(tmpdir)
128+
inputFasta = 'test.clean.fa'
129+
if not os.path.isfile(inputFasta):
130+
if not os.path.isfile('test-clean.tar.gz'):
131+
download(download_links.get('clean'), 'test-clean.tar.gz')
132+
subprocess.call(['tar', '-zxf', 'test-clean.tar.gz'])
133+
shutil.copyfile(inputFasta, os.path.join(tmpdir, inputFasta))
134+
assert countfasta(os.path.join(tmpdir, inputFasta)) == 6
135+
#run exhaustive
136+
runCMD(['funannotate', 'clean', '-i', inputFasta, '-o', 'test.exhaustive.fa', '--exhaustive'], tmpdir)
137+
print("#########################################################")
138+
try:
139+
assert countfasta(os.path.join(tmpdir, 'test.exhaustive.fa')) == 3
140+
print('SUCCESS: `funannotate clean` test complete.')
141+
shutil.rmtree(tmpdir)
142+
except AssertionError:
143+
print('ERROR: `funannotate clean` test failed.')
144+
print("#########################################################\n")
145+
146+
147+
def runPredictTest():
148+
print("#########################################################")
149+
print('Running `funannotate predict` unit testing')
150+
tmpdir = 'test-predict_'+pid
151+
os.makedirs(tmpdir)
152+
inputFasta = 'test.softmasked.fa'
153+
protEvidence = 'protein.evidence.fasta'
154+
if not checkFile(inputFasta) or not checkFile(protEvidence):
155+
if not os.path.isfile('test-predict.tar.gz'):
156+
download(download_links.get('predict'), 'test-predict.tar.gz')
157+
subprocess.call(['tar', '-zxf', 'test-predict.tar.gz'])
158+
shutil.copyfile(inputFasta, os.path.join(tmpdir, inputFasta))
159+
shutil.copyfile(protEvidence, os.path.join(tmpdir, protEvidence))
160+
#run predict
161+
runCMD(['funannotate', 'predict', '-i', inputFasta,
162+
'--protein_evidence', protEvidence,
163+
'-o', 'annotate', '--augustus_species', 'yeast',
164+
'--cpus', str(args.cpus), '--species', "Awesome testicus"], tmpdir)
165+
print("#########################################################")
166+
#check results
167+
try:
168+
assert 1500 <= countGFFgenes(os.path.join(tmpdir, 'annotate', 'predict_results', 'Awesome_testicus.gff3')) <= 1700
169+
print('SUCCESS: `funannotate predict` test complete.')
170+
shutil.rmtree(tmpdir)
171+
except AssertionError:
172+
print('ERROR: `funannotate predict` test failed - check logfiles')
173+
print("#########################################################\n")
174+
175+
def runBuscoTest():
176+
print("#########################################################")
177+
print('Running `funannotate predict` BUSCO-mediated training unit testing')
178+
#need to delete any pre-existing Augustus training data
179+
try:
180+
AUGUSTUS = os.environ["AUGUSTUS_CONFIG_PATH"]
181+
except KeyError:
182+
lib.log.error("$AUGUSTUS_CONFIG_PATH environmental variable not found, set to continue.")
183+
return
184+
if os.path.isdir(os.path.join(AUGUSTUS, 'species', 'awesome_busco')):
185+
shutil.rmtree(os.path.join(AUGUSTUS, 'species', 'awesome_busco'))
186+
tmpdir = 'test-busco_'+pid
187+
os.makedirs(tmpdir)
188+
inputFasta = 'test.softmasked.fa'
189+
protEvidence = 'protein.evidence.fasta'
190+
if not checkFile(inputFasta) or not checkFile(protEvidence):
191+
if not os.path.isfile('test-busco.tar.gz'):
192+
download(download_links.get('predict'), 'test-busco.tar.gz')
193+
subprocess.call(['tar', '-zxf', 'test-busco.tar.gz'])
194+
shutil.copyfile(inputFasta, os.path.join(tmpdir, inputFasta))
195+
shutil.copyfile(protEvidence, os.path.join(tmpdir, protEvidence))
196+
#run predict
197+
runCMD(['funannotate', 'predict', '-i', inputFasta,
198+
'--protein_evidence', protEvidence,
199+
'-o', 'annotate', '--cpus', str(args.cpus),
200+
'--species', "Awesome busco"], tmpdir)
201+
print("#########################################################")
202+
#check results
203+
try:
204+
assert 1500 <= countGFFgenes(os.path.join(tmpdir, 'annotate', 'predict_results', 'Awesome_busco.gff3')) <= 1700
205+
print('SUCCESS: `funannotate predict` BUSCO-mediated training test complete.')
206+
shutil.rmtree(tmpdir)
207+
except AssertionError:
208+
print('ERROR: `funannotate predict` BUSCO-mediated training test failed - check logfiles')
209+
print("#########################################################\n")
210+
211+
def runAnnotateTest():
212+
print("#########################################################")
213+
print('Running `funannotate annotate` unit testing')
214+
tmpdir = 'test-annotate_'+pid
215+
os.makedirs(tmpdir)
216+
input = 'Genome_one.gbk'
217+
iprscan = 'genome_one.iprscan.xml'
218+
emapper = 'genome_one.emapper.annotations'
219+
if not checkFile(input) or not checkFile(iprscan) or not checkFile(emapper):
220+
if not os.path.isfile('test-annotate.tar.gz'):
221+
download(download_links.get('annotate'), 'test-annotate.tar.gz')
222+
subprocess.call(['tar', '-zxf', 'test-annotate.tar.gz'])
223+
shutil.copyfile(input, os.path.join(tmpdir, input))
224+
shutil.copyfile(iprscan, os.path.join(tmpdir, iprscan))
225+
shutil.copyfile(emapper, os.path.join(tmpdir, emapper))
226+
#run predict
227+
runCMD(['funannotate', 'annotate', '--genbank', input,
228+
'-o', 'annotate', '--cpus', str(args.cpus),
229+
'--iprscan', iprscan,
230+
'--eggnog', emapper], tmpdir)
231+
print("#########################################################")
232+
#check results
233+
try:
234+
assert checkFile(os.path.join(tmpdir, 'annotate', 'annotate_results', 'Genome_one.gbk'))
235+
assert checkFile(os.path.join(tmpdir, 'annotate', 'annotate_results', 'Genome_one.sqn'))
236+
assert checkFile(os.path.join(tmpdir, 'annotate', 'annotate_results', 'Genome_one.agp'))
237+
assert checkFile(os.path.join(tmpdir, 'annotate', 'annotate_results', 'Genome_one.tbl'))
238+
assert checkFile(os.path.join(tmpdir, 'annotate', 'annotate_results', 'Genome_one.annotations.txt'))
239+
print('SUCCESS: `funannotate annotate` test complete.')
240+
shutil.rmtree(tmpdir)
241+
except AssertionError:
242+
print('ERROR: `funannotate annotate` test failed - check logfiles')
243+
print("#########################################################\n")
244+
245+
def runCompareTest():
246+
print("#########################################################")
247+
print('Running `funannotate compare` unit testing')
248+
tmpdir = 'test-compare_'+pid
249+
os.makedirs(tmpdir)
250+
input1 = 'Genome_one.gbk'
251+
input2 = 'Genome_two.gbk'
252+
input3 = 'Genome_three.gbk'
253+
if not checkFile(input1) or not checkFile(input2) or not checkFile(input3):
254+
if not os.path.isfile('test-compare.tar.gz'):
255+
download(download_links.get('compare'), 'test-compare.tar.gz')
256+
subprocess.call(['tar', '-zxf', 'test-compare.tar.gz'])
257+
shutil.copyfile(input1, os.path.join(tmpdir, input1))
258+
shutil.copyfile(input2, os.path.join(tmpdir, input2))
259+
shutil.copyfile(input3, os.path.join(tmpdir, input3))
260+
#run predict
261+
runCMD(['funannotate', 'compare',
262+
'-i', input1, input2, input3,
263+
'-o', 'compare', '--cpus', str(args.cpus),
264+
'--run_dnds', 'estimate', '--outgroup', 'botrytis_cinerea.dikarya'], tmpdir)
265+
print("#########################################################")
266+
#check results
267+
try:
268+
assert checkFile(os.path.join(tmpdir, 'compare', 'index.html'))
269+
assert checkFile(os.path.join(tmpdir, 'compare', 'phylogeny.html'))
270+
assert checkFile(os.path.join(tmpdir, 'compare.tar.gz'))
271+
print('SUCCESS: `funannotate compare` test complete.')
272+
shutil.rmtree(tmpdir)
273+
except AssertionError:
274+
print('ERROR: `funannotate compare` test failed - check logfiles')
275+
print("#########################################################\n")
276+
277+
def runRNAseqTest():
278+
print("#########################################################")
279+
print('Running funannotate RNA-seq training/prediction unit testing')
280+
#need to delete any pre-existing Augustus training data
281+
try:
282+
AUGUSTUS = os.environ["AUGUSTUS_CONFIG_PATH"]
283+
except KeyError:
284+
lib.log.error("$AUGUSTUS_CONFIG_PATH environmental variable not found, set to continue.")
285+
return
286+
if os.path.isdir(os.path.join(AUGUSTUS, 'species', 'awesome_rna')):
287+
shutil.rmtree(os.path.join(AUGUSTUS, 'species', 'awesome_rna'))
288+
tmpdir = 'test-rna_seq_'+pid
289+
os.makedirs(tmpdir)
290+
inputFasta = 'test.softmasked.fa'
291+
protEvidence = 'protein.evidence.fasta'
292+
illumina = 'rna-seq.illumina.fastq.gz'
293+
nanopore = 'rna-seq.nanopore.fastq.gz'
294+
if not checkFile(inputFasta) or not checkFile(protEvidence) or not checkFile(illumina) or not checkFile(nanopore):
295+
if not os.path.isfile('test-rna_seq.tar.gz'):
296+
download(download_links.get('rna-seq'), 'test-rna_seq.tar.gz')
297+
subprocess.call(['tar', '-zxf', 'test-rna_seq.tar.gz'])
298+
for f in [inputFasta, protEvidence, illumina, nanopore]:
299+
shutil.copyfile(f, os.path.join(tmpdir, f))
300+
#run train
301+
runCMD(['funannotate', 'train', '-i', inputFasta,
302+
'--single', illumina, '--nanopore_mrna', nanopore,
303+
'-o', 'rna-seq', '--cpus', str(args.cpus), '--jaccard_clip',
304+
'--species', "Awesome rna"], tmpdir)
305+
#run predict
306+
print("#########################################################")
307+
print('Now running `funannotate predict` using RNA-seq training data')
308+
runCMD(['funannotate', 'predict', '-i', inputFasta,
309+
'--protein_evidence', protEvidence,
310+
'-o', 'rna-seq', '--cpus', str(args.cpus),
311+
'--species', "Awesome rna"], tmpdir)
312+
#run update
313+
print("#########################################################")
314+
print('Now running `funannotate update` to run PASA-mediated UTR addition and multiple transcripts')
315+
runCMD(['funannotate', 'update', '-i', 'rna-seq',
316+
'--cpus', str(args.cpus)], tmpdir)
317+
print("#########################################################")
318+
#check results
319+
try:
320+
assert 1630 <= countGFFgenes(os.path.join(tmpdir, 'rna-seq', 'update_results', 'Awesome_rna.gff3')) <= 1830
321+
print('SUCCESS: funannotate RNA-seq training/prediction test complete.')
322+
shutil.rmtree(tmpdir)
323+
except AssertionError:
324+
print('ERROR: funannotate RNA-seq training/prediction test failed - check logfiles')
325+
print("#########################################################\n")
326+
327+
328+
pid = str(os.getpid())
329+
if 'clean' in args.tests or 'all' in args.tests:
330+
runCleanTest()
331+
if 'mask' in args.tests or 'all' in args.tests:
332+
runMaskTest()
333+
if 'predict' in args.tests or 'all' in args.tests:
334+
runPredictTest()
335+
if 'busco' in args.tests or 'all' in args.tests:
336+
runBuscoTest()
337+
if 'rna-seq' in args.tests or 'all' in args.tests:
338+
runRNAseqTest()
339+
if 'annotate' in args.tests or 'all' in args.tests:
340+
runAnnotateTest()
341+
if 'compare' in args.tests or 'all' in args.tests:
342+
runCompareTest()

0 commit comments

Comments
 (0)