1
+ #!/usr/bin/env python
2
+
3
+ import sys
4
+ import os
5
+ import subprocess
6
+ import urllib2
7
+ import socket
8
+ import argparse
9
+ import shutil
10
+
11
+ #setup menu with argparse
12
+ class MyFormatter (argparse .ArgumentDefaultsHelpFormatter ):
13
+ def __init__ (self ,prog ):
14
+ super (MyFormatter ,self ).__init__ (prog ,max_help_position = 48 )
15
+ parser = argparse .ArgumentParser (prog = 'funannotate-test.py' ,
16
+ description = '''Script to download and then test funannotate installation''' ,
17
+ epilog = """Written by Jon Palmer (2016-2018) [email protected] """ ,
18
+ formatter_class = MyFormatter )
19
+ parser .add_argument ('-t' ,'--tests' , required = True , nargs = '+' ,
20
+ choices = ['all' , 'clean' , 'mask' , 'predict' , 'annotate' , 'busco' , 'rna-seq' , 'compare' ],
21
+ help = 'select which tests to run' )
22
+ parser .add_argument ('--cpus' , default = 2 , type = int , help = 'Number of CPUs to use' )
23
+ args = parser .parse_args ()
24
+
25
+ download_links = {'mask' : 'https://osf.io/hbryz/download?version=1' ,
26
+ 'clean' : 'https://osf.io/8pjbe/download?version=1' ,
27
+ 'predict' : 'https://osf.io/te2pf/download?version=1' ,
28
+ 'busco' : 'https://osf.io/kyrd9/download?version=1' ,
29
+ 'rna-seq' : 'https://osf.io/t7j83/download?version=1' ,
30
+ 'annotate' : 'https://osf.io/97pyn/download?version=1' ,
31
+ 'compare' : 'https://osf.io/7s9xh/download?version=1' }
32
+
33
+ def checkFile (input ):
34
+ def _getSize (filename ):
35
+ st = os .stat (filename )
36
+ return st .st_size
37
+ if os .path .isfile (input ):
38
+ filesize = _getSize (input )
39
+ if int (filesize ) < 1 :
40
+ return False
41
+ else :
42
+ return True
43
+ elif os .path .islink (input ):
44
+ return True
45
+ else :
46
+ return False
47
+
48
+ def countfasta (input ):
49
+ count = 0
50
+ with open (input , 'rU' ) as f :
51
+ for line in f :
52
+ if line .startswith (">" ):
53
+ count += 1
54
+ return count
55
+
56
+ def countGFFgenes (input ):
57
+ count = 0
58
+ with open (input , 'rU' ) as f :
59
+ for line in f :
60
+ if "\t gene\t " in line :
61
+ count += 1
62
+ return count
63
+
64
+ def runCMD (cmd , dir ):
65
+ print ('CMD: {:}' .format (' ' .join (cmd )))
66
+ print ("#########################################################" )
67
+ subprocess .call (cmd , cwd = dir )
68
+
69
+ def download (url , name ):
70
+ file_name = name
71
+ try :
72
+ u = urllib2 .urlopen (url )
73
+ f = open (file_name , 'wb' )
74
+ meta = u .info ()
75
+ file_size = int (meta .getheaders ("Content-Length" )[0 ])
76
+ print ("Downloading: {0} Bytes: {1}" .format (url , file_size ))
77
+ file_size_dl = 0
78
+ block_sz = 8192
79
+ while True :
80
+ buffer = u .read (block_sz )
81
+ if not buffer :
82
+ break
83
+ file_size_dl += len (buffer )
84
+ f .write (buffer )
85
+ p = float (file_size_dl ) / file_size
86
+ status = r"{0} [{1:.2%}]" .format (file_size_dl , p )
87
+ status = status + chr (8 )* (len (status )+ 1 )
88
+ sys .stdout .write (status )
89
+ sys .stdout .flush ()
90
+ f .close ()
91
+ except socket .error as e :
92
+ if e .errno != errno .ECONNRESET :
93
+ raise
94
+ pass
95
+
96
+ def runMaskTest ():
97
+ print ("#########################################################" )
98
+ print ('Running `funannotate mask` unit testing: RepeatModeler --> RepeatMasker' )
99
+ tmpdir = 'test-mask_' + pid
100
+ os .makedirs (tmpdir )
101
+ inputFasta = 'test.fa'
102
+ if not os .path .isfile (inputFasta ):
103
+ if not os .path .isfile ('test-mask.tar.gz' ):
104
+ download (download_links .get ('mask' ), 'test-mask.tar.gz' )
105
+ subprocess .call (['tar' , '-zxf' , 'test-mask.tar.gz' ])
106
+ shutil .copyfile (inputFasta , os .path .join (tmpdir , inputFasta ))
107
+ runCMD (['funannotate' , 'mask' , '-i' , inputFasta , '-o' , 'test.masked.fa' , '--cpus' , str (args .cpus )], tmpdir )
108
+ #check that everything worked
109
+ assert checkFile (os .path .join (tmpdir , 'test.masked.fa' ))
110
+ library = False
111
+ for file in os .listdir (tmpdir ):
112
+ if file .startswith ('repeatmodeler-library' ):
113
+ assert checkFile (os .path .join (tmpdir , file ))
114
+ library = True
115
+ print ("#########################################################" )
116
+ if library :
117
+ print ('SUCCESS: `funannotate mask` test complete.' )
118
+ shutil .rmtree (tmpdir )
119
+ else :
120
+ print ('ERROR: `funannotate mask` test failed, RepeatModeler or RepeatMasker not properly installed.' )
121
+ print ("#########################################################\n " )
122
+
123
+ def runCleanTest ():
124
+ print ("#########################################################" )
125
+ print ('Running `funannotate clean` unit testing: minimap2 mediated assembly duplications' )
126
+ tmpdir = 'test-clean_' + pid
127
+ os .makedirs (tmpdir )
128
+ inputFasta = 'test.clean.fa'
129
+ if not os .path .isfile (inputFasta ):
130
+ if not os .path .isfile ('test-clean.tar.gz' ):
131
+ download (download_links .get ('clean' ), 'test-clean.tar.gz' )
132
+ subprocess .call (['tar' , '-zxf' , 'test-clean.tar.gz' ])
133
+ shutil .copyfile (inputFasta , os .path .join (tmpdir , inputFasta ))
134
+ assert countfasta (os .path .join (tmpdir , inputFasta )) == 6
135
+ #run exhaustive
136
+ runCMD (['funannotate' , 'clean' , '-i' , inputFasta , '-o' , 'test.exhaustive.fa' , '--exhaustive' ], tmpdir )
137
+ print ("#########################################################" )
138
+ try :
139
+ assert countfasta (os .path .join (tmpdir , 'test.exhaustive.fa' )) == 3
140
+ print ('SUCCESS: `funannotate clean` test complete.' )
141
+ shutil .rmtree (tmpdir )
142
+ except AssertionError :
143
+ print ('ERROR: `funannotate clean` test failed.' )
144
+ print ("#########################################################\n " )
145
+
146
+
147
+ def runPredictTest ():
148
+ print ("#########################################################" )
149
+ print ('Running `funannotate predict` unit testing' )
150
+ tmpdir = 'test-predict_' + pid
151
+ os .makedirs (tmpdir )
152
+ inputFasta = 'test.softmasked.fa'
153
+ protEvidence = 'protein.evidence.fasta'
154
+ if not checkFile (inputFasta ) or not checkFile (protEvidence ):
155
+ if not os .path .isfile ('test-predict.tar.gz' ):
156
+ download (download_links .get ('predict' ), 'test-predict.tar.gz' )
157
+ subprocess .call (['tar' , '-zxf' , 'test-predict.tar.gz' ])
158
+ shutil .copyfile (inputFasta , os .path .join (tmpdir , inputFasta ))
159
+ shutil .copyfile (protEvidence , os .path .join (tmpdir , protEvidence ))
160
+ #run predict
161
+ runCMD (['funannotate' , 'predict' , '-i' , inputFasta ,
162
+ '--protein_evidence' , protEvidence ,
163
+ '-o' , 'annotate' , '--augustus_species' , 'yeast' ,
164
+ '--cpus' , str (args .cpus ), '--species' , "Awesome testicus" ], tmpdir )
165
+ print ("#########################################################" )
166
+ #check results
167
+ try :
168
+ assert 1500 <= countGFFgenes (os .path .join (tmpdir , 'annotate' , 'predict_results' , 'Awesome_testicus.gff3' )) <= 1700
169
+ print ('SUCCESS: `funannotate predict` test complete.' )
170
+ shutil .rmtree (tmpdir )
171
+ except AssertionError :
172
+ print ('ERROR: `funannotate predict` test failed - check logfiles' )
173
+ print ("#########################################################\n " )
174
+
175
+ def runBuscoTest ():
176
+ print ("#########################################################" )
177
+ print ('Running `funannotate predict` BUSCO-mediated training unit testing' )
178
+ #need to delete any pre-existing Augustus training data
179
+ try :
180
+ AUGUSTUS = os .environ ["AUGUSTUS_CONFIG_PATH" ]
181
+ except KeyError :
182
+ lib .log .error ("$AUGUSTUS_CONFIG_PATH environmental variable not found, set to continue." )
183
+ return
184
+ if os .path .isdir (os .path .join (AUGUSTUS , 'species' , 'awesome_busco' )):
185
+ shutil .rmtree (os .path .join (AUGUSTUS , 'species' , 'awesome_busco' ))
186
+ tmpdir = 'test-busco_' + pid
187
+ os .makedirs (tmpdir )
188
+ inputFasta = 'test.softmasked.fa'
189
+ protEvidence = 'protein.evidence.fasta'
190
+ if not checkFile (inputFasta ) or not checkFile (protEvidence ):
191
+ if not os .path .isfile ('test-busco.tar.gz' ):
192
+ download (download_links .get ('predict' ), 'test-busco.tar.gz' )
193
+ subprocess .call (['tar' , '-zxf' , 'test-busco.tar.gz' ])
194
+ shutil .copyfile (inputFasta , os .path .join (tmpdir , inputFasta ))
195
+ shutil .copyfile (protEvidence , os .path .join (tmpdir , protEvidence ))
196
+ #run predict
197
+ runCMD (['funannotate' , 'predict' , '-i' , inputFasta ,
198
+ '--protein_evidence' , protEvidence ,
199
+ '-o' , 'annotate' , '--cpus' , str (args .cpus ),
200
+ '--species' , "Awesome busco" ], tmpdir )
201
+ print ("#########################################################" )
202
+ #check results
203
+ try :
204
+ assert 1500 <= countGFFgenes (os .path .join (tmpdir , 'annotate' , 'predict_results' , 'Awesome_busco.gff3' )) <= 1700
205
+ print ('SUCCESS: `funannotate predict` BUSCO-mediated training test complete.' )
206
+ shutil .rmtree (tmpdir )
207
+ except AssertionError :
208
+ print ('ERROR: `funannotate predict` BUSCO-mediated training test failed - check logfiles' )
209
+ print ("#########################################################\n " )
210
+
211
+ def runAnnotateTest ():
212
+ print ("#########################################################" )
213
+ print ('Running `funannotate annotate` unit testing' )
214
+ tmpdir = 'test-annotate_' + pid
215
+ os .makedirs (tmpdir )
216
+ input = 'Genome_one.gbk'
217
+ iprscan = 'genome_one.iprscan.xml'
218
+ emapper = 'genome_one.emapper.annotations'
219
+ if not checkFile (input ) or not checkFile (iprscan ) or not checkFile (emapper ):
220
+ if not os .path .isfile ('test-annotate.tar.gz' ):
221
+ download (download_links .get ('annotate' ), 'test-annotate.tar.gz' )
222
+ subprocess .call (['tar' , '-zxf' , 'test-annotate.tar.gz' ])
223
+ shutil .copyfile (input , os .path .join (tmpdir , input ))
224
+ shutil .copyfile (iprscan , os .path .join (tmpdir , iprscan ))
225
+ shutil .copyfile (emapper , os .path .join (tmpdir , emapper ))
226
+ #run predict
227
+ runCMD (['funannotate' , 'annotate' , '--genbank' , input ,
228
+ '-o' , 'annotate' , '--cpus' , str (args .cpus ),
229
+ '--iprscan' , iprscan ,
230
+ '--eggnog' , emapper ], tmpdir )
231
+ print ("#########################################################" )
232
+ #check results
233
+ try :
234
+ assert checkFile (os .path .join (tmpdir , 'annotate' , 'annotate_results' , 'Genome_one.gbk' ))
235
+ assert checkFile (os .path .join (tmpdir , 'annotate' , 'annotate_results' , 'Genome_one.sqn' ))
236
+ assert checkFile (os .path .join (tmpdir , 'annotate' , 'annotate_results' , 'Genome_one.agp' ))
237
+ assert checkFile (os .path .join (tmpdir , 'annotate' , 'annotate_results' , 'Genome_one.tbl' ))
238
+ assert checkFile (os .path .join (tmpdir , 'annotate' , 'annotate_results' , 'Genome_one.annotations.txt' ))
239
+ print ('SUCCESS: `funannotate annotate` test complete.' )
240
+ shutil .rmtree (tmpdir )
241
+ except AssertionError :
242
+ print ('ERROR: `funannotate annotate` test failed - check logfiles' )
243
+ print ("#########################################################\n " )
244
+
245
+ def runCompareTest ():
246
+ print ("#########################################################" )
247
+ print ('Running `funannotate compare` unit testing' )
248
+ tmpdir = 'test-compare_' + pid
249
+ os .makedirs (tmpdir )
250
+ input1 = 'Genome_one.gbk'
251
+ input2 = 'Genome_two.gbk'
252
+ input3 = 'Genome_three.gbk'
253
+ if not checkFile (input1 ) or not checkFile (input2 ) or not checkFile (input3 ):
254
+ if not os .path .isfile ('test-compare.tar.gz' ):
255
+ download (download_links .get ('compare' ), 'test-compare.tar.gz' )
256
+ subprocess .call (['tar' , '-zxf' , 'test-compare.tar.gz' ])
257
+ shutil .copyfile (input1 , os .path .join (tmpdir , input1 ))
258
+ shutil .copyfile (input2 , os .path .join (tmpdir , input2 ))
259
+ shutil .copyfile (input3 , os .path .join (tmpdir , input3 ))
260
+ #run predict
261
+ runCMD (['funannotate' , 'compare' ,
262
+ '-i' , input1 , input2 , input3 ,
263
+ '-o' , 'compare' , '--cpus' , str (args .cpus ),
264
+ '--run_dnds' , 'estimate' , '--outgroup' , 'botrytis_cinerea.dikarya' ], tmpdir )
265
+ print ("#########################################################" )
266
+ #check results
267
+ try :
268
+ assert checkFile (os .path .join (tmpdir , 'compare' , 'index.html' ))
269
+ assert checkFile (os .path .join (tmpdir , 'compare' , 'phylogeny.html' ))
270
+ assert checkFile (os .path .join (tmpdir , 'compare.tar.gz' ))
271
+ print ('SUCCESS: `funannotate compare` test complete.' )
272
+ shutil .rmtree (tmpdir )
273
+ except AssertionError :
274
+ print ('ERROR: `funannotate compare` test failed - check logfiles' )
275
+ print ("#########################################################\n " )
276
+
277
+ def runRNAseqTest ():
278
+ print ("#########################################################" )
279
+ print ('Running funannotate RNA-seq training/prediction unit testing' )
280
+ #need to delete any pre-existing Augustus training data
281
+ try :
282
+ AUGUSTUS = os .environ ["AUGUSTUS_CONFIG_PATH" ]
283
+ except KeyError :
284
+ lib .log .error ("$AUGUSTUS_CONFIG_PATH environmental variable not found, set to continue." )
285
+ return
286
+ if os .path .isdir (os .path .join (AUGUSTUS , 'species' , 'awesome_rna' )):
287
+ shutil .rmtree (os .path .join (AUGUSTUS , 'species' , 'awesome_rna' ))
288
+ tmpdir = 'test-rna_seq_' + pid
289
+ os .makedirs (tmpdir )
290
+ inputFasta = 'test.softmasked.fa'
291
+ protEvidence = 'protein.evidence.fasta'
292
+ illumina = 'rna-seq.illumina.fastq.gz'
293
+ nanopore = 'rna-seq.nanopore.fastq.gz'
294
+ if not checkFile (inputFasta ) or not checkFile (protEvidence ) or not checkFile (illumina ) or not checkFile (nanopore ):
295
+ if not os .path .isfile ('test-rna_seq.tar.gz' ):
296
+ download (download_links .get ('rna-seq' ), 'test-rna_seq.tar.gz' )
297
+ subprocess .call (['tar' , '-zxf' , 'test-rna_seq.tar.gz' ])
298
+ for f in [inputFasta , protEvidence , illumina , nanopore ]:
299
+ shutil .copyfile (f , os .path .join (tmpdir , f ))
300
+ #run train
301
+ runCMD (['funannotate' , 'train' , '-i' , inputFasta ,
302
+ '--single' , illumina , '--nanopore_mrna' , nanopore ,
303
+ '-o' , 'rna-seq' , '--cpus' , str (args .cpus ), '--jaccard_clip' ,
304
+ '--species' , "Awesome rna" ], tmpdir )
305
+ #run predict
306
+ print ("#########################################################" )
307
+ print ('Now running `funannotate predict` using RNA-seq training data' )
308
+ runCMD (['funannotate' , 'predict' , '-i' , inputFasta ,
309
+ '--protein_evidence' , protEvidence ,
310
+ '-o' , 'rna-seq' , '--cpus' , str (args .cpus ),
311
+ '--species' , "Awesome rna" ], tmpdir )
312
+ #run update
313
+ print ("#########################################################" )
314
+ print ('Now running `funannotate update` to run PASA-mediated UTR addition and multiple transcripts' )
315
+ runCMD (['funannotate' , 'update' , '-i' , 'rna-seq' ,
316
+ '--cpus' , str (args .cpus )], tmpdir )
317
+ print ("#########################################################" )
318
+ #check results
319
+ try :
320
+ assert 1630 <= countGFFgenes (os .path .join (tmpdir , 'rna-seq' , 'update_results' , 'Awesome_rna.gff3' )) <= 1830
321
+ print ('SUCCESS: funannotate RNA-seq training/prediction test complete.' )
322
+ shutil .rmtree (tmpdir )
323
+ except AssertionError :
324
+ print ('ERROR: funannotate RNA-seq training/prediction test failed - check logfiles' )
325
+ print ("#########################################################\n " )
326
+
327
+
328
+ pid = str (os .getpid ())
329
+ if 'clean' in args .tests or 'all' in args .tests :
330
+ runCleanTest ()
331
+ if 'mask' in args .tests or 'all' in args .tests :
332
+ runMaskTest ()
333
+ if 'predict' in args .tests or 'all' in args .tests :
334
+ runPredictTest ()
335
+ if 'busco' in args .tests or 'all' in args .tests :
336
+ runBuscoTest ()
337
+ if 'rna-seq' in args .tests or 'all' in args .tests :
338
+ runRNAseqTest ()
339
+ if 'annotate' in args .tests or 'all' in args .tests :
340
+ runAnnotateTest ()
341
+ if 'compare' in args .tests or 'all' in args .tests :
342
+ runCompareTest ()
0 commit comments