@@ -326,27 +326,21 @@ def countGFFgenes(input):
326
326
return count
327
327
328
328
def runMultiProgress (function , inputList , cpus ):
329
- from progressbar import ProgressBar , Percentage
330
- try :
331
- from progressbar import AdaptiveETA
332
- eta = AdaptiveETA ()
333
- except ImportError :
334
- from progressbar import ETA
335
- eta = ETA ()
336
- from time import sleep
337
329
#setup pool
338
330
p = multiprocessing .Pool (cpus )
339
- #setup progress bar
340
- widgets = [' Progress: ' , Percentage (),' || ' , eta ]
341
- pbar = ProgressBar (widgets = widgets , term_width = 30 , maxval = len (inputList )).start ()
342
331
#setup results and split over cpus
332
+ tasks = len (inputList )
343
333
results = []
344
- r = [p .apply_async (function , (x ,), callback = results .append ) for x in inputList ]
334
+ for i in inputList :
335
+ results .append (p .apply_async (function , [i ]))
345
336
#refresh pbar every 5 seconds
346
- while len (results ) != len (inputList ):
347
- pbar .update (len (results ))
348
- sleep (5 )
349
- pbar .finish ()
337
+ while True :
338
+ incomplete_count = sum (1 for x in results if not x .ready ())
339
+ if incomplete_count == 0 :
340
+ break
341
+ sys .stdout .write (" Progress: %.2f%% \r " % (float (tasks - incomplete_count ) / tasks * 100 ))
342
+ sys .stdout .flush ()
343
+ time .sleep (1 )
350
344
p .close ()
351
345
p .join ()
352
346
@@ -1988,9 +1982,8 @@ def simplestTreeEver(fasta, tree):
1988
1982
for rec in SeqIO .parse (input , 'fasta' ):
1989
1983
ids .append (rec .id )
1990
1984
outfile .write ('(%s,%s);' % (ids [0 ], ids [1 ]))
1991
-
1992
1985
1993
- def rundNdS (folder ):
1986
+ def rundNdSexhaustive (folder ):
1994
1987
FNULL = open (os .devnull , 'w' )
1995
1988
#setup intermediate files
1996
1989
tmpdir = os .path .dirname (folder )
@@ -2025,6 +2018,43 @@ def rundNdS(folder):
2025
2018
for file in os .listdir (tmpdir ):
2026
2019
if file .startswith (name + '.' ):
2027
2020
os .rename (os .path .join (tmpdir , file ), os .path .join (tmpdir , name , file ))
2021
+
2022
+
2023
+ def rundNdSestimate (folder ):
2024
+ FNULL = open (os .devnull , 'w' )
2025
+ #setup intermediate files
2026
+ tmpdir = os .path .dirname (folder )
2027
+ name = os .path .basename (folder )
2028
+ transcripts = os .path .join (tmpdir , name + '.transcripts.fa' )
2029
+ prots = os .path .join (tmpdir , name + '.proteins.fa' )
2030
+ aln = os .path .join (tmpdir , name + '.aln' )
2031
+ codon = os .path .join (tmpdir , name + '.codon.aln' )
2032
+ tree = os .path .join (tmpdir , name + '.tree' )
2033
+ log = os .path .join (tmpdir , name + '.log' )
2034
+ finallog = os .path .join (tmpdir , name , name + '.log' )
2035
+ if not checkannotations (finallog ):
2036
+ num_seqs = countfasta (transcripts )
2037
+ #Translate to protein space
2038
+ translatemRNA (transcripts , prots )
2039
+ #align protein sequences
2040
+ alignMAFFT (prots , aln )
2041
+ #convert to codon alignment
2042
+ align2Codon (aln , transcripts , codon )
2043
+ if checkannotations (codon ):
2044
+ if num_seqs > 2 :
2045
+ #now generate a tree using phyml
2046
+ drawPhyMLtree (codon , tree )
2047
+ else :
2048
+ simplestTreeEver (transcripts , tree )
2049
+ #now run codeml through ete3
2050
+ etecmd = ['ete3' , 'evol' , '--alg' , os .path .abspath (codon ), '-t' , os .path .abspath (tree ), '--models' , 'M0' , '-o' , name , '--clear_all' , '--codeml_param' , 'cleandata,1' ]
2051
+ with open (log , 'w' ) as logfile :
2052
+ logfile .write ('\n %s\n ' % ' ' .join (etecmd ))
2053
+ subprocess .call (etecmd , cwd = tmpdir , stdout = logfile , stderr = logfile )
2054
+ #clean up
2055
+ for file in os .listdir (tmpdir ):
2056
+ if file .startswith (name + '.' ):
2057
+ os .rename (os .path .join (tmpdir , file ), os .path .join (tmpdir , name , file ))
2028
2058
2029
2059
def get_subdirs (a_dir ):
2030
2060
return [os .path .join (a_dir , name ) for name in os .listdir (a_dir )
@@ -2069,14 +2099,6 @@ def chunkIt(seq, num):
2069
2099
last += avg
2070
2100
return out
2071
2101
2072
- def countKaks (folder , num ):
2073
- allfiles = []
2074
- for file in os .listdir (folder ):
2075
- if file .endswith ('.fasta.axt' ):
2076
- f = os .path .join (folder , file )
2077
- allfiles .append (f )
2078
- #split files by x chunks
2079
- return chunkIt (allfiles , num )
2080
2102
2081
2103
HEADER = '''
2082
2104
<!DOCTYPE html>
0 commit comments