1+ #!/usr/bin/env python 
2+ 
3+ from  numpy  import  random  as  npr 
4+ from  .lib  import  adrsmlib  as  ad 
5+ from  . import  __version__ 
6+ import  click 
7+ 
8+ @click .command () 
9+ @click .version_option (__version__ ) 
10+ @click .argument ('confFile' , type = click .Path (exists = True ,  
11+                                             readable = True ,  
12+                                             resolve_path = True )) 
13+ @click .option ('-r' , 
14+               '--readLength' , 
15+               default = '76' , 
16+               type = int , 
17+               show_default = True , 
18+               help = 'Average read length' ) 
19+ @click .option ('-n' , 
20+               '--nbinom' , 
21+               default = 8 , 
22+               type = int , 
23+               show_default = True , 
24+               help = 'n parameter for Negative Binomial insert length distribution' ) 
25+ @click .option ('-fwd' , 
26+               '--fwdAdapt' , 
27+               default = 'AGATCGGAAGAGCACACGTCTGAACTCCAGTCACNNNNNNATCTCGTATGCCGTCTTCTGCTTG' , 
28+               type = str , 
29+               show_default = True , 
30+               help = 'Forward adaptor sequence' ) 
31+ @click .option ('-rev' , 
32+               '--revAdapt' , 
33+               default = 'AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT' , 
34+               type = str , 
35+               show_default = True , 
36+               help = 'Reverse adaptor sequence' ) 
37+ @click .option ('-p' ,  
38+               '--geom_p' , 
39+               default = 0.5 , 
40+               type = click .FloatRange (min = 0.0 , max = 1.0 ), 
41+               show_default = True , 
42+               help = 'Geometric distribution parameter for deamination' ) 
43+ @click .option ('-m' , 
44+               '--minD' , 
45+               default = 0.01 , 
46+               type = click .FloatRange (min = 0.0 , max = 1.0 ), 
47+               show_default = True , 
48+               help = 'Deamination substitution base frequency' )    
49+ @click .option ('-M' , 
50+               '--maxD' , 
51+               default = 0.3 , 
52+               type = click .FloatRange (min = 0.0 , max = 1.0 ), 
53+               show_default = True , 
54+               help = 'Deamination substitution max frequency' ) 
55+ @click .option ('-s' , 
56+               '--seed' , 
57+               default = 42 , 
58+               type = int , 
59+               show_default = True , 
60+               help = 'Seed for random generator generator' ) 
61+ @click .option ('-t' , 
62+               '--threads' , 
63+               default = 2 , 
64+               type = click .IntRange (min = 1 , max = 1024 ), 
65+               show_default = True , 
66+               help = 'Number of threads for parallel processing' ) 
67+ @click .option ('-o' , 
68+               '--output' , 
69+               default = './metagenome' , 
70+               type = click .Path (file_okay = True , writable = True , resolve_path = True ), 
71+               show_default = True , 
72+               help = 'Fastq output file basename' ) 
73+ @click .option ('-s' , 
74+               '--stats' , 
75+               default = './stats.csv' , 
76+               type = click .Path (file_okay = True , writable = True , resolve_path = True ), 
77+               show_default = True , 
78+               help = 'Summary statistics file' ) 
79+ 
80+ 
81+ def  cli (no_args_is_help = True , ** kwargs ):
82+     """\b  
83+     ================================================== 
84+     ADRSM: Ancient DNA Read Simulator for Metagenomics 
85+     Author: Maxime Borry 
86+     Contact: <borry[at]shh.mpg.de> 
87+     Homepage & Documentation: github.com/maxibor/adrsm 
88+ 
89+     CONFFILE: path to ADRSM configuration file 
90+     """ 
91+     main (** kwargs )
92+ 
93+ def  read_config (infile ):
94+     """ 
95+     READS CONFIG FILE AND RETURNS CONFIG DICT 
96+     """ 
97+     genomes  =  {}
98+     with  open (infile , "r" ) as  f :
99+         next (f )
100+         for  line  in  f :
101+             line  =  line .rstrip ()
102+             splitline  =  line .split ("," )
103+             agenome  =  splitline [0 ].replace (" " , "" )
104+             ainsert  =  int (splitline [1 ].replace (" " , "" ))
105+             acov  =  float (splitline [2 ].replace (" " , "" ))
106+             deambool  =  str (splitline [3 ].replace (" " , "" ))
107+             deamination  =  ad .parse_yes_no (deambool )
108+             if  len (splitline ) >  4  and  float (splitline [4 ].replace (" " , "" )) !=  0.0 :
109+                 mutate  =  True 
110+                 mutrate  =  float (splitline [4 ].replace (" " , "" ))
111+                 age  =  float (splitline [5 ].replace (" " , "" ))
112+             else :
113+                 mutate  =  False 
114+                 mutrate  =  0 
115+                 age  =  0 
116+ 
117+             genomes [agenome ] =  {'size' : ainsert ,
118+                                 'cov' : acov , 'deam' : deamination , 'mutate' : mutate , 'mutrate' : mutrate , 'age' : age }
119+     return (genomes )
120+ 
121+ 
122+ def  main (conffile , readlength , nbinom , fwdadapt , revadapt , geom_p , mind , maxd , seed , threads , output , stats ):
123+     MINLENGTH  =  20 
124+     npr .seed (seed )
125+     fastq_list  =  []
126+     stat_dict  =  {}
127+     all_genomes  =  read_config (conffile )
128+     for  agenome  in  all_genomes .keys ():
129+         stat_and_run  =  ad .run_read_simulation_multi (INFILE = agenome ,
130+                                                     COV = all_genomes [agenome ]['cov' ],
131+                                                     READLEN = readlength ,
132+                                                     INSERLEN = all_genomes [agenome ]['size' ],
133+                                                     NBINOM = nbinom ,
134+                                                     A1 = fwdadapt ,
135+                                                     A2 = revadapt ,
136+                                                     MINLENGTH = MINLENGTH ,
137+                                                     MUTATE = all_genomes [agenome ]['mutate' ],
138+                                                     MUTRATE = all_genomes [agenome ]['mutrate' ],
139+                                                     AGE = all_genomes [agenome ]['age' ],
140+                                                     DAMAGE = all_genomes [agenome ]['deam' ],
141+                                                     GEOM_P = geom_p ,
142+                                                     THEMIN = mind ,
143+                                                     THEMAX = maxd ,
144+                                                     PROCESS = threads ,
145+                                                     FASTQ_OUT = output )
146+         stat_dict [ad .get_basename (agenome )] =  stat_and_run 
147+ 
148+     ad .write_stat (stat_dict = stat_dict , stat_out = stats )
149+     print ("\n -- ADRSM v"  +  __version__  + 
150+           " finished generating this mock metagenome --" )
151+     print ("-- FASTQ files written to "  +  output  + 
152+           ".1.fastq and "  +  output  +  ".2.fastq --" )
153+     print ("-- Statistic file written to "  +  stats  +  " --" )
154+ 
155+ if  __name__  ==  "__main__" :
156+     cli ()
0 commit comments