@@ -5,72 +5,78 @@ import lib.adrsmlib as ad
55
66import  argparse 
77
8+ 
89def  _get_args ():
910    '''This function parses and return arguments passed in''' 
1011    parser  =  argparse .ArgumentParser (
11-     prog = 'ADRSM' ,
12-     description = 'Ancient DNA Read Simulator for Metagenomics' )
12+          prog = 'ADRSM' ,
13+          description = 'Ancient DNA Read Simulator for Metagenomics' )
1314    parser .add_argument ('confFile' , help = "path to configuration file" )
1415    parser .add_argument (
15-     '-d' ,
16-     dest = "directory" ,
17-     default  =  "." ,
18-     help = "path to genome directory. Default = ." )
16+         '-d' ,
17+         dest = "directory" ,
18+         default = "." ,
19+         help = "path to genome directory. Default = ." )
20+     parser .add_argument (
21+         '-r' ,
22+         dest = 'readLength' ,
23+         default = 76 ,
24+         help = "Average read length. Default = 76" )
1925    parser .add_argument (
20-     '-r '
21-     dest = 'readLength' ,
22-     default = 76 ,
23-     help = "Average read  length. Default = 76 " )
26+          '-l '
27+          dest = "lenStdev" ,
28+          default = 10 ,
29+          help = "Insert  length standard deviation . Default = 10 " )
2430    parser .add_argument (
25-     '-l '
26-     dest = "lenStdev " ,
27-     default = 10 ,
28-     help = "Insert length standard deviation . Default = 10 " )
31+          '-fwd '
32+          dest = "fwdAdapt " ,
33+          default = "AGATCGGAAGAGCACACGTCTGAACTCCAGTCACNNNNNNATCTCGTATGCCGTCTTCTGCTTG" ,
34+          help = "Forward adaptor . Default = AGATCGGAAGAGCACACGTCTGAACTCCAGTCACNNNNNNATCTCGTATGCCGTCTTCTGCTTG " )
2935    parser .add_argument (
30-     '-fwd '
31-     dest = "fwdAdapt " ,
32-     default = "AGATCGGAAGAGCACACGTCTGAACTCCAGTCACNNNNNNATCTCGTATGCCGTCTTCTGCTTG " ,
33-     help = "Forward  adaptor. Default = AGATCGGAAGAGCACACGTCTGAACTCCAGTCACNNNNNNATCTCGTATGCCGTCTTCTGCTTG " )
36+          '-rev '
37+          dest = "revAdapt " ,
38+          default = "AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT " ,
39+          help = "Reverse  adaptor. Default = AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT " )
3440    parser .add_argument (
35-     '-rev '
36-     dest = "revAdapt " ,
37-     default = "AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT" ,
38-     help = "Reverse adaptor . Default = AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT " )
41+          '-e '
42+          dest = "error " ,
43+          default = 0.01 ,
44+          help = "Illumina sequecing error . Default = 0.01 " )
3945    parser .add_argument (
40-     '-e '
41-     dest = "error " ,
42-     default = 0.01  ,
43-     help = "Illumina sequecing error . Default = 0.01 " )
46+          '-p '
47+          dest = "geom_p " ,
48+          default = 0.5  ,
49+          help = "Geometric distribution parameter for deamination . Default = 0.5 " )
4450    parser .add_argument (
45-     '-p '
46-     dest = "geom_p " ,
47-     default = 0.5  ,
48-     help = "Geometric distribution parameter for deamination . Default = 0.5 " )
51+          '-m '
52+          dest = "min " ,
53+          default = 0.001  ,
54+          help = "Deamination substitution base frequency . Default = 0.001 " )
4955    parser .add_argument (
50-     '-m '
51-     dest = "min " ,
52-     default = 0.001  ,
53-     help = "Deamination substitution base  frequency. Default = 0.001 " )
56+          '-M '
57+          dest = "max " ,
58+          default = 0.3  ,
59+          help = "Deamination substitution max  frequency. Default = 0.3 " )
5460    parser .add_argument (
55-     '-M '
56-     dest = "max " ,
57-     default = 0.3 ,
58-     help = "Deamination substitution max frequency . Default = 0.3 " )
61+          '-o '
62+          dest = "output " ,
63+          default = "metagenome" ,
64+          help = "Output file basename . Default = ./metagenome.* " )
5965    parser .add_argument (
60-     '-o '
61-     dest = "output " ,
62-     default = "metagenome " ,
63-     help = "Output file basename . Default = ./metagenome.* " )
66+          '-q '
67+          dest = "quality " ,
68+          default = "d " ,
69+          help = "Base quality encoding . Default = d (PHRED+64) " )
6470    parser .add_argument (
65-     '-q '
66-     dest = "quality " ,
67-     default = "d " ,
68-     help = "Base quality encoding . Default = d (PHRED+64) " )
71+          '-s '
72+          dest = "stats " ,
73+          default = "stats.csv " ,
74+          help = "Statistic file . Default = stats.csv " )
6975    parser .add_argument (
70-     '-s '
71-     dest = "stats " ,
72-     default = "stats.csv" ,
73-     help = "Statistic file . Default = stats.csv " )
76+          '-se '
77+          dest = "seed " ,
78+          default = 7357 ,
79+          help = "Seed for random generator . Default = 7357 " )
7480
7581    args  =  parser .parse_args ()
7682
@@ -84,11 +90,12 @@ def _get_args():
8490    geom_p  =  args .geom_p 
8591    themin  =  args .min 
8692    themax  =  args .max 
87-     outfile =  args .output 
93+     outfile   =  args .output 
8894    quality  =  args .quality 
8995    stats  =  args .stats 
96+     seed  =  int (args .seed )
9097
91-     return (infile , gendir , readlen , lendev , a1 , a2 , err , geom_p , themin , themax , outfile , quality , stats )
98+     return (infile , gendir , readlen , lendev , a1 , a2 , err , geom_p , themin , themax , outfile , quality , stats ,  seed )
9299
93100
94101def  read_config (infile , gendir ):
@@ -101,43 +108,45 @@ def read_config(infile, gendir):
101108        for  line  in  f :
102109            line  =  line .rstrip ()
103110            splitline  =  line .split ("," )
104-             agenome  =  splitline [0 ].replace (" " ,"" )
105-             ainsert  =  int (splitline [1 ].replace (" " ,"" ))
106-             acov  =  float (splitline [2 ].replace (" " ,"" ))
107-             deambool  =  str (splitline [3 ].replace (" " ,"" ))
111+             agenome  =  splitline [0 ].replace (" " ,  "" )
112+             ainsert  =  int (splitline [1 ].replace (" " ,  "" ))
113+             acov  =  float (splitline [2 ].replace (" " ,  "" ))
114+             deambool  =  str (splitline [3 ].replace (" " ,  "" ))
108115            deamination  =  ad .parse_yes_no (deambool )
109-             genomes [gendir + "/" + agenome ] =  [ainsert , acov , deamination ]
116+             genomes [gendir   +   "/"   +   agenome ] =  [ainsert , acov , deamination ]
110117    return (genomes )
111118
119+ 
112120if  __name__  ==  "__main__" :
113-     INFILE ,GENDIR ,READLEN ,LENDEV ,A1 ,A2 ,ERR ,GEOM_P ,THEMIN ,THEMAX ,OUTFILE ,QUALITY ,STATS  =  _get_args ()
121+     INFILE ,  GENDIR ,  READLEN ,  LENDEV ,  A1 ,  A2 ,  ERR ,  GEOM_P ,  THEMIN ,  THEMAX ,  OUTFILE ,  QUALITY ,  STATS ,  SEED  =  _get_args ()
114122
115123    MINLENGTH  =  20 
116- 
124+      npr . seed ( SEED ) 
117125    genome_dict  =  {}
118126    stat_dict  =  {}
119127    all_genomes  =  read_config (INFILE , GENDIR )
120128    for  agenome  in  all_genomes .keys ():
121-         stat_and_run  =  ad .run_read_simulation_multi (INFILE   =   agenome ,
122-                                   NREAD   =   None ,
123-                                   COV   =   all_genomes [agenome ][1 ],
124-                                   READLEN   =   READLEN ,
125-                                   INSERLEN   =   all_genomes [agenome ][0 ]  ,
126-                                   LENDEV   =   LENDEV ,
127-                                   A1   =   A1 ,
128-                                   A2   =   A2 ,
129-                                   MINLENGTH   =   MINLENGTH ,
130-                                   ERR   =   ERR ,
131-                                   DAMAGE   =   all_genomes [agenome ][2 ],
132-                                   GEOM_P   =   GEOM_P ,
133-                                   THEMIN   =   THEMIN ,
134-                                   THEMAX   =   THEMAX ,
135-                                   fastq_dict   =   genome_dict ,
136-                                   QUALITY = QUALITY )
129+         stat_and_run  =  ad .run_read_simulation_multi (INFILE = agenome ,
130+                                                      NREAD = None ,
131+                                                      COV = all_genomes [agenome ][1 ],
132+                                                      READLEN = READLEN ,
133+                                                      INSERLEN = all_genomes [agenome ][0 ],
134+                                                      LENDEV = LENDEV ,
135+                                                      A1 = A1 ,
136+                                                      A2 = A2 ,
137+                                                      MINLENGTH = MINLENGTH ,
138+                                                      ERR = ERR ,
139+                                                      DAMAGE = all_genomes [agenome ][2 ],
140+                                                      GEOM_P = GEOM_P ,
141+                                                      THEMIN = THEMIN ,
142+                                                      THEMAX = THEMAX ,
143+                                                      fastq_dict = genome_dict ,
144+                                                      QUALITY = QUALITY )
137145        stat_dict [ad .get_basename (agenome )] =  stat_and_run 
138146
139147    ad .write_fastq_multi (fastq_dict = genome_dict , outputfile = OUTFILE )
140148    ad .write_stat (stat_dict = stat_dict , stat_out = STATS )
141149    print ("-- ADRSM Finished --" )
142-     print ("-- FASTQ files written to " + OUTFILE + ".1.fastq and " + OUTFILE + ".2.fastq --" )
143-     print ("-- Statistic file written to " + STATS +  " --" )
150+     print ("-- FASTQ files written to "  +  OUTFILE  + 
151+           ".1.fastq and "  +  OUTFILE  +  ".2.fastq --" )
152+     print ("-- Statistic file written to "  +  STATS  +  " --" )
0 commit comments