Skip to content

Commit 2874481

Browse files
committed
adding random seed choice and pep8
1 parent a193323 commit 2874481

File tree

2 files changed

+138
-117
lines changed

2 files changed

+138
-117
lines changed

adrsm

Lines changed: 86 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -5,72 +5,78 @@ import lib.adrsmlib as ad
55

66
import argparse
77

8+
89
def _get_args():
910
'''This function parses and return arguments passed in'''
1011
parser = argparse.ArgumentParser(
11-
prog='ADRSM',
12-
description='Ancient DNA Read Simulator for Metagenomics')
12+
prog='ADRSM',
13+
description='Ancient DNA Read Simulator for Metagenomics')
1314
parser.add_argument('confFile', help="path to configuration file")
1415
parser.add_argument(
15-
'-d',
16-
dest="directory",
17-
default = ".",
18-
help="path to genome directory. Default = .")
16+
'-d',
17+
dest="directory",
18+
default=".",
19+
help="path to genome directory. Default = .")
20+
parser.add_argument(
21+
'-r',
22+
dest='readLength',
23+
default=76,
24+
help="Average read length. Default = 76")
1925
parser.add_argument(
20-
'-r',
21-
dest='readLength',
22-
default=76,
23-
help="Average read length. Default = 76")
26+
'-l',
27+
dest="lenStdev",
28+
default=10,
29+
help="Insert length standard deviation. Default = 10")
2430
parser.add_argument(
25-
'-l',
26-
dest="lenStdev",
27-
default=10,
28-
help="Insert length standard deviation. Default = 10")
31+
'-fwd',
32+
dest="fwdAdapt",
33+
default="AGATCGGAAGAGCACACGTCTGAACTCCAGTCACNNNNNNATCTCGTATGCCGTCTTCTGCTTG",
34+
help="Forward adaptor. Default = AGATCGGAAGAGCACACGTCTGAACTCCAGTCACNNNNNNATCTCGTATGCCGTCTTCTGCTTG")
2935
parser.add_argument(
30-
'-fwd',
31-
dest="fwdAdapt",
32-
default="AGATCGGAAGAGCACACGTCTGAACTCCAGTCACNNNNNNATCTCGTATGCCGTCTTCTGCTTG",
33-
help="Forward adaptor. Default = AGATCGGAAGAGCACACGTCTGAACTCCAGTCACNNNNNNATCTCGTATGCCGTCTTCTGCTTG")
36+
'-rev',
37+
dest="revAdapt",
38+
default="AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT",
39+
help="Reverse adaptor. Default = AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT")
3440
parser.add_argument(
35-
'-rev',
36-
dest="revAdapt",
37-
default="AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT",
38-
help="Reverse adaptor. Default = AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT")
41+
'-e',
42+
dest="error",
43+
default=0.01,
44+
help="Illumina sequecing error. Default = 0.01")
3945
parser.add_argument(
40-
'-e',
41-
dest="error",
42-
default=0.01,
43-
help="Illumina sequecing error. Default = 0.01")
46+
'-p',
47+
dest="geom_p",
48+
default=0.5,
49+
help="Geometric distribution parameter for deamination. Default = 0.5")
4450
parser.add_argument(
45-
'-p',
46-
dest="geom_p",
47-
default=0.5,
48-
help="Geometric distribution parameter for deamination. Default = 0.5")
51+
'-m',
52+
dest="min",
53+
default=0.001,
54+
help="Deamination substitution base frequency. Default = 0.001")
4955
parser.add_argument(
50-
'-m',
51-
dest="min",
52-
default=0.001,
53-
help="Deamination substitution base frequency. Default = 0.001")
56+
'-M',
57+
dest="max",
58+
default=0.3,
59+
help="Deamination substitution max frequency. Default = 0.3")
5460
parser.add_argument(
55-
'-M',
56-
dest="max",
57-
default=0.3,
58-
help="Deamination substitution max frequency. Default = 0.3")
61+
'-o',
62+
dest="output",
63+
default="metagenome",
64+
help="Output file basename. Default = ./metagenome.*")
5965
parser.add_argument(
60-
'-o',
61-
dest="output",
62-
default="metagenome",
63-
help="Output file basename. Default = ./metagenome.*")
66+
'-q',
67+
dest="quality",
68+
default="d",
69+
help="Base quality encoding. Default = d (PHRED+64)")
6470
parser.add_argument(
65-
'-q',
66-
dest="quality",
67-
default="d",
68-
help="Base quality encoding. Default = d (PHRED+64)")
71+
'-s',
72+
dest="stats",
73+
default="stats.csv",
74+
help="Statistic file. Default = stats.csv")
6975
parser.add_argument(
70-
'-s',
71-
dest="stats",
72-
default="stats.csv",
73-
help="Statistic file. Default = stats.csv")
76+
'-se',
77+
dest="seed",
78+
default=7357,
79+
help="Seed for random generator. Default = 7357")
7480

7581
args = parser.parse_args()
7682

@@ -84,11 +90,12 @@ def _get_args():
8490
geom_p = args.geom_p
8591
themin = args.min
8692
themax = args.max
87-
outfile= args.output
93+
outfile = args.output
8894
quality = args.quality
8995
stats = args.stats
96+
seed = int(args.seed)
9097

91-
return(infile, gendir, readlen, lendev, a1, a2, err, geom_p, themin, themax, outfile, quality, stats)
98+
return(infile, gendir, readlen, lendev, a1, a2, err, geom_p, themin, themax, outfile, quality, stats, seed)
9299

93100

94101
def read_config(infile, gendir):
@@ -101,43 +108,45 @@ def read_config(infile, gendir):
101108
for line in f:
102109
line = line.rstrip()
103110
splitline = line.split(",")
104-
agenome = splitline[0].replace(" ","")
105-
ainsert = int(splitline[1].replace(" ",""))
106-
acov = float(splitline[2].replace(" ",""))
107-
deambool = str(splitline[3].replace(" ",""))
111+
agenome = splitline[0].replace(" ", "")
112+
ainsert = int(splitline[1].replace(" ", ""))
113+
acov = float(splitline[2].replace(" ", ""))
114+
deambool = str(splitline[3].replace(" ", ""))
108115
deamination = ad.parse_yes_no(deambool)
109-
genomes[gendir+"/"+agenome] = [ainsert, acov, deamination]
116+
genomes[gendir + "/" + agenome] = [ainsert, acov, deamination]
110117
return(genomes)
111118

119+
112120
if __name__ == "__main__":
113-
INFILE,GENDIR,READLEN,LENDEV,A1,A2,ERR,GEOM_P,THEMIN,THEMAX,OUTFILE,QUALITY,STATS = _get_args()
121+
INFILE, GENDIR, READLEN, LENDEV, A1, A2, ERR, GEOM_P, THEMIN, THEMAX, OUTFILE, QUALITY, STATS, SEED = _get_args()
114122

115123
MINLENGTH = 20
116-
124+
npr.seed(SEED)
117125
genome_dict = {}
118126
stat_dict = {}
119127
all_genomes = read_config(INFILE, GENDIR)
120128
for agenome in all_genomes.keys():
121-
stat_and_run = ad.run_read_simulation_multi(INFILE = agenome,
122-
NREAD = None,
123-
COV = all_genomes[agenome][1],
124-
READLEN = READLEN,
125-
INSERLEN = all_genomes[agenome][0] ,
126-
LENDEV = LENDEV,
127-
A1 = A1,
128-
A2 = A2,
129-
MINLENGTH = MINLENGTH,
130-
ERR = ERR,
131-
DAMAGE = all_genomes[agenome][2],
132-
GEOM_P = GEOM_P,
133-
THEMIN = THEMIN,
134-
THEMAX = THEMAX,
135-
fastq_dict = genome_dict,
136-
QUALITY=QUALITY)
129+
stat_and_run = ad.run_read_simulation_multi(INFILE=agenome,
130+
NREAD=None,
131+
COV=all_genomes[agenome][1],
132+
READLEN=READLEN,
133+
INSERLEN=all_genomes[agenome][0],
134+
LENDEV=LENDEV,
135+
A1=A1,
136+
A2=A2,
137+
MINLENGTH=MINLENGTH,
138+
ERR=ERR,
139+
DAMAGE=all_genomes[agenome][2],
140+
GEOM_P=GEOM_P,
141+
THEMIN=THEMIN,
142+
THEMAX=THEMAX,
143+
fastq_dict=genome_dict,
144+
QUALITY=QUALITY)
137145
stat_dict[ad.get_basename(agenome)] = stat_and_run
138146

139147
ad.write_fastq_multi(fastq_dict=genome_dict, outputfile=OUTFILE)
140148
ad.write_stat(stat_dict=stat_dict, stat_out=STATS)
141149
print("-- ADRSM Finished --")
142-
print("-- FASTQ files written to "+OUTFILE+".1.fastq and "+OUTFILE+".2.fastq --")
143-
print("-- Statistic file written to "+STATS+ " --")
150+
print("-- FASTQ files written to " + OUTFILE +
151+
".1.fastq and " + OUTFILE + ".2.fastq --")
152+
print("-- Statistic file written to " + STATS + " --")

0 commit comments

Comments
 (0)