Skip to content

Commit 6ab1749

Browse files
committed
change genome specifications
1 parent e9e9b9d commit 6ab1749

File tree

6 files changed

+49213
-49213
lines changed

6 files changed

+49213
-49213
lines changed

.travis.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,4 @@ install:
77
- pip install scipy
88
- pip install requests
99
# command to run tests
10-
script: python adrsm -d ./data/genomes -t 1 ./data/short_genome_list.csv
10+
script: python adrsm -t 1 ./data/short_genome_list.csv

README.md

Lines changed: 25 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -35,37 +35,35 @@ You can cite ADRSM like this:
3535
# Help
3636

3737
$ adrsm --help
38-
usage: ADRSM v0.7 [-h] [-d DIRECTORY] [-r READLENGTH] [-n NBINOM]
39-
[-fwd FWDADAPT] [-rev REVADAPT] [-e ERROR] [-p GEOM_P]
40-
[-m MIN] [-M MAX] [-o OUTPUT] [-q QUALITY] [-s STATS]
41-
[-se SEED] [-t THREADS]
42-
confFile
38+
usage: ADRSM v0.8 [-h] [-r READLENGTH] [-n NBINOM] [-fwd FWDADAPT]
39+
[-rev REVADAPT] [-e ERROR] [-p GEOM_P] [-m MIN] [-M MAX]
40+
[-o OUTPUT] [-q QUALITY] [-s STATS] [-se SEED] [-t THREADS]
41+
confFile
4342

4443
Ancient DNA Read Simulator for Metagenomics
4544

4645
positional arguments:
47-
confFile path to configuration file
46+
confFile path to configuration file
4847

4948
optional arguments:
50-
-h, --help show this help message and exit
51-
-d DIRECTORY path to genome directory. Default = .
52-
-r READLENGTH Average read length. Default = 76
53-
-n NBINOM n parameter for Negative Binomial insert length distribution.
54-
Default = 8
55-
-fwd FWDADAPT Forward adaptor. Default = AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC
56-
NNNNNNATCTCGTATGCCGTCTTCTGCTTG
57-
-rev REVADAPT Reverse adaptor. Default =
58-
AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT
59-
-e ERROR Illumina sequecing error. Default = 0.01
60-
-p GEOM_P Geometric distribution parameter for deamination. Default =
61-
0.5
62-
-m MIN Deamination substitution base frequency. Default = 0.001
63-
-M MAX Deamination substitution max frequency. Default = 0.3
64-
-o OUTPUT Output file basename. Default = ./metagenome.*
65-
-q QUALITY Base quality encoding. Default = d (PHRED+64)
66-
-s STATS Statistic file. Default = stats.csv
67-
-se SEED Seed for random generator. Default = 7357
68-
-t THREADS Number of threads for parallel processing. Default = 2
49+
-h, --help show this help message and exit
50+
-r READLENGTH Average read length. Default = 76
51+
-n NBINOM n parameter for Negative Binomial insert length distribution.
52+
Default = 8
53+
-fwd FWDADAPT Forward adaptor. Default = AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC
54+
NNNNNNATCTCGTATGCCGTCTTCTGCTTG
55+
-rev REVADAPT Reverse adaptor. Default =
56+
AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT
57+
-e ERROR Illumina sequecing error. Default = 0.01
58+
-p GEOM_P Geometric distribution parameter for deamination. Default =
59+
0.5
60+
-m MIN Deamination substitution base frequency. Default = 0.001
61+
-M MAX Deamination substitution max frequency. Default = 0.3
62+
-o OUTPUT Output file basename. Default = ./metagenome.*
63+
-q QUALITY Base quality encoding. Default = d (PHRED+64)
64+
-s STATS Statistic file. Default = stats.csv
65+
-se SEED Seed for random generator. Default = 7357
66+
-t THREADS Number of threads for parallel processing. Default = 2
6967

7068
## Genome directory
7169

@@ -78,8 +76,8 @@ Example [short_genome_list.csv](./data/short_genome_list.csv):
7876

7977
| genome(mandatory) | insert_size(mandatory) | coverage(mandatory) | deamination(mandatory) | mutation_rate(optional) | age(optional) |
8078
| ---------------------------- | ---------------------- | ------------------- | ---------------------- | ----------------------- | ------------- |
81-
| Agrobacterium_tumefaciens.fa | 47 | 0.1 | yes | 10e-8 | 10000 |
82-
| Bacillus_anthracis.fa | 48 | 0.2 | no | | |
79+
| ./data/genomes/Agrobacterium_tumefaciens.fa | 47 | 0.1 | yes | 10e-8 | 10000 |
80+
| ./data/genomes/Bacillus_anthracis.fa | 48 | 0.2 | no | | |
8381

8482
## Note on Coverage
8583

adrsm

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,16 @@ def _get_args():
1010
'''This function parses and return arguments passed in'''
1111
parser = argparse.ArgumentParser(
1212
prog='ADRSM v' + str(version),
13-
description='Ancient DNA Read Simulator for Metagenomics')
13+
formatter_class=argparse.RawDescriptionHelpFormatter,
14+
description='''
15+
==================================================\n
16+
ADRSM: Ancient DNA Read Simulator for Metagenomics\n
17+
Author: Maxime Borry\n
18+
Contact: <borry[at]shh.mpg.de>\n
19+
Homepage & Documentation: github.com/maxibor/adrsm
20+
==================================================
21+
''')
1422
parser.add_argument('confFile', help="path to configuration file")
15-
parser.add_argument(
16-
'-d',
17-
dest="directory",
18-
default=".",
19-
help="path to genome directory. Default = .")
2023
parser.add_argument(
2124
'-r',
2225
dest='readLength',
@@ -86,7 +89,6 @@ def _get_args():
8689
args = parser.parse_args()
8790

8891
infile = args.confFile
89-
gendir = args.directory
9092
readlen = int(args.readLength)
9193
nbinom = int(args.nbinom)
9294
a1 = args.fwdAdapt
@@ -101,10 +103,10 @@ def _get_args():
101103
seed = int(args.seed)
102104
threads = int(args.threads)
103105

104-
return(infile, gendir, readlen, nbinom, a1, a2, err, geom_p, themin, themax, outfile, quality, stats, seed, threads)
106+
return(infile, readlen, nbinom, a1, a2, err, geom_p, themin, themax, outfile, quality, stats, seed, threads)
105107

106108

107-
def read_config(infile, gendir):
109+
def read_config(infile):
108110
"""
109111
READS CONFIG FILE AND RETURNS CONFIG DICT
110112
"""
@@ -128,20 +130,20 @@ def read_config(infile, gendir):
128130
mutrate = 0
129131
age = 0
130132

131-
genomes[gendir + "/" + agenome] = {'size': ainsert,
132-
'cov': acov, 'deam': deamination, 'mutate': mutate, 'mutrate': mutrate, 'age': age}
133+
genomes[agenome] = {'size': ainsert,
134+
'cov': acov, 'deam': deamination, 'mutate': mutate, 'mutrate': mutrate, 'age': age}
133135
return(genomes)
134136

135137

136138
if __name__ == "__main__":
137-
version = 0.7
138-
INFILE, GENDIR, READLEN, NBINOM, A1, A2, ERR, GEOM_P, THEMIN, THEMAX, OUTFILE, QUALITY, STATS, SEED, PROCESS = _get_args()
139+
version = 0.8
140+
INFILE, READLEN, NBINOM, A1, A2, ERR, GEOM_P, THEMIN, THEMAX, OUTFILE, QUALITY, STATS, SEED, PROCESS = _get_args()
139141

140142
MINLENGTH = 20
141143
npr.seed(SEED)
142144
genome_dict = {}
143145
stat_dict = {}
144-
all_genomes = read_config(INFILE, GENDIR)
146+
all_genomes = read_config(INFILE)
145147
for agenome in all_genomes.keys():
146148
stat_and_run = ad.run_read_simulation_multi(INFILE=agenome,
147149
COV=all_genomes[agenome]['cov'],

data/short_genome_list.csv

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
genome(mandatory), insert_size(mandatory), coverage(mandatory), deamination(mandatory), mutation_rate(optional), age(optional)
2-
Agrobacterium_tumefaciens.fa, 47 , 0.1, yes, 10e-7, 10000
3-
Bacillus_anthracis.fa, 48, 0.2, no
2+
./data/genomes/Agrobacterium_tumefaciens.fa, 47 , 0.1, yes, 10e-7, 10000
3+
./data/genomes/Bacillus_anthracis.fa, 48, 0.2, no

0 commit comments

Comments
 (0)