Skip to content

Commit d6cdca9

Browse files
authored
Merge pull request #1 from maxibor/dev
Reorganizing to use setupTools
2 parents 826bcd7 + 523ef1b commit d6cdca9

23 files changed

+360
-198
lines changed
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
name: publish_conda
2+
3+
on: [release]
4+
5+
jobs:
6+
publish:
7+
runs-on: ubuntu-latest
8+
steps:
9+
- uses: actions/checkout@v1
10+
- name: publish-to-conda
11+
uses: maxibor/[email protected]
12+
with:
13+
subDir: 'conda'
14+
AnacondaToken: ${{ secrets.ANACONDA_TOKEN }}

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,7 @@ r1.fq
55
r2.fq
66
metagenome.*
77
stats.csv
8+
adrsm.egg-info
9+
__pycache__
10+
.ipynb_checkpoints
11+
dist

.travis.yml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,5 +6,8 @@ install:
66
- pip install numpy
77
- pip install scipy
88
- pip install requests
9+
- pip install click
910
# command to run tests
10-
script: python adrsm -p 0.5 -m 0.001 -M 0.3 -t 2 ./data/short_genome_list.csv
11+
script:
12+
- python setup.py install
13+
- adrsm -p 0.5 -m 0.001 -M 0.3 -t 2 ./test/data/short_genome_list.csv

adrsm

Lines changed: 0 additions & 159 deletions
This file was deleted.

adrsm/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
__version__ = "0.9.4"

adrsm/adrsm.py

Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
#!/usr/bin/env python
2+
3+
from numpy import random as npr
4+
from .lib import adrsmlib as ad
5+
from . import __version__
6+
import click
7+
8+
@click.command()
9+
@click.version_option(__version__)
10+
@click.argument('confFile', type=click.Path(exists=True,
11+
readable=True,
12+
resolve_path=True))
13+
@click.option('-r',
14+
'--readLength',
15+
default='76',
16+
type=int,
17+
show_default=True,
18+
help='Average read length')
19+
@click.option('-n',
20+
'--nbinom',
21+
default=8,
22+
type=int,
23+
show_default=True,
24+
help='n parameter for Negative Binomial insert length distribution')
25+
@click.option('-fwd',
26+
'--fwdAdapt',
27+
default='AGATCGGAAGAGCACACGTCTGAACTCCAGTCACNNNNNNATCTCGTATGCCGTCTTCTGCTTG',
28+
type=str,
29+
show_default=True,
30+
help='Forward adaptor sequence')
31+
@click.option('-rev',
32+
'--revAdapt',
33+
default='AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT',
34+
type=str,
35+
show_default=True,
36+
help='Reverse adaptor sequence')
37+
@click.option('-p',
38+
'--geom_p',
39+
default=0.5,
40+
type=click.FloatRange(min=0.0, max=1.0),
41+
show_default=True,
42+
help='Geometric distribution parameter for deamination')
43+
@click.option('-m',
44+
'--minD',
45+
default=0.01,
46+
type=click.FloatRange(min=0.0, max=1.0),
47+
show_default=True,
48+
help='Deamination substitution base frequency')
49+
@click.option('-M',
50+
'--maxD',
51+
default=0.3,
52+
type=click.FloatRange(min=0.0, max=1.0),
53+
show_default=True,
54+
help='Deamination substitution max frequency')
55+
@click.option('-s',
56+
'--seed',
57+
default=42,
58+
type=int,
59+
show_default=True,
60+
help='Seed for random generator generator')
61+
@click.option('-t',
62+
'--threads',
63+
default=2,
64+
type=click.IntRange(min=1, max=1024),
65+
show_default=True,
66+
help='Number of threads for parallel processing')
67+
@click.option('-o',
68+
'--output',
69+
default='./metagenome',
70+
type=click.Path(file_okay=True, writable=True, resolve_path=True),
71+
show_default=True,
72+
help='Fastq output file basename')
73+
@click.option('-s',
74+
'--stats',
75+
default='./stats.csv',
76+
type=click.Path(file_okay=True, writable=True, resolve_path=True),
77+
show_default=True,
78+
help='Summary statistics file')
79+
80+
81+
def cli(no_args_is_help=True, **kwargs):
82+
"""\b
83+
==================================================
84+
ADRSM: Ancient DNA Read Simulator for Metagenomics
85+
Author: Maxime Borry
86+
Contact: <borry[at]shh.mpg.de>
87+
Homepage & Documentation: github.com/maxibor/adrsm
88+
89+
CONFFILE: path to ADRSM configuration file
90+
"""
91+
main(**kwargs)
92+
93+
def read_config(infile):
94+
"""
95+
READS CONFIG FILE AND RETURNS CONFIG DICT
96+
"""
97+
genomes = {}
98+
with open(infile, "r") as f:
99+
next(f)
100+
for line in f:
101+
line = line.rstrip()
102+
splitline = line.split(",")
103+
agenome = splitline[0].replace(" ", "")
104+
ainsert = int(splitline[1].replace(" ", ""))
105+
acov = float(splitline[2].replace(" ", ""))
106+
deambool = str(splitline[3].replace(" ", ""))
107+
deamination = ad.parse_yes_no(deambool)
108+
if len(splitline) > 4 and float(splitline[4].replace(" ", "")) != 0.0:
109+
mutate = True
110+
mutrate = float(splitline[4].replace(" ", ""))
111+
age = float(splitline[5].replace(" ", ""))
112+
else:
113+
mutate = False
114+
mutrate = 0
115+
age = 0
116+
117+
genomes[agenome] = {'size': ainsert,
118+
'cov': acov, 'deam': deamination, 'mutate': mutate, 'mutrate': mutrate, 'age': age}
119+
return(genomes)
120+
121+
122+
def main(conffile, readlength, nbinom, fwdadapt, revadapt, geom_p, mind, maxd, seed, threads, output, stats):
123+
MINLENGTH = 20
124+
npr.seed(seed)
125+
fastq_list = []
126+
stat_dict = {}
127+
all_genomes = read_config(conffile)
128+
for agenome in all_genomes.keys():
129+
stat_and_run = ad.run_read_simulation_multi(INFILE=agenome,
130+
COV=all_genomes[agenome]['cov'],
131+
READLEN=readlength,
132+
INSERLEN=all_genomes[agenome]['size'],
133+
NBINOM=nbinom,
134+
A1=fwdadapt,
135+
A2=revadapt,
136+
MINLENGTH=MINLENGTH,
137+
MUTATE=all_genomes[agenome]['mutate'],
138+
MUTRATE=all_genomes[agenome]['mutrate'],
139+
AGE=all_genomes[agenome]['age'],
140+
DAMAGE=all_genomes[agenome]['deam'],
141+
GEOM_P=geom_p,
142+
THEMIN=mind,
143+
THEMAX=maxd,
144+
PROCESS=threads,
145+
FASTQ_OUT=output)
146+
stat_dict[ad.get_basename(agenome)] = stat_and_run
147+
148+
ad.write_stat(stat_dict=stat_dict, stat_out=stats)
149+
print("\n-- ADRSM v" + __version__ +
150+
" finished generating this mock metagenome --")
151+
print("-- FASTQ files written to " + output +
152+
".1.fastq and " + output + ".2.fastq --")
153+
print("-- Statistic file written to " + stats + " --")
154+
155+
if __name__ == "__main__":
156+
cli()

adrsm/data/quality/fwd_qual.p

517 KB
Binary file not shown.

adrsm/data/quality/rev_qual.p

517 KB
Binary file not shown.
File renamed without changes.

lib/adrsmlib.py renamed to adrsm/lib/adrsmlib.py

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import multiprocessing
99
import pickle
1010
from functools import partial
11+
from pkg_resources import resource_filename
1112
from . import sequencefunctions as sf
1213
from . import markov as mk
1314

@@ -95,10 +96,7 @@ def get_fwd_qual():
9596
ret = pickle.load(open("data/quality/fwd_qual.p", 'rb'))
9697
return(ret)
9798
except FileNotFoundError:
98-
cmd = "which adrsm"
99-
res = subprocess.check_output(cmd, shell=True)
100-
res = res.decode('utf-8').rstrip()
101-
path = "/".join(res.split("/")[:-2])+"/data/quality/fwd_qual.p"
99+
path = resource_filename('adrsm', '/data/quality/fwd_qual.p')
102100
ret = pickle.load(open(path, 'rb'))
103101
return(ret)
104102

@@ -108,10 +106,7 @@ def get_rev_qual():
108106
ret = pickle.load(open("data/quality/fwd_qual.p", 'rb'))
109107
return(ret)
110108
except FileNotFoundError:
111-
cmd = "which adrsm"
112-
res = subprocess.check_output(cmd, shell=True)
113-
res = res.decode('utf-8').rstrip()
114-
path = "/".join(res.split("/")[:-2])+"/data/quality/rev_qual.p"
109+
path = resource_filename('adrsm', '/data/quality/rev_qual.p')
115110
ret = pickle.load(open(path, 'rb'))
116111
return(ret)
117112

0 commit comments

Comments
 (0)