Skip to content

Commit 08f1d83

Browse files
committed
fix .gitignore
1 parent 477db30 commit 08f1d83

File tree

3 files changed

+202
-101
lines changed

3 files changed

+202
-101
lines changed

.gitignore

Lines changed: 1 addition & 101 deletions
Original file line numberDiff line numberDiff line change
@@ -1,101 +1 @@
1-
# Byte-compiled / optimized / DLL files
2-
__pycache__/
3-
*.py[cod]
4-
*$py.class
5-
6-
# C extensions
7-
*.so
8-
9-
# Distribution / packaging
10-
.Python
11-
env/
12-
build/
13-
develop-eggs/
14-
dist/
15-
downloads/
16-
eggs/
17-
.eggs/
18-
lib/
19-
lib64/
20-
parts/
21-
sdist/
22-
var/
23-
wheels/
24-
*.egg-info/
25-
.installed.cfg
26-
*.egg
27-
28-
# PyInstaller
29-
# Usually these files are written by a python script from a template
30-
# before PyInstaller builds the exe, so as to inject date/other infos into it.
31-
*.manifest
32-
*.spec
33-
34-
# Installer logs
35-
pip-log.txt
36-
pip-delete-this-directory.txt
37-
38-
# Unit test / coverage reports
39-
htmlcov/
40-
.tox/
41-
.coverage
42-
.coverage.*
43-
.cache
44-
nosetests.xml
45-
coverage.xml
46-
*.cover
47-
.hypothesis/
48-
49-
# Translations
50-
*.mo
51-
*.pot
52-
53-
# Django stuff:
54-
*.log
55-
local_settings.py
56-
57-
# Flask stuff:
58-
instance/
59-
.webassets-cache
60-
61-
# Scrapy stuff:
62-
.scrapy
63-
64-
# Sphinx documentation
65-
docs/_build/
66-
67-
# PyBuilder
68-
target/
69-
70-
# Jupyter Notebook
71-
.ipynb_checkpoints
72-
73-
# pyenv
74-
.python-version
75-
76-
# celery beat schedule file
77-
celerybeat-schedule
78-
79-
# SageMath parsed files
80-
*.sage.py
81-
82-
# dotenv
83-
.env
84-
85-
# virtualenv
86-
.venv
87-
venv/
88-
ENV/
89-
90-
# Spyder project settings
91-
.spyderproject
92-
.spyproject
93-
94-
# Rope project settings
95-
.ropeproject
96-
97-
# mkdocs documentation
98-
/site
99-
100-
# mypy
101-
.mypy_cache/
1+
*.pyc

lib/__init__.py

Whitespace-only changes.

lib/adrsmlib.py

Lines changed: 201 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,201 @@
1+
#!/usr/bin/env python
2+
3+
from numpy import random as npr
4+
5+
def get_basename(file_name):
6+
if ("/") in file_name:
7+
basename = file_name.split("/")[-1].split(".")[0]
8+
else:
9+
basename = file_name.split(".")[0]
10+
return(basename)
11+
12+
def reverse_complement(dna) :
13+
dna = dna.upper()
14+
'''
15+
Reverse complement a DNA string
16+
'''
17+
dna = dna[::-1]
18+
revcom = []
19+
complement = {"A" : "T", "T" : "A" , "G" : "C" , "C" : "G"}
20+
for letter in dna :
21+
for key in complement.keys() :
22+
if letter == key :
23+
revcom.append(complement[key])
24+
25+
return "".join(revcom)
26+
27+
def read_fasta (file_name):
28+
"""
29+
READS FASTA FILE, RETURNS SEQUENCE AS STRING
30+
INPUT:
31+
file_name(string): path to fasta file
32+
OUPUT:
33+
result(string): all of the sequences in fasta file, concatenated
34+
"""
35+
result = ""
36+
with open(file_name, "r") as f:
37+
for line in f:
38+
if not line.startswith(">"):
39+
line = line.rstrip()
40+
result = result+line
41+
return([result, len(result)])
42+
43+
def random_insert(read_fasta_out, insert_lengths, read_length, minlen):
44+
genome = read_fasta_out[0]
45+
genome_length = read_fasta_out[1]
46+
result = []
47+
for i in insert_lengths:
48+
if i >= minlen:
49+
insert_start = npr.randint(0, genome_length-read_length)
50+
insert_end = insert_start + i + 1
51+
insert = genome[insert_start:insert_end]
52+
result.append(insert)
53+
return(result)
54+
55+
def complement_read(all_inserts, adaptor, read_length):
56+
result = []
57+
for insert in all_inserts:
58+
inlen = len(insert)
59+
if inlen < read_length:
60+
diff = read_length - inlen
61+
to_add = adaptor[0:diff]
62+
read = insert+to_add
63+
elif inlen == read_length:
64+
read = insert
65+
elif inlen > read_length:
66+
read = insert[0:read_length]
67+
result.append(read)
68+
return(result)
69+
70+
def add_error(all_reads, error_rate):
71+
for i in range(0, len(all_reads)):
72+
read = list(all_reads[i])
73+
for j in range(0, len(read)):
74+
if npr.random() < error_rate:
75+
read[j] = npr.choice(["A","T","G","C"])
76+
all_reads[i] = "".join(read)
77+
return(all_reads)
78+
79+
def prepare_fastq(fastq_dict, fwd_reads, rev_reads, basename, read_length):
80+
fastq_dict[basename] = [[] for i in range(2)]
81+
cnt = 1
82+
for read1, read2 in zip(fwd_reads, rev_reads):
83+
towrite_fwd = "@"+basename+"_"+str(cnt)+"/1"+"\n"+read1+"\n+\n"+"d"*read_length+"\n"
84+
fastq_dict[basename][0].append(towrite_fwd)
85+
towrite_rev = "@"+basename+"_"+str(cnt)+"/2"+"\n"+read2+"\n+\n"+"d"*read_length+"\n"
86+
fastq_dict[basename][1].append(towrite_rev)
87+
cnt += 1
88+
return(fastq_dict)
89+
90+
def write_fastq_multi(fastq_dict, outputfile):
91+
with open(outputfile+".1.fastq","w") as f1:
92+
with open(outputfile+".2.fastq","w") as f2:
93+
for akey in fastq_dict.keys():
94+
for reads1 in fastq_dict[akey][0]:
95+
f1.write(reads1)
96+
for reads2 in fastq_dict[akey][1]:
97+
f2.write(reads2)
98+
99+
def write_fastq(all_reads, basename, orientation, read_length, outfile):
100+
if not outfile:
101+
with open(basename+"."+str(orientation)+".fastq", "w") as fw:
102+
for read in all_reads:
103+
fw.write("@"+basename+"\n")
104+
fw.write(read+"\n")
105+
fw.write("+\n")
106+
fw.write("d"*read_length+"\n")
107+
else:
108+
with open(outfile+"."+str(orientation)+".fastq", "w") as fw:
109+
for read in all_reads:
110+
fw.write("@"+basename+"\n")
111+
fw.write(read+"\n")
112+
fw.write("+\n")
113+
fw.write("d"*read_length+"\n")
114+
115+
116+
117+
def run_read_simulation(INFILE, NREAD, COV, READLEN, INSERLEN, LENDEV, A1, A2, OUTFILE, MINLENGTH, ERR):
118+
print("INFILE: ", INFILE)
119+
if COV:
120+
print("COV: ", COV)
121+
else:
122+
print("NREAD: ", NREAD)
123+
print("READLEN: ", READLEN)
124+
print("INSERLEN: ", INSERLEN)
125+
print("LENDEV: ", LENDEV)
126+
print("A1: ", A1)
127+
print("A2: ", A2)
128+
print("OUTFILE: ", OUTFILE)
129+
130+
nread = None
131+
132+
133+
basename = get_basename(INFILE)
134+
fasta = read_fasta(INFILE)
135+
136+
if COV:
137+
nread = int(fasta[1]/INSERLEN)
138+
print("nread: ", nread)
139+
140+
insert_lengths = [int(n) for n in npr.normal(INSERLEN, LENDEV, nread)]
141+
142+
143+
144+
145+
all_inserts = random_insert(fasta, insert_lengths, READLEN, MINLENGTH)
146+
fwd_inserts = all_inserts
147+
rev_inserts = [reverse_complement(i) for i in all_inserts]
148+
fwd_reads = complement_read(fwd_inserts, A1, READLEN)
149+
fwd_reads = add_error(fwd_reads, ERR)
150+
rev_reads = complement_read(rev_inserts, A2, READLEN)
151+
rev_reads = add_error(rev_reads, ERR)
152+
153+
write_fastq(fwd_reads, basename, 1, READLEN, OUTFILE)
154+
write_fastq(rev_reads, basename, 2, READLEN, OUTFILE)
155+
156+
def run_read_simulation_multi(INFILE, NREAD, COV, READLEN, INSERLEN, LENDEV, A1, A2, MINLENGTH, ERR, fastq_dict):
157+
print("INFILE: ", INFILE)
158+
if COV:
159+
print("COV: ", COV)
160+
else:
161+
print("NREAD: ", NREAD)
162+
print("READLEN: ", READLEN)
163+
print("INSERLEN: ", INSERLEN)
164+
print("LENDEV: ", LENDEV)
165+
print("A1: ", A1)
166+
print("A2: ", A2)
167+
nread = None
168+
169+
170+
basename = get_basename(INFILE)
171+
fasta = read_fasta(INFILE)
172+
173+
if COV:
174+
nread = int(fasta[1]/INSERLEN)
175+
print("nread: ", nread)
176+
177+
insert_lengths = [int(n) for n in npr.normal(INSERLEN, LENDEV, nread)]
178+
179+
180+
181+
182+
all_inserts = random_insert(fasta, insert_lengths, READLEN, MINLENGTH)
183+
fwd_inserts = all_inserts
184+
rev_inserts = [reverse_complement(i) for i in all_inserts]
185+
fwd_reads = complement_read(fwd_inserts, A1, READLEN)
186+
fwd_reads = add_error(fwd_reads, ERR)
187+
rev_reads = complement_read(rev_inserts, A2, READLEN)
188+
rev_reads = add_error(rev_reads, ERR)
189+
190+
prepare_fastq(fastq_dict = fastq_dict, fwd_reads = fwd_reads, rev_reads = rev_reads, basename = basename, read_length = READLEN)
191+
return(nread * INSERLEN)
192+
193+
def write_stat(stat_dict, stat_out):
194+
nbases = []
195+
for akey in stat_dict:
196+
nbases.append(stat_dict[akey])
197+
totbases = sum(nbases)
198+
with open(stat_out,"w") as fs:
199+
fs.write("Organism, percentage of metagenome\n")
200+
for akey in stat_dict:
201+
fs.write(akey+","+str(stat_dict[akey]/totbases)+"\n")

0 commit comments

Comments
 (0)