Skip to content

Commit 949de8e

Browse files
committed
Mutations executed on inserts instead of genome
1 parent b99a24e commit 949de8e

File tree

3 files changed

+24
-14
lines changed

3 files changed

+24
-14
lines changed

adrsm

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ def read_config(infile):
121121
acov = float(splitline[2].replace(" ", ""))
122122
deambool = str(splitline[3].replace(" ", ""))
123123
deamination = ad.parse_yes_no(deambool)
124-
if len(splitline) > 4:
124+
if len(splitline) > 4 and float(splitline[4].replace(" ", "")) != 0.0:
125125
mutate = True
126126
mutrate = float(splitline[4].replace(" ", ""))
127127
age = float(splitline[5].replace(" ", ""))

lib/adrsmlib.py

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,12 @@ def get_basename(file_name):
2525
return(basename)
2626

2727

28-
def add_mutation_multi(sequence, mutrate, process):
28+
def add_mutation_multi(sequences, mutrate, process):
2929
mutate_partial = partial(sf.mutate, mutrate=mutrate)
3030
print("Mutating...")
3131
with multiprocessing.Pool(process) as p:
32-
mutseq = p.map(mutate_partial, list(sequence))
33-
return("".join(mutseq))
32+
mutseq = p.map(mutate_partial, sequences)
33+
return(list(mutseq))
3434

3535

3636
def reverse_complement_multi(all_inserts, process):
@@ -49,10 +49,16 @@ def read_fasta(file_name):
4949
result(string): all of the sequences in fasta file, concatenated
5050
"""
5151
result = ""
52+
# fastadict = {}
5253
with open(file_name, "r") as f:
5354
for line in f:
54-
if not line.startswith(">"):
55+
if line[0] == ">":
56+
# seqname = line[1:]
57+
# fastadict[seqname] = []
58+
continue
59+
else:
5560
line = line.rstrip()
61+
# fastadict[seqname].append(line)
5662
result = result + line
5763
return([result, len(result)])
5864

@@ -159,12 +165,12 @@ def run_read_simulation_multi(INFILE, COV, READLEN, INSERLEN, NBINOM, A1, A2, MI
159165
prob = NBINOM / (NBINOM + INSERLEN)
160166
insert_lengths = npr.negative_binomial(NBINOM, prob, nread)
161167

168+
all_inserts = sf.random_insert(fasta, insert_lengths, READLEN, MINLENGTH)
169+
162170
if MUTATE:
163171
correct_mutrate = (MUTRATE * AGE) / fasta[1]
164-
fasta[0] = add_mutation_multi(
165-
sequence=fasta[0], mutrate=correct_mutrate, process=PROCESS)
166-
167-
all_inserts = sf.random_insert(fasta, insert_lengths, READLEN, MINLENGTH)
172+
all_inserts = add_mutation_multi(
173+
sequences=all_inserts, mutrate=correct_mutrate, process=PROCESS)
168174

169175
if DAMAGE:
170176
all_inserts = add_damage_multi(

lib/sequencefunctions.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -86,17 +86,21 @@ def add_error(read, error_rate):
8686
return("".join(read))
8787

8888

89-
def mutate(nucleotide, mutrate, alpha=0.4, beta=0.2):
89+
def mutate(sequence, mutrate, alpha=0.4, beta=0.2):
9090
"""
9191
alpha: Transitions
9292
beta: Transversions
9393
https://en.wikipedia.org/wiki/Mutation_rate
9494
"""
9595
a = int(10 * alpha)
9696
b = int(10 * beta)
97+
newseq = ""
9798
dmut = {'A': b * ['C'] + b * ['T'] + a * ['G'], 'C': b * ['A'] + b * ['G'] + a * [
9899
'T'], 'G': b * ['C'] + b * ['T'] + a * ['A'], 'T': b * ['A'] + b * ['G'] + a * ['C']}
99-
if npr.random() <= mutrate:
100-
new_nucl = random.choice(dmut[nucleotide])
101-
return(new_nucl)
102-
return(nucleotide)
100+
for nuc in sequence:
101+
if npr.random() <= mutrate:
102+
new_nucl = random.choice(dmut[nuc])
103+
newseq += new_nucl
104+
else:
105+
newseq += nuc
106+
return(newseq)

0 commit comments

Comments
 (0)