Skip to content

Commit bafdaca

Browse files
committed
adding sequencing effort limitation by random subsampling
1 parent f570b0a commit bafdaca

File tree

2 files changed

+31
-7
lines changed

2 files changed

+31
-7
lines changed

adrsm/adrsm.py

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from adrsm import __version__
66
import click
77
import csv
8+
import random
89

910

1011
@click.command()
@@ -68,6 +69,14 @@
6869
show_default=True,
6970
help="Deamination substitution max frequency",
7071
)
72+
@click.option(
73+
"-e",
74+
"--effort",
75+
default=100,
76+
type=int,
77+
show_default=True,
78+
help="Sequencing effort, maximum number of reads to be generated",
79+
)
7180
@click.option(
7281
"-s",
7382
"--seed",
@@ -157,6 +166,7 @@ def main(
157166
geom_p,
158167
mind,
159168
maxd,
169+
effort,
160170
seed,
161171
threads,
162172
output,
@@ -167,8 +177,9 @@ def main(
167177
fastq_list = []
168178
stat_dict = {}
169179
all_genomes = read_config(conffile)
180+
all_reads = []
170181
for agenome in all_genomes.keys():
171-
stat_and_run = ad.run_read_simulation_multi(
182+
reads, stat_and_run = ad.run_read_simulation_multi(
172183
INFILE=agenome,
173184
COV=all_genomes[agenome]["cov"],
174185
READLEN=readlength,
@@ -185,14 +196,27 @@ def main(
185196
THEMIN=mind,
186197
THEMAX=maxd,
187198
PROCESS=threads,
188-
FASTQ_OUT=output,
189199
)
190200
stat_dict[ad.get_basename(agenome)] = stat_and_run
201+
all_reads.extend(reads)
191202

203+
if len(all_reads) > effort:
204+
all_reads = random.sample(all_reads, effort)
205+
ad.write_fastq_multi(all_reads, output)
192206
ad.write_stat(stat_dict=stat_dict, stat_out=stats)
193-
print("\n-- ADRSM v" + __version__ + " finished generating this mock metagenome --")
194207
print(
195-
"-- FASTQ files written to " + output + ".1.fastq and " + output + ".2.fastq --"
208+
"\n-- ADRSM v"
209+
+ __version__
210+
+ " finished generating "
211+
+ str(len(all_reads))
212+
+ " reads for this mock metagenome --"
213+
)
214+
print(
215+
"-- FASTQ files written to "
216+
+ output
217+
+ ".1.fastq.gz and "
218+
+ output
219+
+ ".2.fastq.gz --"
196220
)
197221
print("-- Statistic file written to " + stats + " --")
198222

adrsm/lib/adrsmlib.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,6 @@ def run_read_simulation_multi(
188188
THEMIN,
189189
THEMAX,
190190
PROCESS,
191-
FASTQ_OUT,
192191
):
193192
print("===================\n===================")
194193
print("Genome: ", INFILE)
@@ -269,8 +268,9 @@ def run_read_simulation_multi(
269268
read_length=READLEN,
270269
process=PROCESS,
271270
)
272-
write_fastq_multi(fastq_list=result, outputfile=FASTQ_OUT)
273-
return [nread * INSERLEN, INSERLEN, COV, DAMAGE]
271+
272+
# write_fastq_multi(fastq_list=result, outputfile=FASTQ_OUT)
273+
return result, [nread * INSERLEN, INSERLEN, COV, DAMAGE]
274274

275275

276276
def specie_to_taxid(specie):

0 commit comments

Comments
 (0)