Skip to content

Commit 7dd1172

Browse files
Merge pull request #219 from ncsa/fix_stitch
fixing stitch and no fastq
2 parents 56aadef + 036a58e commit 7dd1172

File tree

4 files changed

+30
-9
lines changed

4 files changed

+30
-9
lines changed

neat/read_simulator/utils/generate_reads.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -268,10 +268,14 @@ def generate_reads(
268268
)
269269

270270
read_1.mutations = find_applicable_mutations(read_1, contig_variants)
271+
if options.produce_fastq:
272+
fastq_handle = ofw.files_to_write[ofw.fq1]
273+
else:
274+
fastq_handle = None
271275
read_1.finalize_read_and_write(
272276
error_model,
273277
qual_model,
274-
ofw.files_to_write[ofw.fq1],
278+
fastq_handle,
275279
options.quality_offset,
276280
options.produce_fastq,
277281
options.rng
@@ -303,10 +307,14 @@ def generate_reads(
303307

304308
read_2.mutations = find_applicable_mutations(read_2, contig_variants)
305309

310+
if options.produce_fastq:
311+
fastq_handle = ofw.files_to_write[ofw.fq2]
312+
else:
313+
fastq_handle = None
306314
read_2.finalize_read_and_write(
307315
error_model,
308316
qual_model,
309-
ofw.files_to_write[ofw.fq2],
317+
fastq_handle,
310318
options.quality_offset,
311319
options.produce_fastq,
312320
options.rng

neat/read_simulator/utils/output_file_writer.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ def __init__(self,
8080
self.vcf_header = vcf_header
8181
self.bam_header = bam_header
8282
self.vcf_format = vcf_format
83+
self.tmp_dir = options.temp_dir_path
8384

8485
file_handles: dict[Path, Any] = {}
8586

neat/read_simulator/utils/split_inputs.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
import shutil
66
import sys
7-
import gzip
7+
import resource
88
from pathlib import Path
99
from textwrap import wrap
1010
from typing import Iterator
@@ -83,6 +83,12 @@ def main(options: Options, reference_index: dict) -> tuple[dict, int]:
8383
idx += 1
8484
written += 1
8585

86+
# soft_limit, hard_limit = resource.getrlimit(resource.RLIMIT_NOFILE)
87+
# # plus one for the final file to write
88+
# if hard_limit > written + 1 >= soft_limit:
89+
# resource.setrlimit(resource.RLIMIT_NOFILE, (written + 1, hard_limit))
90+
# elif written + 1 >= hard_limit:
91+
# raise ValueError("Too many files for psyam merge to work successfully, increase Size parameter and try again.")
8692
# Report success via the logger instead of printing to stderr
8793
_LOG.info(f"Generated {written} FASTAs in {options.splits_dir}")
8894
return split_fasta_dict, written

neat/read_simulator/utils/stitch_outputs.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
"""
22
Stitch NEAT split‑run outputs into one dataset.
33
"""
4-
4+
import resource
55
import shutil
66
import pysam
77
from pathlib import Path
@@ -30,11 +30,17 @@ def merge_vcfs(vcfs: List[Path], ofw: OutputFileWriter) -> None:
3030
if not line.startswith("#"):
3131
dest.write(line)
3232

33-
def merge_bam(bam_files: List[Path], ofw: OutputFileWriter, threads: int) -> None:
34-
unsorted = ofw.bam.with_suffix(".unsorted.bam")
35-
pysam.merge("--no-PG", "-@", str(threads), "-f", str(unsorted), *map(str, bam_files))
36-
pysam.sort("-@", str(threads), "-o", str(ofw.bam), str(unsorted))
37-
unsorted.unlink(missing_ok=True)
33+
def merge_bam(bam_files: List[Path], ofw: OutputFileWriter, threads: int):
34+
merged_file = ofw.tmp_dir / "temp_merged.bam"
35+
intermediate_files = []
36+
# Note 1000 is abritrary. May need to be a user parameter/adjustable/a function
37+
for i in range(0, len(bam_files), 500):
38+
temp_file = str(ofw.tmp_dir / f"temp_merged_{i}.bam")
39+
pysam.merge("--no-PG", "-f", temp_file, *map(str, bam_files[i:i+500]))
40+
intermediate_files.append(temp_file)
41+
pysam.merge("--no-PG", "-f", str(merged_file), *intermediate_files)
42+
pysam.sort("-@", str(threads), "-m", "1G", "-o", str(ofw.bam), str(merged_file))
43+
merged_file.unlink(missing_ok=True)
3844

3945
def main(
4046
ofw: OutputFileWriter,

0 commit comments

Comments
 (0)