Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions neat/read_simulator/utils/generate_reads.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,10 +268,14 @@ def generate_reads(
)

read_1.mutations = find_applicable_mutations(read_1, contig_variants)
if options.produce_fastq:
fastq_handle = ofw.files_to_write[ofw.fq1]
else:
fastq_handle = None
read_1.finalize_read_and_write(
error_model,
qual_model,
ofw.files_to_write[ofw.fq1],
fastq_handle,
options.quality_offset,
options.produce_fastq,
options.rng
Expand Down Expand Up @@ -303,10 +307,14 @@ def generate_reads(

read_2.mutations = find_applicable_mutations(read_2, contig_variants)

if options.produce_fastq:
fastq_handle = ofw.files_to_write[ofw.fq2]
else:
fastq_handle = None
read_2.finalize_read_and_write(
error_model,
qual_model,
ofw.files_to_write[ofw.fq2],
fastq_handle,
options.quality_offset,
options.produce_fastq,
options.rng
Expand Down
1 change: 1 addition & 0 deletions neat/read_simulator/utils/output_file_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ def __init__(self,
self.vcf_header = vcf_header
self.bam_header = bam_header
self.vcf_format = vcf_format
self.tmp_dir = options.temp_dir_path

file_handles: dict[Path, Any] = {}

Expand Down
8 changes: 7 additions & 1 deletion neat/read_simulator/utils/split_inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import shutil
import sys
import gzip
import resource
from pathlib import Path
from textwrap import wrap
from typing import Iterator
Expand Down Expand Up @@ -83,6 +83,12 @@ def main(options: Options, reference_index: dict) -> tuple[dict, int]:
idx += 1
written += 1

# soft_limit, hard_limit = resource.getrlimit(resource.RLIMIT_NOFILE)
# # plus one for the final file to write
# if hard_limit > written + 1 >= soft_limit:
# resource.setrlimit(resource.RLIMIT_NOFILE, (written + 1, hard_limit))
# elif written + 1 >= hard_limit:
# raise ValueError("Too many files for psyam merge to work successfully, increase Size parameter and try again.")
# Report success via the logger instead of printing to stderr
_LOG.info(f"Generated {written} FASTAs in {options.splits_dir}")
return split_fasta_dict, written
18 changes: 12 additions & 6 deletions neat/read_simulator/utils/stitch_outputs.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""
Stitch NEAT split‑run outputs into one dataset.
"""

import resource
import shutil
import pysam
from pathlib import Path
Expand Down Expand Up @@ -30,11 +30,17 @@ def merge_vcfs(vcfs: List[Path], ofw: OutputFileWriter) -> None:
if not line.startswith("#"):
dest.write(line)

def merge_bam(bam_files: List[Path], ofw: OutputFileWriter, threads: int) -> None:
unsorted = ofw.bam.with_suffix(".unsorted.bam")
pysam.merge("--no-PG", "-@", str(threads), "-f", str(unsorted), *map(str, bam_files))
pysam.sort("-@", str(threads), "-o", str(ofw.bam), str(unsorted))
unsorted.unlink(missing_ok=True)
def merge_bam(bam_files: List[Path], ofw: OutputFileWriter, threads: int):
merged_file = ofw.tmp_dir / "temp_merged.bam"
intermediate_files = []
# Note 1000 is abritrary. May need to be a user parameter/adjustable/a function
for i in range(0, len(bam_files), 500):
temp_file = str(ofw.tmp_dir / f"temp_merged_{i}.bam")
pysam.merge("--no-PG", "-f", temp_file, *map(str, bam_files[i:i+500]))
intermediate_files.append(temp_file)
pysam.merge("--no-PG", "-f", str(merged_file), *intermediate_files)
pysam.sort("-@", str(threads), "-m", "1G", "-o", str(ofw.bam), str(merged_file))
merged_file.unlink(missing_ok=True)

def main(
ofw: OutputFileWriter,
Expand Down
Loading