Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 36 additions & 17 deletions bin/create_metabinner_bins.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,23 @@
#!/usr/bin/env python

## Originally written by Hesham Almessady (@HeshamAlmessady) and Adrian Fritz (@AlphaSquad) in https://github.com/hzi-bifo/mag and released under the MIT license.
## Originally written by Hesham Almessady (@HeshamAlmessady) and Adrian Fritz (@AlphaSquad)
# in https://github.com/hzi-bifo/mag and released under the MIT license.
## See git repository (https://github.com/nf-core/mag) for full license text.

import sys
import gzip
import io
import os
import sys

from Bio import SeqIO


def main():
# Argument parsing
if len(sys.argv) != 6:
print("Usage: python create_metabinner_bins.py <binning_file> <fasta_file> <output_path> <prefix> <length_threshold>")
print(
"Usage: python create_metabinner_bins.py <binning_file> <fasta_file> <output_path> <prefix> <length_threshold>"
)
sys.exit(1)

binning = sys.argv[1]
Expand All @@ -19,27 +26,39 @@ def main():
prefix = sys.argv[4]
length = int(sys.argv[5])

# Create output directory if it doesn't exist
root = os.path.dirname(os.path.normpath(path)) or "."
os.makedirs(path, exist_ok=True)

# Load binning data into a dictionary
Metabinner_bins = {}
with open(binning, 'r') as b:
metabinner_bins = {}
with open(binning, "r") as b:
for line in b:
contig, bin = line.strip().split('\t')
Metabinner_bins[contig] = bin
contig, bin = line.strip().split("\t")
metabinner_bins[contig] = bin

handles = {}

def get_handle(dest_dir, fname):
key = os.path.join(dest_dir, fname)
if key not in handles:
raw = open(key + ".gz", "wb")
gz = gzip.GzipFile(fileobj=raw, mode="wb", mtime=0)
handles[key] = (io.TextIOWrapper(gz, encoding="utf-8"), raw)
return handles[key][0]

# Process the input fasta file
with open(fasta) as handle:
for record in SeqIO.parse(handle, "fasta"):
if len(record) < length:
f = prefix + ".tooShort.fa"
elif record.id not in Metabinner_bins:
f = prefix + ".unbinned.fa"
if len(record) <= length:
out = get_handle(root, prefix + ".tooShort.fa")
elif record.id not in metabinner_bins:
out = get_handle(root, prefix + ".unbinned.fa")
else:
f = prefix + "." + Metabinner_bins[record.id] + ".fa"
with open(os.path.join(path, f), 'a') as out:
SeqIO.write(record, out, "fasta")
out = get_handle(path, prefix + "." + metabinner_bins[record.id] + ".fa")
SeqIO.write(record, out, "fasta")

for text, raw in handles.values():
text.close()
raw.close()


if __name__ == "__main__":
main()
5 changes: 0 additions & 5 deletions modules/local/metabinner_bins/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,12 @@ process METABINNER_BINS {
# unzip membership file
zcat ${membership} > membership.tsv

# collect bins & un-binned fractions
create_metabinner_bins.py \\
membership.tsv \\
${fasta} \\
./bins \\
${prefix} \\
${min_contig_size}
find ./bins/ -name "*.fa" -type f | xargs -t -n 1 bgzip -@ ${task.cpus}

# zip contig fractions
find ./bins/ -name "*[tooShort,unbinned].fa.gz" -type f -exec mv {} . \\;

cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand Down
15 changes: 5 additions & 10 deletions subworkflows/local/binning_metabinner/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -11,22 +11,18 @@ workflow BINNING_METABINNER {
main:
ch_versions = channel.empty()

ch_assembly = ch_input.map { meta, assembly, _depths -> [meta, assembly] }

// produce k-mer composition table
METABINNER_KMER(
ch_input
.map { meta, assembly, _depths ->
[meta, assembly]
},
ch_assembly,
params.min_contig_size
)
ch_versions = ch_versions.mix(METABINNER_KMER.out.versions)

// extract contigs over length threshold
METABINNER_TOOSHORT(
ch_input
.map { meta, assembly, _depths ->
[meta, assembly]
},
ch_assembly,
params.min_contig_size
)
ch_versions = ch_versions.mix(METABINNER_TOOSHORT.out.versions)
Expand All @@ -41,8 +37,7 @@ workflow BINNING_METABINNER {

// extract bin sequences
METABINNER_BINS(
ch_input.map { meta, assembly, _depths -> [meta, assembly] }
.join(METABINNER_METABINNER.out.membership),
ch_assembly.join(METABINNER_METABINNER.out.membership),
params.min_contig_size
)
ch_versions = ch_versions.mix(METABINNER_BINS.out.versions)
Expand Down
Loading