Skip to content

Commit 27da84e

Browse files
committed
generate_config works with precomp buscos
1 parent bc183c4 commit 27da84e

File tree

3 files changed

+20
-7
lines changed

3 files changed

+20
-7
lines changed

bin/generate_config.py

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ def parse_args(args=None):
4747
parser.add_argument("--blastx", help="Path to the blastx database", required=True)
4848
parser.add_argument("--blastn", help="Path to the blastn database", required=True)
4949
parser.add_argument("--taxdump", help="Path to the taxonomy database", required=True)
50+
parser.add_argument("--busco_output", action="append", help="Path to BUSCO output directory", required=True)
5051
parser.add_argument("--version", action="version", version="%(prog)s 2.0")
5152
return parser.parse_args(args)
5253

@@ -121,20 +122,28 @@ def get_classification(taxon_info: TaxonInfo) -> typing.Dict[str, str]:
121122
return {r: ancestors[r] for r in RANKS if r in ancestors}
122123

123124

124-
def get_odb(taxon_info: TaxonInfo, lineage_tax_ids: str, requested_buscos: typing.Optional[str]) -> typing.List[str]:
125+
def get_odb(taxon_info: TaxonInfo, lineage_tax_ids: str, requested_buscos: typing.Optional[str], pre_computed_buscos: typing.List[str]) -> typing.List[str]:
125126
# Read the mapping between the BUSCO lineages and their taxon_id
126127
with open(lineage_tax_ids) as file_in:
127128
lineage_tax_ids_dict: typing.Dict[int, str] = {}
128129
for line in file_in:
129130
arr = line.split()
130131
lineage_tax_ids_dict[int(arr[0])] = arr[1] + "_odb10"
131132

132-
if requested_buscos:
133+
valid_odbs = set(lineage_tax_ids_dict.values())
134+
135+
if pre_computed_buscos:
136+
# Use pre-computed BUSCO lineages if available
137+
odb_arr = pre_computed_buscos
138+
for odb in odb_arr:
139+
if odb not in valid_odbs:
140+
print(f"Invalid pre-computed BUSCO lineage: {odb}", file=sys.stderr)
141+
sys.exit(1)
142+
elif requested_buscos:
133143
odb_arr = requested_buscos.split(",")
134-
valid_odbs = set(lineage_tax_ids_dict.values())
135144
for odb in odb_arr:
136145
if odb not in valid_odbs:
137-
print(f"Invalid BUSCO lineage: {odb}", file=sys.stderr)
146+
print(f"Invalid requested BUSCO lineage: {odb}", file=sys.stderr)
138147
sys.exit(1)
139148
else:
140149
# Do the intersection to find the ancestors that have a BUSCO lineage
@@ -327,7 +336,9 @@ def main(args=None):
327336

328337
taxon_info = fetch_taxon_info(args.taxon_query)
329338
classification = get_classification(taxon_info)
330-
odb_arr = get_odb(taxon_info, args.lineage_tax_ids, args.busco)
339+
340+
precomputed_busco = [os.path.basename(path).replace("run_", "") for path in args.busco_output]
341+
odb_arr = get_odb(taxon_info, args.lineage_tax_ids, args.busco, precomputed_busco)
331342
taxon_id = adjust_taxon_id(args.nt, taxon_info)
332343

333344
if sequence_report:

modules/local/generate_config.nf

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ process GENERATE_CONFIG {
1515
tuple val(meta3), path(blastx, stageAs: 'blastx/*')
1616
tuple val(meta4), path(blastn, stageAs: 'blastn/*')
1717
tuple val(meta5), path(taxdump)
18+
tuple val(busco_meta), path(busco_output)
1819

1920
output:
2021
tuple val(meta), path("*.yaml") , emit: yaml
@@ -32,6 +33,7 @@ process GENERATE_CONFIG {
3233
def busco_param = busco_lin ? "--busco '${busco_lin}'" : ""
3334
def accession_params = params.accession ? "--accession ${params.accession}" : ""
3435
def input_reads = reads.collect{"--read_id ${it[0].id} --read_type ${it[0].datatype} --read_layout ${it[0].layout} --read_path ${it[1]}"}.join(' ')
36+
def busco_output_param = "--busco_output ${busco_output}"
3537
"""
3638
generate_config.py \\
3739
--fasta $fasta \\
@@ -45,6 +47,7 @@ process GENERATE_CONFIG {
4547
--blastx ${blastx} \\
4648
--blastn ${blastn} \\
4749
--taxdump ${taxdump} \\
50+
$busco_output_param \\
4851
--output_prefix ${prefix}
4952
5053
cat <<-END_VERSIONS > versions.yml

subworkflows/local/input_check.nf

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -132,8 +132,6 @@ workflow INPUT_CHECK {
132132
ch_parsed_busco = Channel.empty()
133133
}
134134

135-
// View the parsed channel
136-
ch_parsed_busco.view()
137135

138136
GENERATE_CONFIG (
139137
fasta,
@@ -145,6 +143,7 @@ workflow INPUT_CHECK {
145143
ch_databases.blastx,
146144
ch_databases.blastn,
147145
ch_databases.taxdump,
146+
ch_parsed_busco.map { meta, path -> [meta, path] }.groupTuple()
148147
)
149148
ch_versions = ch_versions.mix(GENERATE_CONFIG.out.versions.first())
150149

0 commit comments

Comments
 (0)