Skip to content

Commit 8d0ba36

Browse files
committed
Fixed the handling of the taxdump
JSON files are not generated in the input directory any more. Importantly, the pipeline accepts taking a JSON in too.
1 parent f7ce3b9 commit 8d0ba36

File tree

6 files changed

+95
-5
lines changed

6 files changed

+95
-5
lines changed

bin/jsonify_taxdump.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
#!/usr/bin/env python3
2+
3+
import argparse
4+
import sys
5+
6+
from blobtools.lib.file_io import write_file
7+
from blobtools.lib.taxdump import Taxdump
8+
9+
10+
def parse_args(args=None):
11+
Description = "Parse and digest the taxdump files into a JSON structure, printed on stdout."
12+
13+
parser = argparse.ArgumentParser(description=Description)
14+
parser.add_argument("taxdump", help="Path to the taxonomy database")
15+
parser.add_argument("--version", action="version", version="%(prog)s 1.0")
16+
return parser.parse_args(args)
17+
18+
19+
def main(args=None):
20+
args = parse_args(args)
21+
22+
taxdump = Taxdump(args.taxdump)
23+
write_file("STDOUT", taxdump.values_to_dict())
24+
25+
26+
if __name__ == "__main__":
27+
sys.exit(main())

modules/local/blobtoolkit/createblobdir.nf

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ process BLOBTOOLKIT_CREATEBLOBDIR {
1212
tuple val(meta1), path(busco, stageAs: 'lineage??/*')
1313
tuple val(meta2), path(blastp)
1414
tuple val(meta3), path(yaml)
15-
path(taxdump)
15+
path(taxdump, stageAs: 'taxdump/taxdump.json')
1616

1717
output:
1818
tuple val(meta), path(prefix), emit: blobdir
@@ -30,7 +30,7 @@ process BLOBTOOLKIT_CREATEBLOBDIR {
3030
blobtools replace \\
3131
--bedtsvdir windowstats \\
3232
--meta ${yaml} \\
33-
--taxdump ${taxdump} \\
33+
--taxdump \$(dirname ${taxdump}) \\
3434
--taxrule buscogenes \\
3535
${busco_args} \\
3636
${hits_blastp} \\

modules/local/blobtoolkit/updateblobdir.nf

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ process BLOBTOOLKIT_UPDATEBLOBDIR {
1313
tuple val(meta3), path(categories_tsv)
1414
tuple val(meta4), path(blastx, stageAs: "blastx.txt")
1515
tuple val(meta5), path(blastn, stageAs: "blastn.txt")
16-
path(taxdump)
16+
path(taxdump, stageAs: 'taxdump/taxdump.json')
1717

1818
output:
1919
tuple val(meta), path(prefix), emit: blobdir
@@ -35,7 +35,7 @@ process BLOBTOOLKIT_UPDATEBLOBDIR {
3535
mkdir ${prefix}
3636
cp --preserve=timestamp ${input}/* ${prefix}/
3737
blobtools replace \\
38-
--taxdump ${taxdump} \\
38+
--taxdump \$(dirname ${taxdump}) \\
3939
--taxrule bestdistorder=buscoregions \\
4040
${hits_blastx} \\
4141
${hits_blastn} \\

modules/local/jsonify_taxdump.nf

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
process JSONIFY_TAXDUMP {
2+
tag "$meta.id"
3+
label 'process_single'
4+
5+
conda "conda-forge::requests=2.28.1 conda-forge::pyyaml=6.0"
6+
container "docker.io/genomehubs/blobtoolkit:4.4.4"
7+
8+
input:
9+
tuple val(meta), path(taxdump)
10+
11+
output:
12+
tuple val(meta), path("*.json") , emit: json
13+
path "versions.yml" , emit: versions
14+
15+
when:
16+
task.ext.when == null || task.ext.when
17+
18+
script:
19+
def args = task.ext.args ?: ''
20+
def prefix = task.ext.prefix ?: "${meta.id}"
21+
"""
22+
jsonify_taxdump.py \\
23+
$taxdump \\
24+
$args \\
25+
> ${prefix}.json
26+
27+
cat <<-END_VERSIONS > versions.yml
28+
"${task.process}":
29+
jsonify_taxdump.py: \$(jsonify_taxdump.py --version | cut -d' ' -f2)
30+
END_VERSIONS
31+
"""
32+
}

subworkflows/local/input_check.nf

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ include { UNTAR } from '../../modules/nf-core/untar/main'
99
include { CAT_CAT } from '../../modules/nf-core/cat/cat/main'
1010
include { SAMTOOLS_FLAGSTAT } from '../../modules/nf-core/samtools/flagstat/main'
1111
include { GENERATE_CONFIG } from '../../modules/local/generate_config'
12+
include { JSONIFY_TAXDUMP } from '../../modules/local/jsonify_taxdump'
1213

1314
workflow INPUT_CHECK {
1415
take:
@@ -159,6 +160,32 @@ workflow INPUT_CHECK {
159160
| first
160161
| set { ch_busco_db }
161162

163+
164+
//
165+
// Convert the taxdump to a JSON file if there isn't one yet
166+
//
167+
ch_databases.taxdump
168+
| filter { meta, db_path -> ! db_path.isFile() }
169+
| map { meta, db_path -> [meta, db_path, db_path.listFiles().find { it.getName().endsWith('.json') }] }
170+
| branch { meta, db_path, json_path ->
171+
json: json_path
172+
return [meta, json_path]
173+
dir: true
174+
return [meta, db_path]
175+
}
176+
| set { taxdump_dirs }
177+
178+
JSONIFY_TAXDUMP( taxdump_dirs.dir )
179+
ch_versions = ch_versions.mix(JSONIFY_TAXDUMP.out.versions.first())
180+
181+
ch_databases.taxdump
182+
| filter { meta, db_path -> db_path.isFile() }
183+
| mix ( taxdump_dirs.json )
184+
| mix( JSONIFY_TAXDUMP.out.json )
185+
| map { _, db_path -> db_path }
186+
| set { ch_taxdump }
187+
188+
162189
emit:
163190
reads // channel: [ val(meta), path(datafile) ]
164191
config = GENERATE_CONFIG.out.yaml // channel: [ val(meta), path(yaml) ]
@@ -170,7 +197,7 @@ workflow INPUT_CHECK {
170197
blastp = ch_databases.blastp.first() // channel: [ val(meta), path(blastp_db) ]
171198
blastx = ch_databases.blastx.first() // channel: [ val(meta), path(blastx_db) ]
172199
busco_db = ch_busco_db // channel: [ path(busco_db) ]
173-
taxdump = ch_databases.taxdump.map { _, db_path -> db_path } // channel: [ path(taxdump) ]
200+
taxdump = ch_taxdump.first() // channel: [ path(taxdump) ]
174201
versions = ch_versions // channel: [ versions.yml ]
175202
}
176203

subworkflows/local/utils_nfcore_blobtoolkit_pipeline/main.nf

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,10 @@ workflow PIPELINE_INITIALISATION {
6565
error('--align not specified, even though the input samplesheet is a nf-core/fetchngs one - i.e has fastq files!')
6666
}
6767

68+
if (file(params.taxdump).isFile() && !params.taxdump.endsWith('.json') && !params.taxdump.endsWith('.tar.gz')) {
69+
error('--taxdump can take either a JSON file, a tar.gz archive, or a directory')
70+
}
71+
6872
ch_fasta = Channel.value([ [ 'id': params.accession ?: file(params.fasta.replace(".gz", "")).baseName ], file(params.fasta) ])
6973

7074
Channel.empty()

0 commit comments

Comments
 (0)