Skip to content

Commit 2d04b42

Browse files
committed
merging template changes with precomp_busco
1 parent 0c4640a commit 2d04b42

File tree

8 files changed

+49
-27
lines changed

8 files changed

+49
-27
lines changed

bin/generate_config.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ def parse_args(args=None):
5151
parser.add_argument("--blastx", help="Path to the blastx database", required=True)
5252
parser.add_argument("--blastn", help="Path to the blastn database", required=True)
5353
parser.add_argument("--taxdump", help="Path to the taxonomy database", required=True)
54-
parser.add_argument("--busco_output", action="append", help="Path to BUSCO output directory", required=False)
54+
parser.add_argument("--precomputed_busco", action="append", help="Path to precomputed BUSCO outputs", required=False)
5555
parser.add_argument("--version", action="version", version="%(prog)s 2.0")
5656
return parser.parse_args(args)
5757

@@ -353,7 +353,7 @@ def main(args=None):
353353
taxon_info = fetch_taxon_info(args.taxon_query)
354354
classification = get_classification(taxon_info)
355355

356-
precomputed_busco = [os.path.basename(path).replace("run_", "") for path in (args.busco_output or [])]
356+
precomputed_busco = [os.path.basename(path).replace("run_", "") for path in (args.precomputed_busco or [])]
357357
odb_arr = get_odb(taxon_info, args.lineage_tax_ids, args.busco, precomputed_busco)
358358
taxon_id = adjust_taxon_id(args.nt, taxon_info)
359359

conf/test_nobusco.config

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,15 +10,18 @@
1010
----------------------------------------------------------------------------------------
1111
*/
1212

13+
process {
14+
resourceLimits = [
15+
cpus: 2,
16+
memory: '6.GB',
17+
time: '6.h'
18+
]
19+
}
20+
1321
params {
1422
config_profile_name = 'Test profile'
1523
config_profile_description = 'Minimal aligned test dataset to check pipeline function'
1624

17-
// Limit resources so that this can run on GitHub Actions
18-
max_cpus = 2
19-
max_memory = '6.GB'
20-
max_time = '6.h'
21-
2225
// Input test data
2326
// Specify the paths to your test data
2427
// Give any required params for the test so that command line flags are not needed
@@ -39,8 +42,8 @@ params {
3942
// Precomputed BUSCO outputs
4043
// busco_output_noArchaea.tar.gz deliberately leaves out archaea_odb10 to test the pipeline's detection and filling of missing lineages
4144
// Switch to *_busco_output.tar.gz for fully precomputed BUSCOs
42-
busco_output = "https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/resources/GCA_922984935.2_busco_output_noArchaea.tar.gz"
43-
//busco_output = "https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/resources/GCA_922984935.2_busco_output.tar.gz"
45+
precomputed_busco = "https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/resources/GCA_922984935.2_busco_output_noArchaea.tar.gz"
46+
//precomputed_busco = "https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/resources/GCA_922984935.2_busco_output.tar.gz"
4447

4548
// Need to be set to avoid overfilling /tmp
4649
use_work_dir_as_temp = true

nextflow.config

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ params {
1616
mask = false
1717
fetchngs_samplesheet = false
1818
busco_lineages = null
19-
busco_output = null
19+
precomputed_busco = null
2020

2121
// Reference options
2222
fasta = null

nextflow_schema.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -334,7 +334,7 @@
334334
}
335335
],
336336
"properties": {
337-
"busco_output": {
337+
"precomputed_busco": {
338338
"type": "string"
339339
}
340340
}

subworkflows/local/busco_diamond_blastp.nf

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,14 @@ workflow BUSCO_DIAMOND {
1515
busco_db // channel: path(busco_db)
1616
blastp // channel: path(blastp_db)
1717
taxon_id // channel: val(taxon_id)
18-
busco_output // channel: [ val(meta), path(fasta) ] optional precomputed busco outputs
19-
18+
precomputed_busco // channel: [ val(meta}, path(busco_run_dir) ] optional precomputed busco outputs
2019

2120
main:
2221
ch_versions = Channel.empty()
2322

2423

24+
precomputed_busco.view()
25+
2526
//
2627
// Prepare the BUSCO lineages
2728
//
@@ -44,16 +45,16 @@ workflow BUSCO_DIAMOND {
4445
//
4546
// Format pre-computed outputs
4647
//
47-
if (params.busco_output){
48-
ch_busco_output = busco_output
48+
if (params.precomputed_busco){
49+
ch_precomputed_busco = precomputed_busco
4950
.map { meta, dir -> [meta.lineage, [meta, dir]] }
5051
}else{
51-
ch_busco_output = Channel.empty()
52+
ch_precomputed_busco = Channel.empty()
5253
}
5354

5455
ch_combined = ch_fasta_with_lineage
5556
.map { meta, fasta -> [meta.lineage_name, [meta, fasta]] }
56-
.join(ch_busco_output, by: 0, remainder: true)
57+
.join(ch_precomputed_busco, by: 0, remainder: true)
5758
.map { lineage, fasta_data, busco_data ->
5859
def (meta, fasta) = fasta_data
5960
def (busco_meta, busco_dir) = busco_data ?: [null, null]
@@ -96,12 +97,12 @@ workflow BUSCO_DIAMOND {
9697
//
9798
// Run BUSCO search
9899
//
99-
BUSCO (
100-
ch_fasta_with_lineage,
101-
"genome",
102-
ch_fasta_with_lineage.map { it[0].lineage_name },
100+
BUSCO_BUSCO(
101+
ch_busco_to_run.to_compute,
102+
'genome',
103+
ch_busco_to_run.to_compute.map { it[0].lineage_name },
103104
busco_db,
104-
[],
105+
[]
105106
)
106107
ch_versions = ch_versions.mix ( BUSCO_BUSCO.out.versions.first() )
107108

subworkflows/local/input_check.nf

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,10 +56,11 @@ workflow INPUT_CHECK {
5656
blastn: db_meta.type == "blastn"
5757
blastp: db_meta.type == "blastp"
5858
blastx: db_meta.type == "blastx"
59-
busco_output: db_meta.type == "busco_output"
59+
precomputed_busco: db_meta.type == "precomputed_busco"
6060
busco: db_meta.type == "busco"
6161
taxdump: db_meta.type == "taxdump"
6262
}
63+
ch_databases.precomputed_busco.view()
6364

6465
//
6566
// SUBWORKFLOW: Process samplesheet
@@ -105,7 +106,7 @@ workflow INPUT_CHECK {
105106

106107
// Get the source paths of all the databases, except Busco which is not recorded in the blobDir meta.json
107108
databases
108-
| filter { meta, file -> meta.type != "busco" }
109+
| filter { meta, file -> meta.type != "busco" && meta.type != "precomputed_busco" }
109110
| map {meta, file -> [meta, file.toUriString()]}
110111
| set { db_paths }
111112

@@ -153,7 +154,22 @@ workflow INPUT_CHECK {
153154
| collect
154155
| set { ch_busco_lineages }
155156

156-
// Remove any invalid lineages from busco_outputs
157+
// Format pre-computed BUSCOs (if provided)
158+
// Parse the BUSCO output directories
159+
if (ch_databases.precomputed_busco) {
160+
ch_parsed_busco = ch_databases.precomputed_busco
161+
.flatMap { meta, dir ->
162+
def subdirs = file(dir).listFiles().findAll { it.isDirectory() }
163+
subdirs.collect { subdir ->
164+
def lineage = subdir.name.split('_')[1..-1].join('_')
165+
[[type: 'precomputed_busco', id: subdir.name, lineage: lineage], subdir]
166+
}
167+
}
168+
} else {
169+
ch_parsed_busco = Channel.empty()
170+
}
171+
172+
// Remove any invalid lineages from precomputed_busco
157173
ch_busco_lineages_list = ch_busco_lineages.flatten()
158174
ch_parsed_busco_filtered = ch_parsed_busco
159175
.filter { meta, path ->
@@ -180,6 +196,7 @@ workflow INPUT_CHECK {
180196
blastn = ch_databases.blastn.first() // channel: [ val(meta), path(blastn_db) ]
181197
blastp = ch_databases.blastp.first() // channel: [ val(meta), path(blastp_db) ]
182198
blastx = ch_databases.blastx.first() // channel: [ val(meta), path(blastx_db) ]
199+
precomputed_busco = ch_parsed_busco // channel: [ val(meta), path(busco_run_dir) ]
183200
busco_db = ch_busco_db // channel: [ path(busco_db) ]
184201
taxdump = ch_databases.taxdump.map { _, db_path -> db_path } // channel: [ path(taxdump) ]
185202
versions = ch_versions // channel: [ versions.yml ]

subworkflows/local/utils_nfcore_blobtoolkit_pipeline/main.nf

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ workflow PIPELINE_INITIALISATION {
7171
.concat( Channel.fromPath(params.blastn).map { tuple(["type": "blastn"], it) } )
7272
.concat( Channel.fromPath(params.blastx).map { tuple(["type": "blastx"], it) } )
7373
.concat( Channel.fromPath(params.blastp).map { tuple(["type": "blastp"], it) } )
74+
.concat( params.precomputed_busco ? Channel.fromPath(params.precomputed_busco).map { tuple([ "type": "precomputed_busco"], it ) } : Channel.empty() )
7475
.concat( params.busco ? Channel.fromPath(params.busco).map { tuple([ "type": "busco"], it ) } : Channel.empty() )
7576
.concat( Channel.fromPath(params.taxdump).map { tuple(["type": "taxdump"], it) } )
7677
.set { ch_databases }

workflows/blobtoolkit.nf

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ workflow BLOBTOOLKIT {
9090
INPUT_CHECK.out.busco_db.first(),
9191
INPUT_CHECK.out.blastp.first(),
9292
INPUT_CHECK.out.taxon_id,
93-
INPUT_CHECK.out.busco_output,
93+
INPUT_CHECK.out.precomputed_busco,
9494
)
9595
ch_versions = ch_versions.mix ( BUSCO_DIAMOND.out.versions )
9696

@@ -204,7 +204,7 @@ workflow BLOBTOOLKIT {
204204
)
205205
)
206206

207-
ch_multiqc_files = ch_multiqc_files.mix(BUSCO_DIAMOND.out.multiqc.collect{it[1]}.ifEmpty([]))
207+
// ch_multiqc_files = ch_multiqc_files.mix(BUSCO_DIAMOND.out.multiqc.collect{it[1]}.ifEmpty([]))
208208

209209
MULTIQC (
210210
ch_multiqc_files.collect(),

0 commit comments

Comments
 (0)