Skip to content

Commit 2e5b487

Browse files
committed
working for precomputed, not without...
1 parent 42cb2c1 commit 2e5b487

File tree

4 files changed

+177
-169
lines changed

4 files changed

+177
-169
lines changed

conf/test_nobusco.config

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,9 @@ params {
3535
blastp = "https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/resources/mMelMel3.1.buscogenes.dmnd.tar.gz"
3636
blastx = "https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/resources/mMelMel3.1.buscoregions.dmnd.tar.gz"
3737
blastn = "https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/resources/nt_mMelMel3.1.tar.gz"
38+
39+
// Precomputed BUSCO outputs
40+
// Note we deliberately left out archaea_odb10 to test the addition of missing lineages
3841
busco_output = "busco_output.tar.gz"
3942

4043
// Need to be set to avoid overfilling /tmp

modules/local/restructurebuscodir.nf

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ process RESTRUCTUREBUSCODIR {
88
'nf-core/ubuntu:20.04' }"
99

1010
input:
11-
tuple val(meta), val(lineage), path(batch_summary), path(short_summaries_txt), path(short_summaries_json), path(busco_dir)
11+
tuple val(meta), val(lineage), path(batch_summary), path(short_summary_txt), path(short_summary_json), path(full_table), path(missing_busco_list), path(single_copy_busco_sequences), path(multi_copy_busco_sequences), path(fragmented_busco_sequences), path(hmmer_output)
1212

1313
output:
1414
tuple val(meta), path("${lineage}"), emit: clean_busco_dir
@@ -21,24 +21,23 @@ process RESTRUCTUREBUSCODIR {
2121
def args = task.ext.args ?: ''
2222
prefix = task.ext.prefix ?: "${meta.id}"
2323
"""
24-
mkdir ${lineage}
24+
mkdir -p ${lineage}
2525
26-
cp --dereference ${batch_summary} ${lineage}/short_summary.tsv
27-
[ -n "${short_summaries_txt}" ] && cp --dereference ${short_summaries_txt} ${lineage}/short_summary.txt
28-
[ -n "${short_summaries_json}" ] && cp --dereference ${short_summaries_json} ${lineage}/short_summary.json
26+
cp --dereference ${batch_summary} ${lineage}/short_summary.tsv
27+
[ -n "${short_summary_txt}" ] && cp --dereference ${short_summary_txt} ${lineage}/short_summary.txt
28+
[ -n "${short_summary_json}" ] && cp --dereference ${short_summary_json} ${lineage}/short_summary.json
2929
30-
# Should we compress these ?
31-
[ -e ${busco_dir}/*/run_*/full_table.tsv ] && cp ${busco_dir}/*/run_*/full_table.tsv ${lineage}/
32-
[ -e ${busco_dir}/*/run_*/missing_busco_list.tsv ] && cp ${busco_dir}/*/run_*/missing_busco_list.tsv ${lineage}/
30+
[ -e ${full_table} ] && cp ${full_table} ${lineage}/
31+
[ -e ${missing_busco_list} ] && cp ${missing_busco_list} ${lineage}/
3332
34-
tar czf ${lineage}/single_copy_busco_sequences.tar.gz -C ${busco_dir}/*/run_*/busco_sequences single_copy_busco_sequences
35-
tar czf ${lineage}/multi_copy_busco_sequences.tar.gz -C ${busco_dir}/*/run_*/busco_sequences multi_copy_busco_sequences
36-
tar czf ${lineage}/fragmented_busco_sequences.tar.gz -C ${busco_dir}/*/run_*/busco_sequences fragmented_busco_sequences
37-
tar czf ${lineage}/hmmer_output.tar.gz --exclude=.checkpoint -C ${busco_dir}/*/run_* hmmer_output
33+
tar czf ${lineage}/single_copy_busco_sequences.tar.gz -C \$(dirname ${single_copy_busco_sequences}) \$(basename ${single_copy_busco_sequences})
34+
tar czf ${lineage}/multi_copy_busco_sequences.tar.gz -C \$(dirname ${multi_copy_busco_sequences}) \$(basename ${multi_copy_busco_sequences})
35+
tar czf ${lineage}/fragmented_busco_sequences.tar.gz -C \$(dirname ${fragmented_busco_sequences}) \$(basename ${fragmented_busco_sequences})
36+
tar czf ${lineage}/hmmer_output.tar.gz --exclude=.checkpoint -C \$(dirname ${hmmer_output}) \$(basename ${hmmer_output})
3837
3938
cat <<-END_VERSIONS > versions.yml
4039
"${task.process}":
41-
tar: \$(tar --version| awk 'NR==1 {print \$3}' )
40+
tar: \$(tar --version | awk 'NR==1 {print \$3}')
4241
END_VERSIONS
4342
"""
4443
}

subworkflows/local/busco_diamond_blastp.nf

Lines changed: 64 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ workflow BUSCO_DIAMOND {
7070
meta,
7171
[
7272
batch_summary: file("${busco_dir}/short_summary.txt"),
73-
short_summaries_txt: file("${busco_dir}/short_summary.txt"),
73+
short_summaries_txt: [],
7474
short_summaries_json: file("${busco_dir}/short_summary.json"),
7575
full_table: file("${busco_dir}/full_table.tsv"),
7676
missing_busco_list: file("${busco_dir}/missing_busco_list.tsv"),
@@ -81,6 +81,7 @@ workflow BUSCO_DIAMOND {
8181
]
8282
]
8383
}
84+
8485
ch_formatted_precomputed.view()
8586

8687
//
@@ -127,77 +128,82 @@ workflow BUSCO_DIAMOND {
127128
// Tidy up the BUSCO output directories before publication
128129
//
129130
RESTRUCTUREBUSCODIR(
130-
ch_formatted_precomputed
131+
ch_all_busco_outputs
131132
.map { meta, outputs ->
132133
[
133134
meta,
134135
meta.lineage_name,
135136
outputs.batch_summary,
136-
outputs.short_summaries_txt,
137-
outputs.short_summaries_json,
138-
outputs.busco_dir
137+
outputs.short_summaries_txt ?: [],
138+
outputs.short_summaries_json ?: [],
139+
outputs.full_table ?: [],
140+
outputs.missing_busco_list ?: [],
141+
outputs.seq_dir ? "${outputs.seq_dir}/single_copy_busco_sequences" : [],
142+
outputs.seq_dir ? "${outputs.seq_dir}/multi_copy_busco_sequences" : [],
143+
outputs.seq_dir ? "${outputs.seq_dir}/fragmented_busco_sequences" : [],
144+
outputs.busco_dir ? "${outputs.busco_dir}/hmmer_output" : []
139145
]
140146
}
141147
)
142-
ch_versions = ch_versions.mix(RESTRUCTUREBUSCODIR.out.versions.first())
143-
144-
145-
// //
146-
// // Select input for BLOBTOOLKIT_EXTRACTBUSCOS
147-
// //
148-
// ch_formatted_precomputed.seq_dir
149-
// | filter { meta, seq -> basal_lineages.contains(meta.lineage_name) }
150-
// | map { meta, seq -> seq }
151-
// | collect
152-
// | set { ch_basal_buscos }
153-
154-
155-
// // Extract BUSCO genes from the basal lineages
156-
// BLOBTOOLKIT_EXTRACTBUSCOS ( fasta, ch_basal_buscos )
157-
// ch_versions = ch_versions.mix ( BLOBTOOLKIT_EXTRACTBUSCOS.out.versions.first() )
158-
159-
160-
// //
161-
// // Align BUSCO genes against the BLASTp database
162-
// //
163-
// BLOBTOOLKIT_EXTRACTBUSCOS.out.genes
164-
// | filter { it[1].size() > 140 }
165-
// | set { ch_busco_genes }
166-
167-
// // Hardcoded to match the format expected by blobtools
168-
// def outext = 'txt'
169-
// def cols = 'qseqid staxids bitscore qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore'
170-
// DIAMOND_BLASTP ( ch_busco_genes, blastp, outext, cols, taxon_id )
171-
// ch_versions = ch_versions.mix ( DIAMOND_BLASTP.out.versions.first() )
172-
173-
174-
// // Order BUSCO results according to the lineage index
175-
// ch_formatted_precomputed.full_table
176-
// // 1. Restore the original meta map, and pull the index as an extra tuple element
177-
// | map { meta, table -> [meta.findAll { it.key != "lineage_name" && it.key != "lineage_index" }, [table, meta.lineage_index]] }
178-
// // 2. Turn to a single-element channel that has the (one and only) meta map, and all the pairs (table, lineage index) concatenated as a list
179-
// | groupTuple()
180-
// // 3. Sort the pairs and discard the index
181-
// | map { meta, table_positions -> [ meta, table_positions.sort { a, b -> a[1] <=> b[1] } . collect { table, lineage_index -> table } ] }
182-
// | set { ch_indexed_buscos }
183148

149+
//
150+
// Select input for BLOBTOOLKIT_EXTRACTBUSCOS
151+
//
152+
ch_all_busco_outputs
153+
.filter { meta, outputs -> basal_lineages.contains(meta.lineage_name) }
154+
.map { meta, outputs -> [meta, outputs.seq_dir] }
155+
.collect { it[1] }
156+
.set { ch_basal_buscos }
184157

185-
// // Select BUSCO results for taxonomically closest database
186-
// ch_indexed_buscos
187-
// | map { meta, tables -> [meta, tables[0]] }
188-
// | set { ch_first_table }
158+
// Extract BUSCO genes from the basal lineages
159+
BLOBTOOLKIT_EXTRACTBUSCOS ( fasta, ch_basal_buscos )
160+
ch_versions = ch_versions.mix ( BLOBTOOLKIT_EXTRACTBUSCOS.out.versions.first() )
189161

162+
//
163+
// Align BUSCO genes against the BLASTp database
164+
//
165+
BLOBTOOLKIT_EXTRACTBUSCOS.out.genes
166+
.filter { it[1].size() > 140 }
167+
.set { ch_busco_genes }
168+
169+
// Hardcoded to match the format expected by blobtools
170+
def outext = 'txt'
171+
def cols = 'qseqid staxids bitscore qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore'
172+
DIAMOND_BLASTP ( ch_busco_genes, blastp, outext, cols, taxon_id )
173+
ch_versions = ch_versions.mix ( DIAMOND_BLASTP.out.versions.first() )
174+
175+
// Order BUSCO results according to the lineage index
176+
ch_all_busco_outputs
177+
.map { meta, outputs ->
178+
[
179+
meta.findAll { it.key != "lineage_name" && it.key != "lineage_index" },
180+
[outputs.full_table, meta.lineage_index]
181+
]
182+
}
183+
.groupTuple()
184+
.map { meta, table_positions ->
185+
[
186+
meta,
187+
table_positions.sort { a, b -> a[1] <=> b[1] }.collect { table, lineage_index -> table }
188+
]
189+
}
190+
.set { ch_indexed_buscos }
190191

191-
// // BUSCO results for MULTIQC
192-
// ch_formatted_precomputed.short_summaries_txt
193-
// | ifEmpty ( [ [], [] ] )
194-
// | set { multiqc }
192+
// Select BUSCO results for taxonomically closest database
193+
ch_indexed_buscos
194+
.map { meta, tables -> [meta, tables[0]] }
195+
.set { ch_first_table }
195196

197+
// BUSCO results for MULTIQC
198+
ch_all_busco_outputs
199+
.map { meta, outputs -> outputs.batch_summary }
200+
.ifEmpty ( [ [], [] ] )
201+
.set { multiqc }
196202

197203
emit:
198-
// first_table = ch_first_table // channel: [ val(meta), path(full_table) ]
199-
// all_tables = ch_indexed_buscos // channel: [ val(meta), path(full_tables) ]
200-
// blastp_txt = DIAMOND_BLASTP.out.txt // channel: [ val(meta), path(txt) ]
201-
// multiqc // channel: [ meta, summary ]
204+
first_table = ch_first_table // channel: [ val(meta), path(full_table) ]
205+
all_tables = ch_indexed_buscos // channel: [ val(meta), path(full_tables) ]
206+
blastp_txt = DIAMOND_BLASTP.out.txt // channel: [ val(meta), path(txt) ]
207+
multiqc // channel: [ meta, summary ]
202208
versions = ch_versions // channel: [ versions.yml ]
203209
}

0 commit comments

Comments
 (0)