Skip to content

Commit c315435

Browse files
committed
Merge branch 'CW-4947-maintenance' into 'dev'
Bump version [CW-4947] See merge request epi2melabs/workflows/wf-metagenomics!237
2 parents b40efcf + 8fd00de commit c315435

File tree

5 files changed

+29
-31
lines changed

5 files changed

+29
-31
lines changed

CHANGELOG.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
55
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
66

77

8-
## [Unreleased]
8+
## [v2.13.0]
99
### Changed
1010
- Update Seqkit version(>2.6.0) so Abricate can handle bgzip files without decompressing.
1111
- Split taxonomy classification from minimap2 alignment process to make it more modular.

modules/local/common.nf

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -127,23 +127,20 @@ process createAbundanceTables {
127127
/* Extract reads in FASTQ from a list of IDs.
128128
Use for example to output the unclassified reads.
129129
*/
130-
process extractReads {
130+
process publishReads {
131131
label "wfmetagenomics"
132132
publishDir "${params.out_dir}/${output_name}", mode: 'copy', pattern: "*.${output_name}.fq.gz", enabled: params.output_unclassified
133133
tag "${meta.alias}"
134134
cpus 1
135135
memory 4.GB
136136
input:
137-
tuple val(meta), path(concat_seqs), path("ids.txt")
137+
tuple val(meta), path("reads.fq.gz"), path("ids.txt")
138138
val output_name
139139
output:
140140
path "${meta.alias}.${output_name}.fq.gz"
141-
// No output, can publish results in the process?
142-
// At this moment, input sequences are o FASTQ
143-
// or BAM with the return_fastq option enable
144141
script:
145142
"""
146-
seqkit grep --pattern-file ids.txt "${concat_seqs}" -o "${meta.alias}.${output_name}.fq.gz"
143+
seqkit grep --pattern-file ids.txt reads.fq.gz -o "${meta.alias}.${output_name}.fq.gz"
147144
"""
148145
}
149146

nextflow.config

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ manifest {
135135
description = 'Identification of the origin of single reads from both amplicon-targeted and shotgun metagenomics sequencing.'
136136
mainScript = 'main.nf'
137137
nextflowVersion = '>=23.04.2'
138-
version = 'v2.12.1'
138+
version = 'v2.13.0'
139139
}
140140

141141

subworkflows/kraken_pipeline.nf

Lines changed: 22 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ include {
55
run_common;
66
createAbundanceTables;
77
publish;
8-
extractReads;
98
} from "../modules/local/common"
109

1110

@@ -16,6 +15,10 @@ process run_kraken2 {
1615
label 'wfmetagenomics'
1716
tag "${meta.alias}"
1817
publishDir "${params.out_dir}/kraken2", mode: 'copy', pattern: "*kraken2.report.txt*"
18+
publishDir (
19+
"${params.out_dir}/unclassified", mode: 'copy',
20+
pattern: "${meta.alias}.unclassified.fq.gz", enabled: params.output_unclassified
21+
)
1922
cpus params.threads
2023
// Set the memory required to the size of the database + 4GB overhead.
2124
memory {
@@ -26,13 +29,18 @@ process run_kraken2 {
2629
}
2730
}
2831
errorStrategy {
29-
task.exitStatus == 137 ? log.error("Error 137 may indicate the process ran out of memory.\nIf you are using Docker you should check the amount of RAM allocated to your Docker server.") : ''
32+
task.exitStatus == 137 ? log.error(
33+
'''
34+
Error 137 may indicate the process ran out of memory.
35+
If you are using Docker you should check the amount of
36+
RAM allocated to your Docker server.
37+
'''.stripIndent()) : ''
3038
log.error("Consider to use --kraken2_memory_mapping to reduce the use of RAM memory.")
3139
}
3240
input:
3341
tuple(
3442
val(meta),
35-
path(concat_seqs),
43+
path("reads.fq.gz"),
3644
path(fastq_stats)
3745
)
3846
path kraken_db
@@ -44,22 +52,25 @@ process run_kraken2 {
4452
path("${meta.alias}.kraken2.assignments.tsv"),
4553
emit: kraken2_reports
4654
)
47-
tuple (
55+
tuple(
4856
val(meta),
49-
path("${meta.alias}.unclassified.txt"),
50-
emit: unclassified_ids
57+
path("${meta.alias}.unclassified.fq.gz"),
58+
emit: kraken2_unclassified, optional:true
5159
)
5260
script:
5361
def sample_id = "${meta.alias}"
5462
def memory_mapping = params.kraken2_memory_mapping ? '--memory-mapping' : ''
63+
def unclassified_tmp = "${meta.alias}.unclassified.fq"
64+
def output_unclassified = params.output_unclassified ? '--unclassified-out ' + unclassified_tmp: ''
5565
"""
56-
kraken2 --db ${kraken_db} ${concat_seqs} \
66+
kraken2 --db ${kraken_db} reads.fq.gz \
5767
--threads $task.cpus \
5868
--report "${sample_id}.kraken2.report.txt" \
59-
--confidence ${params.kraken2_confidence} ${memory_mapping} > "${sample_id}.kraken2.assignments.tsv"
60-
# Recover unclassified IDs
61-
csvtk filter2 --no-header-row --tabs -f '\$1=="U"' "${sample_id}.kraken2.assignments.tsv" \
62-
| cut -f2 > "${meta.alias}.unclassified.txt"
69+
--confidence ${params.kraken2_confidence} ${memory_mapping} \
70+
$output_unclassified > "${sample_id}.kraken2.assignments.tsv"
71+
if [ -f $unclassified_tmp ]; then
72+
bgzip "${meta.alias}.unclassified.fq"
73+
fi
6374
"""
6475
}
6576

@@ -214,16 +225,6 @@ workflow kraken_pipeline {
214225
// Find out size of the db. Cannot be done within the process
215226
database_main_file_size = database.resolve('hash.k2d').size()
216227
kraken2_reports = run_kraken2(samples, database, database_main_file_size)
217-
// Output unclassified
218-
if (params.output_unclassified) {
219-
unclassified_to_extract = samples.join(kraken2_reports.unclassified_ids
220-
)
221-
| map { meta, seqs, stats, unclassified_ids ->
222-
[meta, seqs, unclassified_ids]
223-
}
224-
extractReads(unclassified_to_extract, "unclassified")
225-
}
226-
227228
// Run bracken
228229
bracken_reports = run_bracken(kraken2_reports.kraken2_reports, database, taxonomy, bracken_length, taxonomic_rank)
229230
lineages = bracken_reports.bracken_json

subworkflows/minimap_pipeline.nf

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ include {
77
run_common;
88
createAbundanceTables;
99
publish;
10-
extractReads;
10+
publishReads;
1111
} from "../modules/local/common"
1212

1313
OPTIONAL_FILE = file("$projectDir/data/OPTIONAL_FILE")
@@ -312,7 +312,7 @@ workflow minimap_pipeline {
312312
).map { meta, seqs, stats, unclassified_ids ->
313313
[meta, seqs, unclassified_ids]
314314
}
315-
extractReads(unclassified_to_extract, "unclassified")
315+
publishReads(unclassified_to_extract, "unclassified")
316316
}
317317
// Use initial reads stats (after fastcat) QC, but update meta
318318
for_report = samples

0 commit comments

Comments
 (0)