Skip to content

Commit 7d0c162

Browse files
authored
Merge pull request #866 from nf-core/caddfix
Add option to use prescored CADD annotations
2 parents 2f59757 + ea69807 commit 7d0c162

11 files changed

Lines changed: 97 additions & 67 deletions

File tree

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
55

66
## 3.0.1 - Mario-patch [2026-05-25]
77

8+
### `Added`
9+
10+
- Parameter `cadd_prescored` to pass a directory of pre-scored CADD indel annotations to the CADD process in genome and mitochondrial SNV annotation subworkflows
11+
812
### `Fixed`
913

1014
- Add a bcftools norm split-multiallelics step after merging standard and shifted MT calls to handle new multiallelic sites introduced by bcftools merge [#855](https://github.com/nf-core/raredisease/pull/855)

main.nf

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ workflow NFCORE_RAREDISEASE {
4848
val_bwa
4949
val_bwamem2
5050
val_bwameme
51+
val_cadd_prescored
5152
val_cadd_resources
5253
val_call_interval
5354
val_concatenate_snv_calls
@@ -233,6 +234,7 @@ workflow NFCORE_RAREDISEASE {
233234
ch_svd_ud = channelFromPath(val_verifybamid_svd_ud)
234235

235236
// Using channelFromPathWithMeta helper (with simpleName). If filepath is null, returns, [[:],[]]
237+
ch_cadd_prescored = channelFromPathWithMeta(val_cadd_prescored, true)
236238
ch_cadd_resources = channelFromPathWithMeta(val_cadd_resources, true)
237239
ch_call_interval = channelFromPathWithMeta(val_call_interval, true)
238240
ch_ml_model = channelFromPathWithMeta(val_ml_model, true)
@@ -378,6 +380,7 @@ workflow NFCORE_RAREDISEASE {
378380
ch_alignments,
379381
ch_bait_intervals,
380382
ch_cadd_header,
383+
ch_cadd_prescored,
381384
ch_cadd_resources,
382385
ch_call_interval,
383386
ch_case_info,
@@ -566,6 +569,7 @@ workflow {
566569
params.bwa,
567570
params.bwamem2,
568571
params.bwameme,
572+
params.cadd_prescored,
569573
params.cadd_resources,
570574
params.call_interval,
571575
params.concatenate_snv_calls,

nextflow.config

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ params {
5353
bwamem2 = null
5454
bwameme = null
5555
call_interval = null
56+
cadd_prescored = null
5657
cadd_resources = null
5758
gcnvcaller_model = null
5859
gens_interval_list = null

nextflow_schema.json

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,14 @@
8181
"help_text": "If none provided, will be generated automatically from the FASTA reference.",
8282
"fa_icon": "fas fa-folder-open"
8383
},
84+
"cadd_prescored": {
85+
"type": "string",
86+
"exists": true,
87+
"format": "directory-path",
88+
"fa_icon": "fas fa-folder-open",
89+
"description": "Path to the directory containing pre-scored CADD indel annotations.",
90+
"help_text": "This folder contains the pre-scored indel files that would otherwise be in data/prescored folder as described in https://github.com/kircherlab/CADD-scripts/#manual-installation."
91+
},
8492
"cadd_resources": {
8593
"type": "string",
8694
"exists": true,

subworkflows/local/annotate_cadd/main.nf

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,12 @@ include { TABIX_TABIX as TABIX_CADD } from '../../../modules/nf-core/
1313
workflow ANNOTATE_CADD {
1414

1515
take:
16-
ch_cadd_resources // channel: [mandatory] [ path(dir) ]
17-
ch_fai // channel: [optional] [ path(fai) ]
18-
ch_header // channel: [mandatory] [ path(txt) ]
19-
ch_vcf // channel: [mandatory] [ val(meta), path(vcfs), path(idx) ]
20-
val_genome // string: GRCh37 or GRCh38
16+
ch_cadd_resources // channel: [mandatory] [ val(meta), path(dir) ]
17+
ch_cadd_prescored // channel: [optional] [ val(meta), path(prescored) ]
18+
ch_fai // channel: [optional] [ val(meta), path(fai) ]
19+
ch_header // channel: [mandatory] [ path(txt) ]
20+
ch_vcf // channel: [mandatory] [ val(meta), path(vcfs), path(idx) ]
21+
val_genome // string: GRCh37 or GRCh38
2122

2223
main:
2324
ch_rename_chrs = channel.value([[]])
@@ -45,7 +46,7 @@ workflow ANNOTATE_CADD {
4546

4647
BCFTOOLS_VIEW(ch_vcf, [], [], [])
4748

48-
CADD(BCFTOOLS_VIEW.out.vcf, ch_cadd_resources, [[:], []])
49+
CADD(BCFTOOLS_VIEW.out.vcf, ch_cadd_resources, ch_cadd_prescored)
4950

5051
TABIX_CADD(CADD.out.tsv)
5152

subworkflows/local/annotate_cadd/tests/main.nf.test

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,15 @@ nextflow_workflow {
2626
workflow {
2727
"""
2828
input[0] = Channel.from("\$PWD").map { dir -> [ [ id: 'cadd_resources' ], dir ] }
29-
input[1] = channel.of([[id:'genome'], file(params.pipelines_testdata_base_path + 'reference/reference.fasta.fai', checkIfExists: true)]).collect()
30-
input[2] = channel.of(file('https://raw.githubusercontent.com/nf-core/raredisease/refs/heads/master/assets/cadd_to_vcf_header_-1.0-.txt', checkIfExists: true)).collect()
31-
input[3] = channel.of([
29+
input[1] = channel.value([[:], []])
30+
input[2] = channel.of([[id:'genome'], file(params.pipelines_testdata_base_path + 'reference/reference.fasta.fai', checkIfExists: true)]).collect()
31+
input[3] = channel.of(file('https://raw.githubusercontent.com/nf-core/raredisease/refs/heads/master/assets/cadd_to_vcf_header_-1.0-.txt', checkIfExists: true)).collect()
32+
input[4] = channel.of([
3233
[id:'test', single_end: false],
3334
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true),
3435
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true)
3536
])
36-
input[4] = 'GRCh37'
37+
input[5] = 'GRCh37'
3738
"""
3839
}
3940
}
@@ -57,14 +58,15 @@ nextflow_workflow {
5758
workflow {
5859
"""
5960
input[0] = Channel.from("\$PWD").map { dir -> [ [ id: 'cadd_resources' ], dir ] }
60-
input[1] = channel.of([[id:'genome'], file(params.pipelines_testdata_base_path + 'reference/reference.fasta.fai', checkIfExists: true)]).collect()
61-
input[2] = channel.of(file('https://raw.githubusercontent.com/nf-core/raredisease/refs/heads/master/assets/cadd_to_vcf_header_-1.0-.txt', checkIfExists: true)).collect()
62-
input[3] = channel.of([
61+
input[1] = channel.value([[:], []])
62+
input[2] = channel.of([[id:'genome'], file(params.pipelines_testdata_base_path + 'reference/reference.fasta.fai', checkIfExists: true)]).collect()
63+
input[3] = channel.of(file('https://raw.githubusercontent.com/nf-core/raredisease/refs/heads/master/assets/cadd_to_vcf_header_-1.0-.txt', checkIfExists: true)).collect()
64+
input[4] = channel.of([
6365
[id:'test', single_end: false],
6466
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true),
6567
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true)
6668
])
67-
input[4] = 'GRCh38'
69+
input[5] = 'GRCh38'
6870
"""
6971
}
7072
}

subworkflows/local/annotate_genome_snvs/main.nf

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@ workflow ANNOTATE_GENOME_SNVS {
2121

2222
take:
2323
ch_cadd_header // channel: [mandatory] [ path(txt) ]
24-
ch_cadd_resources // channel: [mandatory] [ path(annotation) ]
24+
ch_cadd_prescored // channel: [optional] [ val(meta), path(prescored) ]
25+
ch_cadd_resources // channel: [mandatory] [ val(meta), path(annotation) ]
2526
ch_genome_chrsizes // channel: [mandatory] [ path(sizes) ]
2627
ch_genome_fai // channel: [mandatory] [ path(fai) ]
2728
ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ]
@@ -121,6 +122,7 @@ workflow ANNOTATE_GENOME_SNVS {
121122

122123
ANNOTATE_CADD (
123124
ch_cadd_resources,
125+
ch_cadd_prescored,
124126
ch_genome_fai,
125127
ch_cadd_header,
126128
ch_cadd_in,

subworkflows/local/annotate_genome_snvs/tests/main.nf.test

Lines changed: 38 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -30,28 +30,29 @@ nextflow_workflow {
3030
workflow {
3131
"""
3232
input[0] = channel.of([[id:'resources'], file('https://raw.githubusercontent.com/nf-core/raredisease/refs/heads/master/assets/cadd_to_vcf_header_-1.0-.txt', checkIfExists: true)]).collect()
33-
input[1] = Channel.from(env('PWD')).map { dir -> [ [ id: 'cadd_resources' ], dir ] }
34-
input[2] = channel.of(file(params.pipelines_testdata_base_path + 'reference/reference_chr.sizes', checkIfExists: true)).collect()
35-
input[3] = channel.of([[id:'genome'], file(params.pipelines_testdata_base_path + 'reference/reference.fasta.fai', checkIfExists: true)]).collect()
36-
input[4] = channel.of([[id:'genome'], file(params.pipelines_testdata_base_path + 'reference/reference.fasta', checkIfExists: true)]).collect()
37-
input[5] = channel.value([[],[]])
38-
input[6] = channel.of([id:'earlycasualcaiman', sample:'earlycasualcaiman', sex:1, phenotype:2, paternal:0, maternal:0, case_id:'justhusky'])
39-
input[7] = channel.of(file(params.pipelines_testdata_base_path + 'reference/target_wgs.interval_list', checkIfExists: true))
40-
input[8] = channel.of([
33+
input[1] = channel.value([[:], []])
34+
input[2] = Channel.from(env('PWD')).map { dir -> [ [ id: 'cadd_resources' ], dir ] }
35+
input[3] = channel.of(file(params.pipelines_testdata_base_path + 'reference/reference_chr.sizes', checkIfExists: true)).collect()
36+
input[4] = channel.of([[id:'genome'], file(params.pipelines_testdata_base_path + 'reference/reference.fasta.fai', checkIfExists: true)]).collect()
37+
input[5] = channel.of([[id:'genome'], file(params.pipelines_testdata_base_path + 'reference/reference.fasta', checkIfExists: true)]).collect()
38+
input[6] = channel.value([[],[]])
39+
input[7] = channel.of([id:'earlycasualcaiman', sample:'earlycasualcaiman', sex:1, phenotype:2, paternal:0, maternal:0, case_id:'justhusky'])
40+
input[8] = channel.of(file(params.pipelines_testdata_base_path + 'reference/target_wgs.interval_list', checkIfExists: true))
41+
input[9] = channel.of([
4142
[id:'justhusky', probands:['earlycasualcaiman'], upd_children:['earlycasualcaiman'], mother:'', father:''],
4243
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true),
4344
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true)
4445
])
45-
input[9] = channel.value([[]])
46-
input[10] = channel.of(file(params.pipelines_testdata_base_path + 'reference/vcfanno_functions.lua', checkIfExists: true)).collect()
47-
input[11] = channel.value(file(params.pipelines_testdata_base_path + 'reference/grch38_gnomad_reformated_-r3.1.1-.vcf.gz', checkIfExists: true))
48-
input[12] = channel.of(file(params.pipelines_testdata_base_path + 'reference/vcfanno_config.toml', checkIfExists: true)).collect()
49-
input[13] = channel.of(file(params.pipelines_testdata_base_path + 'reference/vep_cache_and_plugins.tar.gz', checkIfExists: true)).collect()
50-
input[14] = channel.value([])
51-
input[15] = 'wgs'
52-
input[16] = null
53-
input[17] = 'GRCh37'
54-
input[18] = 107
46+
input[10] = channel.value([[]])
47+
input[11] = channel.of(file(params.pipelines_testdata_base_path + 'reference/vcfanno_functions.lua', checkIfExists: true)).collect()
48+
input[12] = channel.value(file(params.pipelines_testdata_base_path + 'reference/grch38_gnomad_reformated_-r3.1.1-.vcf.gz', checkIfExists: true))
49+
input[13] = channel.of(file(params.pipelines_testdata_base_path + 'reference/vcfanno_config.toml', checkIfExists: true)).collect()
50+
input[14] = channel.of(file(params.pipelines_testdata_base_path + 'reference/vep_cache_and_plugins.tar.gz', checkIfExists: true)).collect()
51+
input[15] = channel.value([])
52+
input[16] = 'wgs'
53+
input[17] = null
54+
input[18] = 'GRCh37'
55+
input[19] = 107
5556
"""
5657
}
5758
}
@@ -86,28 +87,29 @@ nextflow_workflow {
8687
workflow {
8788
"""
8889
input[0] = channel.of([[id:'resources'], file('https://raw.githubusercontent.com/nf-core/raredisease/refs/heads/master/assets/cadd_to_vcf_header_-1.0-.txt', checkIfExists: true)]).collect()
89-
input[1] = Channel.from(env('PWD')).map { dir -> [ [ id: 'cadd_resources' ], dir ] }
90-
input[2] = channel.of(file(params.pipelines_testdata_base_path + 'reference/reference_chr.sizes', checkIfExists: true)).collect()
91-
input[3] = channel.of([[id:'genome'], file(params.pipelines_testdata_base_path + 'reference/reference.fasta.fai', checkIfExists: true)]).collect()
92-
input[4] = channel.of([[id:'genome'], file(params.pipelines_testdata_base_path + 'reference/reference.fasta', checkIfExists: true)]).collect()
93-
input[5] = channel.value([[],[]])
94-
input[6] = channel.of([id:'earlycasualcaiman', sample:'earlycasualcaiman', sex:1, phenotype:2, paternal:0, maternal:0, case_id:'justhusky'])
95-
input[7] = channel.of(file(params.pipelines_testdata_base_path + 'reference/target_wgs.interval_list', checkIfExists: true))
96-
input[8] = channel.of([
90+
input[1] = channel.value([[:], []])
91+
input[2] = Channel.from(env('PWD')).map { dir -> [ [ id: 'cadd_resources' ], dir ] }
92+
input[3] = channel.of(file(params.pipelines_testdata_base_path + 'reference/reference_chr.sizes', checkIfExists: true)).collect()
93+
input[4] = channel.of([[id:'genome'], file(params.pipelines_testdata_base_path + 'reference/reference.fasta.fai', checkIfExists: true)]).collect()
94+
input[5] = channel.of([[id:'genome'], file(params.pipelines_testdata_base_path + 'reference/reference.fasta', checkIfExists: true)]).collect()
95+
input[6] = channel.value([[],[]])
96+
input[7] = channel.of([id:'earlycasualcaiman', sample:'earlycasualcaiman', sex:1, phenotype:2, paternal:0, maternal:0, case_id:'justhusky'])
97+
input[8] = channel.of(file(params.pipelines_testdata_base_path + 'reference/target_wgs.interval_list', checkIfExists: true))
98+
input[9] = channel.of([
9799
[id:'justhusky', probands:['earlycasualcaiman'], upd_children:['earlycasualcaiman'], mother:'', father:''],
98100
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true),
99101
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true)
100102
])
101-
input[9] = channel.value([[]])
102-
input[10] = channel.of(file(params.pipelines_testdata_base_path + 'reference/vcfanno_functions.lua', checkIfExists: true)).collect()
103-
input[11] = channel.value(file(params.pipelines_testdata_base_path + 'reference/grch38_gnomad_reformated_-r3.1.1-.vcf.gz', checkIfExists: true))
104-
input[12] = channel.of(file(params.pipelines_testdata_base_path + 'reference/vcfanno_config.toml', checkIfExists: true)).collect()
105-
input[13] = channel.of(file(params.pipelines_testdata_base_path + 'reference/vep_cache_and_plugins.tar.gz', checkIfExists: true)).collect()
106-
input[14] = channel.value([])
107-
input[15] = 'wes'
108-
input[16] = null
109-
input[17] = 'GRCh37'
110-
input[18] = 107
103+
input[10] = channel.value([[]])
104+
input[11] = channel.of(file(params.pipelines_testdata_base_path + 'reference/vcfanno_functions.lua', checkIfExists: true)).collect()
105+
input[12] = channel.value(file(params.pipelines_testdata_base_path + 'reference/grch38_gnomad_reformated_-r3.1.1-.vcf.gz', checkIfExists: true))
106+
input[13] = channel.of(file(params.pipelines_testdata_base_path + 'reference/vcfanno_config.toml', checkIfExists: true)).collect()
107+
input[14] = channel.of(file(params.pipelines_testdata_base_path + 'reference/vep_cache_and_plugins.tar.gz', checkIfExists: true)).collect()
108+
input[15] = channel.value([])
109+
input[16] = 'wes'
110+
input[17] = null
111+
input[18] = 'GRCh37'
112+
input[19] = 107
111113
"""
112114
}
113115
}

subworkflows/local/annotate_mt_snvs/main.nf

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,10 @@ include { VCFANNO as VCFANNO_MT } from '../../../modules/nf-core/vcf
1212
workflow ANNOTATE_MT_SNVS {
1313
take:
1414
ch_cadd_header // channel: [mandatory] [ path(txt) ]
15-
ch_cadd_resources // channel: [mandatory] [ path(annotation) ]
15+
ch_cadd_prescored // channel: [optional] [ val(meta), path(prescored) ]
16+
ch_cadd_resources // channel: [mandatory] [ val(meta), path(annotation) ]
1617
ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ]
17-
ch_fai // channel: [mandatory] [ path(fai) ]
18+
ch_fai // channel: [mandatory] [ val(meta), path(fai) ]
1819
ch_mt_vcf_tbi // channel: [mandatory] [ val(meta), path(vcf), path(tbi) ]
1920
ch_vcfanno_extra // channel: [mandatory] [ [path(vcf),path(index)] ]
2021
ch_vcfanno_lua // channel: [mandatory] [ path(lua) ]
@@ -40,6 +41,7 @@ workflow ANNOTATE_MT_SNVS {
4041
if (!val_cadd_resources.equals(null)) {
4142
ANNOTATE_CADD (
4243
ch_cadd_resources,
44+
ch_cadd_prescored,
4345
ch_fai,
4446
ch_cadd_header,
4547
VCFANNO_MT.out.vcf.join(VCFANNO_MT.out.tbi, failOnMismatch:true, failOnDuplicate:true),

0 commit comments

Comments
 (0)