Clinical-Genomics · kristinebilgrav · Apr 23, 2026 · Feb 11, 2026 · Mar 9, 2026 · Mar 11, 2026
@@ -19,6 +19,7 @@ Initial release of Clinical-Genomics/oncorefiner, created with the [nf-core](htt
 - Added parameters documentation [#25](https://github.com/Clinical-Genomics/oncorefiner/pull/25)
 - Added pre-commit hook for automatic generation of parameters documentation [#25](https://github.com/Clinical-Genomics/oncorefiner/pull/25)
 - Added Nextflow strict syntax compatibility [#30](https://github.com/Clinical-Genomics/oncorefiner/pull/30)
+- Added CADD scoring for InDels, and a test for the subworkflow [#59](https://github.com/Clinical-Genomics/oncorefiner/pull/59)
 
 ### Changed
 

@@ -0,0 +1 @@
+##INFO=<ID=CADD,Number=1,Type=Float,Description="PHRED-like scaled CADD score.">
@@ -0,0 +1,49 @@
+/*
+Annotate with CADD
+*/
+
+
+process {
+
+    withName: '.*:ANNOTATE_CADD:.*' {
+        publishDir = [
+            enabled: false
+        ]
+    }
+
+    withName: '.*:ANNOTATE_CADD:RENAME_CHR_CADD' {
+        ext.args   = { "--output-type z" }
+        ext.prefix = { "${input.simpleName}_indels" }
+    }
+
+    withName: '.*:ANNOTATE_CADD:BCFTOOLS_VIEW' {
+        ext.args   = { "--output-type z --types indels,other" }
+        ext.prefix = { "${vcf.simpleName}_indels" }
+    }
+
+    withName: '.*:ANNOTATE_CADD:CADD' {
+        ext.args   = { "-g ${params.genome}" }
+        ext.prefix = { "${vcf.simpleName}_cadd" }
+    }
+
+    withName: '.*:ANNOTATE_CADD:TABIX_CADD' {
+        ext.args = { "--force --sequence 1 --begin 2 --end 2" }
+    }
+
+    withName: '.*:ANNOTATE_CADD:CADD_TO_REFERENCE_CHRNAMES' {
+        ext.args2 = '\'{original=$1; sub("chr","",$1); print $1, original}\''
+        ext.prefix = "cadd_to_reference"
+        ext.suffix = "txt"
+    }
+
+    withName: '.*:ANNOTATE_CADD:REFERENCE_TO_CADD_CHRNAMES' {
+        ext.args2 = '\'{original=$1; sub("chr","",$1); print original, $1}\''
+        ext.prefix = "reference_to_cadd"
+        ext.suffix = "txt"
+    }
+
+    withName: '.*:ANNOTATE_CADD:ANNOTATE_INDELS' {
+        ext.args   = { "--columns Chrom,Pos,Ref,Alt,-,CADD --output-type z --write-index=tbi" }
+        ext.prefix = { "${input.simpleName}_ann" }
+    }
+}
@@ -46,4 +46,5 @@ params {
 
     svdb_query_dbs      = params.pipelines_testdata_base_path + 'reference/svdb_querydb_files.csv'
 
+    // TODO make/insert mock input for CADD
 }
@@ -24,6 +24,8 @@ Reference genome related files and options required for the workflow.
 | `genome` | Name of the genome reference. (accepted: `GRCh38`\|`GRCh37`) <details><summary>Help</summary><small>Use this parameter to specify the ID for the reference genome used. This is then used to annotate the SV and SNV files e.g. `--genome GRCh38`.</small></details>| `string` | GRCh38 |  |  |
 | `fasta` | Path to FASTA genome file. <details><summary>Help</summary><small>If you don't have a BWA index available this will be generated for you automatically. Combine with `--save_reference` to save BWA index for future runs.</small></details>| `string` |  |  |  |
 | `fai` | Path to FASTA genome index file. <details><summary>Help</summary><small>If none provided, will be generated automatically from the FASTA reference</small></details>| `string` |  |  |  |
+| `cadd_prescored_indels` | Path to a directory containing prescored indels for CADD. <details><summary>Help</summary><small>This folder contains the compressed files and indexes that would otherwise be in data/prescored folder as described in https://github.com/kircherlab/CADD-scripts/#manual-installation.</small></details>| `string` |  |  |  |
+| `cadd_resources` | Path to the directory containing cadd annotations. <details><summary>Help</summary><small>This folder contains the uncompressed files that would otherwise be in data/annotation folder as described in https://github.com/kircherlab/CADD-scripts/#manual-installation.</small></details>| `string` |  |  |  |
 | `species` | Species of the reference genome. E.g. `--species homo_sapiens`. (accepted: `homo_sapiens`) | `string` | homo_sapiens |  |  |
 
 ## Annotation options

@@ -30,8 +30,11 @@ workflow CLINICALGENOMICS_ONCOREFINER {
 
     take:
     samplesheet                 // channel: [mandatory] samplesheet read in from --input
+    val_cadd_resources          // string:  [optional]  path to CADD resources directory
+    val_cadd_prescored_indels   // string:  [optional]  path to CADD prescored indels file
     val_genome                  // string:  [optional]  genome assembly (e.g. "GRCh38")
     val_genome_fasta            // string:  [optional]  path to genome fasta file
+    val_genome_fai              // string:  [optional]  path to genome fasta index file
     val_snv_vcf                 // string:  [optional]  path to input SNV vcf file
     val_species                 // string:  [optional]  species (e.g. "homo_sapiens")
     val_sv_vcf                  // string:  [optional]  path to input SV vcf file
@@ -53,22 +56,31 @@ workflow CLINICALGENOMICS_ONCOREFINER {
                                              : channel.value([[],[]])
 
     PREPARE_REFERENCES (
-        params.vep_cache
+        val_vep_cache
         )
 
     //
     // WORKFLOW: Run pipeline
     //
     // Input channels
-    ch_snv_vcf              = channel.fromPath(val_snv_vcf).map { vcf -> [[id:vcf.simpleName], vcf] }.collect()
-    ch_snv_vcf_tbi          = channel.fromPath(val_snv_vcf + '.tbi', checkIfExists: true).map { vcf -> [[id:vcf.simpleName], vcf] }.collect()
-    ch_sv_vcf               = channel.fromPath(val_sv_vcf).map { vcf -> [[id:vcf.simpleName], vcf] }.collect()
-    ch_sv_vcf_tbi           = channel.fromPath(val_sv_vcf + '.tbi', checkIfExists: true).map { vcf -> [[id:vcf.simpleName], vcf] }.collect()
-    ch_vep_extra_files      = channel.empty()
-    ch_svdb_dbs             = channel.empty()
+    ch_snv_vcf               = channel.fromPath(val_snv_vcf).map { vcf -> [[id:vcf.simpleName], vcf] }.collect()
+    ch_snv_vcf_tbi           = channel.fromPath(val_snv_vcf + '.tbi', checkIfExists: true).map { vcf -> [[id:vcf.simpleName], vcf] }.collect()
+    ch_sv_vcf                = channel.fromPath(val_sv_vcf).map { vcf -> [[id:vcf.simpleName], vcf] }.collect()
+    ch_sv_vcf_tbi            = channel.fromPath(val_sv_vcf + '.tbi', checkIfExists: true).map { vcf -> [[id:vcf.simpleName], vcf] }.collect()
+    ch_vep_extra_files       = channel.empty()
+    ch_svdb_dbs              = channel.empty()
 
     // Reference files
-    ch_genome_fasta         = channel.fromPath(val_genome_fasta).map { it -> [[id:it.simpleName], it] }.collect()
+    ch_genome_fasta          = channel.fromPath(val_genome_fasta).map { it -> [[id:it.simpleName], it] }.collect()
+    ch_genome_fai            = channel.fromPath(val_genome_fai).map {it -> [[id:it.simpleName], it]  }.collect()
+
+    // CADD input files
+    ch_cadd_header           = channel.fromPath("$projectDir/assets/cadd_to_vcf_header.txt", checkIfExists: true).collect()
+    ch_cadd_resources        = val_cadd_resources        ? channel.fromPath(val_cadd_resources).map { it -> [[id:'cadd_resources'], it] }.collect()
+                                                         : channel.value([])
+
+    ch_cadd_prescored_indels = val_cadd_prescored_indels ? channel.fromPath(val_cadd_prescored_indels).map { it -> [[id:'cadd_prescored_indels'], it] }.collect()
+                                                         : channel.value([])
 
     // Input for VEP
     ch_vep_extra_files_unsplit  = val_vep_plugin_files ? channel.fromPath(val_vep_plugin_files).collect() : channel.value([])
@@ -104,7 +116,11 @@ workflow CLINICALGENOMICS_ONCOREFINER {
 
     ONCOREFINER (
         samplesheet,
+        ch_cadd_header,
+        ch_cadd_prescored_indels,
+        ch_cadd_resources,
         ch_genome_fasta,
+        ch_genome_fai,
         ch_snv_vcf,
         ch_snv_vcf_tbi,
         ch_sv_dbs,
@@ -116,6 +132,7 @@ workflow CLINICALGENOMICS_ONCOREFINER {
         ch_vcfanno_toml,
         PREPARE_REFERENCES.out.vep_resources,
         ch_vep_extra_files,
+        val_cadd_resources,
         val_genome,
         val_species,
         val_vep_cache_version
@@ -152,8 +169,11 @@ workflow {
     //
     CLINICALGENOMICS_ONCOREFINER (
         PIPELINE_INITIALISATION.out.samplesheet,
+        params.cadd_resources,
+        params.cadd_prescored_indels,
         params.genome,
         params.fasta,
+        params.fai,
         params.snv_vcf,
         params.species,
         params.sv_vcf,

@@ -25,11 +25,21 @@
                         "git_sha": "6383d8fe58f9498eecd5aa303e71a4a932d1e9f6",
                         "installed_by": ["modules"]
                     },
+                    "cadd": {
+                        "branch": "master",
+                        "git_sha": "64ab14a6905e5c9d649f61e2757a1e600dbdb8e0",
+                        "installed_by": ["modules"]
+                    },
                     "ensemblvep/vep": {
                         "branch": "master",
                         "git_sha": "34505e1fc5e9f4fd641210ca440acff6bd33b842",
                         "installed_by": ["modules"]
                     },
+                    "gawk": {
+                        "branch": "master",
+                        "git_sha": "c0da8f3a26835d663873001382a708f75766fec6",
+                        "installed_by": ["modules"]
+                    },
                     "multiqc": {
                         "branch": "master",
                         "git_sha": "2c73cc8fa92cf48de3da0b643fdf357a8a290b36",
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		##INFO=<ID=CADD,Number=1,Type=Float,Description="PHRED-like scaled CADD score.">
Original file line number	Diff line number	Diff line change
Expand Up		@@ -46,4 +46,5 @@ params {

		svdb_query_dbs = params.pipelines_testdata_base_path + 'reference/svdb_querydb_files.csv'

		// TODO make/insert mock input for CADD
Comment thread kristinebilgrav marked this conversation as resolved.
		}