From f7d2a3d08475c6ab781a1682fda57bd5dd9e0478 Mon Sep 17 00:00:00 2001
From: kristinebilgrav <kristinebilgrav1@gmail.com>
Date: Wed, 11 Feb 2026 15:35:14 +0100
Subject: [PATCH 01/23] add cadd -raw

---
 assets/cadd_to_vcf_header_-1.0-.txt          |  1 +
 conf/test.config                             |  4 ++
 modules/nf-core/cadd/environment.yml         |  9 +++
 modules/nf-core/cadd/main.nf                 | 63 +++++++++++++++++
 modules/nf-core/cadd/meta.yml                | 72 ++++++++++++++++++++
 modules/nf-core/cadd/tests/main.nf.test      | 37 ++++++++++
 modules/nf-core/cadd/tests/main.nf.test.snap | 44 ++++++++++++
 nextflow.config                              |  5 ++
 nextflow_schema.json                         | 17 +++++
 subworkflows/local/annotate_cadd/main.nf     | 44 ++++++++++++
 workflows/oncorefiner.nf                     | 35 +++++++++-
 11 files changed, 329 insertions(+), 2 deletions(-)
 create mode 100644 assets/cadd_to_vcf_header_-1.0-.txt
 create mode 100644 modules/nf-core/cadd/environment.yml
 create mode 100644 modules/nf-core/cadd/main.nf
 create mode 100644 modules/nf-core/cadd/meta.yml
 create mode 100644 modules/nf-core/cadd/tests/main.nf.test
 create mode 100644 modules/nf-core/cadd/tests/main.nf.test.snap
 create mode 100644 subworkflows/local/annotate_cadd/main.nf

diff --git a/assets/cadd_to_vcf_header_-1.0-.txt b/assets/cadd_to_vcf_header_-1.0-.txt
new file mode 100644
index 0000000..8deee48
--- /dev/null
+++ b/assets/cadd_to_vcf_header_-1.0-.txt
@@ -0,0 +1 @@
+##INFO=<ID=CADD,Number=1,Type=Float,Description="PHRED-like scaled CADD score.">
diff --git a/conf/test.config b/conf/test.config
index e6cf193..e21a556 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -46,4 +46,8 @@ params {
 
     svdb_query_dbs      = params.pipelines_testdata_base_path + 'reference/svdb_querydb_files.csv'
 
+    // Mock input for CADD
+    cadd_resources               = params.pipelines_testdata_base_path + "/assets" //TODO add
+    cadd_prescored_indels        = params.pipelines_testdata_base_path + "docs"   //TODO add
+
 }
diff --git a/modules/nf-core/cadd/environment.yml b/modules/nf-core/cadd/environment.yml
new file mode 100644
index 0000000..d98de65
--- /dev/null
+++ b/modules/nf-core/cadd/environment.yml
@@ -0,0 +1,9 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - bioconda::cadd-scripts=1.6.post1
+  - conda-forge::conda=4.14.0
+  - conda-forge::mamba=1.4.0
diff --git a/modules/nf-core/cadd/main.nf b/modules/nf-core/cadd/main.nf
new file mode 100644
index 0000000..0e3c79b
--- /dev/null
+++ b/modules/nf-core/cadd/main.nf
@@ -0,0 +1,63 @@
+process CADD {
+    tag "${meta.id}"
+    label 'process_medium'
+
+    conda "${moduleDir}/environment.yml"
+    container 'docker.io/biocontainers/cadd-scripts-with-envs:1.6.post1_cv1'
+
+    containerOptions {
+        if (prescored_dir) {
+            ['singularity', 'apptainer'].contains(workflow.containerEngine) ?
+                "-B ${annotation_dir}:/opt/CADD-scripts-1.6.post1/data/annotations -B ${prescored_dir}:/opt/CADD-scripts-1.6.post1/data/prescored" :
+                "-v ${annotation_dir}:/opt/CADD-scripts-1.6.post1/data/annotations -v ${prescored_dir}:/opt/CADD-scripts-1.6.post1/data/prescored"
+        } else {
+            ['singularity', 'apptainer'].contains(workflow.containerEngine) ?
+                "-B ${annotation_dir}:/opt/CADD-scripts-1.6.post1/data/annotations" :
+                "-v ${annotation_dir}:/opt/CADD-scripts-1.6.post1/data/annotations"
+        }
+    }
+
+    input:
+    tuple val(meta), path(vcf)
+    tuple val(meta2), path(annotation_dir)
+
+    output:
+    tuple val(meta), path("*.tsv.gz"), emit: tsv
+    path "versions.yml"              , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def VERSION = "1.6.post1"
+    // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions.
+    """
+    export XDG_CACHE_HOME=\$PWD/snakemake_cache
+    mkdir -p \$XDG_CACHE_HOME
+
+    cadd.sh \\
+        -o ${prefix}.tsv.gz \\
+        ${args} \\
+        ${vcf}
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        cadd: ${VERSION}
+    END_VERSIONS
+    """
+
+    stub:
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def VERSION = "1.6.post1"
+    // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions.
+    """
+    echo "" | gzip > ${prefix}.tsv.gz
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        cadd: ${VERSION}
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/cadd/meta.yml b/modules/nf-core/cadd/meta.yml
new file mode 100644
index 0000000..60c863c
--- /dev/null
+++ b/modules/nf-core/cadd/meta.yml
@@ -0,0 +1,72 @@
+name: "cadd"
+description: CADD is a tool for scoring the deleteriousness of single nucleotide variants
+  as well as insertion/deletions variants in the human genome.
+keywords:
+  - cadd
+  - annotate
+  - variants
+tools:
+  - "cadd":
+      description: "CADD scripts release for offline scoring"
+      homepage: "https://cadd.gs.washington.edu/"
+      documentation: "https://github.com/kircherlab/CADD-scripts/blob/master/README.md"
+      tool_dev_url: "https://github.com/kircherlab/CADD-scripts/"
+      doi: "10.1093/nar/gky1016"
+      licence:
+        - Restricted. Free for non-commercial users.
+      identifier: biotools:cadd_phred
+input:
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing sample information
+          e.g. [ id:'test', single_end:false ]
+    - vcf:
+        type: file
+        description: Input file for annotation in vcf or vcf.gz format
+        pattern: "*.{vcf,vcf.gz}"
+        ontologies: []
+  - - meta2:
+        type: map
+        description: |
+          Groovy Map containing reference information
+          e.g. [ id:'test' ]
+    - annotation_dir:
+        type: directory
+        description: |
+          Path to folder containing the vcf files with precomputed CADD scores.
+          This folder contains the uncompressed files that would otherwise be in data/annotation folder as described in https://github.com/kircherlab/CADD-scripts/#manual-installation.
+  - - meta3:
+        type: map
+        description: |
+          Groovy Map containing reference information
+          e.g. [ id:'test' ]
+    - prescored_dir:
+        type: directory
+        description: |
+          Path to folder containing prescored files.
+          This folder contains the uncompressed files that would otherwise be in data/prescored/${GENOME_BUILD}_${VERSION}/ folder as described in https://github.com/kircherlab/CADD-scripts/#manual-installation.
+output:
+  tsv:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+      - "*.tsv.gz":
+          type: file
+          description: Annotated tsv file
+          pattern: "*.{tsv,tsv.gz}"
+          ontologies:
+            - edam: http://edamontology.org/format_3475 # TSV
+  versions:
+    - versions.yml:
+        type: file
+        description: File containing software versions
+        pattern: "versions.yml"
+        ontologies:
+          - edam: http://edamontology.org/format_3750 # YAML
+authors:
+  - "@ramprasadn"
+maintainers:
+  - "@ramprasadn"
diff --git a/modules/nf-core/cadd/tests/main.nf.test b/modules/nf-core/cadd/tests/main.nf.test
new file mode 100644
index 0000000..cc36d0c
--- /dev/null
+++ b/modules/nf-core/cadd/tests/main.nf.test
@@ -0,0 +1,37 @@
+nextflow_process {
+
+    name "Test Process CADD"
+
+    script "../main.nf"
+    process "CADD"
+
+    tag "modules"
+    tag "modules_nfcore"
+    tag "cadd"
+
+    test("test_cadd - stub") {
+        options '-stub'
+        when {
+
+            process {
+                """
+                input[0] = [
+                    [id:'test',single_end:false],// meta map
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf',checkIfExists:true)
+                ]
+                input[1] = Channel.from("\$PWD").map { dir -> [ [ id: dir ], dir ] }
+                input[2] = Channel.from("/").map { dir -> [ [ id: dir ], dir ] }
+                """
+            }
+        }
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(
+                    process.out,
+                    process.out.versions.collect{ path(it).yaml }
+                ).match() }
+            )
+        }
+    }
+}
diff --git a/modules/nf-core/cadd/tests/main.nf.test.snap b/modules/nf-core/cadd/tests/main.nf.test.snap
new file mode 100644
index 0000000..15a0fa1
--- /dev/null
+++ b/modules/nf-core/cadd/tests/main.nf.test.snap
@@ -0,0 +1,44 @@
+{
+    "test_cadd - stub": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.tsv.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+                    ]
+                ],
+                "1": [
+                    "versions.yml:md5,ef02d93c7627a5a20a25326b5d7ebffc"
+                ],
+                "tsv": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.tsv.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,ef02d93c7627a5a20a25326b5d7ebffc"
+                ]
+            },
+            [
+                {
+                    "CADD": {
+                        "cadd": "1.6.post1"
+                    }
+                }
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.9.2",
+            "nextflow": "24.10.5"
+        },
+        "timestamp": "2025-04-16T09:56:33.347204138"
+    }
+}
\ No newline at end of file
diff --git a/nextflow.config b/nextflow.config
index 43091a6..9e791da 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -18,6 +18,11 @@ params {
     snv_vcf                    = null
     sv_vcf                     = null
 
+    // CADD
+    cadd_resources = null
+    cadd_prescored_indels = null
+
+
     // Vep
     vep_cache_version = 112
     vep_plugin_files = null
diff --git a/nextflow_schema.json b/nextflow_schema.json
index b9dcf56..76265a4 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -88,7 +88,24 @@
                     "fa_icon": "fas fa-ban",
                     "hidden": true,
                     "default": "s3://ngi-igenomes/igenomes/"
+                },
+                "cadd_prescored_indels": {
+                    "type": "string",
+                    "exists": true,
+                    "format": "directory-path",
+                    "fa_icon": "fas fa-file",
+                    "description": "Path to a directory containing prescored indels for CADD.",
+                    "help_text": "This folder contains the compressed files and indexes that would otherwise be in data/prescored folder as described in https://github.com/kircherlab/CADD-scripts/#manual-installation."
+                },
+                "cadd_resources": {
+                    "type": "string",
+                    "exists": true,
+                    "format": "directory-path",
+                    "fa_icon": "fas fa-file",
+                    "description": "Path to the directory containing cadd annotations.",
+                    "help_text": "This folder contains the uncompressed files that would otherwise be in data/annotation folder as described in https://github.com/kircherlab/CADD-scripts/#manual-installation."
                 }
+
             }
         },
         "annotation_options": {
diff --git a/subworkflows/local/annotate_cadd/main.nf b/subworkflows/local/annotate_cadd/main.nf
new file mode 100644
index 0000000..a3097b9
--- /dev/null
+++ b/subworkflows/local/annotate_cadd/main.nf
@@ -0,0 +1,44 @@
+//
+// A subworkflow to annotate cadd
+//
+
+include { BCFTOOLS_ANNOTATE                        } from '../../../modules/nf-core/bcftools/annotate/main'
+include { CADD                                     } from '../../../modules/nf-core/cadd/main'
+include { TABIX_TABIX as TABIX_CADD                } from '../../../modules/nf-core/tabix/tabix/main'
+include { TABIX_TABIX as TABIX_ANNOTATE            } from '../../../modules/nf-core/tabix/tabix/main'
+
+
+workflow ANNOTATE_CADD {
+
+    take:
+        ch_snv_vcf         // channel: [mandatory] [ val(meta), path(vcfs), path(idx) ]
+        ch_cadd_header     // channel: [mandatory] [ path(txt) ]
+        ch_cadd_resources  // channel: [mandatory] [ path(dir) ]
+        ch_cadd_prescored_indels // channel: [mandatory] [ val(meta), path(dir) ]
+
+    main:
+        ch_versions = channel.empty()
+
+        CADD(ch_snv_vcf, ch_cadd_resources, ch_cadd_prescored_indels)
+
+        TABIX_CADD(CADD.out.tsv)
+
+        ch_snv_vcf
+            .join(CADD.out.tsv)
+            .join(TABIX_CADD.out.tbi)
+            .set { ch_annotate_in }
+
+        BCFTOOLS_ANNOTATE(ch_annotate_in, ch_cadd_header )
+
+        TABIX_ANNOTATE (BCFTOOLS_ANNOTATE.out.vcf)
+
+        ch_versions = ch_versions.mix(CADD.out.versions.first())
+        ch_versions = ch_versions.mix(TABIX_CADD.out.versions.first())
+        ch_versions = ch_versions.mix(BCFTOOLS_ANNOTATE.out.versions.first())
+        ch_versions = ch_versions.mix(TABIX_ANNOTATE.out.versions.first())
+
+    emit:
+        vcf = BCFTOOLS_ANNOTATE.out.vcf // channel: [ val(meta), path(vcf) ]
+        tbi = TABIX_ANNOTATE.out.tbi
+        versions = ch_versions
+}
diff --git a/workflows/oncorefiner.nf b/workflows/oncorefiner.nf
index c9df030..2d57e5a 100644
--- a/workflows/oncorefiner.nf
+++ b/workflows/oncorefiner.nf
@@ -19,6 +19,7 @@ include { SVDB_QUERY as SVDB_QUERY_DB              } from '../modules/nf-core/sv
 include { ENSEMBLVEP_VEP as ENSEMBLVEP_SV          } from '../modules/nf-core/ensemblvep/vep/main'
 include { BCFTOOLS_VIEW as RESEARCH_FILTERING_SV   } from '../modules/nf-core/bcftools/view/main'
 include { BCFTOOLS_VIEW as CLINICAL_FILTERING_SV   } from '../modules/nf-core/bcftools/view/main'
+include { TABIX_TABIX as TABIX_RESEARCH_FILTERING  } from '../modules/nf-core/tabix/tabix/main'
 
 //
 // MODULE: Local modules
@@ -33,6 +34,7 @@ include { paramsSummaryMultiqc   } from '../subworkflows/nf-core/utils_nfcore_pi
 include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline'
 include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_oncorefiner_pipeline'
 include { PREPARE_REFERENCES     } from '../subworkflows/local/prepare_references'
+include { ANNOTATE_CADD          } from '../subworkflows/local/annotate_cadd'
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -71,7 +73,13 @@ workflow ONCOREFINER {
 
         // Gather or get from params
         ch_vep_cache                = ( params.vep_cache && params.vep_cache.endsWith("tar.gz") )  ? ch_references.vep_resources
-                                                                            : ( params.vep_cache    ? channel.fromPath(params.vep_cache).collect() : channel.value([]) )
+                                                                                : ( params.vep_cache    ? channel.fromPath(params.vep_cache).collect() : channel.value([]) )
+
+        ch_cadd_header              = Channel.fromPath("$projectDir/assets/cadd_to_vcf_header_-1.0-.txt", checkIfExists: true).collect()
+        ch_cadd_resources           = params.cadd_resources                     ? Channel.fromPath(params.cadd_resources).collect()
+                                                                                : Channel.value([])
+        ch_cadd_prescored_indels     = createReferenceChannelFromPath(params.cadd_prescored_indels) // align with above
+
 
         //
         // Read and store paths in the vep_plugin_files file
@@ -133,16 +141,39 @@ workflow ONCOREFINER {
                     tuple(meta, vcf, tbi)
                     }
                 .set { ch_research_filtering_in }
+
             RESEARCH_FILTERING(ch_research_filtering_in, [], [], [])
 
 
+            /*
             // VEP
             RESEARCH_FILTERING.out.vcf
                     .map { meta, vcf ->
                         def custom_extra_files = params.custom_extra_files ? file(params.custom_extra_files) : []
                         tuple(meta, vcf, custom_extra_files)
                     }
-                    .set { ch_vep_snv }
+                    .set { ch_cadd_snv }
+            */
+
+
+            // ANNOTATE WITH CADD
+            if (params.cadd_resources != null) {
+                TABIX_RESEARCH_FILTERING (RESEARCH_FILTERING.out.vcf)
+
+                RESEARCH_FILTERING.out.vcf
+                    .join(TABIX_RESEARCH_FILTERING.out.tbi, failOnMismatch:true, failOnDuplicate:true)
+                    .set {ch_cadd_snv}
+
+                ANNOTATE_CADD (
+                    ch_cadd_snv,
+                    ch_cadd_header,
+                    ch_cadd_resources,
+                    ch_cadd_prescored_indels
+                )
+                ch_vep_snv = ANNOTATE_CADD.out.vcf
+                ch_versions = ch_versions.mix(ANNOTATE_CADD.out.versions)
+
+            }
 
             ENSEMBLVEP_SNV (
                 ch_vep_snv,

From 45cb012cf9887f6aa3bbdae82a8aae1b946b0682 Mon Sep 17 00:00:00 2001
From: kristinebilgrav <kristinebilgrav1@gmail.com>
Date: Mon, 23 Mar 2026 15:15:20 +0100
Subject: [PATCH 02/23] update

---
 conf/subworkflows/annotate_cadd.config       |  9 ++++
 conf/test.config                             |  5 ++-
 modules.json                                 |  5 +++
 modules/nf-core/cadd/environment.yml         |  4 +-
 modules/nf-core/cadd/main.nf                 | 40 +++++++----------
 modules/nf-core/cadd/meta.yml                | 46 +++++++++++++++-----
 modules/nf-core/cadd/tests/main.nf.test      |  2 +
 modules/nf-core/cadd/tests/main.nf.test.snap | 28 +++++++-----
 modules/nf-core/cadd/tests/nextflow.config   |  5 +++
 nextflow.config                              |  1 +
 workflows/oncorefiner.nf                     | 15 +++----
 11 files changed, 97 insertions(+), 63 deletions(-)
 create mode 100644 conf/subworkflows/annotate_cadd.config
 create mode 100644 modules/nf-core/cadd/tests/nextflow.config

diff --git a/conf/subworkflows/annotate_cadd.config b/conf/subworkflows/annotate_cadd.config
new file mode 100644
index 0000000..635799a
--- /dev/null
+++ b/conf/subworkflows/annotate_cadd.config
@@ -0,0 +1,9 @@
+/*
+Annotate with CADD
+*/
+
+
+process {
+
+
+}
diff --git a/conf/test.config b/conf/test.config
index 0836cf8..4a750f4 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -47,7 +47,8 @@ params {
     svdb_query_dbs      = params.pipelines_testdata_base_path + 'reference/svdb_querydb_files.csv'
 
     // Mock input for CADD
-    cadd_resources               = params.pipelines_testdata_base_path + "/assets" //TODO add
-    cadd_prescored_indels        = params.pipelines_testdata_base_path + "docs"   //TODO add
+    cadd_resources               = '../test-datasets'
+    //cadd_resources               = params.pipelines_testdata_base_path + "assets" //TODO add
+    //cadd_prescored_indels        = params.pipelines_testdata_base_path + "docs"   //TODO add
 
 }
diff --git a/modules.json b/modules.json
index 8a4a313..ab7e9cb 100644
--- a/modules.json
+++ b/modules.json
@@ -25,6 +25,11 @@
                         "git_sha": "6383d8fe58f9498eecd5aa303e71a4a932d1e9f6",
                         "installed_by": ["modules"]
                     },
+                    "cadd": {
+                        "branch": "master",
+                        "git_sha": "64ab14a6905e5c9d649f61e2757a1e600dbdb8e0",
+                        "installed_by": ["modules"]
+                    },
                     "ensemblvep/vep": {
                         "branch": "master",
                         "git_sha": "34505e1fc5e9f4fd641210ca440acff6bd33b842",
diff --git a/modules/nf-core/cadd/environment.yml b/modules/nf-core/cadd/environment.yml
index d98de65..39701b4 100644
--- a/modules/nf-core/cadd/environment.yml
+++ b/modules/nf-core/cadd/environment.yml
@@ -4,6 +4,4 @@ channels:
   - conda-forge
   - bioconda
 dependencies:
-  - bioconda::cadd-scripts=1.6.post1
-  - conda-forge::conda=4.14.0
-  - conda-forge::mamba=1.4.0
+  - bioconda::cadd-scripts=1.7.3
diff --git a/modules/nf-core/cadd/main.nf b/modules/nf-core/cadd/main.nf
index 0e3c79b..771d144 100644
--- a/modules/nf-core/cadd/main.nf
+++ b/modules/nf-core/cadd/main.nf
@@ -3,61 +3,51 @@ process CADD {
     label 'process_medium'
 
     conda "${moduleDir}/environment.yml"
-    container 'docker.io/biocontainers/cadd-scripts-with-envs:1.6.post1_cv1'
+    container 'docker.io/clinicalgenomics/cadd-with-scripts:1.7.3'
 
     containerOptions {
         if (prescored_dir) {
             ['singularity', 'apptainer'].contains(workflow.containerEngine) ?
-                "-B ${annotation_dir}:/opt/CADD-scripts-1.6.post1/data/annotations -B ${prescored_dir}:/opt/CADD-scripts-1.6.post1/data/prescored" :
-                "-v ${annotation_dir}:/opt/CADD-scripts-1.6.post1/data/annotations -v ${prescored_dir}:/opt/CADD-scripts-1.6.post1/data/prescored"
+                "-B ${annotation_dir}:/cadd-scripts/data/annotations -B ${prescored_dir}:/cadd-scripts/data/prescored" :
+                "-v ${annotation_dir}:/cadd-scripts/data/annotations -v ${prescored_dir}:/cadd-scripts/data/prescored"
         } else {
             ['singularity', 'apptainer'].contains(workflow.containerEngine) ?
-                "-B ${annotation_dir}:/opt/CADD-scripts-1.6.post1/data/annotations" :
-                "-v ${annotation_dir}:/opt/CADD-scripts-1.6.post1/data/annotations"
+                "-B ${annotation_dir}:/cadd-scripts/data/annotations" :
+                "-v ${annotation_dir}:/cadd-scripts/data/annotations"
         }
     }
 
     input:
     tuple val(meta), path(vcf)
-    tuple val(meta2), path(annotation_dir)
+    tuple val(meta2), val(annotation_dir)
+    tuple val(meta3), val(prescored_dir)
 
     output:
-    tuple val(meta), path("*.tsv.gz"), emit: tsv
-    path "versions.yml"              , emit: versions
+    tuple val(meta), path("${prefix}.tsv.gz"), emit: tsv
+    tuple val("${task.process}"), val("cadd"), val("1.7.3"), emit: versions_cadd, topic: versions
+    // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions.
 
     when:
     task.ext.when == null || task.ext.when
 
     script:
     def args = task.ext.args ?: ''
-    def prefix = task.ext.prefix ?: "${meta.id}"
-    def VERSION = "1.6.post1"
-    // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions.
+    prefix = task.ext.prefix ?: "${meta.id}"
     """
     export XDG_CACHE_HOME=\$PWD/snakemake_cache
+    export MPLCONFIGDIR=.
     mkdir -p \$XDG_CACHE_HOME
 
-    cadd.sh \\
+    CADD.sh \\
+        -m \\
         -o ${prefix}.tsv.gz \\
         ${args} \\
         ${vcf}
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        cadd: ${VERSION}
-    END_VERSIONS
     """
 
     stub:
-    def prefix = task.ext.prefix ?: "${meta.id}"
-    def VERSION = "1.6.post1"
-    // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions.
+    prefix = task.ext.prefix ?: "${meta.id}"
     """
     echo "" | gzip > ${prefix}.tsv.gz
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        cadd: ${VERSION}
-    END_VERSIONS
     """
 }
diff --git a/modules/nf-core/cadd/meta.yml b/modules/nf-core/cadd/meta.yml
index 60c863c..1efaa94 100644
--- a/modules/nf-core/cadd/meta.yml
+++ b/modules/nf-core/cadd/meta.yml
@@ -1,6 +1,6 @@
 name: "cadd"
-description: CADD is a tool for scoring the deleteriousness of single nucleotide variants
-  as well as insertion/deletions variants in the human genome.
+description: CADD is a tool for scoring the deleteriousness of single nucleotide
+  variants as well as insertion/deletions variants in the human genome.
 keywords:
   - cadd
   - annotate
@@ -44,8 +44,16 @@ input:
     - prescored_dir:
         type: directory
         description: |
-          Path to folder containing prescored files.
-          This folder contains the uncompressed files that would otherwise be in data/prescored/${GENOME_BUILD}_${VERSION}/ folder as described in https://github.com/kircherlab/CADD-scripts/#manual-installation.
+          Path to folder containing prescored CADD score files.
+          Expected structure mirrors data/prescored/ from the CADD-scripts installation:
+            <prescored_dir>/
+              GRCh38_v1.7/
+                incl_anno/   # *.tsv.gz + *.tsv.gz.tbi (scores with annotations)
+                no_anno/     # *.tsv.gz + *.tsv.gz.tbi (scores only)
+              GRCh37_v1.7/
+                incl_anno/
+                no_anno/
+          See https://github.com/kircherlab/CADD-scripts/#manual-installation for details.
 output:
   tsv:
     - - meta:
@@ -53,19 +61,33 @@ output:
           description: |
             Groovy Map containing sample information
             e.g. [ id:'test', single_end:false ]
-      - "*.tsv.gz":
+      - ${prefix}.tsv.gz:
           type: file
           description: Annotated tsv file
           pattern: "*.{tsv,tsv.gz}"
           ontologies:
-            - edam: http://edamontology.org/format_3475 # TSV
+            - edam: http://edamontology.org/format_3475
+  versions_cadd:
+    - - ${task.process}:
+          type: string
+          description: The name of the process
+      - cadd:
+          type: string
+          description: The name of the tool
+      - 1.7.3:
+          type: string
+          description: The expression to obtain the version of the tool
+topics:
   versions:
-    - versions.yml:
-        type: file
-        description: File containing software versions
-        pattern: "versions.yml"
-        ontologies:
-          - edam: http://edamontology.org/format_3750 # YAML
+    - - ${task.process}:
+          type: string
+          description: The name of the process
+      - cadd:
+          type: string
+          description: The name of the tool
+      - 1.7.3:
+          type: string
+          description: The expression to obtain the version of the tool
 authors:
   - "@ramprasadn"
 maintainers:
diff --git a/modules/nf-core/cadd/tests/main.nf.test b/modules/nf-core/cadd/tests/main.nf.test
index cc36d0c..c328790 100644
--- a/modules/nf-core/cadd/tests/main.nf.test
+++ b/modules/nf-core/cadd/tests/main.nf.test
@@ -9,6 +9,8 @@ nextflow_process {
     tag "modules_nfcore"
     tag "cadd"
 
+    config "./nextflow.config"
+
     test("test_cadd - stub") {
         options '-stub'
         when {
diff --git a/modules/nf-core/cadd/tests/main.nf.test.snap b/modules/nf-core/cadd/tests/main.nf.test.snap
index 15a0fa1..5e38eea 100644
--- a/modules/nf-core/cadd/tests/main.nf.test.snap
+++ b/modules/nf-core/cadd/tests/main.nf.test.snap
@@ -12,7 +12,11 @@
                     ]
                 ],
                 "1": [
-                    "versions.yml:md5,ef02d93c7627a5a20a25326b5d7ebffc"
+                    [
+                        "CADD",
+                        "cadd",
+                        "1.7.3"
+                    ]
                 ],
                 "tsv": [
                     [
@@ -23,22 +27,22 @@
                         "test.tsv.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
                     ]
                 ],
-                "versions": [
-                    "versions.yml:md5,ef02d93c7627a5a20a25326b5d7ebffc"
+                "versions_cadd": [
+                    [
+                        "CADD",
+                        "cadd",
+                        "1.7.3"
+                    ]
                 ]
             },
             [
-                {
-                    "CADD": {
-                        "cadd": "1.6.post1"
-                    }
-                }
+                
             ]
         ],
+        "timestamp": "2026-03-01T12:08:37.372500636",
         "meta": {
-            "nf-test": "0.9.2",
-            "nextflow": "24.10.5"
-        },
-        "timestamp": "2025-04-16T09:56:33.347204138"
+            "nf-test": "0.9.4",
+            "nextflow": "25.10.4"
+        }
     }
 }
\ No newline at end of file
diff --git a/modules/nf-core/cadd/tests/nextflow.config b/modules/nf-core/cadd/tests/nextflow.config
new file mode 100644
index 0000000..bd24d9f
--- /dev/null
+++ b/modules/nf-core/cadd/tests/nextflow.config
@@ -0,0 +1,5 @@
+process {
+    withName: 'CADD' {
+        container = "nf-core/ubuntu:22.04" //Using an basic container because v1.7.3 is too big for CI.
+    }
+}
diff --git a/nextflow.config b/nextflow.config
index d96ef24..4df69fa 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -293,3 +293,4 @@ validation {
 // Load modules.config for DSL2 module specific options
 includeConfig 'conf/modules.config'
 includeConfig 'conf/modules/prepare_references.config'
+includeConfig 'conf/subworkflows/annotate_cadd.config'
diff --git a/workflows/oncorefiner.nf b/workflows/oncorefiner.nf
index 16f1c74..4308373 100644
--- a/workflows/oncorefiner.nf
+++ b/workflows/oncorefiner.nf
@@ -144,27 +144,24 @@ workflow ONCOREFINER {
             RESEARCH_FILTERING(ch_research_filtering_in, [], [], [])
 
 
-            /*
+
             // VEP
             RESEARCH_FILTERING.out.vcf
                     .map { meta, vcf ->
                         def custom_extra_files = params.custom_extra_files ? file(params.custom_extra_files) : []
                         tuple(meta, vcf, custom_extra_files)
                     }
-                    .set { ch_cadd_snv }
-            */
+                    //.set { ch_cadd_snv }
+                    .set {ch_vep_snv}
+
 
 
             // ANNOTATE WITH CADD
             if (params.cadd_resources != null) {
-                TABIX_RESEARCH_FILTERING (RESEARCH_FILTERING.out.vcf)
-
-                RESEARCH_FILTERING.out.vcf
-                    .join(TABIX_RESEARCH_FILTERING.out.tbi, failOnMismatch:true, failOnDuplicate:true)
-                    .set {ch_cadd_snv}
 
                 ANNOTATE_CADD (
-                    ch_cadd_snv,
+                    ch_vep_snv,
+                    //ch_cadd_snv,
                     ch_cadd_header,
                     ch_cadd_resources,
                     ch_cadd_prescored_indels

From 8b6b8a04b4cb73188d381d664573055732fe81a6 Mon Sep 17 00:00:00 2001
From: kristinebilgrav <kristinebilgrav1@gmail.com>
Date: Thu, 26 Mar 2026 13:12:13 +0100
Subject: [PATCH 03/23] commit update

---
 assets/cadd_to_vcf_header.txt                |   1 +
 modules/nf-core/gawk/environment.yml         |   7 +
 modules/nf-core/gawk/main.nf                 |  60 ++++++
 modules/nf-core/gawk/meta.yml                |  84 ++++++++
 modules/nf-core/gawk/tests/main.nf.test      | 211 +++++++++++++++++++
 modules/nf-core/gawk/tests/main.nf.test.snap | 199 +++++++++++++++++
 modules/nf-core/gawk/tests/nextflow.config   |   6 +
 7 files changed, 568 insertions(+)
 create mode 100644 assets/cadd_to_vcf_header.txt
 create mode 100644 modules/nf-core/gawk/environment.yml
 create mode 100644 modules/nf-core/gawk/main.nf
 create mode 100644 modules/nf-core/gawk/meta.yml
 create mode 100644 modules/nf-core/gawk/tests/main.nf.test
 create mode 100644 modules/nf-core/gawk/tests/main.nf.test.snap
 create mode 100644 modules/nf-core/gawk/tests/nextflow.config

diff --git a/assets/cadd_to_vcf_header.txt b/assets/cadd_to_vcf_header.txt
new file mode 100644
index 0000000..8deee48
--- /dev/null
+++ b/assets/cadd_to_vcf_header.txt
@@ -0,0 +1 @@
+##INFO=<ID=CADD,Number=1,Type=Float,Description="PHRED-like scaled CADD score.">
diff --git a/modules/nf-core/gawk/environment.yml b/modules/nf-core/gawk/environment.yml
new file mode 100644
index 0000000..185a0f5
--- /dev/null
+++ b/modules/nf-core/gawk/environment.yml
@@ -0,0 +1,7 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - conda-forge::gawk=5.3.1
diff --git a/modules/nf-core/gawk/main.nf b/modules/nf-core/gawk/main.nf
new file mode 100644
index 0000000..33dd24c
--- /dev/null
+++ b/modules/nf-core/gawk/main.nf
@@ -0,0 +1,60 @@
+process GAWK {
+    tag "$meta.id"
+    label 'process_single'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/a1/a125c778baf3865331101a104b60d249ee15fe1dca13bdafd888926cc5490a34/data' :
+        'community.wave.seqera.io/library/gawk:5.3.1--e09efb5dfc4b8156' }"
+
+    input:
+    tuple val(meta), path(input, arity: '0..*')
+    path(program_file)
+    val(disable_redirect_output)
+
+    output:
+    tuple val(meta), path("*.${suffix}"), emit: output
+    tuple val("${task.process}"), val('gawk'), eval("awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//'"), topic: versions, emit: versions_gawk
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args  = task.ext.args  ?: '' // args is used for the main arguments of the tool
+    def args2 = task.ext.args2 ?: '' // args2 is used to specify a program when no program file has been given
+    prefix    = task.ext.prefix ?: "${meta.id}"
+    suffix    = task.ext.suffix ?: "${input.collect{ file -> file.getExtension()}.get(0)}" // use the first extension of the input files
+
+    program    = program_file ? "-f ${program_file}" : "${args2}"
+    lst_gz     = input.findResults{ file -> file.getExtension().endsWith("gz") ? file.toString() : null }
+    unzip      = lst_gz ? "gunzip -q -f ${lst_gz.join(" ")}" : ""
+    input_cmd  = input.collect { file -> file.toString() - ~/\.gz$/ }.join(" ")
+    output_cmd = suffix.endsWith("gz") ? "| gzip > ${prefix}.${suffix}" : "> ${prefix}.${suffix}"
+    output     = disable_redirect_output ? "" : output_cmd
+    cleanup    = lst_gz ? "rm ${lst_gz.collect{ file -> file - ~/\.gz$/ }.join(" ")}" : ""
+
+    input.collect{ file ->
+        assert file.name != "${prefix}.${suffix}" : "Input and output names are the same, set prefix in module configuration to disambiguate!"
+    }
+
+    """
+    ${unzip}
+
+    awk \\
+        ${args} \\
+        ${program} \\
+        ${input_cmd} \\
+        ${output}
+
+    ${cleanup}
+    """
+
+    stub:
+    prefix = task.ext.prefix ?: "${meta.id}"
+    suffix = task.ext.suffix ?: "${input.collect{ file -> file.getExtension()}.get(0)}"
+    def create_cmd = suffix.endsWith("gz") ? "echo '' | gzip >" : "touch"
+
+    """
+    ${create_cmd} ${prefix}.${suffix}
+    """
+}
diff --git a/modules/nf-core/gawk/meta.yml b/modules/nf-core/gawk/meta.yml
new file mode 100644
index 0000000..96cd0c7
--- /dev/null
+++ b/modules/nf-core/gawk/meta.yml
@@ -0,0 +1,84 @@
+name: "gawk"
+description: |
+  If you are like many computer users, you would frequently like to make changes in various text files
+  wherever certain patterns appear, or extract data from parts of certain lines while discarding the rest.
+  The job is easy with awk, especially the GNU implementation gawk.
+keywords:
+  - gawk
+  - awk
+  - txt
+  - text
+  - file parsing
+tools:
+  - "gawk":
+      description: "GNU awk"
+      homepage: "https://www.gnu.org/software/gawk/"
+      documentation: "https://www.gnu.org/software/gawk/manual/"
+      tool_dev_url: "https://www.gnu.org/prep/ftp.html"
+      licence:
+        - "GPL v3"
+      identifier: ""
+input:
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing sample information
+          e.g. [ id:'test', single_end:false ]
+    - input:
+        type: file
+        description: The input file - Specify the logic that needs to be executed
+          on this file on the `ext.args2` or in the program file. If the files
+          have a `.gz` extension, they will be unzipped using `zcat`.
+        pattern: "*"
+        ontologies: []
+  - program_file:
+      type: file
+      description: Optional file containing logic for awk to execute. If you don't
+        wish to use a file, you can use `ext.args2` to specify the logic.
+      pattern: "*"
+      ontologies: []
+  - disable_redirect_output:
+      type: boolean
+      description: Disable the redirection of awk output to a given file. This is
+        useful if you want to use awk's built-in redirect to write files instead
+        of the shell's redirect.
+output:
+  output:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+      - "*.${suffix}":
+          type: file
+          description: The output file - if using shell redirection, specify the
+            name of this file using `ext.prefix` and the extension using
+            `ext.suffix`. Otherwise, ensure the awk program produces files with
+            the extension in `ext.suffix`.
+          pattern: "*"
+          ontologies: []
+  versions_gawk:
+    - - ${task.process}:
+          type: string
+          description: The name of the process
+      - gawk:
+          type: string
+          description: The name of the tool
+      - awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//':
+          type: eval
+          description: The expression to obtain the version of the tool
+topics:
+  versions:
+    - - ${task.process}:
+          type: string
+          description: The name of the process
+      - gawk:
+          type: string
+          description: The name of the tool
+      - awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//':
+          type: eval
+          description: The expression to obtain the version of the tool
+authors:
+  - "@nvnieuwk"
+maintainers:
+  - "@nvnieuwk"
diff --git a/modules/nf-core/gawk/tests/main.nf.test b/modules/nf-core/gawk/tests/main.nf.test
new file mode 100644
index 0000000..3bd0a43
--- /dev/null
+++ b/modules/nf-core/gawk/tests/main.nf.test
@@ -0,0 +1,211 @@
+nextflow_process {
+
+    name "Test Process GAWK"
+    script "../main.nf"
+    process "GAWK"
+
+    tag "modules"
+    tag "modules_nfcore"
+    tag "gawk"
+
+    config "./nextflow.config"
+
+    test("Convert fasta to bed") {
+        when {
+            params {
+                gawk_suffix = "bed"
+                gawk_args2  = '\'BEGIN { FS = OFS = "\t"}; { print \$1, "0", \$2 }\''
+            }
+            process {
+                """
+                input[0] = [
+                    [ id:'test' ],
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true)
+                ]
+                input[1] = []
+                input[2] = false
+                """
+            }
+        }
+
+        then {
+            assert process.success
+            assert snapshot(sanitizeOutput(process.out)).match()
+        }
+    }
+
+    test("Convert fasta to bed - stub") {
+
+        options "-stub"
+
+        when {
+            params {
+                gawk_suffix = "bed"
+                gawk_args2  = '\'BEGIN { FS = OFS = "\t"}; { print \$1, "0", \$2 }\''
+            }
+            process {
+                """
+                input[0] = [
+                    [ id:'test' ],
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true)
+                ]
+                input[1] = []
+                input[2] = false
+                """
+            }
+        }
+
+        then {
+            assert process.success
+            assert snapshot(process.out).match()
+        }
+    }
+
+    test("Convert fasta to bed with program file") {
+        when {
+            params {
+                gawk_suffix = "bed"
+                gawk_args2  = ""
+            }
+            process {
+                """
+                input[0] = [
+                    [ id:'test' ],
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true)
+                ]
+                input[1] = Channel.of('BEGIN { FS = OFS = "\t"}; { print \$1, "0", \$2 }').collectFile(name:"program.awk")
+                input[2] = false
+                """
+            }
+        }
+
+        then {
+            assert process.success
+            assert snapshot(sanitizeOutput(process.out)).match()
+        }
+    }
+
+    test("Convert fasta to bed using awk redirect instead of shell redirect") {
+        when {
+            params {
+                gawk_suffix = "bed"
+                gawk_args2  = '\'BEGIN { FS = OFS = "\t"}; { print \$1, "0", \$2 > "test.bed" }\''
+            }
+            process {
+                """
+                input[0] = [
+                    [ id:'test' ],
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true)
+                ]
+                input[1] = []
+                input[2] = true
+                """
+            }
+        }
+
+        then {
+            assert process.success
+            assert snapshot(sanitizeOutput(process.out)).match()
+        }
+    }
+
+    test("Extract first column from multiple files") {
+        when {
+            params {
+                gawk_suffix = "bed"
+                gawk_args2  = ""
+            }
+            process {
+                """
+                input[0] = [
+                    [ id:'test' ],
+                    [file(params.modules_testdata_base_path + 'generic/txt/hello.txt', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'generic/txt/species_names.txt', checkIfExists: true)]
+                ]
+                input[1] = Channel.of('BEGIN {FS=" "}; {print \$1}').collectFile(name:"program.awk")
+                input[2] = false
+                """
+            }
+        }
+
+        then {
+            assert process.success
+            assert snapshot(sanitizeOutput(process.out)).match()
+        }
+    }
+
+    test("Unzip files before processing") {
+        when {
+            params {
+                gawk_suffix = "bed"
+                gawk_args2  = ""
+            }
+            process {
+                """
+                input[0] = [
+                    [ id:'test' ],
+                    [file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878_chrM.vcf.gz', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA24385_sv.vcf.gz', checkIfExists: true)]
+                ]
+                input[1] = Channel.of('/^#CHROM/ { print \$1, \$10 }').collectFile(name:"column_header.awk")
+                input[2] = false
+                """
+            }
+        }
+
+        then {
+            assert process.success
+            assert snapshot(sanitizeOutput(process.out)).match()
+        }
+    }
+
+    test("Compress after processing") {
+        when {
+            params {
+                gawk_suffix = "txt.gz"
+                gawk_args2  = '\'BEGIN { FS = OFS = "\t"}; { print \$1, "0", \$2 }\''
+            }
+            process {
+                """
+                input[0] = [
+                    [ id:'test' ],
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true)
+                ]
+                input[1] = []
+                input[2] = false
+                """
+            }
+        }
+
+        then {
+            assert process.success
+            assert snapshot(sanitizeOutput(process.out)).match()
+        }
+    }
+
+    test("Input and output files are similar") {
+        when {
+            params {
+                gawk_suffix = "txt"
+                gawk_args   = ""
+                gawk_args2  = ""
+            }
+            process {
+                """
+                input[0] = [
+                    [ id:'hello' ],
+                    [file(params.modules_testdata_base_path + 'generic/txt/hello.txt', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'generic/txt/species_names.txt', checkIfExists: true)]
+                ]
+                input[1] = Channel.of('BEGIN {FS=" "}; {print \$1}').collectFile(name:"program.awk")
+                input[2] = false
+                """
+            }
+        }
+
+        then {
+            assert process.failed
+            assert process.errorReport.contains("Input and output names are the same, set prefix in module configuration to disambiguate!")
+        }
+    }
+}
diff --git a/modules/nf-core/gawk/tests/main.nf.test.snap b/modules/nf-core/gawk/tests/main.nf.test.snap
new file mode 100644
index 0000000..9d6a369
--- /dev/null
+++ b/modules/nf-core/gawk/tests/main.nf.test.snap
@@ -0,0 +1,199 @@
+{
+    "Compress after processing": {
+        "content": [
+            {
+                "output": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.txt.gz:md5,87a15eb9c2ff20ccd5cd8735a28708f7"
+                    ]
+                ],
+                "versions_gawk": [
+                    [
+                        "GAWK",
+                        "gawk",
+                        "5.3.1"
+                    ]
+                ]
+            }
+        ],
+        "timestamp": "2026-03-04T11:31:50.761549948",
+        "meta": {
+            "nf-test": "0.9.4",
+            "nextflow": "25.10.4"
+        }
+    },
+    "Convert fasta to bed": {
+        "content": [
+            {
+                "output": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7"
+                    ]
+                ],
+                "versions_gawk": [
+                    [
+                        "GAWK",
+                        "gawk",
+                        "5.3.1"
+                    ]
+                ]
+            }
+        ],
+        "timestamp": "2026-03-04T11:30:50.804933797",
+        "meta": {
+            "nf-test": "0.9.4",
+            "nextflow": "25.10.4"
+        }
+    },
+    "Convert fasta to bed with program file": {
+        "content": [
+            {
+                "output": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7"
+                    ]
+                ],
+                "versions_gawk": [
+                    [
+                        "GAWK",
+                        "gawk",
+                        "5.3.1"
+                    ]
+                ]
+            }
+        ],
+        "timestamp": "2026-03-04T11:31:10.838989113",
+        "meta": {
+            "nf-test": "0.9.4",
+            "nextflow": "25.10.4"
+        }
+    },
+    "Convert fasta to bed - stub": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.bed:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "1": [
+                    [
+                        "GAWK",
+                        "gawk",
+                        "5.3.1"
+                    ]
+                ],
+                "output": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.bed:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "versions_gawk": [
+                    [
+                        "GAWK",
+                        "gawk",
+                        "5.3.1"
+                    ]
+                ]
+            }
+        ],
+        "timestamp": "2026-03-04T11:31:00.182649403",
+        "meta": {
+            "nf-test": "0.9.4",
+            "nextflow": "25.10.4"
+        }
+    },
+    "Extract first column from multiple files": {
+        "content": [
+            {
+                "output": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.bed:md5,566c51674bd643227bb2d83e0963376d"
+                    ]
+                ],
+                "versions_gawk": [
+                    [
+                        "GAWK",
+                        "gawk",
+                        "5.3.1"
+                    ]
+                ]
+            }
+        ],
+        "timestamp": "2026-03-04T11:31:30.796772884",
+        "meta": {
+            "nf-test": "0.9.4",
+            "nextflow": "25.10.4"
+        }
+    },
+    "Unzip files before processing": {
+        "content": [
+            {
+                "output": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.bed:md5,1e31ebd4a060aab5433bbbd9ab24e403"
+                    ]
+                ],
+                "versions_gawk": [
+                    [
+                        "GAWK",
+                        "gawk",
+                        "5.3.1"
+                    ]
+                ]
+            }
+        ],
+        "timestamp": "2026-03-04T11:31:40.72259289",
+        "meta": {
+            "nf-test": "0.9.4",
+            "nextflow": "25.10.4"
+        }
+    },
+    "Convert fasta to bed using awk redirect instead of shell redirect": {
+        "content": [
+            {
+                "output": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7"
+                    ]
+                ],
+                "versions_gawk": [
+                    [
+                        "GAWK",
+                        "gawk",
+                        "5.3.1"
+                    ]
+                ]
+            }
+        ],
+        "timestamp": "2026-03-04T11:31:20.33222004",
+        "meta": {
+            "nf-test": "0.9.4",
+            "nextflow": "25.10.4"
+        }
+    }
+}
\ No newline at end of file
diff --git a/modules/nf-core/gawk/tests/nextflow.config b/modules/nf-core/gawk/tests/nextflow.config
new file mode 100644
index 0000000..895709a
--- /dev/null
+++ b/modules/nf-core/gawk/tests/nextflow.config
@@ -0,0 +1,6 @@
+process {
+    withName: GAWK {
+        ext.suffix = params.gawk_suffix
+        ext.args2  = params.gawk_args2
+    }
+}

From 73774a1227806c313cf28d35a0dee6e66d4240ba Mon Sep 17 00:00:00 2001
From: kristinebilgrav <kristinebilgrav1@gmail.com>
Date: Thu, 26 Mar 2026 18:03:55 +0100
Subject: [PATCH 04/23] fixing subwf

---
 assets/cadd_to_vcf_header_-1.0-.txt      |  1 -
 conf/subworkflows/annotate_cadd.config   | 35 +++++++++++
 conf/test.config                         |  3 +-
 modules.json                             |  5 ++
 subworkflows/local/annotate_cadd/main.nf | 78 ++++++++++++++++++------
 workflows/oncorefiner.nf                 | 34 ++++++-----
 6 files changed, 119 insertions(+), 37 deletions(-)
 delete mode 100644 assets/cadd_to_vcf_header_-1.0-.txt

diff --git a/assets/cadd_to_vcf_header_-1.0-.txt b/assets/cadd_to_vcf_header_-1.0-.txt
deleted file mode 100644
index 8deee48..0000000
--- a/assets/cadd_to_vcf_header_-1.0-.txt
+++ /dev/null
@@ -1 +0,0 @@
-##INFO=<ID=CADD,Number=1,Type=Float,Description="PHRED-like scaled CADD score.">
diff --git a/conf/subworkflows/annotate_cadd.config b/conf/subworkflows/annotate_cadd.config
index 635799a..1214262 100644
--- a/conf/subworkflows/annotate_cadd.config
+++ b/conf/subworkflows/annotate_cadd.config
@@ -5,5 +5,40 @@ Annotate with CADD
 
 process {
 
+    withName: '.*:ANNOTATE_CADD:.*' {
+        publishDir = [
+            enabled: false
+        ]
+    }
 
+    withName: '.*:ANNOTATE_CADD:BCFTOOLS_VIEW' {
+        ext.args   = { "--output-type z --types indels,other" }
+        ext.prefix = { "${vcf.simpleName}_indels" }
+    }
+
+    withName: '.*:ANNOTATE_CADD:CADD' {
+        ext.args   = { "-g ${params.genome}" }
+        ext.prefix = { "${vcf.simpleName}_cadd" }
+    }
+
+    withName: '.*:ANNOTATE_CADD:TABIX_CADD' {
+        ext.args = { "--force --sequence 1 --begin 2 --end 2" }
+    }
+
+    withName: '.*:ANNOTATE_CADD:CADD_TO_REFERENCE_CHRNAMES' {
+        ext.args2 = '\'{original=$1; sub("chr","",$1); print $1, original}\''
+        ext.prefix = "cadd_to_reference"
+        ext.suffix = "txt"
+    }
+
+    withName: '.*:ANNOTATE_CADD:REFERENCE_TO_CADD_CHRNAMES' {
+        ext.args2 = '\'{original=$1; sub("chr","",$1); print original, $1}\''
+        ext.prefix = "reference_to_cadd"
+        ext.suffix = "txt"
+    }
+
+    withName: '.*:ANNOTATE_CADD:ANNOTATE_INDELS' {
+        ext.args   = { "--columns Chrom,Pos,Ref,Alt,-,CADD --output-type z --write-index=tbi" }
+        ext.prefix = { "${input.simpleName}_ann" }
+    }
 }
diff --git a/conf/test.config b/conf/test.config
index 4a750f4..d7bf962 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -47,7 +47,8 @@ params {
     svdb_query_dbs      = params.pipelines_testdata_base_path + 'reference/svdb_querydb_files.csv'
 
     // Mock input for CADD
-    cadd_resources               = '../test-datasets'
+    cadd_resources               = '../test-datasets/reference'
+    cadd_prescored_indels        = '../test-datasets/'
     //cadd_resources               = params.pipelines_testdata_base_path + "assets" //TODO add
     //cadd_prescored_indels        = params.pipelines_testdata_base_path + "docs"   //TODO add
 
diff --git a/modules.json b/modules.json
index ab7e9cb..9632cda 100644
--- a/modules.json
+++ b/modules.json
@@ -35,6 +35,11 @@
                         "git_sha": "34505e1fc5e9f4fd641210ca440acff6bd33b842",
                         "installed_by": ["modules"]
                     },
+                    "gawk": {
+                        "branch": "master",
+                        "git_sha": "c0da8f3a26835d663873001382a708f75766fec6",
+                        "installed_by": ["modules"]
+                    },
                     "multiqc": {
                         "branch": "master",
                         "git_sha": "2c73cc8fa92cf48de3da0b643fdf357a8a290b36",
diff --git a/subworkflows/local/annotate_cadd/main.nf b/subworkflows/local/annotate_cadd/main.nf
index a3097b9..98da106 100644
--- a/subworkflows/local/annotate_cadd/main.nf
+++ b/subworkflows/local/annotate_cadd/main.nf
@@ -2,8 +2,12 @@
 // A subworkflow to annotate cadd
 //
 
-include { BCFTOOLS_ANNOTATE                        } from '../../../modules/nf-core/bcftools/annotate/main'
+include { BCFTOOLS_ANNOTATE as RENAME_CHR_CADD     } from '../../../modules/nf-core/bcftools/annotate/main'
+include { BCFTOOLS_ANNOTATE as ANNOTATE_INDELS      } from '../../../modules/nf-core/bcftools/annotate/main'
+include { BCFTOOLS_VIEW                            } from '../../../modules/nf-core/bcftools/view/main'
 include { CADD                                     } from '../../../modules/nf-core/cadd/main'
+include { GAWK as REFERENCE_TO_CADD_CHRNAMES       } from '../../../modules/nf-core/gawk/main'
+include { GAWK as CADD_TO_REFERENCE_CHRNAMES       } from '../../../modules/nf-core/gawk/main'
 include { TABIX_TABIX as TABIX_CADD                } from '../../../modules/nf-core/tabix/tabix/main'
 include { TABIX_TABIX as TABIX_ANNOTATE            } from '../../../modules/nf-core/tabix/tabix/main'
 
@@ -11,34 +15,68 @@ include { TABIX_TABIX as TABIX_ANNOTATE            } from '../../../modules/nf-c
 workflow ANNOTATE_CADD {
 
     take:
-        ch_snv_vcf         // channel: [mandatory] [ val(meta), path(vcfs), path(idx) ]
-        ch_cadd_header     // channel: [mandatory] [ path(txt) ]
-        ch_cadd_resources  // channel: [mandatory] [ path(dir) ]
-        ch_cadd_prescored_indels // channel: [mandatory] [ val(meta), path(dir) ]
+        ch_vcf                     // channel: [mandatory] [ val(meta), path(vcf), path(idx) ]
+        val_genome                 // string:  [mandatory] GRCh37 or GRCh38
+        ch_fai                     // channel: [mandatory] [ val(meta), path(fai) ]
+        ch_header                  // channel: [mandatory] [ path(txt) ]
+        ch_cadd_resources          // channel: [mandatory] [ val(meta), path(dir) ]
+        ch_cadd_prescored_indels   // channel: [mandatory] [ val(meta), path(dir) ]
 
     main:
-        ch_versions = channel.empty()
 
-        CADD(ch_snv_vcf, ch_cadd_resources, ch_cadd_prescored_indels)
+        ch_rename_chrs_ref    = []
 
+        // Create files and rename chromosomes if reference is GRCh38
+        if (val_genome.equals('GRCh38')) { // TODO change to 38
+
+            // Create txt files for changing of chromosomes
+            REFERENCE_TO_CADD_CHRNAMES ( ch_fai , [], false )
+
+            REFERENCE_TO_CADD_CHRNAMES.out.output.map { _meta, txt -> txt }
+                .set {ch_chrnames_cadd}
+
+            CADD_TO_REFERENCE_CHRNAMES ( ch_fai , [], false )
+
+            CADD_TO_REFERENCE_CHRNAMES.out.output.map { _meta, txt -> txt }
+                .set { ch_rename_chrs_ref }
+
+            ch_vcf
+                .combine(ch_chrnames_cadd)
+                .map { meta, vcf, tbi, txt -> tuple( meta, vcf, tbi, [], [], [], [], txt ) }
+                .set {rename_chrnames_in}
+
+            // Change chr names to CADD compatible names
+            RENAME_CHR_CADD( rename_chrnames_in )
+
+            RENAME_CHR_CADD.out.vcf
+                .map {meta, vcf -> tuple( meta , vcf, [] )}
+                .set { ch_vcf }
+        }
+
+        // Filter to extract indels
+        BCFTOOLS_VIEW(ch_vcf, [], [], [])
+
+        // CADD
+        CADD(BCFTOOLS_VIEW.out.vcf, ch_cadd_resources, ch_cadd_prescored_indels)
+
+        // Index CADD
         TABIX_CADD(CADD.out.tsv)
 
-        ch_snv_vcf
-            .join(CADD.out.tsv)
-            .join(TABIX_CADD.out.tbi)
-            .set { ch_annotate_in }
+        // Change chr names back to desired naming and annotate original vcf with cadd results
+        ch_vcf
+            .join(CADD.out.tsv, failOnMismatch: true, failOnDuplicate: true)
+            .join(TABIX_CADD.out.index, failOnMismatch: true, failOnDuplicate: true)
+            .combine( ch_header )
+            .combine( ch_rename_chrs_ref )
+            .map { meta, vcf, tbi, annotations, annotations_index, header, txt -> tuple( meta, vcf, [], annotations, annotations_index, [], header, txt )  }
+            .set { ch_annotate }
 
-        BCFTOOLS_ANNOTATE(ch_annotate_in, ch_cadd_header )
 
-        TABIX_ANNOTATE (BCFTOOLS_ANNOTATE.out.vcf)
+        ANNOTATE_INDELS( ch_annotate )
 
-        ch_versions = ch_versions.mix(CADD.out.versions.first())
-        ch_versions = ch_versions.mix(TABIX_CADD.out.versions.first())
-        ch_versions = ch_versions.mix(BCFTOOLS_ANNOTATE.out.versions.first())
-        ch_versions = ch_versions.mix(TABIX_ANNOTATE.out.versions.first())
+        ANNOTATE_INDELS.out.vcf.view() //TODO fix
 
     emit:
-        vcf = BCFTOOLS_ANNOTATE.out.vcf // channel: [ val(meta), path(vcf) ]
-        tbi = TABIX_ANNOTATE.out.tbi
-        versions = ch_versions
+        vcf = ANNOTATE_INDELS.out.vcf // channel: [ val(meta), path(vcf) ]
+        tbi = ANNOTATE_INDELS.out.tbi // channel: [ val(meta), path(tbi) ]
 }
diff --git a/workflows/oncorefiner.nf b/workflows/oncorefiner.nf
index 691554f..aa14054 100644
--- a/workflows/oncorefiner.nf
+++ b/workflows/oncorefiner.nf
@@ -60,6 +60,7 @@ workflow ONCOREFINER {
 
         // Reference files
         ch_genome_fasta         = channel.fromPath(params.fasta).map { it -> [[id:it.simpleName], it] }.collect()
+        ch_genome_fai           = channel.fromPath(params.fai).map {it -> [[id:it.simpleName], it]  }.collect()
 
         // File channels for PREPARE_REFERENCES
         ch_vep_cache_unprocessed     = params.vep_cache           ? channel.fromPath(params.vep_cache).map { it -> [[id:'vep_cache'], it] }.collect()
@@ -71,14 +72,14 @@ workflow ONCOREFINER {
         .set { ch_references }
 
         // Gather or get from params
-        ch_vep_cache                = ( params.vep_cache && params.vep_cache.endsWith("tar.gz") )  ? ch_references.vep_resources
+        ch_vep_cache                 = ( params.vep_cache && params.vep_cache.endsWith("tar.gz") )  ? ch_references.vep_resources
                                                                                 : ( params.vep_cache    ? channel.fromPath(params.vep_cache).collect() : channel.value([]) )
 
-        ch_cadd_header              = Channel.fromPath("$projectDir/assets/cadd_to_vcf_header_-1.0-.txt", checkIfExists: true).collect()
-        ch_cadd_resources           = params.cadd_resources                     ? Channel.fromPath(params.cadd_resources).collect()
-                                                                                : Channel.value([])
-        ch_cadd_prescored_indels     = createReferenceChannelFromPath(params.cadd_prescored_indels) // align with above
-
+        ch_cadd_header               = channel.fromPath("$projectDir/assets/cadd_to_vcf_header.txt", checkIfExists: true).collect()
+        ch_cadd_resources            = params.cadd_resources                     ? channel.fromPath(params.cadd_resources).map { it -> [[id:'cadd_resources'], it] }.collect()
+                                                                                 : channel.value([])
+        ch_cadd_prescored_indels     = params.cadd_prescored_indels              ? channel.fromPath(params.cadd_prescored_indels).map { it -> [[id:'cadd_prescored_indels'], it] }.collect()
+                                                                                 : channel.value([])
 
         //
         // Read and store paths in the vep_plugin_files file
@@ -143,9 +144,7 @@ workflow ONCOREFINER {
 
             RESEARCH_FILTERING(ch_research_filtering_in, [], [], [])
 
-
-
-            // VEP
+            // TODO remove or move down - not used if cadd output is input to vep
             RESEARCH_FILTERING.out.vcf
                     .map { meta, vcf ->
                         tuple(meta, vcf, [])
@@ -153,23 +152,28 @@ workflow ONCOREFINER {
                     //.set { ch_cadd_snv }
                     .set {ch_vep_snv}
 
+            // ANNOTATE WITH CADD - currently depends on resources - could be variable instead (ref optional wf refinement)?
+            if (params.cadd_resources != null) {
 
+                TABIX_RESEARCH_FILTERING(RESEARCH_FILTERING.out.vcf)
 
-            // ANNOTATE WITH CADD
-            if (params.cadd_resources != null) {
+                RESEARCH_FILTERING.out.vcf
+                    .join(TABIX_RESEARCH_FILTERING.out.index, failOnMismatch:true, failOnDuplicate:true)
+                    .set{ ch_cadd_in }
 
                 ANNOTATE_CADD (
-                    ch_vep_snv,
-                    //ch_cadd_snv,
+                    ch_cadd_in,
+                    params.genome,
+                    ch_genome_fai,
                     ch_cadd_header,
                     ch_cadd_resources,
                     ch_cadd_prescored_indels
                 )
-                ch_vep_snv = ANNOTATE_CADD.out.vcf
-                ch_versions = ch_versions.mix(ANNOTATE_CADD.out.versions)
+                //ch_vep_snv = ANNOTATE_CADD.out.vcf
 
             }
 
+            // VEP
             ENSEMBLVEP_SNV (
                 ch_vep_snv,
                 params.genome,

From eafa726bafbc62d9a915b913499f219eee36cc10 Mon Sep 17 00:00:00 2001
From: kristinebilgrav <kristinebilgrav1@gmail.com>
Date: Fri, 27 Mar 2026 16:17:10 +0100
Subject: [PATCH 05/23] update subwf and test

---
 conf/subworkflows/annotate_cadd.config        |  5 ++
 subworkflows/local/annotate_cadd/main.nf      |  8 +--
 .../local/annotate_cadd/tests/main.nf.test    | 67 ++++++++++++++++++
 .../annotate_cadd/tests/main.nf.test.snap     | 68 +++++++++++++++++++
 .../local/annotate_cadd/tests/nextflow.config | 40 +++++++++++
 tests/nextflow.config                         |  2 +-
 workflows/oncorefiner.nf                      | 11 +--
 7 files changed, 191 insertions(+), 10 deletions(-)
 create mode 100644 subworkflows/local/annotate_cadd/tests/main.nf.test
 create mode 100644 subworkflows/local/annotate_cadd/tests/main.nf.test.snap
 create mode 100644 subworkflows/local/annotate_cadd/tests/nextflow.config

diff --git a/conf/subworkflows/annotate_cadd.config b/conf/subworkflows/annotate_cadd.config
index 1214262..55bcf0d 100644
--- a/conf/subworkflows/annotate_cadd.config
+++ b/conf/subworkflows/annotate_cadd.config
@@ -11,6 +11,11 @@ process {
         ]
     }
 
+    withName: 'RENAME_CHR_CADD' {
+        ext.args   = { "--output-type z" }
+        ext.prefix = { "${vcf.simpleName}_indels" }
+    }
+
     withName: '.*:ANNOTATE_CADD:BCFTOOLS_VIEW' {
         ext.args   = { "--output-type z --types indels,other" }
         ext.prefix = { "${vcf.simpleName}_indels" }
diff --git a/subworkflows/local/annotate_cadd/main.nf b/subworkflows/local/annotate_cadd/main.nf
index 98da106..286dc89 100644
--- a/subworkflows/local/annotate_cadd/main.nf
+++ b/subworkflows/local/annotate_cadd/main.nf
@@ -3,7 +3,7 @@
 //
 
 include { BCFTOOLS_ANNOTATE as RENAME_CHR_CADD     } from '../../../modules/nf-core/bcftools/annotate/main'
-include { BCFTOOLS_ANNOTATE as ANNOTATE_INDELS      } from '../../../modules/nf-core/bcftools/annotate/main'
+include { BCFTOOLS_ANNOTATE as ANNOTATE_INDELS     } from '../../../modules/nf-core/bcftools/annotate/main'
 include { BCFTOOLS_VIEW                            } from '../../../modules/nf-core/bcftools/view/main'
 include { CADD                                     } from '../../../modules/nf-core/cadd/main'
 include { GAWK as REFERENCE_TO_CADD_CHRNAMES       } from '../../../modules/nf-core/gawk/main'
@@ -24,10 +24,10 @@ workflow ANNOTATE_CADD {
 
     main:
 
-        ch_rename_chrs_ref    = []
+        ch_rename_chrs_ref    = channel.value([[]])
 
         // Create files and rename chromosomes if reference is GRCh38
-        if (val_genome.equals('GRCh38')) { // TODO change to 38
+        if (val_genome.equals('GRCh38')) {
 
             // Create txt files for changing of chromosomes
             REFERENCE_TO_CADD_CHRNAMES ( ch_fai , [], false )
@@ -74,8 +74,6 @@ workflow ANNOTATE_CADD {
 
         ANNOTATE_INDELS( ch_annotate )
 
-        ANNOTATE_INDELS.out.vcf.view() //TODO fix
-
     emit:
         vcf = ANNOTATE_INDELS.out.vcf // channel: [ val(meta), path(vcf) ]
         tbi = ANNOTATE_INDELS.out.tbi // channel: [ val(meta), path(tbi) ]
diff --git a/subworkflows/local/annotate_cadd/tests/main.nf.test b/subworkflows/local/annotate_cadd/tests/main.nf.test
new file mode 100644
index 0000000..e01f85c
--- /dev/null
+++ b/subworkflows/local/annotate_cadd/tests/main.nf.test
@@ -0,0 +1,67 @@
+nextflow_workflow {
+
+    name "Test Workflow ANNOTATE_CADD"
+    script "subworkflows/local/annotate_cadd/main.nf"
+    workflow "ANNOTATE_CADD"
+    tag "subworkflows"
+    tag "annotate_cadd"
+    config "./nextflow.config"
+
+    test("ANNOTATE_CADD - GRCh37, stub") {
+
+        options "-stub"
+
+        when {
+            params {
+                genome = "GRCh37"
+            }
+            workflow {
+                """
+                input[0] = channel.of([ [id:'test'], file(params.pipelines_testdata_base_path + 'testdata/SNV.tumor.pave.somatic.37.vcf.gz'), file(params.pipelines_testdata_base_path + 'testdata/SNV.tumor.pave.somatic.37.vcf.gz.tbi') ]).collect()
+                input[1] = 'GRCh37'
+                input[2] = channel.of([ [id:'genome'], file(params.pipelines_testdata_base_path + 'reference/reference.fasta.fai', checkIfExists: true)]).collect()
+                input[3] = channel.fromPath("$projectDir/assets/cadd_to_vcf_header.txt", checkIfExists: true).collect()
+                input[4] = channel.from("\$PWD").map { dir -> [ [ id: 'cadd_resources' ], dir ] }
+                input[5] = channel.from("\$PWD").map { dir -> [ [ id: 'cadd_prescored_indels' ], dir ] }
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert workflow.success },
+                { assert snapshot(workflow.out).match() }
+            )
+        }
+    }
+
+    test("ANNOTATE_CADD - GRCh38, stub") {
+
+        options "-stub"
+
+        when {
+            params {
+                genome = "GRCh38"
+            }
+            workflow {
+                """
+                input[0] = channel.of([ [id:'test'], file(params.pipelines_testdata_base_path + 'testdata/SNV.tumor.pave.somatic.37.vcf.gz'), file(params.pipelines_testdata_base_path + 'testdata/SNV.tumor.pave.somatic.37.vcf.gz.tbi') ]).collect()
+                input[1] = 'GRCh38'
+                input[2] = channel.fromPath( params.pipelines_testdata_base_path + 'reference/reference.fasta.fai' ).map { it -> [[id:it.simpleName], it] }.collect()
+                input[3] = channel.fromPath("$projectDir/assets/cadd_to_vcf_header.txt", checkIfExists: true).collect()
+                input[4] = channel.from("\$PWD").map { dir -> [ [ id: 'cadd_resources' ], dir ] }
+                input[5] = channel.from("\$PWD").map { dir -> [ [ id: 'cadd_prescored_indels' ], dir ] }
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert workflow.success },
+                { assert snapshot(
+                    workflow.out).match() }
+            )
+        }
+    }
+
+}
diff --git a/subworkflows/local/annotate_cadd/tests/main.nf.test.snap b/subworkflows/local/annotate_cadd/tests/main.nf.test.snap
new file mode 100644
index 0000000..301b1a4
--- /dev/null
+++ b/subworkflows/local/annotate_cadd/tests/main.nf.test.snap
@@ -0,0 +1,68 @@
+{
+    "ANNOTATE_CADD - GRCh37, stub": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "SNV_ann.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+                    ]
+                ],
+                "1": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "SNV_ann.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "tbi": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "SNV_ann.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "vcf": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "SNV_ann.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+                    ]
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.3",
+            "nextflow": "25.10.4"
+        },
+        "timestamp": "2026-03-27T16:04:52.225642"
+    },
+    "ANNOTATE_CADD - GRCh38, stub": {
+        "content": [
+            {
+                "0": [
+                    
+                ],
+                "1": [
+                    
+                ],
+                "tbi": [
+                    
+                ],
+                "vcf": [
+                    
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.3",
+            "nextflow": "25.10.4"
+        },
+        "timestamp": "2026-03-27T15:48:16.48239"
+    }
+}
\ No newline at end of file
diff --git a/subworkflows/local/annotate_cadd/tests/nextflow.config b/subworkflows/local/annotate_cadd/tests/nextflow.config
new file mode 100644
index 0000000..3bdabb5
--- /dev/null
+++ b/subworkflows/local/annotate_cadd/tests/nextflow.config
@@ -0,0 +1,40 @@
+process {
+
+    withName: 'BCFTOOLS_VIEW' {
+        ext.args   = { "--output-type z --types indels,other" }
+        ext.prefix = { "${vcf.simpleName}_indels" }
+    }
+
+    withName: 'CADD' {
+        container = "nf-core/ubuntu:22.04" //Using an basic container because v1.7.3 is too big for CI.
+        ext.args   = { "-g ${params.genome}" }
+        ext.prefix = { "${vcf.simpleName}_cadd" }
+    }
+
+    withName: 'TABIX_CADD' {
+        ext.args = { "--force --sequence 1 --begin 2 --end 2" }
+    }
+
+    withName: 'CADD_TO_REFERENCE_CHRNAMES' {
+        ext.args2 = '\'{original=$1; sub("chr","",$1); print $1, original}\''
+        ext.prefix = "cadd_to_reference"
+        ext.suffix = "txt"
+    }
+
+    withName: 'REFERENCE_TO_CADD_CHRNAMES' {
+        ext.args2 = '\'{original=$1; sub("chr","",$1); print original, $1}\''
+        ext.prefix = "reference_to_cadd"
+        ext.suffix = "txt"
+    }
+
+    withName: 'ANNOTATE_INDELS' {
+        ext.args   = { "--columns Chrom,Pos,Ref,Alt,-,CADD --output-type z --write-index=tbi" }
+        ext.prefix = { "${input.simpleName}_ann" }
+    }
+
+    withName: 'RENAME_CHR_CADD' {
+        ext.args   = { "--output-type z" }
+        ext.prefix = { "${vcf.simpleName}_indels" }
+    }
+
+}
diff --git a/tests/nextflow.config b/tests/nextflow.config
index b705059..03c6297 100644
--- a/tests/nextflow.config
+++ b/tests/nextflow.config
@@ -12,7 +12,7 @@ params {
     config_profile_name          = 'Test profile'
     config_profile_description   = 'Minimal test dataset to check pipeline function'
     modules_testdata_base_path   = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/'
-    pipelines_testdata_base_path = 'https://raw.githubusercontent.com/Clinical-Genomics/test-datasets/tree/1184e1c31b5e47055e3580c7e0f65240a1c005d0/'
+    pipelines_testdata_base_path = 'https://raw.githubusercontent.com/Clinical-Genomics/test-datasets/1184e1c31b5e47055e3580c7e0f65240a1c005d0/'
 
 
 }
diff --git a/workflows/oncorefiner.nf b/workflows/oncorefiner.nf
index aa14054..210a294 100644
--- a/workflows/oncorefiner.nf
+++ b/workflows/oncorefiner.nf
@@ -144,12 +144,10 @@ workflow ONCOREFINER {
 
             RESEARCH_FILTERING(ch_research_filtering_in, [], [], [])
 
-            // TODO remove or move down - not used if cadd output is input to vep
             RESEARCH_FILTERING.out.vcf
                     .map { meta, vcf ->
                         tuple(meta, vcf, [])
                     }
-                    //.set { ch_cadd_snv }
                     .set {ch_vep_snv}
 
             // ANNOTATE WITH CADD - currently depends on resources - could be variable instead (ref optional wf refinement)?
@@ -163,16 +161,21 @@ workflow ONCOREFINER {
 
                 ANNOTATE_CADD (
                     ch_cadd_in,
-                    params.genome,
+                    params.genome, //TODO pull dev and change to val_genome
                     ch_genome_fai,
                     ch_cadd_header,
                     ch_cadd_resources,
                     ch_cadd_prescored_indels
                 )
-                //ch_vep_snv = ANNOTATE_CADD.out.vcf
+                ch_cadd_snv = ANNOTATE_CADD.out.vcf
 
             }
 
+            ch_cadd_snv // Q: is it better to make this channel in the annotate cadd subwf?
+                .join(ANNOTATE_CADD.out.tbi)
+                .map { meta, vcf, tbi -> tuple(meta, vcf, tbi) }
+                .set { ch_vep_snv }
+
             // VEP
             ENSEMBLVEP_SNV (
                 ch_vep_snv,

From 1448d888b1063e0cd5f399c09c5ac1e2d7733d51 Mon Sep 17 00:00:00 2001
From: kristinebilgrav <kristinebilgrav1@gmail.com>
Date: Fri, 27 Mar 2026 16:32:19 +0100
Subject: [PATCH 06/23] fixing inputs

---
 workflows/oncorefiner.nf | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/workflows/oncorefiner.nf b/workflows/oncorefiner.nf
index fa2aa29..06bbed1 100644
--- a/workflows/oncorefiner.nf
+++ b/workflows/oncorefiner.nf
@@ -63,6 +63,13 @@ workflow ONCOREFINER {
         ch_genome_fasta         = channel.fromPath(params.fasta).map { it -> [[id:it.simpleName], it] }.collect()
         ch_genome_fai           = channel.fromPath(params.fai).map {it -> [[id:it.simpleName], it]  }.collect()
 
+        ch_cadd_header               = channel.fromPath("$projectDir/assets/cadd_to_vcf_header.txt", checkIfExists: true).collect()
+        ch_cadd_resources            = params.cadd_resources                     ? channel.fromPath(params.cadd_resources).map { it -> [[id:'cadd_resources'], it] }.collect()
+                                                                                 : channel.value([])
+        ch_cadd_prescored_indels     = params.cadd_prescored_indels              ? channel.fromPath(params.cadd_prescored_indels).map { it -> [[id:'cadd_prescored_indels'], it] }.collect()
+                                                                                 : channel.value([])
+
+
         //
         // Read and store paths in the vep_plugin_files file
         //

From 175639a1e47e81e8f2a8f4d7b335fac79118bd04 Mon Sep 17 00:00:00 2001
From: kristinebilgrav <kristinebilgrav1@gmail.com>
Date: Mon, 30 Mar 2026 14:00:13 +0200
Subject: [PATCH 07/23] update test

---
 conf/subworkflows/annotate_cadd.config        |  4 +--
 subworkflows/local/annotate_cadd/main.nf      |  1 -
 .../local/annotate_cadd/tests/main.nf.test    | 18 +++++++----
 .../annotate_cadd/tests/main.nf.test.snap     | 30 +++++++++++++++----
 .../local/annotate_cadd/tests/nextflow.config | 10 +++----
 5 files changed, 45 insertions(+), 18 deletions(-)

diff --git a/conf/subworkflows/annotate_cadd.config b/conf/subworkflows/annotate_cadd.config
index 55bcf0d..0ce8b02 100644
--- a/conf/subworkflows/annotate_cadd.config
+++ b/conf/subworkflows/annotate_cadd.config
@@ -11,9 +11,9 @@ process {
         ]
     }
 
-    withName: 'RENAME_CHR_CADD' {
+    withName: '.*:ANNOTATE_CADD:RENAME_CHR_CADD' {
         ext.args   = { "--output-type z" }
-        ext.prefix = { "${vcf.simpleName}_indels" }
+        ext.prefix = { "${input.simpleName}_indels" }
     }
 
     withName: '.*:ANNOTATE_CADD:BCFTOOLS_VIEW' {
diff --git a/subworkflows/local/annotate_cadd/main.nf b/subworkflows/local/annotate_cadd/main.nf
index 286dc89..287046e 100644
--- a/subworkflows/local/annotate_cadd/main.nf
+++ b/subworkflows/local/annotate_cadd/main.nf
@@ -9,7 +9,6 @@ include { CADD                                     } from '../../../modules/nf-c
 include { GAWK as REFERENCE_TO_CADD_CHRNAMES       } from '../../../modules/nf-core/gawk/main'
 include { GAWK as CADD_TO_REFERENCE_CHRNAMES       } from '../../../modules/nf-core/gawk/main'
 include { TABIX_TABIX as TABIX_CADD                } from '../../../modules/nf-core/tabix/tabix/main'
-include { TABIX_TABIX as TABIX_ANNOTATE            } from '../../../modules/nf-core/tabix/tabix/main'
 
 
 workflow ANNOTATE_CADD {
diff --git a/subworkflows/local/annotate_cadd/tests/main.nf.test b/subworkflows/local/annotate_cadd/tests/main.nf.test
index e01f85c..9023868 100644
--- a/subworkflows/local/annotate_cadd/tests/main.nf.test
+++ b/subworkflows/local/annotate_cadd/tests/main.nf.test
@@ -1,7 +1,7 @@
 nextflow_workflow {
 
-    name "Test Workflow ANNOTATE_CADD"
-    script "subworkflows/local/annotate_cadd/main.nf"
+    name "Test Subworkflow ANNOTATE_CADD"
+    script "../main.nf"
     workflow "ANNOTATE_CADD"
     tag "subworkflows"
     tag "annotate_cadd"
@@ -17,7 +17,11 @@ nextflow_workflow {
             }
             workflow {
                 """
-                input[0] = channel.of([ [id:'test'], file(params.pipelines_testdata_base_path + 'testdata/SNV.tumor.pave.somatic.37.vcf.gz'), file(params.pipelines_testdata_base_path + 'testdata/SNV.tumor.pave.somatic.37.vcf.gz.tbi') ]).collect()
+                input[0] = channel.of([
+                    [id:'test'],
+                    file(params.pipelines_testdata_base_path + 'testdata/SNV.tumor.pave.somatic.37.vcf.gz', checkIfExists: true),
+                    file(params.pipelines_testdata_base_path + 'testdata/SNV.tumor.pave.somatic.37.vcf.gz.tbi', checkIfExists: true)
+                    ])
                 input[1] = 'GRCh37'
                 input[2] = channel.of([ [id:'genome'], file(params.pipelines_testdata_base_path + 'reference/reference.fasta.fai', checkIfExists: true)]).collect()
                 input[3] = channel.fromPath("$projectDir/assets/cadd_to_vcf_header.txt", checkIfExists: true).collect()
@@ -45,9 +49,13 @@ nextflow_workflow {
             }
             workflow {
                 """
-                input[0] = channel.of([ [id:'test'], file(params.pipelines_testdata_base_path + 'testdata/SNV.tumor.pave.somatic.37.vcf.gz'), file(params.pipelines_testdata_base_path + 'testdata/SNV.tumor.pave.somatic.37.vcf.gz.tbi') ]).collect()
+                input[0] = channel.of([
+                    [id:'test'],
+                    file(params.pipelines_testdata_base_path + 'testdata/SNV.tumor.pave.somatic.37.vcf.gz', checkIfExists: true),
+                    file(params.pipelines_testdata_base_path + 'testdata/SNV.tumor.pave.somatic.37.vcf.gz.tbi', checkIfExists: true)
+                    ])
                 input[1] = 'GRCh38'
-                input[2] = channel.fromPath( params.pipelines_testdata_base_path + 'reference/reference.fasta.fai' ).map { it -> [[id:it.simpleName], it] }.collect()
+                input[2] = channel.fromPath(params.pipelines_testdata_base_path + 'reference/reference.fasta.fai', checkIfExists: true).map {it -> [[id:it.simpleName], it]  }.collect()
                 input[3] = channel.fromPath("$projectDir/assets/cadd_to_vcf_header.txt", checkIfExists: true).collect()
                 input[4] = channel.from("\$PWD").map { dir -> [ [ id: 'cadd_resources' ], dir ] }
                 input[5] = channel.from("\$PWD").map { dir -> [ [ id: 'cadd_prescored_indels' ], dir ] }
diff --git a/subworkflows/local/annotate_cadd/tests/main.nf.test.snap b/subworkflows/local/annotate_cadd/tests/main.nf.test.snap
index 301b1a4..f05c43f 100644
--- a/subworkflows/local/annotate_cadd/tests/main.nf.test.snap
+++ b/subworkflows/local/annotate_cadd/tests/main.nf.test.snap
@@ -46,16 +46,36 @@
         "content": [
             {
                 "0": [
-                    
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "SNV_indels_ann.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+                    ]
                 ],
                 "1": [
-                    
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "SNV_indels_ann.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
                 ],
                 "tbi": [
-                    
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "SNV_indels_ann.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
                 ],
                 "vcf": [
-                    
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "SNV_indels_ann.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+                    ]
                 ]
             }
         ],
@@ -63,6 +83,6 @@
             "nf-test": "0.9.3",
             "nextflow": "25.10.4"
         },
-        "timestamp": "2026-03-27T15:48:16.48239"
+        "timestamp": "2026-03-30T13:58:31.285282"
     }
 }
\ No newline at end of file
diff --git a/subworkflows/local/annotate_cadd/tests/nextflow.config b/subworkflows/local/annotate_cadd/tests/nextflow.config
index 3bdabb5..8dfa90f 100644
--- a/subworkflows/local/annotate_cadd/tests/nextflow.config
+++ b/subworkflows/local/annotate_cadd/tests/nextflow.config
@@ -1,5 +1,10 @@
 process {
 
+    withName: 'RENAME_CHR_CADD' {
+        ext.args   = { "--output-type z" }
+        ext.prefix = { "${input.simpleName}_indels" }
+    }
+
     withName: 'BCFTOOLS_VIEW' {
         ext.args   = { "--output-type z --types indels,other" }
         ext.prefix = { "${vcf.simpleName}_indels" }
@@ -32,9 +37,4 @@ process {
         ext.prefix = { "${input.simpleName}_ann" }
     }
 
-    withName: 'RENAME_CHR_CADD' {
-        ext.args   = { "--output-type z" }
-        ext.prefix = { "${vcf.simpleName}_indels" }
-    }
-
 }

From 728f2b916c9fb4cfab5b89108556958e57466b1b Mon Sep 17 00:00:00 2001
From: kristinebilgrav <kristinebilgrav1@gmail.com>
Date: Mon, 30 Mar 2026 16:48:29 +0200
Subject: [PATCH 08/23] fix so default test does not run cadd

---
 conf/subworkflows/annotate_cadd.config |  1 +
 conf/test.config                       |  6 +-----
 main.nf                                |  3 ++-
 nextflow.config                        |  4 ++--
 tests/default.nf.test                  |  1 +
 workflows/oncorefiner.nf               | 18 ++++++++----------
 6 files changed, 15 insertions(+), 18 deletions(-)

diff --git a/conf/subworkflows/annotate_cadd.config b/conf/subworkflows/annotate_cadd.config
index 0ce8b02..3e2e16a 100644
--- a/conf/subworkflows/annotate_cadd.config
+++ b/conf/subworkflows/annotate_cadd.config
@@ -9,6 +9,7 @@ process {
         publishDir = [
             enabled: false
         ]
+        //ext.when = { ( !(workflow.profile.tokenize(',').intersect(['test', 'test_full']).size() >= 1) || workflow.stubRun) }
     }
 
     withName: '.*:ANNOTATE_CADD:RENAME_CHR_CADD' {
diff --git a/conf/test.config b/conf/test.config
index d7bf962..27bbb59 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -46,10 +46,6 @@ params {
 
     svdb_query_dbs      = params.pipelines_testdata_base_path + 'reference/svdb_querydb_files.csv'
 
-    // Mock input for CADD
-    cadd_resources               = '../test-datasets/reference'
-    cadd_prescored_indels        = '../test-datasets/'
-    //cadd_resources               = params.pipelines_testdata_base_path + "assets" //TODO add
-    //cadd_prescored_indels        = params.pipelines_testdata_base_path + "docs"   //TODO add
+    // TODO make mock input for CADD
 
 }
diff --git a/main.nf b/main.nf
index 178461a..d47a5ba 100644
--- a/main.nf
+++ b/main.nf
@@ -78,10 +78,10 @@ workflow CLINICALGENOMICS_ONCOREFINER {
     ch_cadd_header           = channel.fromPath("$projectDir/assets/cadd_to_vcf_header.txt", checkIfExists: true).collect()
     ch_cadd_resources        = val_cadd_resources        ? channel.fromPath(val_cadd_resources).map { it -> [[id:'cadd_resources'], it] }.collect()
                                                          : channel.value([])
+
     ch_cadd_prescored_indels = val_cadd_prescored_indels ? channel.fromPath(val_cadd_prescored_indels).map { it -> [[id:'cadd_prescored_indels'], it] }.collect()
                                                          : channel.value([])
 
-
     // Input for VEP
     ch_vep_extra_files_unsplit  = val_vep_plugin_files ? channel.fromPath(val_vep_plugin_files).collect() : channel.value([])
     if (val_vep_plugin_files) {
@@ -132,6 +132,7 @@ workflow CLINICALGENOMICS_ONCOREFINER {
         ch_vcfanno_toml,
         PREPARE_REFERENCES.out.vep_resources,
         ch_vep_extra_files,
+        val_cadd_resources,
         val_genome,
         val_species,
         val_vep_cache_version
diff --git a/nextflow.config b/nextflow.config
index 27454f0..3876482 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -19,8 +19,8 @@ params {
     sv_vcf                      = null
 
     // CADD
-    cadd_resources = null
-    cadd_prescored_indels = null
+    cadd_resources              = null
+    cadd_prescored_indels       = null
 
 
     // Vep
diff --git a/tests/default.nf.test b/tests/default.nf.test
index dc67666..8af67e6 100644
--- a/tests/default.nf.test
+++ b/tests/default.nf.test
@@ -9,6 +9,7 @@ nextflow_pipeline {
         when {
             params {
                 outdir = "$outputDir"
+                //skip_tools = ".*:ANNOTATE_CADD:.*"
             }
         }
 
diff --git a/workflows/oncorefiner.nf b/workflows/oncorefiner.nf
index 0e9cd60..c854f55 100644
--- a/workflows/oncorefiner.nf
+++ b/workflows/oncorefiner.nf
@@ -62,6 +62,7 @@ workflow ONCOREFINER {
         ch_vcfanno_toml          // channel: [optional]  [path(toml_file)]
         ch_vep_cache             // channel: [optional]  [vep_cache_files]
         ch_vep_extra_files       // channel: [optional]  [path(plugin_file1), path(plugin_file2), ...]
+        val_cadd_resources        // string:  [optional]  path to CADD resources directory
         val_genome               // string:  [optional]  genome assembly (e.g. "GRCh38")
         val_species              // string:  [optional]  species (e.g. "homo_sapiens")
         val_vep_cache_version    // string:  [optional]  version of vep cache to use (e.g. "107")
@@ -107,13 +108,14 @@ workflow ONCOREFINER {
                     }
                     .set {ch_vep_snv}
 
-            // ANNOTATE WITH CADD - currently depends on resources - could be variable instead (ref optional wf refinement)?
-            if (params.cadd_resources != null) {
+            // ANNOTATE WITH CADD - currently depends on val_cadd_resources - could be improved?
+            if (val_cadd_resources) {
 
-                TABIX_RESEARCH_FILTERING(RESEARCH_FILTERING.out.vcf)
+                TABIX_RESEARCH_FILTERING(RESEARCH_FILTERING.out.vcf) //CADD needs tabix index
 
                 RESEARCH_FILTERING.out.vcf
                     .join(TABIX_RESEARCH_FILTERING.out.index, failOnMismatch:true, failOnDuplicate:true)
+                    .view()
                     .set{ ch_cadd_in }
 
                 ANNOTATE_CADD (
@@ -124,15 +126,11 @@ workflow ONCOREFINER {
                     ch_cadd_resources,
                     ch_cadd_prescored_indels
                 )
-                ch_cadd_snv = ANNOTATE_CADD.out.vcf
-
+                ANNOTATE_CADD.out.vcf
+                    .join(ANNOTATE_CADD.out.tbi)
+                    .set { ch_vep_snv }
             }
 
-            ch_cadd_snv // Q: is it better to make this channel in the annotate cadd subwf?
-                .join(ANNOTATE_CADD.out.tbi)
-                .map { meta, vcf, tbi -> tuple(meta, vcf, tbi) }
-                .set { ch_vep_snv }
-
             // VEP
             ENSEMBLVEP_SNV (
                 ch_vep_snv,

From 8e0d78574e5ab9ac2b1193d8163b76646fb7dfb0 Mon Sep 17 00:00:00 2001
From: kristinebilgrav <kristinebilgrav1@gmail.com>
Date: Mon, 30 Mar 2026 17:05:57 +0200
Subject: [PATCH 09/23] changelog and small fixes

---
 CHANGELOG.md                                        | 1 +
 conf/subworkflows/annotate_cadd.config              | 1 -
 conf/test.config                                    | 3 +--
 subworkflows/local/annotate_cadd/tests/main.nf.test | 2 +-
 tests/default.nf.test                               | 1 -
 workflows/oncorefiner.nf                            | 6 +++---
 6 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2defb47..4e4d6b9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -19,6 +19,7 @@ Initial release of Clinical-Genomics/oncorefiner, created with the [nf-core](htt
 - Added parameters documentation [#25](https://github.com/Clinical-Genomics/oncorefiner/pull/25)
 - Added pre-commit hook for automatic generation of parameters documentation [#25](https://github.com/Clinical-Genomics/oncorefiner/pull/25)
 - Added Nextflow strict syntax compatibility [#30](https://github.com/Clinical-Genomics/oncorefiner/pull/30)
+- Added CADD scoring for InDels, and a test for the subworkflow [#59](https://github.com/Clinical-Genomics/oncorefiner/pull/59)
 
 ### Changed
 
diff --git a/conf/subworkflows/annotate_cadd.config b/conf/subworkflows/annotate_cadd.config
index 3e2e16a..0ce8b02 100644
--- a/conf/subworkflows/annotate_cadd.config
+++ b/conf/subworkflows/annotate_cadd.config
@@ -9,7 +9,6 @@ process {
         publishDir = [
             enabled: false
         ]
-        //ext.when = { ( !(workflow.profile.tokenize(',').intersect(['test', 'test_full']).size() >= 1) || workflow.stubRun) }
     }
 
     withName: '.*:ANNOTATE_CADD:RENAME_CHR_CADD' {
diff --git a/conf/test.config b/conf/test.config
index 27bbb59..d89964c 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -46,6 +46,5 @@ params {
 
     svdb_query_dbs      = params.pipelines_testdata_base_path + 'reference/svdb_querydb_files.csv'
 
-    // TODO make mock input for CADD
-
+    // TODO make/insert mock input for CADD
 }
diff --git a/subworkflows/local/annotate_cadd/tests/main.nf.test b/subworkflows/local/annotate_cadd/tests/main.nf.test
index 9023868..6d57147 100644
--- a/subworkflows/local/annotate_cadd/tests/main.nf.test
+++ b/subworkflows/local/annotate_cadd/tests/main.nf.test
@@ -23,7 +23,7 @@ nextflow_workflow {
                     file(params.pipelines_testdata_base_path + 'testdata/SNV.tumor.pave.somatic.37.vcf.gz.tbi', checkIfExists: true)
                     ])
                 input[1] = 'GRCh37'
-                input[2] = channel.of([ [id:'genome'], file(params.pipelines_testdata_base_path + 'reference/reference.fasta.fai', checkIfExists: true)]).collect()
+                input[2] = channel.fromPath(params.pipelines_testdata_base_path + 'reference/reference.fasta.fai', checkIfExists: true).map {it -> [[id:it.simpleName], it]  }.collect()
                 input[3] = channel.fromPath("$projectDir/assets/cadd_to_vcf_header.txt", checkIfExists: true).collect()
                 input[4] = channel.from("\$PWD").map { dir -> [ [ id: 'cadd_resources' ], dir ] }
                 input[5] = channel.from("\$PWD").map { dir -> [ [ id: 'cadd_prescored_indels' ], dir ] }
diff --git a/tests/default.nf.test b/tests/default.nf.test
index 8af67e6..dc67666 100644
--- a/tests/default.nf.test
+++ b/tests/default.nf.test
@@ -9,7 +9,6 @@ nextflow_pipeline {
         when {
             params {
                 outdir = "$outputDir"
-                //skip_tools = ".*:ANNOTATE_CADD:.*"
             }
         }
 
diff --git a/workflows/oncorefiner.nf b/workflows/oncorefiner.nf
index c854f55..5503288 100644
--- a/workflows/oncorefiner.nf
+++ b/workflows/oncorefiner.nf
@@ -62,7 +62,7 @@ workflow ONCOREFINER {
         ch_vcfanno_toml          // channel: [optional]  [path(toml_file)]
         ch_vep_cache             // channel: [optional]  [vep_cache_files]
         ch_vep_extra_files       // channel: [optional]  [path(plugin_file1), path(plugin_file2), ...]
-        val_cadd_resources        // string:  [optional]  path to CADD resources directory
+        val_cadd_resources       // string:  [optional]  path to CADD resources directory
         val_genome               // string:  [optional]  genome assembly (e.g. "GRCh38")
         val_species              // string:  [optional]  species (e.g. "homo_sapiens")
         val_vep_cache_version    // string:  [optional]  version of vep cache to use (e.g. "107")
@@ -99,14 +99,13 @@ workflow ONCOREFINER {
                     tuple(meta, vcf, tbi)
                     }
                 .set { ch_research_filtering_in }
-
             RESEARCH_FILTERING(ch_research_filtering_in, [], [], [])
 
             RESEARCH_FILTERING.out.vcf
                     .map { meta, vcf ->
                         tuple(meta, vcf, [])
                     }
-                    .set {ch_vep_snv}
+                    .set { ch_vep_snv }
 
             // ANNOTATE WITH CADD - currently depends on val_cadd_resources - could be improved?
             if (val_cadd_resources) {
@@ -129,6 +128,7 @@ workflow ONCOREFINER {
                 ANNOTATE_CADD.out.vcf
                     .join(ANNOTATE_CADD.out.tbi)
                     .set { ch_vep_snv }
+
             }
 
             // VEP

From 217fd8d7ea93f5dfc6bc381627b32ca761302011 Mon Sep 17 00:00:00 2001
From: kristinebilgrav <kristinebilgrav1@gmail.com>
Date: Tue, 7 Apr 2026 09:37:36 +0200
Subject: [PATCH 10/23] fix

---
 workflows/oncorefiner.nf | 1 -
 1 file changed, 1 deletion(-)

diff --git a/workflows/oncorefiner.nf b/workflows/oncorefiner.nf
index 5503288..80534dc 100644
--- a/workflows/oncorefiner.nf
+++ b/workflows/oncorefiner.nf
@@ -114,7 +114,6 @@ workflow ONCOREFINER {
 
                 RESEARCH_FILTERING.out.vcf
                     .join(TABIX_RESEARCH_FILTERING.out.index, failOnMismatch:true, failOnDuplicate:true)
-                    .view()
                     .set{ ch_cadd_in }
 
                 ANNOTATE_CADD (

From a2c21741a72aef3de9d39ef6c6bc0130bf75087d Mon Sep 17 00:00:00 2001
From: kristinebilgrav <kristinebilgrav1@gmail.com>
Date: Fri, 10 Apr 2026 16:35:26 +0200
Subject: [PATCH 11/23] add publishdir

---
 conf/subworkflows/annotate_cadd.config | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/conf/subworkflows/annotate_cadd.config b/conf/subworkflows/annotate_cadd.config
index 0ce8b02..c97f424 100644
--- a/conf/subworkflows/annotate_cadd.config
+++ b/conf/subworkflows/annotate_cadd.config
@@ -45,5 +45,8 @@ process {
     withName: '.*:ANNOTATE_CADD:ANNOTATE_INDELS' {
         ext.args   = { "--columns Chrom,Pos,Ref,Alt,-,CADD --output-type z --write-index=tbi" }
         ext.prefix = { "${input.simpleName}_ann" }
+        publishDir = [
+            path: { "${params.outdir}/annotations" },
+            mode: params.publish_dir_mode ]
     }
 }

From fd0522c9e7feb210939eb7f0c3d4309ef3368032 Mon Sep 17 00:00:00 2001
From: kristinebilgrav <kristinebilgrav1@gmail.com>
Date: Mon, 13 Apr 2026 10:23:27 +0200
Subject: [PATCH 12/23] fix

---
 main.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/main.nf b/main.nf
index d47a5ba..1c14b11 100644
--- a/main.nf
+++ b/main.nf
@@ -56,7 +56,7 @@ workflow CLINICALGENOMICS_ONCOREFINER {
                                              : channel.value([[],[]])
 
     PREPARE_REFERENCES (
-        val_vep_cache
+        params.vep_cache
         )
 
     //

From 342ea0c8be34d3a6c08a8d218453105ce9fb1faa Mon Sep 17 00:00:00 2001
From: kristinebilgrav <kristinebilgrav1@gmail.com>
Date: Mon, 13 Apr 2026 11:31:47 +0200
Subject: [PATCH 13/23] fix merge bug

---
 main.nf | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/main.nf b/main.nf
index c2da2cd..7e4835a 100644
--- a/main.nf
+++ b/main.nf
@@ -33,10 +33,10 @@ workflow CLINICALGENOMICS_ONCOREFINER {
     samplesheet                 // channel: [mandatory] samplesheet read in from --input
     val_bam_normal              // string:  [optional]  path to BAM file for the normal sample
     val_bai_normal              // string:  [optional]  path to BAI file for the normal sample
-    val_cadd_resources          // string:  [optional]  path to CADD resources directory
-    val_cadd_prescored_indels   // string:  [optional]  path to CADD prescored indels file
     val_bam_tumor               // string:  [optional]  path to BAM file for the tumor sample
     val_bai_tumor               // string:  [optional]  path to BAI file for the tumor sample
+    val_cadd_prescored_indels   // string:  [optional]  path to CADD prescored indels file
+    val_cadd_resources          // string:  [optional]  path to CADD resources directory
     val_genome                  // string:  [optional]  genome assembly (e.g. "GRCh38")
     val_genome_fasta            // string:  [optional]  path to genome fasta file
     val_genome_fai              // string:  [optional]  path to genome fasta index file

From 1a8e7f9bab0bc0a96e72460d00b7f3eba67de454 Mon Sep 17 00:00:00 2001
From: kristinebilgrav <kristinebilgrav1@gmail.com>
Date: Mon, 13 Apr 2026 14:03:45 +0200
Subject: [PATCH 14/23] improving CHANGELOG comment

---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index dad423d..83d271f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -23,7 +23,7 @@ Initial release of Clinical-Genomics/oncorefiner, created with the [nf-core](htt
 - Added `sex` parameter [#62](https://github.com/Clinical-Genomics/oncorefiner/pull/62)
 - Added `SAMTOOLS/VIEW` for bam to cram conversion in the main.nf [#70](https://github.com/Clinical-Genomics/oncorefiner/pull/70)
 - Added `GENERATE_CYTOSURE_FILES` subworkflow and necessary nf-core modules `TIDDIT_COV` and `VCF2CYTOSURE` [#60](https://github.com/Clinical-Genomics/oncorefiner/pull/60)
-- Added CADD scoring for InDels, and a test for the subworkflow [#59](https://github.com/Clinical-Genomics/oncorefiner/pull/59)
+- Added CADD scoring for InDels in the subworkflow `ANNOTATE_CADD`, with a subworkflow test (stub only) [#59](https://github.com/Clinical-Genomics/oncorefiner/pull/59)
 
 ### Changed
 

From 99819f017f4d7bd8baedd0f00b92e133412c7bc1 Mon Sep 17 00:00:00 2001
From: kristinebilgrav <kristinebilgrav1@gmail.com>
Date: Mon, 13 Apr 2026 14:11:34 +0200
Subject: [PATCH 15/23] update citations

---
 CITATIONS.md | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/CITATIONS.md b/CITATIONS.md
index 2b87728..07c4e21 100644
--- a/CITATIONS.md
+++ b/CITATIONS.md
@@ -10,6 +10,12 @@
 
 ## Pipeline tools
 
+- [CADD<sup>1</sup>](https://genomemedicine.biomedcentral.com/articles/10.1186/s13073-021-00835-9)<sup>,</sup> [<sup>2</sup>](https://academic.oup.com/nar/article/47/D1/D886/5146191)
+
+  > Rentzsch P, Schubach M, Shendure J, Kircher M. CADD-Splice—improving genome-wide variant effect prediction using deep learning-derived splice scores. Genome Med. 2021;13(1):31. doi:10.1186/s13073-021-00835-9
+
+  > Rentzsch P, Witten D, Cooper GM, Shendure J, Kircher M. CADD: predicting the deleteriousness of variants throughout the human genome. Nucleic Acids Research. 2019;47(D1):D886-D894. doi:10.1093/nar/gky1016
+
 - [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/)
 
 > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924.

From 1a6e8573f8a68c09743fe900de1da814b1578f92 Mon Sep 17 00:00:00 2001
From: kristinebilgrav <kristinebilgrav1@gmail.com>
Date: Fri, 17 Apr 2026 11:35:52 +0200
Subject: [PATCH 16/23] moving tabix to subworkflow

---
 subworkflows/local/annotate_cadd/main.nf | 31 +++++++++++++++---------
 subworkflows/local/process_snvs/main.nf  |  5 +---
 2 files changed, 20 insertions(+), 16 deletions(-)

diff --git a/subworkflows/local/annotate_cadd/main.nf b/subworkflows/local/annotate_cadd/main.nf
index 5194330..9930318 100644
--- a/subworkflows/local/annotate_cadd/main.nf
+++ b/subworkflows/local/annotate_cadd/main.nf
@@ -1,5 +1,5 @@
 //
-// A subworkflow to annotate cadd
+// A subworkflow to annotate indels with CADD scores
 //
 
 include { BCFTOOLS_ANNOTATE as BCFTOOLS_RENAME_CHR_CADD } from '../../../modules/nf-core/bcftools/annotate/main'
@@ -9,22 +9,29 @@ include { CADD                                          } from '../../../modules
 include { GAWK as GAWK_REF_TO_CADD_CHRNAMES             } from '../../../modules/nf-core/gawk/main'
 include { GAWK as GAWK_CADD_TO_REF_CHRNAMES             } from '../../../modules/nf-core/gawk/main'
 include { TABIX_TABIX as TABIX_CADD                     } from '../../../modules/nf-core/tabix/tabix/main'
+include { TABIX_TABIX as TABIX_INPUT                    } from '../../../modules/nf-core/tabix/tabix/main'
 
 
 workflow ANNOTATE_CADD {
 
     take:
-        ch_vcf                     // channel: [mandatory] [ val(meta), path(vcf), path(idx) ]
-        val_genome                 // string:  [mandatory] GRCh37 or GRCh38
-        ch_fai                     // channel: [mandatory] [ val(meta), path(fai) ]
-        ch_header                  // channel: [mandatory] [ path(txt) ]
-        ch_cadd_resources          // channel: [mandatory] [ val(meta), path(dir) ]
-        ch_cadd_prescored_indels   // channel: [mandatory] [ val(meta), path(dir) ]
+        ch_vcf                   // channel: [mandatory] [val(meta), path(vcf)]
+        val_genome               // string:  [mandatory] GRCh37 or GRCh38
+        ch_fai                   // channel: [mandatory] [val(meta), path(fai)]
+        ch_header                // channel: [mandatory] [path(txt)]
+        ch_cadd_resources        // channel: [mandatory] [val(meta), path(dir)]
+        ch_cadd_prescored_indels // channel: [mandatory] [val(meta), path(dir)]
 
     main:
 
         ch_rename_chrs_ref    = channel.value([[]])
 
+        TABIX_INPUT(ch_vcf) //Subworkflow needs tabix index
+
+        ch_vcf
+            .join(TABIX_INPUT.out.index, failOnMismatch:true, failOnDuplicate:true)
+            .set { ch_vcf_tbi }
+
         // Create files and rename chromosomes if reference is GRCh38
         if (val_genome.equals('GRCh38')) {
 
@@ -39,7 +46,7 @@ workflow ANNOTATE_CADD {
             GAWK_CADD_TO_REF_CHRNAMES.out.output.map { _meta, txt -> txt }
                 .set { ch_rename_chrs_ref }
 
-            ch_vcf
+            ch_vcf_tbi
                 .combine(ch_chrnames_cadd)
                 .map { meta, vcf, tbi, txt -> tuple( meta, vcf, tbi, [], [], [], [], txt ) }
                 .set {rename_chrnames_in}
@@ -49,11 +56,11 @@ workflow ANNOTATE_CADD {
 
             BCFTOOLS_RENAME_CHR_CADD.out.vcf
                 .map {meta, vcf -> tuple( meta , vcf, [] )}
-                .set { ch_vcf }
+                .set { ch_vcf_tbi }
         }
 
         // Filter to extract indels
-        BCFTOOLS_VIEW(ch_vcf, [], [], [])
+        BCFTOOLS_VIEW(ch_vcf_tbi, [], [], [])
 
         // CADD
         CADD(BCFTOOLS_VIEW.out.vcf, ch_cadd_resources, ch_cadd_prescored_indels)
@@ -62,12 +69,12 @@ workflow ANNOTATE_CADD {
         TABIX_CADD(CADD.out.tsv)
 
         // Change chr names back to desired naming and annotate original vcf with cadd results
-        ch_vcf
+        ch_vcf_tbi
             .join(CADD.out.tsv, failOnMismatch: true, failOnDuplicate: true)
             .join(TABIX_CADD.out.index, failOnMismatch: true, failOnDuplicate: true)
             .combine( ch_header )
             .combine( ch_rename_chrs_ref )
-            .map { meta, vcf, tbi, annotations, annotations_index, header, txt -> tuple( meta, vcf, [], annotations, annotations_index, [], header, txt )  }
+            .map { meta, vcf, tbi, annotations, annotations_index, header, txt -> tuple( meta, vcf, [], annotations, annotations_index, [], header, txt )  } //THERE IS A TBI?
             .set { ch_annotate }
 
 
diff --git a/subworkflows/local/process_snvs/main.nf b/subworkflows/local/process_snvs/main.nf
index a8054dc..061bbd0 100644
--- a/subworkflows/local/process_snvs/main.nf
+++ b/subworkflows/local/process_snvs/main.nf
@@ -12,7 +12,6 @@ include { ENSEMBLVEP_VEP                          } from '../../../modules/nf-co
 include { VCFANNO                                 } from '../../../modules/nf-core/vcfanno/main'
 include { BCFTOOLS_VIEW as BCFTOOLS_VIEW_RESEARCH } from '../../../modules/nf-core/bcftools/view/main'
 include { BCFTOOLS_VIEW as BCFTOOLS_VIEW_CLINICAL } from '../../../modules/nf-core/bcftools/view/main'
-include { TABIX_TABIX as TABIX_RESEARCH_FILTERING } from '../../../modules/nf-core/tabix/tabix/main'
 include { ANNOTATE_CADD                           } from '../../../subworkflows/local/annotate_cadd'
 
 /*
@@ -74,10 +73,7 @@ workflow PROCESS_SNVS {
         // ANNOTATE WITH CADD - currently depends on val_cadd_resources - could be improved?
         if (val_cadd_resources) {
 
-            TABIX_RESEARCH_FILTERING(BCFTOOLS_VIEW_RESEARCH.out.vcf) //Subworkflow needs tabix index
-
             BCFTOOLS_VIEW_RESEARCH.out.vcf
-                .join(TABIX_RESEARCH_FILTERING.out.index, failOnMismatch:true, failOnDuplicate:true)
                 .set{ ch_cadd_in }
 
             ANNOTATE_CADD (
@@ -88,6 +84,7 @@ workflow PROCESS_SNVS {
                 ch_cadd_resources,
                 ch_cadd_prescored_indels
             )
+
             ANNOTATE_CADD.out.vcf
                 .join(ANNOTATE_CADD.out.tbi)
                 .set { ch_vep_snv }

From dcb9352a60a8c99e4bc06976dad687afd558cce5 Mon Sep 17 00:00:00 2001
From: kristinebilgrav <kristinebilgrav1@gmail.com>
Date: Fri, 17 Apr 2026 13:13:09 +0200
Subject: [PATCH 17/23] review changes and update of snapshot

---
 conf/subworkflows/annotate_cadd.config        |   8 +-
 main.nf                                       |   4 +-
 subworkflows/local/annotate_cadd/main.nf      |   2 +-
 .../local/annotate_cadd/tests/main.nf.test    |  13 +--
 .../annotate_cadd/tests/main.nf.test.snap     | 100 ++++++------------
 .../local/annotate_cadd/tests/nextflow.config |   8 +-
 6 files changed, 48 insertions(+), 87 deletions(-)

diff --git a/conf/subworkflows/annotate_cadd.config b/conf/subworkflows/annotate_cadd.config
index c97f424..3feeece 100644
--- a/conf/subworkflows/annotate_cadd.config
+++ b/conf/subworkflows/annotate_cadd.config
@@ -11,7 +11,7 @@ process {
         ]
     }
 
-    withName: '.*:ANNOTATE_CADD:RENAME_CHR_CADD' {
+    withName: '.*:ANNOTATE_CADD:BCFTOOLS_RENAME_CHR_CADD' {
         ext.args   = { "--output-type z" }
         ext.prefix = { "${input.simpleName}_indels" }
     }
@@ -30,19 +30,19 @@ process {
         ext.args = { "--force --sequence 1 --begin 2 --end 2" }
     }
 
-    withName: '.*:ANNOTATE_CADD:CADD_TO_REFERENCE_CHRNAMES' {
+    withName: '.*:ANNOTATE_CADD:GAWK_CADD_TO_REF_CHRNAMES' {
         ext.args2 = '\'{original=$1; sub("chr","",$1); print $1, original}\''
         ext.prefix = "cadd_to_reference"
         ext.suffix = "txt"
     }
 
-    withName: '.*:ANNOTATE_CADD:REFERENCE_TO_CADD_CHRNAMES' {
+    withName: '.*:ANNOTATE_CADD:GAWK_REF_TO_CADD_CHRNAMES' {
         ext.args2 = '\'{original=$1; sub("chr","",$1); print original, $1}\''
         ext.prefix = "reference_to_cadd"
         ext.suffix = "txt"
     }
 
-    withName: '.*:ANNOTATE_CADD:ANNOTATE_INDELS' {
+    withName: '.*:ANNOTATE_CADD:BCFTOOLS_ANNOTATE_INDELS' {
         ext.args   = { "--columns Chrom,Pos,Ref,Alt,-,CADD --output-type z --write-index=tbi" }
         ext.prefix = { "${input.simpleName}_ann" }
         publishDir = [
diff --git a/main.nf b/main.nf
index e4a4acc..dc7d133 100644
--- a/main.nf
+++ b/main.nf
@@ -92,8 +92,8 @@ workflow CLINICALGENOMICS_ONCOREFINER {
 
     // Reference files
     ch_genome_fasta          = channel.fromPath(val_genome_fasta).map { it -> [[id:it.simpleName], it] }.collect()
-    ch_genome_fai            = channel.fromPath(val_genome_fai).map {it -> [[id:it.simpleName], it]  }.collect()
-    ch_genome_fasta_fai     = ch_genome_fasta.join(ch_genome_fai, failOnMismatch: true, failOnDuplicate: true)
+    ch_genome_fai            = channel.fromPath(val_genome_fai).map { it -> [[id:it.simpleName], it] }.collect()
+    ch_genome_fasta_fai      = ch_genome_fasta.join(ch_genome_fai, failOnMismatch: true, failOnDuplicate: true)
 
     // CADD input files
     ch_cadd_header           = channel.fromPath("$projectDir/assets/cadd_to_vcf_header.txt", checkIfExists: true).collect()
diff --git a/subworkflows/local/annotate_cadd/main.nf b/subworkflows/local/annotate_cadd/main.nf
index 9930318..38da64d 100644
--- a/subworkflows/local/annotate_cadd/main.nf
+++ b/subworkflows/local/annotate_cadd/main.nf
@@ -74,7 +74,7 @@ workflow ANNOTATE_CADD {
             .join(TABIX_CADD.out.index, failOnMismatch: true, failOnDuplicate: true)
             .combine( ch_header )
             .combine( ch_rename_chrs_ref )
-            .map { meta, vcf, tbi, annotations, annotations_index, header, txt -> tuple( meta, vcf, [], annotations, annotations_index, [], header, txt )  } //THERE IS A TBI?
+            .map { meta, vcf, tbi, annotations, annotations_index, header, txt -> tuple( meta, vcf, tbi, annotations, annotations_index, [], header, txt )  } //THERE IS A TBI?
             .set { ch_annotate }
 
 
diff --git a/subworkflows/local/annotate_cadd/tests/main.nf.test b/subworkflows/local/annotate_cadd/tests/main.nf.test
index 6d57147..8b8df8f 100644
--- a/subworkflows/local/annotate_cadd/tests/main.nf.test
+++ b/subworkflows/local/annotate_cadd/tests/main.nf.test
@@ -19,8 +19,7 @@ nextflow_workflow {
                 """
                 input[0] = channel.of([
                     [id:'test'],
-                    file(params.pipelines_testdata_base_path + 'testdata/SNV.tumor.pave.somatic.37.vcf.gz', checkIfExists: true),
-                    file(params.pipelines_testdata_base_path + 'testdata/SNV.tumor.pave.somatic.37.vcf.gz.tbi', checkIfExists: true)
+                    file(params.pipelines_testdata_base_path + 'testdata/tumor_normal/subject_a.tumor.purple.somatic.vcf.gz', checkIfExists: true)
                     ])
                 input[1] = 'GRCh37'
                 input[2] = channel.fromPath(params.pipelines_testdata_base_path + 'reference/reference.fasta.fai', checkIfExists: true).map {it -> [[id:it.simpleName], it]  }.collect()
@@ -34,13 +33,13 @@ nextflow_workflow {
         then {
             assertAll(
                 { assert workflow.success },
-                { assert snapshot(workflow.out).match() }
+                { assert snapshot(workflow.out.vcf, workflow.out.tbi).match() }
             )
         }
     }
 
     test("ANNOTATE_CADD - GRCh38, stub") {
-
+    // TODO update test data to GRCh38
         options "-stub"
 
         when {
@@ -51,8 +50,7 @@ nextflow_workflow {
                 """
                 input[0] = channel.of([
                     [id:'test'],
-                    file(params.pipelines_testdata_base_path + 'testdata/SNV.tumor.pave.somatic.37.vcf.gz', checkIfExists: true),
-                    file(params.pipelines_testdata_base_path + 'testdata/SNV.tumor.pave.somatic.37.vcf.gz.tbi', checkIfExists: true)
+                    file(params.pipelines_testdata_base_path + 'testdata/tumor_normal/subject_a.tumor.purple.somatic.vcf.gz', checkIfExists: true)
                     ])
                 input[1] = 'GRCh38'
                 input[2] = channel.fromPath(params.pipelines_testdata_base_path + 'reference/reference.fasta.fai', checkIfExists: true).map {it -> [[id:it.simpleName], it]  }.collect()
@@ -66,8 +64,7 @@ nextflow_workflow {
         then {
             assertAll(
                 { assert workflow.success },
-                { assert snapshot(
-                    workflow.out).match() }
+                { assert snapshot(workflow.out.vcf, workflow.out.tbi).match() }
             )
         }
     }
diff --git a/subworkflows/local/annotate_cadd/tests/main.nf.test.snap b/subworkflows/local/annotate_cadd/tests/main.nf.test.snap
index f05c43f..0a3c051 100644
--- a/subworkflows/local/annotate_cadd/tests/main.nf.test.snap
+++ b/subworkflows/local/annotate_cadd/tests/main.nf.test.snap
@@ -1,88 +1,52 @@
 {
     "ANNOTATE_CADD - GRCh37, stub": {
         "content": [
-            {
-                "0": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "SNV_ann.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
-                    ]
-                ],
-                "1": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "SNV_ann.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e"
-                    ]
-                ],
-                "tbi": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "SNV_ann.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e"
-                    ]
-                ],
-                "vcf": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "SNV_ann.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
-                    ]
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "subject_a_ann.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
                 ]
-            }
+            ],
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "subject_a_ann.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e"
+                ]
+            ]
         ],
         "meta": {
             "nf-test": "0.9.3",
             "nextflow": "25.10.4"
         },
-        "timestamp": "2026-03-27T16:04:52.225642"
+        "timestamp": "2026-04-17T13:12:12.741306"
     },
     "ANNOTATE_CADD - GRCh38, stub": {
         "content": [
-            {
-                "0": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "SNV_indels_ann.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
-                    ]
-                ],
-                "1": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "SNV_indels_ann.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e"
-                    ]
-                ],
-                "tbi": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "SNV_indels_ann.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e"
-                    ]
-                ],
-                "vcf": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "SNV_indels_ann.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
-                    ]
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "subject_a_indels_ann.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+                ]
+            ],
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "subject_a_indels_ann.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e"
                 ]
-            }
+            ]
         ],
         "meta": {
             "nf-test": "0.9.3",
             "nextflow": "25.10.4"
         },
-        "timestamp": "2026-03-30T13:58:31.285282"
+        "timestamp": "2026-04-17T13:12:21.913504"
     }
 }
\ No newline at end of file
diff --git a/subworkflows/local/annotate_cadd/tests/nextflow.config b/subworkflows/local/annotate_cadd/tests/nextflow.config
index 8dfa90f..9c408d6 100644
--- a/subworkflows/local/annotate_cadd/tests/nextflow.config
+++ b/subworkflows/local/annotate_cadd/tests/nextflow.config
@@ -1,6 +1,6 @@
 process {
 
-    withName: 'RENAME_CHR_CADD' {
+    withName: 'BCFTOOLS_RENAME_CHR_CADD' {
         ext.args   = { "--output-type z" }
         ext.prefix = { "${input.simpleName}_indels" }
     }
@@ -20,19 +20,19 @@ process {
         ext.args = { "--force --sequence 1 --begin 2 --end 2" }
     }
 
-    withName: 'CADD_TO_REFERENCE_CHRNAMES' {
+    withName: 'GAWK_CADD_TO_REF_CHRNAMES' {
         ext.args2 = '\'{original=$1; sub("chr","",$1); print $1, original}\''
         ext.prefix = "cadd_to_reference"
         ext.suffix = "txt"
     }
 
-    withName: 'REFERENCE_TO_CADD_CHRNAMES' {
+    withName: 'GAWK_REF_TO_CADD_CHRNAMES' {
         ext.args2 = '\'{original=$1; sub("chr","",$1); print original, $1}\''
         ext.prefix = "reference_to_cadd"
         ext.suffix = "txt"
     }
 
-    withName: 'ANNOTATE_INDELS' {
+    withName: 'BCFTOOLS_ANNOTATE_INDELS' {
         ext.args   = { "--columns Chrom,Pos,Ref,Alt,-,CADD --output-type z --write-index=tbi" }
         ext.prefix = { "${input.simpleName}_ann" }
     }

From a83477fa900ba0b1b8663785508aecd5afc1ed06 Mon Sep 17 00:00:00 2001
From: kristinebilgrav <kristinebilgrav1@gmail.com>
Date: Fri, 17 Apr 2026 13:19:31 +0200
Subject: [PATCH 18/23] add citations

---
 subworkflows/local/utils_nfcore_oncorefiner_pipeline/main.nf | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/subworkflows/local/utils_nfcore_oncorefiner_pipeline/main.nf b/subworkflows/local/utils_nfcore_oncorefiner_pipeline/main.nf
index a492c2e..a1d2893 100644
--- a/subworkflows/local/utils_nfcore_oncorefiner_pipeline/main.nf
+++ b/subworkflows/local/utils_nfcore_oncorefiner_pipeline/main.nf
@@ -180,6 +180,7 @@ def toolCitationText() {
     def citations_list = []
     def vcfanno        = "vcfanno (Pedersen et al. 2016)"
     def bcftools_view  = "bcftools (Danecek et al. 2021)"
+    def cadd           = "CADD (Rentzsch et al. 2019)"
     def ensemblvep_vep = "Ensembl VEP (McLaren et al. 2016)"
     def svdb           = "svdb"
     def multiqc        = "MultiQC (Ewels et al. 2016)"
@@ -189,6 +190,7 @@ def toolCitationText() {
             citations_list +
             vcfanno        +
             bcftools_view  +
+            cadd           +
             ensemblvep_vep
     }
 
@@ -218,6 +220,7 @@ def toolBibliographyText() {
     def bibliography_list   = []
     def vcfanno             = "<li>Pedersen BS, Layer RM, Quinlan AR. Vcfanno: fast, flexible annotation of genetic variants. Genome Biol. 2016 Jun 1;17(1):118. doi: 10.1186/s13059-016-0973-5. PMID: 27250555; PMCID: PMC4888505.</li>"
     def bcftools_view       = "<li>Danecek P, Bonfield JK, Liddle J, Marshall J, Ohan V, Pollard MO, Whitwham A, Keane T, McCarthy SA, Davies RM, Li H. Twelve years of SAMtools and BCFtools. Gigascience. 2021 Feb 16;10(2):giab008. doi: 10.1093/gigascience/giab008. PMID: 33590845; PMCID: PMC7898596.</li>"
+    def cadd                = "<li>Rentzsch P, Witten D, Cooper GM, Shendure J, Kircher M. CADD: predicting the deleteriousness of variants throughout the human genome. Nucleic Acids Res. 2019 Jan 8;47(D1):D886-D894. doi: 10.1093/nar/gky1016. PMID: 30371827; PMCID: PMC6323892.</li>"
     def ensemblvep_vep      = "<li>McLaren W, Gil L, Hunt SE, Riat HS, Ritchie GR, Thormann A, Flicek P, Cunningham F. The Ensembl Variant Effect Predictor. Genome Biol. 2016 Jun 6;17(1):122. doi: 10.1186/s13059-016-0974-4. PMID: 27268795; PMCID: PMC4893825.</li>"
     def svdb                = "<li>svdb. https://github.com/J35P312/svdb.</li>"
     def multiqc             = "<li>Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354</li>"
@@ -227,6 +230,7 @@ def toolBibliographyText() {
             bibliography_list +
             vcfanno        +
             bcftools_view  +
+            cadd           +
             ensemblvep_vep
     }
 

From 02d46a0f9d2ce30d320f8810ba93e6b78f387c36 Mon Sep 17 00:00:00 2001
From: kristinebilgrav <kristinebilgrav1@gmail.com>
Date: Fri, 17 Apr 2026 13:44:32 +0200
Subject: [PATCH 19/23] update process_svs test

---
 .../local/process_snvs/tests/main.nf.test     | 27 +++++++++++--------
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/subworkflows/local/process_snvs/tests/main.nf.test b/subworkflows/local/process_snvs/tests/main.nf.test
index 9f2ead2..403e0ee 100644
--- a/subworkflows/local/process_snvs/tests/main.nf.test
+++ b/subworkflows/local/process_snvs/tests/main.nf.test
@@ -33,27 +33,31 @@ nextflow_workflow {
                     [id:'reference'],
                     file(params.pipelines_testdata_base_path + 'reference/reference.fasta', checkIfExists: true)
                 ])
-                input[1] = channel.of([
+                input[1] = channel.fromPath(params.pipelines_testdata_base_path + 'reference/reference.fasta.fai', checkIfExists: true).map {it -> [[id:it.simpleName], it]  }.collect()
+                input[2] = channel.fromPath("$projectDir/assets/cadd_to_vcf_header.txt", checkIfExists: true).collect()
+                input[3] = null
+                input[4] = null
+                input[5] = channel.of([
                     [id:'SNV'],
                     file(params.pipelines_testdata_base_path + 'testdata/tumor_normal/subject_a.tumor.purple.somatic.vcf.gz', checkIfExists: true)
                 ])
-                input[2] = channel.of([
+                input[6] = channel.of([
                     [id:'SNV'],
                     file(params.pipelines_testdata_base_path + 'testdata/tumor_normal/subject_a.tumor.purple.sv.vcf.gz', checkIfExists: true)
                 ])
-                input[3] = []
-                input[4] = channel.of([
+                input[7] = []
+                input[8] = channel.of([
                     file(params.pipelines_testdata_base_path + 'reference/vcfanno_functions.lua', checkIfExists: true)
                 ])
-                input[5] = channel.of([
+                input[9] = channel.of([
                     file(params.pipelines_testdata_base_path + 'reference/grch37_gnomad_-r2.1.1-.vcf.gz', checkIfExists: true),
                     file(params.pipelines_testdata_base_path + 'reference/grch37_gnomad_-r2.1.1-.vcf.gz.tbi', checkIfExists: true)
                 ])
-                input[6] = channel.of([
+                input[10] = channel.of([
                     file(params.pipelines_testdata_base_path + 'reference/vcfanno.toml', checkIfExists: true)
                 ])
-                input[7] = UNTAR_VEP_CACHE.out.untar.map{ _meta, files -> [files]}.collect()
-                input[8] = channel.of([
+                input[11] = UNTAR_VEP_CACHE.out.untar.map{ _meta, files -> [files]}.collect()
+                input[12] = channel.of([
                     file(params.pipelines_testdata_base_path + 'reference/LoFtool_scores.txt', checkIfExists: true),
                     file(params.pipelines_testdata_base_path + 'reference/spliceai_21_scores_raw_indel_-v1.3-.vcf.gz', checkIfExists: true),
                     file(params.pipelines_testdata_base_path + 'reference/spliceai_21_scores_raw_snv_-v1.3-.vcf.gz', checkIfExists: true),
@@ -61,9 +65,10 @@ nextflow_workflow {
                     file(params.pipelines_testdata_base_path + 'reference/spliceai_21_scores_raw_indel_-v1.3-.vcf.gz.tbi', checkIfExists: true),
                     file(params.pipelines_testdata_base_path + 'reference/spliceai_21_scores_raw_snv_-v1.3-.vcf.gz.tbi', checkIfExists: true)
                 ])
-                input[9] = 'GRCh37'
-                input[10] = 'homo_sapiens'
-                input[11] = '107'
+                input[13] = null
+                input[14] = 'GRCh37'
+                input[15] = 'homo_sapiens'
+                input[16] = '107'
                 """
             }
         }

From ca5aae7559e72b33ace2fd2212fc9503a5cffde9 Mon Sep 17 00:00:00 2001
From: kristinebilgrav <kristinebilgrav1@gmail.com>
Date: Mon, 20 Apr 2026 13:37:26 +0200
Subject: [PATCH 20/23] updating snapshot

---
 main.nf                                       | 14 ++---
 .../local/annotate_cadd/tests/main.nf.test    | 18 ++++++-
 .../annotate_cadd/tests/main.nf.test.snap     | 54 +++++++++----------
 .../local/annotate_cadd/tests/nextflow.config |  2 +-
 4 files changed, 48 insertions(+), 40 deletions(-)

diff --git a/main.nf b/main.nf
index dc7d133..343c29a 100644
--- a/main.nf
+++ b/main.nf
@@ -66,15 +66,15 @@ workflow CLINICALGENOMICS_ONCOREFINER {
     //
 
     // Input channels
-    ch_snv_vcf               = channel.fromPath(val_snv_vcf).map { vcf -> [[id:vcf.simpleName], vcf] }.collect()
-    ch_snv_vcf_tbi           = channel.fromPath(val_snv_vcf + '.tbi', checkIfExists: true).map { vcf -> [[id:vcf.simpleName], vcf] }.collect()
-    ch_sv_vcf                = channel.fromPath(val_sv_vcf).map { vcf -> [[id:vcf.simpleName], vcf] }.collect()
-    ch_sv_vcf_tbi            = channel.fromPath(val_sv_vcf + '.tbi', checkIfExists: true).map { vcf -> [[id:vcf.simpleName], vcf] }.collect()
-    ch_vep_extra_files       = channel.empty()
-    ch_svdb_dbs              = channel.empty()
+    ch_snv_vcf         = channel.fromPath(val_snv_vcf).map { vcf -> [[id:vcf.simpleName], vcf] }.collect()
+    ch_snv_vcf_tbi     = channel.fromPath(val_snv_vcf + '.tbi', checkIfExists: true).map { vcf -> [[id:vcf.simpleName], vcf] }.collect()
+    ch_sv_vcf          = channel.fromPath(val_sv_vcf).map { vcf -> [[id:vcf.simpleName], vcf] }.collect()
+    ch_sv_vcf_tbi      = channel.fromPath(val_sv_vcf + '.tbi', checkIfExists: true).map { vcf -> [[id:vcf.simpleName], vcf] }.collect()
+    ch_vep_extra_files = channel.empty()
+    ch_svdb_dbs        = channel.empty()
 
     // Alignment files
-    ch_bam_bai_normal = channel.empty()
+    ch_bam_bai_normal  = channel.empty()
 
     if (val_bam_normal && val_bai_normal) {
         ch_bam_bai_normal = channel.fromPath(val_bam_normal)
diff --git a/subworkflows/local/annotate_cadd/tests/main.nf.test b/subworkflows/local/annotate_cadd/tests/main.nf.test
index 8b8df8f..2a59695 100644
--- a/subworkflows/local/annotate_cadd/tests/main.nf.test
+++ b/subworkflows/local/annotate_cadd/tests/main.nf.test
@@ -14,6 +14,7 @@ nextflow_workflow {
         when {
             params {
                 genome = "GRCh37"
+                outdir = "$outputDir"
             }
             workflow {
                 """
@@ -31,9 +32,15 @@ nextflow_workflow {
         }
 
         then {
+            // All directories and files
+            def output_directories_and_files = getAllFilesFromDir(params.outdir, relative: true, includeDir: true)
+
             assertAll(
                 { assert workflow.success },
-                { assert snapshot(workflow.out.vcf, workflow.out.tbi).match() }
+                { assert snapshot(
+                    // All directories and files
+                    output_directories_and_files
+                    ).match() }
             )
         }
     }
@@ -45,6 +52,7 @@ nextflow_workflow {
         when {
             params {
                 genome = "GRCh38"
+                outdir = "$outputDir"
             }
             workflow {
                 """
@@ -62,9 +70,15 @@ nextflow_workflow {
         }
 
         then {
+            // All directories and files
+            def output_directories_and_files = getAllFilesFromDir(params.outdir, relative: true, includeDir: true)
+
             assertAll(
                 { assert workflow.success },
-                { assert snapshot(workflow.out.vcf, workflow.out.tbi).match() }
+                { assert snapshot(
+                    // All directories and files
+                    output_directories_and_files
+                    ).match() }
             )
         }
     }
diff --git a/subworkflows/local/annotate_cadd/tests/main.nf.test.snap b/subworkflows/local/annotate_cadd/tests/main.nf.test.snap
index 0a3c051..0f5186b 100644
--- a/subworkflows/local/annotate_cadd/tests/main.nf.test.snap
+++ b/subworkflows/local/annotate_cadd/tests/main.nf.test.snap
@@ -2,51 +2,45 @@
     "ANNOTATE_CADD - GRCh37, stub": {
         "content": [
             [
-                [
-                    {
-                        "id": "test"
-                    },
-                    "subject_a_ann.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
-                ]
-            ],
-            [
-                [
-                    {
-                        "id": "test"
-                    },
-                    "subject_a_ann.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e"
-                ]
+                "bcftools",
+                "bcftools/subject_a_ann.vcf.gz",
+                "bcftools/subject_a_ann.vcf.gz.tbi",
+                "bcftools/subject_a_indels.vcf.gz",
+                "cadd",
+                "cadd/subject_a_indels_cadd.tsv.gz",
+                "tabix",
+                "tabix/subject_a.tumor.purple.somatic.vcf.gz.tbi",
+                "tabix/subject_a_indels_cadd.tsv.gz.tbi"
             ]
         ],
         "meta": {
             "nf-test": "0.9.3",
             "nextflow": "25.10.4"
         },
-        "timestamp": "2026-04-17T13:12:12.741306"
+        "timestamp": "2026-04-20T13:35:58.691949"
     },
     "ANNOTATE_CADD - GRCh38, stub": {
         "content": [
             [
-                [
-                    {
-                        "id": "test"
-                    },
-                    "subject_a_indels_ann.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
-                ]
-            ],
-            [
-                [
-                    {
-                        "id": "test"
-                    },
-                    "subject_a_indels_ann.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e"
-                ]
+                "bcftools",
+                "bcftools/subject_a_renamed.vcf.gz",
+                "bcftools/subject_a_renamed_ann.vcf.gz",
+                "bcftools/subject_a_renamed_ann.vcf.gz.tbi",
+                "bcftools/subject_a_renamed_indels.vcf.gz",
+                "cadd",
+                "cadd/subject_a_renamed_indels_cadd.tsv.gz",
+                "gawk",
+                "gawk/cadd_to_reference.txt",
+                "gawk/reference_to_cadd.txt",
+                "tabix",
+                "tabix/subject_a.tumor.purple.somatic.vcf.gz.tbi",
+                "tabix/subject_a_renamed_indels_cadd.tsv.gz.tbi"
             ]
         ],
         "meta": {
             "nf-test": "0.9.3",
             "nextflow": "25.10.4"
         },
-        "timestamp": "2026-04-17T13:12:21.913504"
+        "timestamp": "2026-04-20T13:36:09.683491"
     }
 }
\ No newline at end of file
diff --git a/subworkflows/local/annotate_cadd/tests/nextflow.config b/subworkflows/local/annotate_cadd/tests/nextflow.config
index 9c408d6..dd99056 100644
--- a/subworkflows/local/annotate_cadd/tests/nextflow.config
+++ b/subworkflows/local/annotate_cadd/tests/nextflow.config
@@ -2,7 +2,7 @@ process {
 
     withName: 'BCFTOOLS_RENAME_CHR_CADD' {
         ext.args   = { "--output-type z" }
-        ext.prefix = { "${input.simpleName}_indels" }
+        ext.prefix = { "${input.simpleName}_renamed" }
     }
 
     withName: 'BCFTOOLS_VIEW' {

From 82bcd85b20d31b6534ee22e3b8f69c81b80ea684 Mon Sep 17 00:00:00 2001
From: kristinebilgrav <kristinebilgrav1@gmail.com>
Date: Mon, 20 Apr 2026 16:45:16 +0200
Subject: [PATCH 21/23] Review implementations

---
 CHANGELOG.md                             |  2 +-
 conf/subworkflows/annotate_cadd.config   | 15 ++++++++++++++-
 nextflow.config                          |  1 -
 subworkflows/local/annotate_cadd/main.nf | 16 ++++++----------
 subworkflows/local/process_snvs/main.nf  |  4 +---
 5 files changed, 22 insertions(+), 16 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c435dfb..24fb675 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -24,7 +24,7 @@ Initial release of Clinical-Genomics/oncorefiner, created with the [nf-core](htt
 - [#60](https://github.com/Clinical-Genomics/oncorefiner/pull/60) Added `GENERATE_CYTOSURE_FILES` subworkflow and necessary nf-core modules `TIDDIT_COV` and `VCF2CYTOSURE`.
 - [#70](https://github.com/Clinical-Genomics/oncorefiner/pull/70) Added `SAMTOOLS/VIEW` for bam to cram conversion in the `main.nf`.
 - [#66](https://github.com/Clinical-Genomics/oncorefiner/pull/66) Added `PROCESS_SNVS` subworkflow.
-- [#59](https://github.com/Clinical-Genomics/oncorefiner/pull/59) Added CADD scoring for InDels in the subworkflow `ANNOTATE_CADD`, with a subworkflow test (stub only)
+- [#59](https://github.com/Clinical-Genomics/oncorefiner/pull/59) Added `ANNOTATE_CADD` subworkflow with following test (stub only), for CADD scoring of InDels, used in `PROCESS_SNVS`.
 
 ### `Changed`
 
diff --git a/conf/subworkflows/annotate_cadd.config b/conf/subworkflows/annotate_cadd.config
index 3feeece..301f91c 100644
--- a/conf/subworkflows/annotate_cadd.config
+++ b/conf/subworkflows/annotate_cadd.config
@@ -1,7 +1,20 @@
 /*
-Annotate with CADD
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Config file for defining DSL2 per module options and publishing paths
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Available keys to override module options:
+        ext.args            = Additional arguments appended to command in module.
+        ext.args2           = Second set of arguments appended to command in module (multi-tool modules).
+        ext.args3           = Third set of arguments appended to command in module (multi-tool modules).
+        ext.prefix          = File name prefix for output files.
+        ext.when            = Conditional clause
+----------------------------------------------------------------------------------------
 */
 
+//
+// Annotate with CADD
+//
+
 
 process {
 
diff --git a/nextflow.config b/nextflow.config
index 5ed7f32..3a31a88 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -27,7 +27,6 @@ params {
     cadd_resources              = null
     cadd_prescored_indels       = null
 
-
     // Vep
     vep_cache_version           = 112
     vep_plugin_files            = null
diff --git a/subworkflows/local/annotate_cadd/main.nf b/subworkflows/local/annotate_cadd/main.nf
index 38da64d..e6d851e 100644
--- a/subworkflows/local/annotate_cadd/main.nf
+++ b/subworkflows/local/annotate_cadd/main.nf
@@ -28,9 +28,8 @@ workflow ANNOTATE_CADD {
 
         TABIX_INPUT(ch_vcf) //Subworkflow needs tabix index
 
-        ch_vcf
+        ch_vcf_tbi = ch_vcf
             .join(TABIX_INPUT.out.index, failOnMismatch:true, failOnDuplicate:true)
-            .set { ch_vcf_tbi }
 
         // Create files and rename chromosomes if reference is GRCh38
         if (val_genome.equals('GRCh38')) {
@@ -46,17 +45,15 @@ workflow ANNOTATE_CADD {
             GAWK_CADD_TO_REF_CHRNAMES.out.output.map { _meta, txt -> txt }
                 .set { ch_rename_chrs_ref }
 
-            ch_vcf_tbi
+            rename_chrnames_in = ch_vcf_tbi
                 .combine(ch_chrnames_cadd)
                 .map { meta, vcf, tbi, txt -> tuple( meta, vcf, tbi, [], [], [], [], txt ) }
-                .set {rename_chrnames_in}
 
             // Change chr names to CADD compatible names
             BCFTOOLS_RENAME_CHR_CADD( rename_chrnames_in )
 
-            BCFTOOLS_RENAME_CHR_CADD.out.vcf
+            ch_vcf_tbi = BCFTOOLS_RENAME_CHR_CADD.out.vcf
                 .map {meta, vcf -> tuple( meta , vcf, [] )}
-                .set { ch_vcf_tbi }
         }
 
         // Filter to extract indels
@@ -69,18 +66,17 @@ workflow ANNOTATE_CADD {
         TABIX_CADD(CADD.out.tsv)
 
         // Change chr names back to desired naming and annotate original vcf with cadd results
-        ch_vcf_tbi
+        ch_annotate = ch_vcf_tbi
             .join(CADD.out.tsv, failOnMismatch: true, failOnDuplicate: true)
             .join(TABIX_CADD.out.index, failOnMismatch: true, failOnDuplicate: true)
             .combine( ch_header )
             .combine( ch_rename_chrs_ref )
             .map { meta, vcf, tbi, annotations, annotations_index, header, txt -> tuple( meta, vcf, tbi, annotations, annotations_index, [], header, txt )  } //THERE IS A TBI?
-            .set { ch_annotate }
 
 
         BCFTOOLS_ANNOTATE_INDELS( ch_annotate )
 
     emit:
-        vcf = BCFTOOLS_ANNOTATE_INDELS.out.vcf // channel: [ val(meta), path(vcf) ]
-        tbi = BCFTOOLS_ANNOTATE_INDELS.out.tbi // channel: [ val(meta), path(tbi) ]
+        vcf = BCFTOOLS_ANNOTATE_INDELS.out.vcf // channel: [val(meta), path(vcf)]
+        tbi = BCFTOOLS_ANNOTATE_INDELS.out.tbi // channel: [val(meta), path(tbi)]
 }
diff --git a/subworkflows/local/process_snvs/main.nf b/subworkflows/local/process_snvs/main.nf
index 061bbd0..442fef5 100644
--- a/subworkflows/local/process_snvs/main.nf
+++ b/subworkflows/local/process_snvs/main.nf
@@ -73,8 +73,7 @@ workflow PROCESS_SNVS {
         // ANNOTATE WITH CADD - currently depends on val_cadd_resources - could be improved?
         if (val_cadd_resources) {
 
-            BCFTOOLS_VIEW_RESEARCH.out.vcf
-                .set{ ch_cadd_in }
+            ch_cadd_in = BCFTOOLS_VIEW_RESEARCH.out.vcf
 
             ANNOTATE_CADD (
                 ch_cadd_in,
@@ -88,7 +87,6 @@ workflow PROCESS_SNVS {
             ANNOTATE_CADD.out.vcf
                 .join(ANNOTATE_CADD.out.tbi)
                 .set { ch_vep_snv }
-
         }
 
         ENSEMBLVEP_VEP (

From 3c5e89aee1b1c2d9aa6acfb2aa8d4a5e30fb698a Mon Sep 17 00:00:00 2001
From: kristinebilgrav <kristinebilgrav1@gmail.com>
Date: Mon, 20 Apr 2026 17:12:15 +0200
Subject: [PATCH 22/23] update citations

---
 .../local/utils_nfcore_oncorefiner_pipeline/main.nf       | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/subworkflows/local/utils_nfcore_oncorefiner_pipeline/main.nf b/subworkflows/local/utils_nfcore_oncorefiner_pipeline/main.nf
index a1d2893..b276648 100644
--- a/subworkflows/local/utils_nfcore_oncorefiner_pipeline/main.nf
+++ b/subworkflows/local/utils_nfcore_oncorefiner_pipeline/main.nf
@@ -190,8 +190,10 @@ def toolCitationText() {
             citations_list +
             vcfanno        +
             bcftools_view  +
-            cadd           +
             ensemblvep_vep
+            if (params.cadd_resources) {
+                citations_list = citations_list + cadd
+            }
     }
 
     if (params.sv_vcf) {
@@ -230,8 +232,10 @@ def toolBibliographyText() {
             bibliography_list +
             vcfanno        +
             bcftools_view  +
-            cadd           +
             ensemblvep_vep
+            if (params.cadd_resources) {
+                bibliography_list = bibliography_list + cadd
+            }
     }
 
     if (params.sv_vcf) {

From 4bafb21765dcfd476623c2c68c71d6a5cfda3309 Mon Sep 17 00:00:00 2001
From: kristinebilgrav <kristinebilgrav1@gmail.com>
Date: Tue, 21 Apr 2026 14:51:24 +0200
Subject: [PATCH 23/23] update snapshot

---
 .../local/annotate_cadd/tests/main.nf.test    | 14 +--
 .../annotate_cadd/tests/main.nf.test.snap     | 98 +++++++++++++------
 2 files changed, 72 insertions(+), 40 deletions(-)

diff --git a/subworkflows/local/annotate_cadd/tests/main.nf.test b/subworkflows/local/annotate_cadd/tests/main.nf.test
index 2a59695..2691e46 100644
--- a/subworkflows/local/annotate_cadd/tests/main.nf.test
+++ b/subworkflows/local/annotate_cadd/tests/main.nf.test
@@ -32,15 +32,10 @@ nextflow_workflow {
         }
 
         then {
-            // All directories and files
-            def output_directories_and_files = getAllFilesFromDir(params.outdir, relative: true, includeDir: true)
 
             assertAll(
                 { assert workflow.success },
-                { assert snapshot(
-                    // All directories and files
-                    output_directories_and_files
-                    ).match() }
+                { assert snapshot(workflow.out).match() }
             )
         }
     }
@@ -70,15 +65,10 @@ nextflow_workflow {
         }
 
         then {
-            // All directories and files
-            def output_directories_and_files = getAllFilesFromDir(params.outdir, relative: true, includeDir: true)
 
             assertAll(
                 { assert workflow.success },
-                { assert snapshot(
-                    // All directories and files
-                    output_directories_and_files
-                    ).match() }
+                { assert snapshot(workflow.out).match() }
             )
         }
     }
diff --git a/subworkflows/local/annotate_cadd/tests/main.nf.test.snap b/subworkflows/local/annotate_cadd/tests/main.nf.test.snap
index 0f5186b..c9dcc48 100644
--- a/subworkflows/local/annotate_cadd/tests/main.nf.test.snap
+++ b/subworkflows/local/annotate_cadd/tests/main.nf.test.snap
@@ -1,46 +1,88 @@
 {
     "ANNOTATE_CADD - GRCh37, stub": {
         "content": [
-            [
-                "bcftools",
-                "bcftools/subject_a_ann.vcf.gz",
-                "bcftools/subject_a_ann.vcf.gz.tbi",
-                "bcftools/subject_a_indels.vcf.gz",
-                "cadd",
-                "cadd/subject_a_indels_cadd.tsv.gz",
-                "tabix",
-                "tabix/subject_a.tumor.purple.somatic.vcf.gz.tbi",
-                "tabix/subject_a_indels_cadd.tsv.gz.tbi"
-            ]
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "subject_a_ann.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+                    ]
+                ],
+                "1": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "subject_a_ann.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "tbi": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "subject_a_ann.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "vcf": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "subject_a_ann.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+                    ]
+                ]
+            }
         ],
         "meta": {
             "nf-test": "0.9.3",
             "nextflow": "25.10.4"
         },
-        "timestamp": "2026-04-20T13:35:58.691949"
+        "timestamp": "2026-04-21T14:50:16.980447"
     },
     "ANNOTATE_CADD - GRCh38, stub": {
         "content": [
-            [
-                "bcftools",
-                "bcftools/subject_a_renamed.vcf.gz",
-                "bcftools/subject_a_renamed_ann.vcf.gz",
-                "bcftools/subject_a_renamed_ann.vcf.gz.tbi",
-                "bcftools/subject_a_renamed_indels.vcf.gz",
-                "cadd",
-                "cadd/subject_a_renamed_indels_cadd.tsv.gz",
-                "gawk",
-                "gawk/cadd_to_reference.txt",
-                "gawk/reference_to_cadd.txt",
-                "tabix",
-                "tabix/subject_a.tumor.purple.somatic.vcf.gz.tbi",
-                "tabix/subject_a_renamed_indels_cadd.tsv.gz.tbi"
-            ]
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "subject_a_renamed_ann.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+                    ]
+                ],
+                "1": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "subject_a_renamed_ann.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "tbi": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "subject_a_renamed_ann.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "vcf": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "subject_a_renamed_ann.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+                    ]
+                ]
+            }
         ],
         "meta": {
             "nf-test": "0.9.3",
             "nextflow": "25.10.4"
         },
-        "timestamp": "2026-04-20T13:36:09.683491"
+        "timestamp": "2026-04-21T14:50:26.527943"
     }
 }
\ No newline at end of file