From ea8e9c4a814a80341f07b55ab9d0d75e40cdbe13 Mon Sep 17 00:00:00 2001
From: Sevda Nuralieva <sevda.nuralieva@bostongene.com>
Date: Fri, 13 Mar 2026 02:31:17 +0400
Subject: [PATCH] Add rtgtools/cnveval module for CNV evaluation

---
 .../nf-core/rtgtools/cnveval/environment.yml  |   7 +
 modules/nf-core/rtgtools/cnveval/main.nf      |  52 +++++
 modules/nf-core/rtgtools/cnveval/meta.yml     | 141 ++++++++++++++
 .../rtgtools/cnveval/tests/main.nf.test       |  66 +++++++
 .../rtgtools/cnveval/tests/main.nf.test.snap  | 180 ++++++++++++++++++
 5 files changed, 446 insertions(+)
 create mode 100644 modules/nf-core/rtgtools/cnveval/environment.yml
 create mode 100644 modules/nf-core/rtgtools/cnveval/main.nf
 create mode 100644 modules/nf-core/rtgtools/cnveval/meta.yml
 create mode 100644 modules/nf-core/rtgtools/cnveval/tests/main.nf.test
 create mode 100644 modules/nf-core/rtgtools/cnveval/tests/main.nf.test.snap

diff --git a/modules/nf-core/rtgtools/cnveval/environment.yml b/modules/nf-core/rtgtools/cnveval/environment.yml
new file mode 100644
index 000000000000..5a1612109c7d
--- /dev/null
+++ b/modules/nf-core/rtgtools/cnveval/environment.yml
@@ -0,0 +1,7 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - "bioconda::rtg-tools=3.13"
diff --git a/modules/nf-core/rtgtools/cnveval/main.nf b/modules/nf-core/rtgtools/cnveval/main.nf
new file mode 100644
index 000000000000..8c6b56a9b2bf
--- /dev/null
+++ b/modules/nf-core/rtgtools/cnveval/main.nf
@@ -0,0 +1,52 @@
+process RTGTOOLS_CNVEVAL {
+    tag "$meta.id"
+    label 'process_medium'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/dc/dca5ba13b7ec38bf7cacf00a33517b9080067bea638745c05d50a4957c75fc2e/data':
+        'community.wave.seqera.io/library/rtg-tools:3.13--3465421f1b0be0ce' }"
+
+    input:
+    tuple val(meta), path(query_vcf), path(query_vcf_tbi), path(truth_vcf), path(truth_vcf_tbi), path(evaluation_regions_bed)
+
+    output:
+    tuple val(meta), path("*.baseline.bed.gz")          , emit: baseline_bed
+    tuple val(meta), path("*.calls.bed.gz")             , emit: calls_bed
+    tuple val(meta), path("*.weighted_roc.tsv.gz")      , emit: weighted_roc
+    tuple val(meta), path("*.summary.txt")              , emit: summary
+    tuple val("${task.process}"), val('rtgtools'), eval("rtg version | sed 's/Product: RTG Tools //; q'"), topic: versions, emit: versions_rtgtools
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ""
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def avail_mem = task.memory.toGiga() + "G"
+
+    """
+    rtg RTG_MEM=$avail_mem cnveval \\
+        ${args} \\
+        --baseline=${truth_vcf} \\
+        --calls=${query_vcf} \\
+        --evaluation-regions=${evaluation_regions_bed} \\
+        --output=${prefix} \\
+        --threads=${task.cpus}
+
+    cd ${prefix}/
+    mv done progress ..
+    for f in * ; do mv "\$f" "../${prefix}.\$f" ; done
+    cd ..
+    """
+
+    stub:
+    def prefix = task.ext.prefix ?: "${meta.id}"
+
+    """
+    echo | gzip > ${prefix}.baseline.bed.gz
+    echo | gzip > ${prefix}.calls.bed.gz
+    echo | gzip > ${prefix}.weighted_roc.tsv.gz
+    touch ${prefix}.summary.txt
+    """
+}
diff --git a/modules/nf-core/rtgtools/cnveval/meta.yml b/modules/nf-core/rtgtools/cnveval/meta.yml
new file mode 100644
index 000000000000..50072e13e0a7
--- /dev/null
+++ b/modules/nf-core/rtgtools/cnveval/meta.yml
@@ -0,0 +1,141 @@
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
+name: "rtgtools_cnveval"
+description: The cnveval tool of RTG tools. It is used to evaluate called CNV regions
+  for agreement with a baseline CNV set.
+keywords:
+  - benchmarking
+  - vcf
+  - rtg-tools
+  - cnv-eval
+  - copy-number-variants
+
+tools:
+  - "rtgtools":
+      description: "RealTimeGenomics Tools -- Utilities for accurate VCF comparison
+        and manipulation."
+      homepage: "https://www.realtimegenomics.com/products/rtg-tools"
+      documentation: "https://cdn.jsdelivr.net/gh/RealTimeGenomics/rtg-tools@master/installer/resources/tools/RTGOperationsManual.pdf"
+      tool_dev_url: "https://github.com/RealTimeGenomics/rtg-tools"
+      licence: ["BSD-2-clause"]
+      identifier: ""
+
+input:
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing sample information
+          e.g. [ id:'test', single_end:false ]
+    - query_vcf:
+        type: file
+        description: A VCF with called CNV variants to benchmark against the baseline.
+          Records must contain INFO END and INFO SVTYPE (DUP or DEL).
+        pattern: "*.{vcf,vcf.gz}"
+        ontologies:
+          - edam: "http://edamontology.org/format_3016"
+    - query_vcf_tbi:
+        type: file
+        description: The index of the VCF file with called variants
+        pattern: "*.{vcf.gz.tbi,vcf.tbi}"
+        ontologies:
+          - edam: "http://edamontology.org/format_3616"
+    - truth_vcf:
+        type: file
+        description: A baseline VCF containing expected CNV calls.
+          Records must contain INFO END and INFO SVTYPE (DUP or DEL).
+        pattern: "*.{vcf,vcf.gz}"
+        ontologies:
+          - edam: "http://edamontology.org/format_3016"
+    - truth_vcf_tbi:
+        type: file
+        description: The index of the baseline VCF to compare against
+        pattern: "*.{vcf.gz.tbi,vcf.tbi}"
+        ontologies:
+          - edam: "http://edamontology.org/format_3616"
+    - evaluation_regions_bed:
+        type: file
+        description: A BED file containing the regions of interest for evaluation (required).
+          Regions are intersected with truth and calls VCFs to obtain CNV regions.
+        pattern: "*.bed"
+        ontologies:
+          - edam: "http://edamontology.org/format_3003"
+
+output:
+  baseline_bed:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+      - "*.baseline.bed.gz":
+          type: file
+          description: A BED file containing truth CNV regions with TP/FN status, SVTYPE,
+            and the span of the original truth VCF record
+          pattern: "*.baseline.bed.gz"
+          ontologies:
+            - edam: "http://edamontology.org/format_3003"
+            - edam: "http://edamontology.org/format_3989"
+  calls_bed:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+      - "*.calls.bed.gz":
+          type: file
+          description: A BED file containing called CNV regions with TP/FP status, SVTYPE,
+            the span of the original calls VCF record, and the score value
+          pattern: "*.calls.bed.gz"
+          ontologies:
+            - edam: "http://edamontology.org/format_3003"
+            - edam: "http://edamontology.org/format_3989"
+  weighted_roc:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+      - "*.weighted_roc.tsv.gz":
+          type: file
+          description: TSV file containing weighted ROC data that can be plotted with rocplot
+          pattern: "*.weighted_roc.tsv.gz"
+          ontologies:
+            - edam: "http://edamontology.org/format_3475"
+            - edam: "http://edamontology.org/format_3989"
+  summary:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+      - "*.summary.txt":
+          type: file
+          description: A TXT file containing the summary statistics of the evaluation
+          pattern: "*.summary.txt"
+          ontologies:
+            - edam: "http://edamontology.org/format_1964"
+  versions_rtgtools:
+    - - "${task.process}":
+          type: string
+          description: The name of the process
+      - "rtgtools":
+          type: string
+          description: The name of the tool
+      - "rtg version | sed 's/Product: RTG Tools //; q'":
+          type: eval
+          description: The expression to obtain the version of the tool
+
+topics:
+  versions:
+    - - ${task.process}:
+          type: string
+          description: The process the versions were collected from
+      - rtgtools:
+          type: string
+          description: The name of the tool
+      - "rtg version | sed 's/Product: RTG Tools //; q'":
+          type: eval
+          description: The expression to obtain the version of the tool
+authors:
+  - "@SevaNur"
+maintainers:
+  - "@SevaNur"
diff --git a/modules/nf-core/rtgtools/cnveval/tests/main.nf.test b/modules/nf-core/rtgtools/cnveval/tests/main.nf.test
new file mode 100644
index 000000000000..01bbd76e4479
--- /dev/null
+++ b/modules/nf-core/rtgtools/cnveval/tests/main.nf.test
@@ -0,0 +1,66 @@
+nextflow_process {
+
+    name "Test Process RTGTOOLS_CNVEVAL"
+    script "../main.nf"
+    process "RTGTOOLS_CNVEVAL"
+
+    tag "modules"
+    tag "modules_nfcore"
+    tag "rtgtools"
+    tag "rtgtools/cnveval"
+
+    test("homo_sapiens - [vcf, tbi, truth, truth_tbi, evaluation_regions_bed]") {
+
+        when {
+            process {
+                """
+                input[0] = Channel.of([
+                    [ id:'test' ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/chr21/simulated_sv.vcf.gz', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/chr21/simulated_sv.vcf.gz.tbi', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/chr21/simulated_sv2.vcf.gz', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/chr21/simulated_sv2.vcf.gz.tbi', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed', checkIfExists: true)
+                ])
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+
+    }
+
+    test("homo_sapiens - [vcf, tbi, truth, truth_tbi, evaluation_regions_bed] - stub") {
+
+        options "-stub"
+
+        when {
+            process {
+                """
+                input[0] = Channel.of([
+                    [ id:'test' ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/chr21/simulated_sv.vcf.gz', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/chr21/simulated_sv.vcf.gz.tbi', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/chr21/simulated_sv2.vcf.gz', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/chr21/simulated_sv2.vcf.gz.tbi', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed', checkIfExists: true)
+                ])
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+
+    }
+
+}
diff --git a/modules/nf-core/rtgtools/cnveval/tests/main.nf.test.snap b/modules/nf-core/rtgtools/cnveval/tests/main.nf.test.snap
new file mode 100644
index 000000000000..e50616946b9a
--- /dev/null
+++ b/modules/nf-core/rtgtools/cnveval/tests/main.nf.test.snap
@@ -0,0 +1,180 @@
+{
+    "homo_sapiens - [vcf, tbi, truth, truth_tbi, evaluation_regions_bed]": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.baseline.bed.gz:md5,d93ffeef4c9bd38370f526980904dac6"
+                    ]
+                ],
+                "1": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.calls.bed.gz:md5,d93ffeef4c9bd38370f526980904dac6"
+                    ]
+                ],
+                "2": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.weighted_roc.tsv.gz:md5,b9f4c2716ef1c8fb3341d1541982cfe9"
+                    ]
+                ],
+                "3": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.summary.txt:md5,f4c8df93c8bdab603036bbc27b4a28c3"
+                    ]
+                ],
+                "4": [
+                    [
+                        "RTGTOOLS_CNVEVAL",
+                        "rtgtools",
+                        "3.13"
+                    ]
+                ],
+                "baseline_bed": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.baseline.bed.gz:md5,d93ffeef4c9bd38370f526980904dac6"
+                    ]
+                ],
+                "calls_bed": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.calls.bed.gz:md5,d93ffeef4c9bd38370f526980904dac6"
+                    ]
+                ],
+                "summary": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.summary.txt:md5,f4c8df93c8bdab603036bbc27b4a28c3"
+                    ]
+                ],
+                "versions_rtgtools": [
+                    [
+                        "RTGTOOLS_CNVEVAL",
+                        "rtgtools",
+                        "3.13"
+                    ]
+                ],
+                "weighted_roc": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.weighted_roc.tsv.gz:md5,b9f4c2716ef1c8fb3341d1541982cfe9"
+                    ]
+                ]
+            }
+        ],
+        "timestamp": "2026-03-13T02:20:33.987241",
+        "meta": {
+            "nf-test": "0.9.4",
+            "nextflow": "25.10.4"
+        }
+    },
+    "homo_sapiens - [vcf, tbi, truth, truth_tbi, evaluation_regions_bed] - stub": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.baseline.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+                    ]
+                ],
+                "1": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.calls.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+                    ]
+                ],
+                "2": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.weighted_roc.tsv.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+                    ]
+                ],
+                "3": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "4": [
+                    [
+                        "RTGTOOLS_CNVEVAL",
+                        "rtgtools",
+                        "3.13"
+                    ]
+                ],
+                "baseline_bed": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.baseline.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+                    ]
+                ],
+                "calls_bed": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.calls.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+                    ]
+                ],
+                "summary": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "versions_rtgtools": [
+                    [
+                        "RTGTOOLS_CNVEVAL",
+                        "rtgtools",
+                        "3.13"
+                    ]
+                ],
+                "weighted_roc": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.weighted_roc.tsv.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+                    ]
+                ]
+            }
+        ],
+        "timestamp": "2026-03-13T02:20:43.791597",
+        "meta": {
+            "nf-test": "0.9.4",
+            "nextflow": "25.10.4"
+        }
+    }
+}
\ No newline at end of file