From ea8e9c4a814a80341f07b55ab9d0d75e40cdbe13 Mon Sep 17 00:00:00 2001 From: Sevda Nuralieva Date: Fri, 13 Mar 2026 02:31:17 +0400 Subject: [PATCH] Add rtgtools/cnveval module for CNV evaluation --- .../nf-core/rtgtools/cnveval/environment.yml | 7 + modules/nf-core/rtgtools/cnveval/main.nf | 52 +++++ modules/nf-core/rtgtools/cnveval/meta.yml | 141 ++++++++++++++ .../rtgtools/cnveval/tests/main.nf.test | 66 +++++++ .../rtgtools/cnveval/tests/main.nf.test.snap | 180 ++++++++++++++++++ 5 files changed, 446 insertions(+) create mode 100644 modules/nf-core/rtgtools/cnveval/environment.yml create mode 100644 modules/nf-core/rtgtools/cnveval/main.nf create mode 100644 modules/nf-core/rtgtools/cnveval/meta.yml create mode 100644 modules/nf-core/rtgtools/cnveval/tests/main.nf.test create mode 100644 modules/nf-core/rtgtools/cnveval/tests/main.nf.test.snap diff --git a/modules/nf-core/rtgtools/cnveval/environment.yml b/modules/nf-core/rtgtools/cnveval/environment.yml new file mode 100644 index 000000000000..5a1612109c7d --- /dev/null +++ b/modules/nf-core/rtgtools/cnveval/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::rtg-tools=3.13" diff --git a/modules/nf-core/rtgtools/cnveval/main.nf b/modules/nf-core/rtgtools/cnveval/main.nf new file mode 100644 index 000000000000..8c6b56a9b2bf --- /dev/null +++ b/modules/nf-core/rtgtools/cnveval/main.nf @@ -0,0 +1,52 @@ +process RTGTOOLS_CNVEVAL { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/dc/dca5ba13b7ec38bf7cacf00a33517b9080067bea638745c05d50a4957c75fc2e/data': + 'community.wave.seqera.io/library/rtg-tools:3.13--3465421f1b0be0ce' }" + + input: + tuple val(meta), path(query_vcf), path(query_vcf_tbi), path(truth_vcf), path(truth_vcf_tbi), path(evaluation_regions_bed) + + output: + tuple val(meta), path("*.baseline.bed.gz") , emit: baseline_bed + tuple val(meta), path("*.calls.bed.gz") , emit: calls_bed + tuple val(meta), path("*.weighted_roc.tsv.gz") , emit: weighted_roc + tuple val(meta), path("*.summary.txt") , emit: summary + tuple val("${task.process}"), val('rtgtools'), eval("rtg version | sed 's/Product: RTG Tools //; q'"), topic: versions, emit: versions_rtgtools + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: "" + def prefix = task.ext.prefix ?: "${meta.id}" + def avail_mem = task.memory.toGiga() + "G" + + """ + rtg RTG_MEM=$avail_mem cnveval \\ + ${args} \\ + --baseline=${truth_vcf} \\ + --calls=${query_vcf} \\ + --evaluation-regions=${evaluation_regions_bed} \\ + --output=${prefix} \\ + --threads=${task.cpus} + + cd ${prefix}/ + mv done progress .. + for f in * ; do mv "\$f" "../${prefix}.\$f" ; done + cd .. + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + echo | gzip > ${prefix}.baseline.bed.gz + echo | gzip > ${prefix}.calls.bed.gz + echo | gzip > ${prefix}.weighted_roc.tsv.gz + touch ${prefix}.summary.txt + """ +} diff --git a/modules/nf-core/rtgtools/cnveval/meta.yml b/modules/nf-core/rtgtools/cnveval/meta.yml new file mode 100644 index 000000000000..50072e13e0a7 --- /dev/null +++ b/modules/nf-core/rtgtools/cnveval/meta.yml @@ -0,0 +1,141 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "rtgtools_cnveval" +description: The cnveval tool of RTG tools. It is used to evaluate called CNV regions + for agreement with a baseline CNV set. +keywords: + - benchmarking + - vcf + - rtg-tools + - cnv-eval + - copy-number-variants + +tools: + - "rtgtools": + description: "RealTimeGenomics Tools -- Utilities for accurate VCF comparison + and manipulation." + homepage: "https://www.realtimegenomics.com/products/rtg-tools" + documentation: "https://cdn.jsdelivr.net/gh/RealTimeGenomics/rtg-tools@master/installer/resources/tools/RTGOperationsManual.pdf" + tool_dev_url: "https://github.com/RealTimeGenomics/rtg-tools" + licence: ["BSD-2-clause"] + identifier: "" + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - query_vcf: + type: file + description: A VCF with called CNV variants to benchmark against the baseline. + Records must contain INFO END and INFO SVTYPE (DUP or DEL). + pattern: "*.{vcf,vcf.gz}" + ontologies: + - edam: "http://edamontology.org/format_3016" + - query_vcf_tbi: + type: file + description: The index of the VCF file with called variants + pattern: "*.{vcf.gz.tbi,vcf.tbi}" + ontologies: + - edam: "http://edamontology.org/format_3616" + - truth_vcf: + type: file + description: A baseline VCF containing expected CNV calls. + Records must contain INFO END and INFO SVTYPE (DUP or DEL). + pattern: "*.{vcf,vcf.gz}" + ontologies: + - edam: "http://edamontology.org/format_3016" + - truth_vcf_tbi: + type: file + description: The index of the baseline VCF to compare against + pattern: "*.{vcf.gz.tbi,vcf.tbi}" + ontologies: + - edam: "http://edamontology.org/format_3616" + - evaluation_regions_bed: + type: file + description: A BED file containing the regions of interest for evaluation (required). + Regions are intersected with truth and calls VCFs to obtain CNV regions. + pattern: "*.bed" + ontologies: + - edam: "http://edamontology.org/format_3003" + +output: + baseline_bed: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.baseline.bed.gz": + type: file + description: A BED file containing truth CNV regions with TP/FN status, SVTYPE, + and the span of the original truth VCF record + pattern: "*.baseline.bed.gz" + ontologies: + - edam: "http://edamontology.org/format_3003" + - edam: "http://edamontology.org/format_3989" + calls_bed: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.calls.bed.gz": + type: file + description: A BED file containing called CNV regions with TP/FP status, SVTYPE, + the span of the original calls VCF record, and the score value + pattern: "*.calls.bed.gz" + ontologies: + - edam: "http://edamontology.org/format_3003" + - edam: "http://edamontology.org/format_3989" + weighted_roc: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.weighted_roc.tsv.gz": + type: file + description: TSV file containing weighted ROC data that can be plotted with rocplot + pattern: "*.weighted_roc.tsv.gz" + ontologies: + - edam: "http://edamontology.org/format_3475" + - edam: "http://edamontology.org/format_3989" + summary: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.summary.txt": + type: file + description: A TXT file containing the summary statistics of the evaluation + pattern: "*.summary.txt" + ontologies: + - edam: "http://edamontology.org/format_1964" + versions_rtgtools: + - - "${task.process}": + type: string + description: The name of the process + - "rtgtools": + type: string + description: The name of the tool + - "rtg version | sed 's/Product: RTG Tools //; q'": + type: eval + description: The expression to obtain the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - rtgtools: + type: string + description: The name of the tool + - "rtg version | sed 's/Product: RTG Tools //; q'": + type: eval + description: The expression to obtain the version of the tool +authors: + - "@SevaNur" +maintainers: + - "@SevaNur" diff --git a/modules/nf-core/rtgtools/cnveval/tests/main.nf.test b/modules/nf-core/rtgtools/cnveval/tests/main.nf.test new file mode 100644 index 000000000000..01bbd76e4479 --- /dev/null +++ b/modules/nf-core/rtgtools/cnveval/tests/main.nf.test @@ -0,0 +1,66 @@ +nextflow_process { + + name "Test Process RTGTOOLS_CNVEVAL" + script "../main.nf" + process "RTGTOOLS_CNVEVAL" + + tag "modules" + tag "modules_nfcore" + tag "rtgtools" + tag "rtgtools/cnveval" + + test("homo_sapiens - [vcf, tbi, truth, truth_tbi, evaluation_regions_bed]") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/chr21/simulated_sv.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/chr21/simulated_sv.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/chr21/simulated_sv2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/chr21/simulated_sv2.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("homo_sapiens - [vcf, tbi, truth, truth_tbi, evaluation_regions_bed] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/chr21/simulated_sv.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/chr21/simulated_sv.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/chr21/simulated_sv2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/chr21/simulated_sv2.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/rtgtools/cnveval/tests/main.nf.test.snap b/modules/nf-core/rtgtools/cnveval/tests/main.nf.test.snap new file mode 100644 index 000000000000..e50616946b9a --- /dev/null +++ b/modules/nf-core/rtgtools/cnveval/tests/main.nf.test.snap @@ -0,0 +1,180 @@ +{ + "homo_sapiens - [vcf, tbi, truth, truth_tbi, evaluation_regions_bed]": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.baseline.bed.gz:md5,d93ffeef4c9bd38370f526980904dac6" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.calls.bed.gz:md5,d93ffeef4c9bd38370f526980904dac6" + ] + ], + "2": [ + [ + { + "id": "test" + }, + "test.weighted_roc.tsv.gz:md5,b9f4c2716ef1c8fb3341d1541982cfe9" + ] + ], + "3": [ + [ + { + "id": "test" + }, + "test.summary.txt:md5,f4c8df93c8bdab603036bbc27b4a28c3" + ] + ], + "4": [ + [ + "RTGTOOLS_CNVEVAL", + "rtgtools", + "3.13" + ] + ], + "baseline_bed": [ + [ + { + "id": "test" + }, + "test.baseline.bed.gz:md5,d93ffeef4c9bd38370f526980904dac6" + ] + ], + "calls_bed": [ + [ + { + "id": "test" + }, + "test.calls.bed.gz:md5,d93ffeef4c9bd38370f526980904dac6" + ] + ], + "summary": [ + [ + { + "id": "test" + }, + "test.summary.txt:md5,f4c8df93c8bdab603036bbc27b4a28c3" + ] + ], + "versions_rtgtools": [ + [ + "RTGTOOLS_CNVEVAL", + "rtgtools", + "3.13" + ] + ], + "weighted_roc": [ + [ + { + "id": "test" + }, + "test.weighted_roc.tsv.gz:md5,b9f4c2716ef1c8fb3341d1541982cfe9" + ] + ] + } + ], + "timestamp": "2026-03-13T02:20:33.987241", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } + }, + "homo_sapiens - [vcf, tbi, truth, truth_tbi, evaluation_regions_bed] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.baseline.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.calls.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "2": [ + [ + { + "id": "test" + }, + "test.weighted_roc.tsv.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "3": [ + [ + { + "id": "test" + }, + "test.summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + "RTGTOOLS_CNVEVAL", + "rtgtools", + "3.13" + ] + ], + "baseline_bed": [ + [ + { + "id": "test" + }, + "test.baseline.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "calls_bed": [ + [ + { + "id": "test" + }, + "test.calls.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "summary": [ + [ + { + "id": "test" + }, + "test.summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_rtgtools": [ + [ + "RTGTOOLS_CNVEVAL", + "rtgtools", + "3.13" + ] + ], + "weighted_roc": [ + [ + { + "id": "test" + }, + "test.weighted_roc.tsv.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + } + ], + "timestamp": "2026-03-13T02:20:43.791597", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } + } +} \ No newline at end of file