-
Notifications
You must be signed in to change notification settings - Fork 1k
Add rtgtools/cnveval module for CNV evaluation #10795
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,7 @@ | ||
| --- | ||
| # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json | ||
| channels: | ||
| - conda-forge | ||
| - bioconda | ||
| dependencies: | ||
| - "bioconda::rtg-tools=3.13" |
| Original file line number | Diff line number | Diff line change | ||
|---|---|---|---|---|
| @@ -0,0 +1,52 @@ | ||||
| process RTGTOOLS_CNVEVAL { | ||||
| tag "$meta.id" | ||||
| label 'process_medium' | ||||
|
|
||||
| conda "${moduleDir}/environment.yml" | ||||
| container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? | ||||
| 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/dc/dca5ba13b7ec38bf7cacf00a33517b9080067bea638745c05d50a4957c75fc2e/data': | ||||
| 'community.wave.seqera.io/library/rtg-tools:3.13--3465421f1b0be0ce' }" | ||||
|
|
||||
| input: | ||||
| tuple val(meta), path(query_vcf), path(query_vcf_tbi), path(truth_vcf), path(truth_vcf_tbi), path(evaluation_regions_bed) | ||||
|
|
||||
| output: | ||||
| tuple val(meta), path("*.baseline.bed.gz") , emit: baseline_bed | ||||
| tuple val(meta), path("*.calls.bed.gz") , emit: calls_bed | ||||
| tuple val(meta), path("*.weighted_roc.tsv.gz") , emit: weighted_roc | ||||
| tuple val(meta), path("*.summary.txt") , emit: summary | ||||
| tuple val("${task.process}"), val('rtgtools'), eval("rtg version | sed 's/Product: RTG Tools //; q'"), topic: versions, emit: versions_rtgtools | ||||
|
|
||||
| when: | ||||
| task.ext.when == null || task.ext.when | ||||
|
|
||||
| script: | ||||
| def args = task.ext.args ?: "" | ||||
| def prefix = task.ext.prefix ?: "${meta.id}" | ||||
| def avail_mem = task.memory.toGiga() + "G" | ||||
|
|
||||
| """ | ||||
| rtg RTG_MEM=$avail_mem cnveval \\ | ||||
| ${args} \\ | ||||
| --baseline=${truth_vcf} \\ | ||||
| --calls=${query_vcf} \\ | ||||
| --evaluation-regions=${evaluation_regions_bed} \\ | ||||
SevaNur marked this conversation as resolved.
Show resolved
Hide resolved
|
||||
| --output=${prefix} \\ | ||||
| --threads=${task.cpus} | ||||
|
|
||||
| cd ${prefix}/ | ||||
| mv done progress .. | ||||
| for f in * ; do mv "\$f" "../${prefix}.\$f" ; done | ||||
| cd .. | ||||
|
Comment on lines
+37
to
+40
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't know what the
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good catch. This block was copied directly from another rtgtools module. That specific move command just pulls the tracking files out of the folder, no need to do double cd. may be better? |
||||
| """ | ||||
|
|
||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||
| stub: | ||||
| def prefix = task.ext.prefix ?: "${meta.id}" | ||||
|
|
||||
| """ | ||||
| echo | gzip > ${prefix}.baseline.bed.gz | ||||
| echo | gzip > ${prefix}.calls.bed.gz | ||||
| echo | gzip > ${prefix}.weighted_roc.tsv.gz | ||||
| touch ${prefix}.summary.txt | ||||
| """ | ||||
| } | ||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,141 @@ | ||
| # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json | ||
| name: "rtgtools_cnveval" | ||
| description: The cnveval tool of RTG tools. It is used to evaluate called CNV regions | ||
| for agreement with a baseline CNV set. | ||
| keywords: | ||
| - benchmarking | ||
| - vcf | ||
| - rtg-tools | ||
| - cnv-eval | ||
| - copy-number-variants | ||
|
|
||
| tools: | ||
| - "rtgtools": | ||
| description: "RealTimeGenomics Tools -- Utilities for accurate VCF comparison | ||
| and manipulation." | ||
| homepage: "https://www.realtimegenomics.com/products/rtg-tools" | ||
| documentation: "https://cdn.jsdelivr.net/gh/RealTimeGenomics/rtg-tools@master/installer/resources/tools/RTGOperationsManual.pdf" | ||
| tool_dev_url: "https://github.com/RealTimeGenomics/rtg-tools" | ||
| licence: ["BSD-2-clause"] | ||
| identifier: "" | ||
|
|
||
| input: | ||
| - - meta: | ||
| type: map | ||
| description: | | ||
| Groovy Map containing sample information | ||
| e.g. [ id:'test', single_end:false ] | ||
| - query_vcf: | ||
| type: file | ||
| description: A VCF with called CNV variants to benchmark against the baseline. | ||
| Records must contain INFO END and INFO SVTYPE (DUP or DEL). | ||
| pattern: "*.{vcf,vcf.gz}" | ||
| ontologies: | ||
| - edam: "http://edamontology.org/format_3016" | ||
| - query_vcf_tbi: | ||
| type: file | ||
| description: The index of the VCF file with called variants | ||
| pattern: "*.{vcf.gz.tbi,vcf.tbi}" | ||
| ontologies: | ||
| - edam: "http://edamontology.org/format_3616" | ||
| - truth_vcf: | ||
| type: file | ||
| description: A baseline VCF containing expected CNV calls. | ||
| Records must contain INFO END and INFO SVTYPE (DUP or DEL). | ||
| pattern: "*.{vcf,vcf.gz}" | ||
| ontologies: | ||
| - edam: "http://edamontology.org/format_3016" | ||
| - truth_vcf_tbi: | ||
| type: file | ||
| description: The index of the baseline VCF to compare against | ||
| pattern: "*.{vcf.gz.tbi,vcf.tbi}" | ||
| ontologies: | ||
| - edam: "http://edamontology.org/format_3616" | ||
| - evaluation_regions_bed: | ||
| type: file | ||
| description: A BED file containing the regions of interest for evaluation (required). | ||
| Regions are intersected with truth and calls VCFs to obtain CNV regions. | ||
| pattern: "*.bed" | ||
| ontologies: | ||
| - edam: "http://edamontology.org/format_3003" | ||
|
|
||
| output: | ||
| baseline_bed: | ||
| - - meta: | ||
| type: map | ||
| description: | | ||
| Groovy Map containing sample information | ||
| e.g. [ id:'test', single_end:false ] | ||
| - "*.baseline.bed.gz": | ||
| type: file | ||
| description: A BED file containing truth CNV regions with TP/FN status, SVTYPE, | ||
| and the span of the original truth VCF record | ||
| pattern: "*.baseline.bed.gz" | ||
| ontologies: | ||
| - edam: "http://edamontology.org/format_3003" | ||
| - edam: "http://edamontology.org/format_3989" | ||
| calls_bed: | ||
| - - meta: | ||
| type: map | ||
| description: | | ||
| Groovy Map containing sample information | ||
| e.g. [ id:'test', single_end:false ] | ||
| - "*.calls.bed.gz": | ||
| type: file | ||
| description: A BED file containing called CNV regions with TP/FP status, SVTYPE, | ||
| the span of the original calls VCF record, and the score value | ||
| pattern: "*.calls.bed.gz" | ||
| ontologies: | ||
| - edam: "http://edamontology.org/format_3003" | ||
| - edam: "http://edamontology.org/format_3989" | ||
| weighted_roc: | ||
| - - meta: | ||
| type: map | ||
| description: | | ||
| Groovy Map containing sample information | ||
| e.g. [ id:'test', single_end:false ] | ||
| - "*.weighted_roc.tsv.gz": | ||
| type: file | ||
| description: TSV file containing weighted ROC data that can be plotted with rocplot | ||
| pattern: "*.weighted_roc.tsv.gz" | ||
| ontologies: | ||
| - edam: "http://edamontology.org/format_3475" | ||
| - edam: "http://edamontology.org/format_3989" | ||
| summary: | ||
| - - meta: | ||
| type: map | ||
| description: | | ||
| Groovy Map containing sample information | ||
| e.g. [ id:'test', single_end:false ] | ||
| - "*.summary.txt": | ||
| type: file | ||
| description: A TXT file containing the summary statistics of the evaluation | ||
| pattern: "*.summary.txt" | ||
| ontologies: | ||
| - edam: "http://edamontology.org/format_1964" | ||
| versions_rtgtools: | ||
| - - "${task.process}": | ||
| type: string | ||
| description: The name of the process | ||
| - "rtgtools": | ||
| type: string | ||
| description: The name of the tool | ||
| - "rtg version | sed 's/Product: RTG Tools //; q'": | ||
| type: eval | ||
| description: The expression to obtain the version of the tool | ||
|
|
||
| topics: | ||
| versions: | ||
| - - ${task.process}: | ||
| type: string | ||
| description: The process the versions were collected from | ||
| - rtgtools: | ||
| type: string | ||
| description: The name of the tool | ||
| - "rtg version | sed 's/Product: RTG Tools //; q'": | ||
| type: eval | ||
| description: The expression to obtain the version of the tool | ||
| authors: | ||
| - "@SevaNur" | ||
| maintainers: | ||
| - "@SevaNur" |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,66 @@ | ||
| nextflow_process { | ||
|
|
||
| name "Test Process RTGTOOLS_CNVEVAL" | ||
| script "../main.nf" | ||
| process "RTGTOOLS_CNVEVAL" | ||
|
|
||
| tag "modules" | ||
| tag "modules_nfcore" | ||
| tag "rtgtools" | ||
| tag "rtgtools/cnveval" | ||
|
|
||
| test("homo_sapiens - [vcf, tbi, truth, truth_tbi, evaluation_regions_bed]") { | ||
|
|
||
| when { | ||
| process { | ||
| """ | ||
| input[0] = Channel.of([ | ||
| [ id:'test' ], // meta map | ||
| file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/chr21/simulated_sv.vcf.gz', checkIfExists: true), | ||
| file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/chr21/simulated_sv.vcf.gz.tbi', checkIfExists: true), | ||
| file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/chr21/simulated_sv2.vcf.gz', checkIfExists: true), | ||
| file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/chr21/simulated_sv2.vcf.gz.tbi', checkIfExists: true), | ||
| file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) | ||
| ]) | ||
| """ | ||
| } | ||
| } | ||
|
|
||
| then { | ||
| assertAll( | ||
| { assert process.success }, | ||
| { assert snapshot(process.out).match() } | ||
| ) | ||
| } | ||
|
|
||
| } | ||
|
|
||
| test("homo_sapiens - [vcf, tbi, truth, truth_tbi, evaluation_regions_bed] - stub") { | ||
|
|
||
| options "-stub" | ||
|
|
||
| when { | ||
| process { | ||
| """ | ||
| input[0] = Channel.of([ | ||
| [ id:'test' ], // meta map | ||
| file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/chr21/simulated_sv.vcf.gz', checkIfExists: true), | ||
| file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/chr21/simulated_sv.vcf.gz.tbi', checkIfExists: true), | ||
| file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/chr21/simulated_sv2.vcf.gz', checkIfExists: true), | ||
| file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/chr21/simulated_sv2.vcf.gz.tbi', checkIfExists: true), | ||
| file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) | ||
| ]) | ||
| """ | ||
| } | ||
| } | ||
|
|
||
| then { | ||
| assertAll( | ||
| { assert process.success }, | ||
| { assert snapshot(process.out).match() } | ||
| ) | ||
| } | ||
|
|
||
| } | ||
|
|
||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.