Skip to content

Commit 5ce6910

Browse files
New module for mergestr
1 parent d07a945 commit 5ce6910

6 files changed

Lines changed: 324 additions & 0 deletions

File tree

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
---
2+
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
3+
channels:
4+
- conda-forge
5+
- bioconda
6+
dependencies:
7+
- "bioconda::trtools=6.1.0"
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
process TRTOOLS_MERGESTR {
2+
tag "$meta.id"
3+
label 'process_single'
4+
5+
conda "${moduleDir}/environment.yml"
6+
container "${ workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container ?
7+
'https://depot.galaxyproject.org/singularity/trtools:6.1.0--pyhdfd78af_0':
8+
'quay.io/biocontainers/trtools:6.1.0--pyhdfd78af_0' }"
9+
10+
input:
11+
tuple val(meta), path(vcfs), path(tbis)
12+
13+
output:
14+
tuple val(meta), path("*.vcf.gz"), emit: vcf
15+
tuple val(meta), path("*.vcf.gz.tbi"), emit: tbi
16+
tuple val("${task.process}"), val('trtools'), eval("mergeSTR --version | sed 's/mergeSTR //'"), topic: versions, emit: versions_trtools
17+
18+
when:
19+
task.ext.when == null || task.ext.when
20+
21+
script:
22+
def args = task.ext.args ?: ''
23+
def prefix = task.ext.prefix ?: "${meta.id}_mergestr"
24+
def input = vcfs.sort { vcf -> vcf.toString() }.join(",")
25+
26+
"""
27+
mergeSTR \\
28+
--vcfs ${input} \\
29+
--out ${prefix} \\
30+
${args}
31+
32+
bgzip -f ${prefix}.vcf
33+
tabix -f -p vcf ${prefix}.vcf.gz
34+
"""
35+
36+
stub:
37+
def prefix = task.ext.prefix ?: "${meta.id}"
38+
39+
"""
40+
echo "" | gzip > ${prefix}.vcf.gz
41+
touch ${prefix}.vcf.gz.tbi
42+
"""
43+
}
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
name: "trtools_mergestr"
2+
description: MergeSTR merges multiple VCF files produced by the same TR
3+
genotyper into a single VCF file.
4+
keywords:
5+
- tandem repeats
6+
- str
7+
- vcf
8+
- merge
9+
- trtools
10+
tools:
11+
- "trtools":
12+
description: "Toolkit for genome-wide analysis of tandem repeats"
13+
homepage: "https://trtools.readthedocs.io/"
14+
documentation: "https://trtools.readthedocs.io/"
15+
tool_dev_url: "https://github.com/gymrek-lab/TRTools"
16+
doi: "10.1093/bioinformatics/btaa736"
17+
licence:
18+
- "MIT"
19+
identifier: biotools:trtools
20+
input:
21+
- - meta:
22+
type: map
23+
description: |
24+
Groovy Map containing sample information
25+
e.g. `[ id:'sample1' ]`
26+
- vcfs:
27+
type: file
28+
description: List containing 2 or more bgzipped tandem repeat VCF files
29+
e.g. [ 'file1.vcf,gz', 'file2.vcf.gz' ]
30+
pattern: "*.{vcf.gz}"
31+
ontologies:
32+
- edam: "http://edamontology.org/format_3016"
33+
- tbis:
34+
type: file
35+
description: List containing the tbi index files corresponding to the vcfs
36+
input files e.g. [ 'file1.vcf.gz.tbi', 'file2.vcf.gz.tbi' ]
37+
pattern: "*.{vcf.gz.tbi}"
38+
ontologies: []
39+
output:
40+
vcf:
41+
- - meta:
42+
type: map
43+
description: |
44+
Groovy Map containing sample information
45+
e.g. `[ id:'sample1' ]`
46+
- "*.vcf.gz":
47+
type: file
48+
description: Merged VCF file with the merged genotypes
49+
pattern: "*.vcf.gz"
50+
ontologies:
51+
- edam: "http://edamontology.org/format_3016"
52+
- edam: "http://edamontology.org/format_3989"
53+
tbi:
54+
- - meta:
55+
type: map
56+
description: |
57+
Groovy Map containing sample information
58+
e.g. `[ id:'sample1' ]`
59+
- "*.vcf.gz.tbi":
60+
type: file
61+
description: Tabix index for the merged VCF file
62+
pattern: "*.vcf.gz.tbi"
63+
ontologies:
64+
- edam: "http://edamontology.org/format_3700"
65+
versions_trtools:
66+
- - ${task.process}:
67+
type: string
68+
description: The name of the process
69+
- trtools:
70+
type: string
71+
description: The name of the tool
72+
- mergeSTR --version | sed 's/mergeSTR //':
73+
type: eval
74+
description: The expression to obtain the version of the tool
75+
topics:
76+
versions:
77+
- - ${task.process}:
78+
type: string
79+
description: The name of the process
80+
- trtools:
81+
type: string
82+
description: The name of the tool
83+
- mergeSTR --version | sed 's/mergeSTR //':
84+
type: eval
85+
description: The expression to obtain the version of the tool
86+
authors:
87+
- "@KondratievaOlesya"
88+
maintainers:
89+
- "@KondratievaOlesya"
Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
nextflow_process {
2+
3+
name "Test Process TRTOOLS_MERGESTR"
4+
script "../main.nf"
5+
config "./nextflow.config"
6+
process "TRTOOLS_MERGESTR"
7+
8+
tag "modules"
9+
tag "modules_nfcore"
10+
tag "gangstr"
11+
tag "trtools"
12+
tag "trtools/mergestr"
13+
14+
setup {
15+
run("GANGSTR", alias: "GANGSTR1") {
16+
script "modules/nf-core/gangstr/main.nf"
17+
18+
process {
19+
"""
20+
bed1 = Channel.of('chr22\t3000\t3020\t5\tCGCGC')
21+
.collectFile(name: 'genome1.bed', newLine: true)
22+
23+
input[0] = Channel.of([
24+
[id:'test1'],
25+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists:true),
26+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists:true)
27+
]).combine(bed1)
28+
29+
input[1] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists:true)
30+
input[2] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists:true)
31+
"""
32+
}
33+
}
34+
35+
run("GANGSTR", alias: "GANGSTR2") {
36+
script "modules/nf-core/gangstr/main.nf"
37+
38+
process {
39+
"""
40+
bed2 = Channel.of('chr22\t3000\t3020\t5\tCGCGC')
41+
.collectFile(name: 'genome2.bed', newLine: true)
42+
43+
input[0] = Channel.of([
44+
[id:'test2'],
45+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists:true),
46+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists:true)
47+
]).combine(bed2)
48+
49+
input[1] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists:true)
50+
input[2] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists:true)
51+
"""
52+
}
53+
}
54+
}
55+
56+
test("homo_sapiens - gangstr vcfs") {
57+
58+
when {
59+
process {
60+
"""
61+
input[0] = GANGSTR1.out.vcf
62+
.combine(GANGSTR2.out.vcf)
63+
.combine(GANGSTR1.out.index)
64+
.combine(GANGSTR2.out.index)
65+
.map { meta1, vcf1, meta2, vcf2, meta3, tbi1, meta4, tbi2 ->
66+
[
67+
[ id: 'test' ],
68+
[ vcf1, vcf2 ],
69+
[ tbi1, tbi2 ]
70+
]
71+
}
72+
"""
73+
}
74+
}
75+
76+
then {
77+
assertAll(
78+
{ assert process.success },
79+
{ assert snapshot(
80+
path(process.out.vcf.get(0).get(1)).vcf.summary,
81+
file(process.out.tbi[0][1]).name,
82+
process.out.findAll { key, val -> key.startsWith('versions') }
83+
).match() }
84+
)
85+
}
86+
}
87+
88+
test("homo_sapiens - gangstr vcfs - stub") {
89+
90+
options "-stub"
91+
92+
when {
93+
process {
94+
"""
95+
input[0] = GANGSTR1.out.vcf
96+
.combine(GANGSTR2.out.vcf)
97+
.combine(GANGSTR1.out.index)
98+
.combine(GANGSTR2.out.index)
99+
.map { meta1, vcf1, meta2, vcf2, meta3, tbi1, meta4, tbi2 ->
100+
[
101+
[ id: 'test' ],
102+
[ vcf1, vcf2 ],
103+
[ tbi1, tbi2 ]
104+
]
105+
}
106+
"""
107+
}
108+
}
109+
110+
then {
111+
assertAll(
112+
{ assert process.success },
113+
{ assert snapshot(sanitizeOutput(process.out)).match() }
114+
)
115+
}
116+
}
117+
}
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
{
2+
"homo_sapiens - gangstr vcfs - stub": {
3+
"content": [
4+
{
5+
"tbi": [
6+
[
7+
{
8+
"id": "test"
9+
},
10+
"test.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e"
11+
]
12+
],
13+
"vcf": [
14+
[
15+
{
16+
"id": "test"
17+
},
18+
"test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
19+
]
20+
],
21+
"versions_trtools": [
22+
[
23+
"TRTOOLS_MERGESTR",
24+
"trtools",
25+
"6.1.0"
26+
]
27+
]
28+
}
29+
],
30+
"timestamp": "2026-06-16T14:19:01.247840767",
31+
"meta": {
32+
"nf-test": "0.9.5",
33+
"nextflow": "26.04.3"
34+
}
35+
},
36+
"homo_sapiens - gangstr vcfs": {
37+
"content": [
38+
"VcfFile [chromosomes=[chr22], sampleCount=2, variantCount=1, phased=false, phasedAutodetect=false]",
39+
"test_mergestr.vcf.gz.tbi",
40+
{
41+
"versions_trtools": [
42+
[
43+
"TRTOOLS_MERGESTR",
44+
"trtools",
45+
"6.1.0"
46+
]
47+
]
48+
}
49+
],
50+
"timestamp": "2026-06-16T14:11:27.91327297",
51+
"meta": {
52+
"nf-test": "0.9.5",
53+
"nextflow": "26.04.3"
54+
}
55+
}
56+
}
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
process {
2+
withName: TRTOOLS_MERGESTR {
3+
ext.args = '--vcftype gangstr --update-sample-from-file'
4+
}
5+
withName: GANGSTR1 {
6+
ext.args = '--insertmean 300 --insertsdev 50 --min-sample-reads 1 --max-proc-read 100000'
7+
}
8+
9+
withName: GANGSTR2 {
10+
ext.args = '--insertmean 300 --insertsdev 50 --min-sample-reads 1 --max-proc-read 100000'
11+
}
12+
}

0 commit comments

Comments
 (0)