New module for mergestr

KondratievaOlesya · KondratievaOlesya · commit 5ce6910c0dac · 2026-06-16T14:22:27.000+02:00
diff --git a/modules/nf-core/trtools/mergestr/environment.yml b/modules/nf-core/trtools/mergestr/environment.yml
@@ -0,0 +1,7 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - "bioconda::trtools=6.1.0"
diff --git a/modules/nf-core/trtools/mergestr/main.nf b/modules/nf-core/trtools/mergestr/main.nf
@@ -0,0 +1,43 @@
+process TRTOOLS_MERGESTR {
+    tag "$meta.id"
+    label 'process_single'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/trtools:6.1.0--pyhdfd78af_0':
+        'quay.io/biocontainers/trtools:6.1.0--pyhdfd78af_0' }"
+
+    input:
+    tuple val(meta), path(vcfs), path(tbis)
+
+    output:
+    tuple val(meta), path("*.vcf.gz"),     emit: vcf
+    tuple val(meta), path("*.vcf.gz.tbi"), emit: tbi
+    tuple val("${task.process}"), val('trtools'), eval("mergeSTR --version | sed 's/mergeSTR //'"), topic: versions, emit: versions_trtools
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}_mergestr"
+    def input = vcfs.sort { vcf -> vcf.toString() }.join(",")
+
+    """
+    mergeSTR \\
+        --vcfs ${input} \\
+        --out ${prefix} \\
+        ${args}
+
+    bgzip -f ${prefix}.vcf
+    tabix -f -p vcf ${prefix}.vcf.gz
+    """
+
+    stub:
+    def prefix = task.ext.prefix ?: "${meta.id}"
+
+    """
+    echo "" | gzip > ${prefix}.vcf.gz
+    touch ${prefix}.vcf.gz.tbi
+    """
+}
diff --git a/modules/nf-core/trtools/mergestr/meta.yml b/modules/nf-core/trtools/mergestr/meta.yml
@@ -0,0 +1,89 @@
+name: "trtools_mergestr"
+description: MergeSTR merges multiple VCF files produced by the same TR
+  genotyper into a single VCF file.
+keywords:
+  - tandem repeats
+  - str
+  - vcf
+  - merge
+  - trtools
+tools:
+  - "trtools":
+      description: "Toolkit for genome-wide analysis of tandem repeats"
+      homepage: "https://trtools.readthedocs.io/"
+      documentation: "https://trtools.readthedocs.io/"
+      tool_dev_url: "https://github.com/gymrek-lab/TRTools"
+      doi: "10.1093/bioinformatics/btaa736"
+      licence:
+        - "MIT"
+      identifier: biotools:trtools
+input:
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing sample information
+          e.g. `[ id:'sample1' ]`
+    - vcfs:
+        type: file
+        description: List containing 2 or more bgzipped tandem repeat VCF files
+          e.g. [ 'file1.vcf,gz', 'file2.vcf.gz' ]
+        pattern: "*.{vcf.gz}"
+        ontologies:
+          - edam: "http://edamontology.org/format_3016"
+    - tbis:
+        type: file
+        description: List containing the tbi index files corresponding to the vcfs
+          input files e.g. [ 'file1.vcf.gz.tbi', 'file2.vcf.gz.tbi' ]
+        pattern: "*.{vcf.gz.tbi}"
+        ontologies: []
+output:
+  vcf:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. `[ id:'sample1' ]`
+      - "*.vcf.gz":
+          type: file
+          description: Merged VCF file with the merged genotypes
+          pattern: "*.vcf.gz"
+          ontologies:
+            - edam: "http://edamontology.org/format_3016"
+            - edam: "http://edamontology.org/format_3989"
+  tbi:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. `[ id:'sample1' ]`
+      - "*.vcf.gz.tbi":
+          type: file
+          description: Tabix index for the merged VCF file
+          pattern: "*.vcf.gz.tbi"
+          ontologies:
+            - edam: "http://edamontology.org/format_3700"
+  versions_trtools:
+    - - ${task.process}:
+          type: string
+          description: The name of the process
+      - trtools:
+          type: string
+          description: The name of the tool
+      - mergeSTR --version | sed 's/mergeSTR //':
+          type: eval
+          description: The expression to obtain the version of the tool
+topics:
+  versions:
+    - - ${task.process}:
+          type: string
+          description: The name of the process
+      - trtools:
+          type: string
+          description: The name of the tool
+      - mergeSTR --version | sed 's/mergeSTR //':
+          type: eval
+          description: The expression to obtain the version of the tool
+authors:
+  - "@KondratievaOlesya"
+maintainers:
+  - "@KondratievaOlesya"
diff --git a/modules/nf-core/trtools/mergestr/tests/main.nf.test b/modules/nf-core/trtools/mergestr/tests/main.nf.test
@@ -0,0 +1,117 @@
+nextflow_process {
+
+    name "Test Process TRTOOLS_MERGESTR"
+    script "../main.nf"
+    config "./nextflow.config"
+    process "TRTOOLS_MERGESTR"
+
+    tag "modules"
+    tag "modules_nfcore"
+    tag "gangstr"
+    tag "trtools"
+    tag "trtools/mergestr"
+
+    setup {
+        run("GANGSTR", alias: "GANGSTR1") {
+            script "modules/nf-core/gangstr/main.nf"
+
+            process {
+                """
+                bed1 = Channel.of('chr22\t3000\t3020\t5\tCGCGC')
+                    .collectFile(name: 'genome1.bed', newLine: true)
+
+                input[0] = Channel.of([
+                    [id:'test1'],
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists:true),
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists:true)
+                ]).combine(bed1)
+
+                input[1] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists:true)
+                input[2] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists:true)
+                """
+            }
+        }
+
+        run("GANGSTR", alias: "GANGSTR2") {
+            script "modules/nf-core/gangstr/main.nf"
+
+            process {
+                """
+                bed2 = Channel.of('chr22\t3000\t3020\t5\tCGCGC')
+                    .collectFile(name: 'genome2.bed', newLine: true)
+
+                input[0] = Channel.of([
+                    [id:'test2'],
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists:true),
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists:true)
+                ]).combine(bed2)
+
+                input[1] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists:true)
+                input[2] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists:true)
+                """
+            }
+        }
+    }
+
+    test("homo_sapiens - gangstr vcfs") {
+
+        when {
+            process {
+                """
+                input[0] = GANGSTR1.out.vcf
+                    .combine(GANGSTR2.out.vcf)
+                    .combine(GANGSTR1.out.index)
+                    .combine(GANGSTR2.out.index)
+                    .map { meta1, vcf1, meta2, vcf2, meta3, tbi1, meta4, tbi2 ->
+                        [
+                            [ id: 'test' ],
+                            [ vcf1, vcf2 ],
+                            [ tbi1, tbi2 ]
+                        ]
+                    }
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(
+                    path(process.out.vcf.get(0).get(1)).vcf.summary,
+                    file(process.out.tbi[0][1]).name,
+                    process.out.findAll { key, val -> key.startsWith('versions') }
+                ).match() }
+            )
+        }
+    }
+
+    test("homo_sapiens - gangstr vcfs - stub") {
+
+        options "-stub"
+
+        when {
+            process {
+                """
+                input[0] = GANGSTR1.out.vcf
+                    .combine(GANGSTR2.out.vcf)
+                    .combine(GANGSTR1.out.index)
+                    .combine(GANGSTR2.out.index)
+                    .map { meta1, vcf1, meta2, vcf2, meta3, tbi1, meta4, tbi2 ->
+                        [
+                            [ id: 'test' ],
+                            [ vcf1, vcf2 ],
+                            [ tbi1, tbi2 ]
+                        ]
+                    }
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(sanitizeOutput(process.out)).match() }
+            )
+        }
+    }
+}
diff --git a/modules/nf-core/trtools/mergestr/tests/main.nf.test.snap b/modules/nf-core/trtools/mergestr/tests/main.nf.test.snap
@@ -0,0 +1,56 @@
+{
+    "homo_sapiens - gangstr vcfs - stub": {
+        "content": [
+            {
+                "tbi": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "vcf": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+                    ]
+                ],
+                "versions_trtools": [
+                    [
+                        "TRTOOLS_MERGESTR",
+                        "trtools",
+                        "6.1.0"
+                    ]
+                ]
+            }
+        ],
+        "timestamp": "2026-06-16T14:19:01.247840767",
+        "meta": {
+            "nf-test": "0.9.5",
+            "nextflow": "26.04.3"
+        }
+    },
+    "homo_sapiens - gangstr vcfs": {
+        "content": [
+            "VcfFile [chromosomes=[chr22], sampleCount=2, variantCount=1, phased=false, phasedAutodetect=false]",
+            "test_mergestr.vcf.gz.tbi",
+            {
+                "versions_trtools": [
+                    [
+                        "TRTOOLS_MERGESTR",
+                        "trtools",
+                        "6.1.0"
+                    ]
+                ]
+            }
+        ],
+        "timestamp": "2026-06-16T14:11:27.91327297",
+        "meta": {
+            "nf-test": "0.9.5",
+            "nextflow": "26.04.3"
+        }
+    }
+}
diff --git a/modules/nf-core/trtools/mergestr/tests/nextflow.config b/modules/nf-core/trtools/mergestr/tests/nextflow.config
@@ -0,0 +1,12 @@
+process {
+    withName: TRTOOLS_MERGESTR {
+        ext.args = '--vcftype gangstr --update-sample-from-file'
+    }
+    withName: GANGSTR1 {
+        ext.args = '--insertmean 300 --insertsdev 50 --min-sample-reads 1 --max-proc-read 100000'
+    }
+
+    withName: GANGSTR2 {
+        ext.args = '--insertmean 300 --insertsdev 50 --min-sample-reads 1 --max-proc-read 100000'
+    }
+}