From 75efd153dda376148e45ab751cdc6d37af2c1e0f Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Mon, 4 Aug 2025 15:56:45 +0100
Subject: [PATCH 01/58] Updates to modules

---
 modules/local/extract/telo/main.nf  | 43 ++++++++++++++++
 modules/local/gawk/environment.yml  |  7 +++
 modules/local/gawk/main.nf          | 68 +++++++++++++++++++++++++
 modules/local/gawk/meta.yml         | 63 +++++++++++++++++++++++
 modules/local/pretext/graph/main.nf | 77 +++++++++++++++++++++++++----
 5 files changed, 248 insertions(+), 10 deletions(-)
 create mode 100755 modules/local/extract/telo/main.nf
 create mode 100644 modules/local/gawk/environment.yml
 create mode 100644 modules/local/gawk/main.nf
 create mode 100644 modules/local/gawk/meta.yml

diff --git a/modules/local/extract/telo/main.nf b/modules/local/extract/telo/main.nf
new file mode 100755
index 00000000..380c1acf
--- /dev/null
+++ b/modules/local/extract/telo/main.nf
@@ -0,0 +1,43 @@
+process EXTRACT_TELO {
+    tag "${meta.id}"
+    label 'process_low'
+
+    conda "conda-forge::coreutils=9.1"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+    'https://depot.galaxyproject.org/singularity/ubuntu:20.04' :
+    'docker.io/ubuntu:20.04' }"
+
+    input:
+    tuple val( meta ), path( file )
+
+    output:
+    tuple val( meta ), file( "*bed" )   , emit: bed
+    tuple val( meta ), file("*bedgraph"), emit: bedgraph
+    path "versions.yml"                 , emit: versions
+
+    script:
+    def prefix  = task.ext.prefix ?: "${meta.id}"
+    def VERSION = "9.1" // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
+    """
+    cat "${file}" | awk '{print \$2"\\t"\$4"\\t"\$5}' | sed 's/>//g' > ${prefix}_telomere.bed
+    cat "${file}" | awk '{print \$2"\\t"\$4"\\t"\$5"\\t"(((\$5-\$4)<0)?-(\$5-\$4):(\$5-\$4))}' | sed 's/>//g' > ${prefix}_telomere.bedgraph
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        coreutils: $VERSION
+    END_VERSIONS
+    """
+
+    stub:
+    def prefix  = task.ext.prefix ?: "${meta.id}"
+    def VERSION = "9.1" // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
+    """
+    touch ${prefix}_telomere.bed
+    touch ${prefix}_telomere.bedgraph
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        coreutils: $VERSION
+    END_VERSIONS
+    """
+}
diff --git a/modules/local/gawk/environment.yml b/modules/local/gawk/environment.yml
new file mode 100644
index 00000000..f52109e8
--- /dev/null
+++ b/modules/local/gawk/environment.yml
@@ -0,0 +1,7 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - conda-forge::gawk=5.3.0
diff --git a/modules/local/gawk/main.nf b/modules/local/gawk/main.nf
new file mode 100644
index 00000000..f7f34b2e
--- /dev/null
+++ b/modules/local/gawk/main.nf
@@ -0,0 +1,68 @@
+process GAWK {
+    tag "$meta.id"
+    label 'process_single'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/gawk:5.3.0' :
+        'biocontainers/gawk:5.3.0' }"
+
+    input:
+    tuple val(meta), path(input, arity: '0..*')
+    path(program_file)
+    val(disable_redirect_output)
+
+    output:
+    tuple val(meta), path("direction.0.${suffix}"), emit: prime3
+    tuple val(meta), path("direction.1.${suffix}"), emit: prime5
+    path "versions.yml"                           , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args  = task.ext.args  ?: '' // args is used for the main arguments of the tool
+    def args2 = task.ext.args2 ?: '' // args2 is used to specify a program when no program file has been given
+    prefix    = task.ext.prefix ?: "${meta.id}"
+    suffix    = task.ext.suffix ?: "${input.collect{ it.getExtension()}.get(0)}" // use the first extension of the input files
+
+    program    = program_file ? "-f ${program_file}" : "${args2}"
+    lst_gz     = input.findResults{ it.getExtension().endsWith("gz") ? it.toString() : null }
+    unzip      = lst_gz ? "gunzip -q -f ${lst_gz.join(" ")}" : ""
+    input_cmd  = input.collect { it.toString() - ~/\.gz$/ }.join(" ")
+    cleanup    = lst_gz ? "rm ${lst_gz.collect{ it - ~/\.gz$/ }.join(" ")}" : ""
+
+    input.collect{
+        assert it.name != "${prefix}.${suffix}" : "Input and output names are the same, set prefix in module configuration to disambiguate!"
+    }
+
+    """
+    ${unzip}
+
+    awk \\
+        ${args} \\
+        ${program} \\
+        ${input_cmd}
+
+    ${cleanup}
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        gawk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//')
+    END_VERSIONS
+    """
+
+    stub:
+    prefix = task.ext.prefix ?: "${meta.id}"
+    suffix = task.ext.suffix ?: "${input.getExtension()}"
+    def create_cmd = suffix.endsWith("gz") ? "echo '' | gzip >" : "touch"
+
+    """
+    ${create_cmd} ${prefix}.${suffix}
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        gawk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//')
+    END_VERSIONS
+    """
+}
diff --git a/modules/local/gawk/meta.yml b/modules/local/gawk/meta.yml
new file mode 100644
index 00000000..34c50b12
--- /dev/null
+++ b/modules/local/gawk/meta.yml
@@ -0,0 +1,63 @@
+name: "gawk"
+description: |
+  If you are like many computer users, you would frequently like to make changes in various text files
+  wherever certain patterns appear, or extract data from parts of certain lines while discarding the rest.
+  The job is easy with awk, especially the GNU implementation gawk.
+keywords:
+  - gawk
+  - awk
+  - txt
+  - text
+  - file parsing
+tools:
+  - "gawk":
+      description: "GNU awk"
+      homepage: "https://www.gnu.org/software/gawk/"
+      documentation: "https://www.gnu.org/software/gawk/manual/"
+      tool_dev_url: "https://www.gnu.org/prep/ftp.html"
+      licence: ["GPL v3"]
+      identifier: ""
+input:
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing sample information
+          e.g. [ id:'test', single_end:false ]
+    - input:
+        type: file
+        description: The input file - Specify the logic that needs to be executed on
+          this file on the `ext.args2` or in the program file.
+          If the files have a `.gz` extension, they will be unzipped using `zcat`.
+        pattern: "*"
+  - - program_file:
+        type: file
+        description: Optional file containing logic for awk to execute. If you don't
+          wish to use a file, you can use `ext.args2` to specify the logic.
+        pattern: "*"
+  - - disable_redirect_output:
+        type: boolean
+        description: Disable the redirection of awk output to a given file. This is
+          useful if you want to use awk's built-in redirect to write files instead
+          of the shell's redirect.
+output:
+  - output:
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+      - "*.${suffix}":
+          type: file
+          description: The output file - if using shell redirection, specify the name of this
+            file using `ext.prefix` and the extension using `ext.suffix`. Otherwise, ensure
+            the awk program produces files with the extension in `ext.suffix`.
+          pattern: "*"
+  - versions:
+      - versions.yml:
+          type: file
+          description: File containing software versions
+          pattern: "versions.yml"
+authors:
+  - "@nvnieuwk"
+maintainers:
+  - "@nvnieuwk"
diff --git a/modules/local/pretext/graph/main.nf b/modules/local/pretext/graph/main.nf
index 4e9c92ad..ac966417 100644
--- a/modules/local/pretext/graph/main.nf
+++ b/modules/local/pretext/graph/main.nf
@@ -5,11 +5,12 @@ process PRETEXT_GRAPH {
     container "quay.io/sanger-tol/pretext:0.0.9-yy5-c2"
 
     input:
-    tuple val(meta), path(pretext_file)
+    tuple val(meta),        path(pretext_file)
     path(gap_file,          stageAs: 'gap_file.bed')
     path(coverage,          stageAs: 'coverage.bw')
-    path(telomere_file,     stageAs: 'telomere.bed')
+    path(telomere_file,     stageAs: 'telomere/*')
     path(repeat_density,    stageAs: 'repeat_density.bw')
+    val(split_telo_bool)
 
     output:
     tuple val(meta), path("*.pretext")  , emit: pretext
@@ -30,7 +31,6 @@ process PRETEXT_GRAPH {
 
     // Using single [ ] as nextflow will use sh where possible not bash
     """
-
     echo "PROCESSING ESSENTIAL FILES"
 
     if [ -s "${coverage}" ]; then
@@ -50,20 +50,78 @@ process PRETEXT_GRAPH {
     fi
 
     echo "NOW PROCESSING NON-ESSENTIAL files"
-
     input_file="repeat.pretext.part"
-
     if [ -s "${gap_file}" ]; then
         echo "Processing GAP file..."
         cat "${gap_file}" | PretextGraph ${args} -i repeat.pretext.part -n "gap" -o gap.pretext.part
         input_file="gap.pretext.part"
     fi
 
-    if [ -s "${telomere_file}" ]; then
-        echo "Processing TELO file..."
-        cat "${telomere_file}" | PretextGraph ${args} -i "\$input_file" -n "telomere" -o "${prefix}.pretext"
+    # Check if telomere directory has any files
+    if [ "\$(ls -A telomere 2>/dev/null)" ]; then
+        file_telox=""
+        file_5p=""
+        file_3p=""
+        file_og=""
+
+        for file in telomere/*.bedgraph; do
+            [ -e "\$file" ] || continue  # skip if no match
+            fname=\$(basename "\$file")
+
+            case "\$fname" in
+                *telox*)
+                    echo
+                    file_telox="\$file"
+                    ;;
+                *5P*)
+                    file_5p="\$file"
+                    ;;
+                *3P*)
+                    file_3p="\$file"
+                    ;;
+                *)
+                    file_og="\$file"
+                    ;;
+            esac
+        done
+
+        ls telomere/*
+        echo \$file_og
+
+        if [ -s "\$file_og" ]; then
+            echo "Processing OG_TELOMERE file..."
+            PretextGraph $args -i "\$input_file" -n "og_telomere" -o telo_0.pretext < "\$file_og"
+        else
+            echo "No OG TELOMERE file"
+            cp "\$input_file" telo_0.pretext
+        fi
+
+        if [ -s "\$file_telox" ]; then
+            echo "Processing TELOX_TELOMERE file..."
+            PretextGraph $args -i telo_0.pretext -n "telox_telomere" -o telo_1.pretext < "\$file_telox"
+        else
+            echo "No TELOX file"
+            cp telo_0.pretext telo_1.pretext
+        fi
+
+        if [ -s "\$file_5p" ]; then
+            echo "Processing 5 Prime TELOMERE file..."
+            PretextGraph $args -i telo_1.pretext -n "5p_telomere" -o telo_2.pretext < "\$file_5p"
+        else
+            echo "No 5Prime TELOMERE file"
+            cp telo_1.pretext telo_2.pretext
+        fi
+
+        if [ -s "\$file_3p" ]; then
+            echo "Processing 3 Prime TELOMERE file..."
+            PretextGraph $args -i telo_2.pretext -n "3p_telomere" -o "${prefix}.pretext" < "\$file_3p"
+        else
+            echo "No 3Prime TELOMERE file"
+            cp telo_2.pretext "${prefix}.pretext"
+        fi
+
     else
-        mv "\$input_file" "${prefix}.pretext"
+        cp "\$input_file" "${prefix}.pretext"
     fi
 
     cat <<-END_VERSIONS > versions.yml
@@ -84,7 +142,6 @@ process PRETEXT_GRAPH {
     def UCSC_VERSION = '448' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
     """
     touch ${prefix}.pretext
-
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
         PretextGraph: \$(PretextGraph | grep "Version" | sed 's/Pretext* Version //;')

From 4c07238e253e4754aa45be512a8aa2aaeedf4c01 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Mon, 4 Aug 2025 15:57:54 +0100
Subject: [PATCH 02/58] Update modules

---
 modules/nf-core/cat/cat/environment.yml       |   7 +
 modules/nf-core/cat/cat/main.nf               |  78 +++++++
 modules/nf-core/cat/cat/meta.yml              |  46 ++++
 modules/nf-core/cat/cat/tests/main.nf.test    | 191 ++++++++++++++++
 .../nf-core/cat/cat/tests/main.nf.test.snap   | 147 +++++++++++++
 .../cat/tests/nextflow_unzipped_zipped.config |   6 +
 .../cat/tests/nextflow_zipped_unzipped.config |   8 +
 .../nf-core/tabix/bgziptabix/environment.yml  |   8 +
 modules/nf-core/tabix/bgziptabix/main.nf      |  48 ++++
 modules/nf-core/tabix/bgziptabix/meta.yml     |  74 +++++++
 .../tabix/bgziptabix/tests/main.nf.test       | 123 +++++++++++
 .../tabix/bgziptabix/tests/main.nf.test.snap  | 206 ++++++++++++++++++
 .../tabix/bgziptabix/tests/tabix_csi.config   |   5 +
 .../tabix/bgziptabix/tests/tabix_tbi.config   |   5 +
 14 files changed, 952 insertions(+)
 create mode 100644 modules/nf-core/cat/cat/environment.yml
 create mode 100644 modules/nf-core/cat/cat/main.nf
 create mode 100644 modules/nf-core/cat/cat/meta.yml
 create mode 100644 modules/nf-core/cat/cat/tests/main.nf.test
 create mode 100644 modules/nf-core/cat/cat/tests/main.nf.test.snap
 create mode 100644 modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config
 create mode 100644 modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config
 create mode 100644 modules/nf-core/tabix/bgziptabix/environment.yml
 create mode 100644 modules/nf-core/tabix/bgziptabix/main.nf
 create mode 100644 modules/nf-core/tabix/bgziptabix/meta.yml
 create mode 100644 modules/nf-core/tabix/bgziptabix/tests/main.nf.test
 create mode 100644 modules/nf-core/tabix/bgziptabix/tests/main.nf.test.snap
 create mode 100644 modules/nf-core/tabix/bgziptabix/tests/tabix_csi.config
 create mode 100644 modules/nf-core/tabix/bgziptabix/tests/tabix_tbi.config

diff --git a/modules/nf-core/cat/cat/environment.yml b/modules/nf-core/cat/cat/environment.yml
new file mode 100644
index 00000000..50c2059a
--- /dev/null
+++ b/modules/nf-core/cat/cat/environment.yml
@@ -0,0 +1,7 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - conda-forge::pigz=2.3.4
diff --git a/modules/nf-core/cat/cat/main.nf b/modules/nf-core/cat/cat/main.nf
new file mode 100644
index 00000000..2862c64c
--- /dev/null
+++ b/modules/nf-core/cat/cat/main.nf
@@ -0,0 +1,78 @@
+process CAT_CAT {
+    tag "$meta.id"
+    label 'process_low'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/pigz:2.3.4' :
+        'biocontainers/pigz:2.3.4' }"
+
+    input:
+    tuple val(meta), path(files_in)
+
+    output:
+    tuple val(meta), path("${prefix}"), emit: file_out
+    path "versions.yml"               , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def args2 = task.ext.args2 ?: ''
+    def file_list = files_in.collect { it.toString() }
+
+    // choose appropriate concatenation tool depending on input and output format
+
+    // | input     | output     | command1 | command2 |
+    // |-----------|------------|----------|----------|
+    // | gzipped   | gzipped    | cat      |          |
+    // | ungzipped | ungzipped  | cat      |          |
+    // | gzipped   | ungzipped  | zcat     |          |
+    // | ungzipped | gzipped    | cat      | pigz     |
+
+    // Use input file ending as default
+    prefix   = task.ext.prefix ?: "${meta.id}${getFileSuffix(file_list[0])}"
+    out_zip  = prefix.endsWith('.gz')
+    in_zip   = file_list[0].endsWith('.gz')
+    command1 = (in_zip && !out_zip) ? 'zcat' : 'cat'
+    command2 = (!in_zip && out_zip) ? "| pigz -c -p $task.cpus $args2" : ''
+    if(file_list.contains(prefix.trim())) {
+        error "The name of the input file can't be the same as for the output prefix in the " +
+        "module CAT_CAT (currently `$prefix`). Please choose a different one."
+    }
+    """
+    $command1 \\
+        $args \\
+        ${file_list.join(' ')} \\
+        $command2 \\
+        > ${prefix}
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' )
+    END_VERSIONS
+    """
+
+    stub:
+    def file_list   = files_in.collect { it.toString() }
+    prefix          = task.ext.prefix ?: "${meta.id}${file_list[0].substring(file_list[0].lastIndexOf('.'))}"
+    if(file_list.contains(prefix.trim())) {
+        error "The name of the input file can't be the same as for the output prefix in the " +
+        "module CAT_CAT (currently `$prefix`). Please choose a different one."
+    }
+    """
+    touch $prefix
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' )
+    END_VERSIONS
+    """
+}
+
+// for .gz files also include the second to last extension if it is present. E.g., .fasta.gz
+def getFileSuffix(filename) {
+    def match = filename =~ /^.*?((\.\w{1,5})?(\.\w{1,5}\.gz$))/
+    return match ? match[0][1] : filename.substring(filename.lastIndexOf('.'))
+}
diff --git a/modules/nf-core/cat/cat/meta.yml b/modules/nf-core/cat/cat/meta.yml
new file mode 100644
index 00000000..2a9284d7
--- /dev/null
+++ b/modules/nf-core/cat/cat/meta.yml
@@ -0,0 +1,46 @@
+name: cat_cat
+description: A module for concatenation of gzipped or uncompressed files
+keywords:
+  - concatenate
+  - gzip
+  - cat
+tools:
+  - cat:
+      description: Just concatenation
+      documentation: https://man7.org/linux/man-pages/man1/cat.1.html
+      licence: ["GPL-3.0-or-later"]
+      identifier: ""
+input:
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing sample information
+          e.g. [ id:'test', single_end:false ]
+    - files_in:
+        type: file
+        description: List of compressed / uncompressed files
+        pattern: "*"
+        ontologies: []
+output:
+  file_out:
+    - - meta:
+          type: map
+          description: Groovy Map containing sample information
+      - ${prefix}:
+          type: file
+          description: Concatenated file. Will be gzipped if file_out ends with ".gz"
+          pattern: "${file_out}"
+          ontologies: []
+  versions:
+    - versions.yml:
+        type: file
+        description: File containing software versions
+        pattern: "versions.yml"
+        ontologies:
+          - edam: http://edamontology.org/format_3750 # YAML
+authors:
+  - "@erikrikarddaniel"
+  - "@FriederikeHanssen"
+maintainers:
+  - "@erikrikarddaniel"
+  - "@FriederikeHanssen"
diff --git a/modules/nf-core/cat/cat/tests/main.nf.test b/modules/nf-core/cat/cat/tests/main.nf.test
new file mode 100644
index 00000000..9cb16178
--- /dev/null
+++ b/modules/nf-core/cat/cat/tests/main.nf.test
@@ -0,0 +1,191 @@
+nextflow_process {
+
+    name "Test Process CAT_CAT"
+    script "../main.nf"
+    process "CAT_CAT"
+    tag "modules"
+    tag "modules_nfcore"
+    tag "cat"
+    tag "cat/cat"
+
+    test("test_cat_name_conflict") {
+        when {
+            params {
+                outdir   = "${outputDir}"
+            }
+            process {
+                """
+                input[0] =
+                    [
+                        [ id:'genome', single_end:true ],
+                        [
+                            file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true),
+                            file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true)
+                        ]
+                    ]
+                """
+            }
+        }
+        then {
+            assertAll(
+                { assert !process.success },
+                { assert process.stdout.toString().contains("The name of the input file can't be the same as for the output prefix") },
+                { assert snapshot(process.out.versions).match() }
+            )
+        }
+    }
+
+    test("test_cat_unzipped_unzipped") {
+        when {
+            params {
+                outdir   = "${outputDir}"
+            }
+            process {
+                """
+                input[0] =
+                    [
+                        [ id:'test', single_end:true ],
+                        [
+                            file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true),
+                            file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true)
+                        ]
+                    ]
+                """
+            }
+        }
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+    }
+
+
+    test("test_cat_zipped_zipped") {
+        when {
+            params {
+                outdir   = "${outputDir}"
+            }
+            process {
+                """
+                input[0] =
+                    [
+                        [ id:'test', single_end:true ],
+                        [
+                            file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3.gz', checkIfExists: true),
+                            file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true)
+                        ]
+                    ]
+                """
+            }
+        }
+        then {
+            def lines = path(process.out.file_out.get(0).get(1)).linesGzip
+            assertAll(
+                { assert process.success },
+                { assert snapshot(
+                    lines[0..5],
+                    lines.size(),
+                    process.out.versions
+                    ).match()
+                }
+            )
+        }
+    }
+
+    test("test_cat_zipped_unzipped") {
+        config './nextflow_zipped_unzipped.config'
+
+        when {
+            params {
+                outdir   = "${outputDir}"
+            }
+            process {
+                """
+                input[0] =
+                    [
+                        [ id:'test', single_end:true ],
+                        [
+                            file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3.gz', checkIfExists: true),
+                            file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true)
+                        ]
+                    ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+
+    }
+
+    test("test_cat_unzipped_zipped") {
+        config './nextflow_unzipped_zipped.config'
+        when {
+            params {
+                outdir   = "${outputDir}"
+            }
+            process {
+                """
+                input[0] =
+                    [
+                        [ id:'test', single_end:true ],
+                        [
+                            file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true),
+                            file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true)
+                        ]
+                    ]
+                """
+            }
+        }
+        then {
+            def lines = path(process.out.file_out.get(0).get(1)).linesGzip
+            assertAll(
+                { assert process.success },
+                { assert snapshot(
+                    lines[0..5],
+                    lines.size(),
+                    process.out.versions
+                    ).match()
+                }
+            )
+        }
+    }
+
+    test("test_cat_one_file_unzipped_zipped") {
+        config './nextflow_unzipped_zipped.config'
+        when {
+            params {
+                outdir   = "${outputDir}"
+            }
+            process {
+                """
+                input[0] =
+                    [
+                        [ id:'test', single_end:true ],
+                        [
+                            file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+                        ]
+                    ]
+                """
+            }
+        }
+        then {
+            def lines = path(process.out.file_out.get(0).get(1)).linesGzip
+            assertAll(
+                { assert process.success },
+                { assert snapshot(
+                    lines[0..5],
+                    lines.size(),
+                    process.out.versions
+                    ).match()
+                }
+            )
+        }
+    }
+}
diff --git a/modules/nf-core/cat/cat/tests/main.nf.test.snap b/modules/nf-core/cat/cat/tests/main.nf.test.snap
new file mode 100644
index 00000000..b7623ee6
--- /dev/null
+++ b/modules/nf-core/cat/cat/tests/main.nf.test.snap
@@ -0,0 +1,147 @@
+{
+    "test_cat_unzipped_unzipped": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.fasta:md5,f44b33a0e441ad58b2d3700270e2dbe2"
+                    ]
+                ],
+                "1": [
+                    "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894"
+                ],
+                "file_out": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.fasta:md5,f44b33a0e441ad58b2d3700270e2dbe2"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2023-10-16T14:32:18.500464399"
+    },
+    "test_cat_zipped_unzipped": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "cat.txt:md5,c439d3b60e7bc03e8802a451a0d9a5d9"
+                    ]
+                ],
+                "1": [
+                    "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894"
+                ],
+                "file_out": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "cat.txt:md5,c439d3b60e7bc03e8802a451a0d9a5d9"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2023-10-16T14:32:49.642741302"
+    },
+    "test_cat_zipped_zipped": {
+        "content": [
+            [
+                "MT192765.1\tGenbank\ttranscript\t259\t29667\t.\t+\t.\tID=unknown_transcript_1;geneID=orf1ab;gene_name=orf1ab",
+                "MT192765.1\tGenbank\tgene\t259\t21548\t.\t+\t.\tParent=unknown_transcript_1",
+                "MT192765.1\tGenbank\tCDS\t259\t13461\t.\t+\t0\tParent=unknown_transcript_1;exception=\"ribosomal slippage\";gbkey=CDS;gene=orf1ab;note=\"pp1ab;translated=by -1 ribosomal frameshift\";product=\"orf1ab polyprotein\";protein_id=QIK50426.1",
+                "MT192765.1\tGenbank\tCDS\t13461\t21548\t.\t+\t0\tParent=unknown_transcript_1;exception=\"ribosomal slippage\";gbkey=CDS;gene=orf1ab;note=\"pp1ab;translated=by -1 ribosomal frameshift\";product=\"orf1ab polyprotein\";protein_id=QIK50426.1",
+                "MT192765.1\tGenbank\tCDS\t21556\t25377\t.\t+\t0\tParent=unknown_transcript_1;gbkey=CDS;gene=S;note=\"structural protein\";product=\"surface glycoprotein\";protein_id=QIK50427.1",
+                "MT192765.1\tGenbank\tgene\t21556\t25377\t.\t+\t.\tParent=unknown_transcript_1"
+            ],
+            78,
+            [
+                "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894"
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-07-22T11:51:46.802978"
+    },
+    "test_cat_name_conflict": {
+        "content": [
+            [
+                
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-07-22T11:51:29.45394"
+    },
+    "test_cat_one_file_unzipped_zipped": {
+        "content": [
+            [
+                ">MT192765.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/PC00101P/2020, complete genome",
+                "GTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGT",
+                "GTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAG",
+                "TAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGG",
+                "GTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTT",
+                "ACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAG"
+            ],
+            374,
+            [
+                "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894"
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-07-22T11:52:02.774016"
+    },
+    "test_cat_unzipped_zipped": {
+        "content": [
+            [
+                ">MT192765.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/PC00101P/2020, complete genome",
+                "GTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGT",
+                "GTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAG",
+                "TAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGG",
+                "GTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTT",
+                "ACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAG"
+            ],
+            375,
+            [
+                "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894"
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-07-22T11:51:57.581523"
+    }
+}
\ No newline at end of file
diff --git a/modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config b/modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config
new file mode 100644
index 00000000..ec26b0fd
--- /dev/null
+++ b/modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config
@@ -0,0 +1,6 @@
+
+process {
+    withName: CAT_CAT {
+        ext.prefix = 'cat.txt.gz'
+    }
+}
diff --git a/modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config b/modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config
new file mode 100644
index 00000000..fbc79783
--- /dev/null
+++ b/modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config
@@ -0,0 +1,8 @@
+
+process {
+
+    withName: CAT_CAT {
+        ext.prefix = 'cat.txt'
+    }
+
+}
diff --git a/modules/nf-core/tabix/bgziptabix/environment.yml b/modules/nf-core/tabix/bgziptabix/environment.yml
new file mode 100644
index 00000000..771b1387
--- /dev/null
+++ b/modules/nf-core/tabix/bgziptabix/environment.yml
@@ -0,0 +1,8 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+channels:
+  - conda-forge
+  - bioconda
+
+dependencies:
+  - bioconda::htslib=1.21
diff --git a/modules/nf-core/tabix/bgziptabix/main.nf b/modules/nf-core/tabix/bgziptabix/main.nf
new file mode 100644
index 00000000..f295c7f2
--- /dev/null
+++ b/modules/nf-core/tabix/bgziptabix/main.nf
@@ -0,0 +1,48 @@
+process TABIX_BGZIPTABIX {
+    tag "$meta.id"
+    label 'process_single'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/92/92859404d861ae01afb87e2b789aebc71c0ab546397af890c7df74e4ee22c8dd/data' :
+        'community.wave.seqera.io/library/htslib:1.21--ff8e28a189fbecaa' }"
+
+    input:
+    tuple val(meta), path(input)
+
+    output:
+    tuple val(meta), path("*.gz"), path("*.tbi"), optional: true, emit: gz_tbi
+    tuple val(meta), path("*.gz"), path("*.csi"), optional: true, emit: gz_csi
+    path  "versions.yml" ,                        emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def args2 = task.ext.args2 ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    bgzip  --threads ${task.cpus} -c $args $input > ${prefix}.${input.getExtension()}.gz
+    tabix --threads ${task.cpus} $args2 ${prefix}.${input.getExtension()}.gz
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//')
+    END_VERSIONS
+    """
+
+    stub:
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def args2 = task.ext.args2 ?: ''
+    def index = args2.contains("-C ") || args2.contains("--csi") ? "csi" : "tbi"
+    """
+    echo "" | gzip > ${prefix}.${input.getExtension()}.gz
+    touch ${prefix}.${input.getExtension()}.gz.${index}
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//')
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/tabix/bgziptabix/meta.yml b/modules/nf-core/tabix/bgziptabix/meta.yml
new file mode 100644
index 00000000..9c2c46d1
--- /dev/null
+++ b/modules/nf-core/tabix/bgziptabix/meta.yml
@@ -0,0 +1,74 @@
+name: tabix_bgziptabix
+description: bgzip a sorted tab-delimited genome file and then create tabix index
+keywords:
+  - bgzip
+  - compress
+  - index
+  - tabix
+  - vcf
+tools:
+  - tabix:
+      description: Generic indexer for TAB-delimited genome position files.
+      homepage: https://www.htslib.org/doc/tabix.html
+      documentation: https://www.htslib.org/doc/tabix.1.html
+      doi: 10.1093/bioinformatics/btq671
+      licence: ["MIT"]
+      identifier: biotools:tabix
+input:
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing sample information
+          e.g. [ id:'test', single_end:false ]
+    - input:
+        type: file
+        description: Sorted tab-delimited genome file
+        ontologies: []
+output:
+  gz_tbi:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+      - "*.gz":
+          type: file
+          description: bgzipped tab-delimited genome file
+          pattern: "*.gz"
+          ontologies:
+            - edam: http://edamontology.org/format_3989 # GZIP format
+      - "*.tbi":
+          type: file
+          description: tabix index file
+          pattern: "*.tbi"
+          ontologies: []
+  gz_csi:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+      - "*.gz":
+          type: file
+          description: bgzipped tab-delimited genome file
+          pattern: "*.gz"
+          ontologies:
+            - edam: http://edamontology.org/format_3989 # GZIP format
+      - "*.csi":
+          type: file
+          description: csi index file
+          pattern: "*.csi"
+          ontologies: []
+  versions:
+    - versions.yml:
+        type: file
+        description: File containing software versions
+        pattern: "versions.yml"
+        ontologies:
+          - edam: http://edamontology.org/format_3750 # YAML
+authors:
+  - "@maxulysse"
+  - "@DLBPointon"
+maintainers:
+  - "@maxulysse"
+  - "@DLBPointon"
diff --git a/modules/nf-core/tabix/bgziptabix/tests/main.nf.test b/modules/nf-core/tabix/bgziptabix/tests/main.nf.test
new file mode 100644
index 00000000..cdb016e5
--- /dev/null
+++ b/modules/nf-core/tabix/bgziptabix/tests/main.nf.test
@@ -0,0 +1,123 @@
+nextflow_process {
+
+    name "Test Process TABIX_BGZIPTABIX"
+    script "../main.nf"
+    process "TABIX_BGZIPTABIX"
+
+    tag "modules"
+    tag "modules_nfcore"
+    tag "tabix"
+    tag "tabix/bgziptabix"
+
+    test("sarscov2_bed_tbi") {
+        config "./tabix_tbi.config"
+
+        when {
+            process {
+                """
+                input[0] = [
+                                [ id:'tbi_test' ],
+                                [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) ]
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll (
+                { assert process.success },
+                { assert snapshot(process.out).match() },
+                { assert snapshot(
+                            file(process.out.gz_tbi[0][1]).name
+                                ).match("tbi_test")
+                }
+            )
+        }
+    }
+
+    test("sarscov2_bed_csi") {
+        config "./tabix_csi.config"
+
+        when {
+            process {
+                """
+                input[0] = [
+                                [ id:'csi_test' ],
+                                [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) ]
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll (
+                { assert process.success },
+                { assert snapshot(process.out).match() },
+                { assert snapshot(
+                            file(process.out.gz_csi[0][1]).name
+                                ).match("csi_test")
+                }
+            )
+        }
+
+    }
+
+    test("sarscov2_bed_csi_stub") {
+        config "./tabix_csi.config"
+
+        options "-stub"
+
+        when {
+            process {
+                """
+                input[0] = [
+                                [ id:'test' ],
+                                [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) ]
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll (
+                { assert process.success },
+                { assert snapshot(process.out).match() },
+                { assert snapshot(
+                            file(process.out.gz_csi[0][1]).name
+                                ).match("csi_stub")
+                }
+            )
+        }
+
+    }
+
+    test("sarscov2_bed_tbi_stub") {
+        config "./tabix_tbi.config"
+
+        options "-stub"
+
+        when {
+            process {
+                """
+                input[0] = [
+                                [ id:'test' ],
+                                [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) ]
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll (
+                { assert process.success },
+                { assert snapshot(process.out).match() },
+                { assert snapshot(
+                            file(process.out.gz_tbi[0][1]).name
+                                ).match("tbi_stub")
+                }
+            )
+        }
+
+    }
+
+}
diff --git a/modules/nf-core/tabix/bgziptabix/tests/main.nf.test.snap b/modules/nf-core/tabix/bgziptabix/tests/main.nf.test.snap
new file mode 100644
index 00000000..5f818045
--- /dev/null
+++ b/modules/nf-core/tabix/bgziptabix/tests/main.nf.test.snap
@@ -0,0 +1,206 @@
+{
+    "sarscov2_bed_tbi": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "tbi_test"
+                        },
+                        "tbi_test.bed.gz:md5,fe4053cf4de3aebbdfc3be2efb125a74",
+                        "tbi_test.bed.gz.tbi:md5,ca06caf88b1e3c67d5fcba0a1460b52c"
+                    ]
+                ],
+                "1": [
+                    
+                ],
+                "2": [
+                    "versions.yml:md5,9a7904908d7400fc67ef0412a925e9fc"
+                ],
+                "gz_csi": [
+                    
+                ],
+                "gz_tbi": [
+                    [
+                        {
+                            "id": "tbi_test"
+                        },
+                        "tbi_test.bed.gz:md5,fe4053cf4de3aebbdfc3be2efb125a74",
+                        "tbi_test.bed.gz.tbi:md5,ca06caf88b1e3c67d5fcba0a1460b52c"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,9a7904908d7400fc67ef0412a925e9fc"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.2",
+            "nextflow": "24.10.5"
+        },
+        "timestamp": "2025-03-26T13:52:30.53305451"
+    },
+    "sarscov2_bed_csi": {
+        "content": [
+            {
+                "0": [
+                    
+                ],
+                "1": [
+                    [
+                        {
+                            "id": "csi_test"
+                        },
+                        "csi_test.bed.gz:md5,fe4053cf4de3aebbdfc3be2efb125a74",
+                        "csi_test.bed.gz.csi:md5,c9c0377de58fdc89672bb3005a0d69f5"
+                    ]
+                ],
+                "2": [
+                    "versions.yml:md5,9a7904908d7400fc67ef0412a925e9fc"
+                ],
+                "gz_csi": [
+                    [
+                        {
+                            "id": "csi_test"
+                        },
+                        "csi_test.bed.gz:md5,fe4053cf4de3aebbdfc3be2efb125a74",
+                        "csi_test.bed.gz.csi:md5,c9c0377de58fdc89672bb3005a0d69f5"
+                    ]
+                ],
+                "gz_tbi": [
+                    
+                ],
+                "versions": [
+                    "versions.yml:md5,9a7904908d7400fc67ef0412a925e9fc"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.2",
+            "nextflow": "24.10.5"
+        },
+        "timestamp": "2025-03-26T13:52:34.152301569"
+    },
+    "csi_test": {
+        "content": [
+            "csi_test.bed.gz"
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "24.04.2"
+        },
+        "timestamp": "2024-02-19T14:51:00.548801"
+    },
+    "sarscov2_bed_tbi_stub": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940",
+                        "test.bed.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "1": [
+                    
+                ],
+                "2": [
+                    "versions.yml:md5,9a7904908d7400fc67ef0412a925e9fc"
+                ],
+                "gz_csi": [
+                    
+                ],
+                "gz_tbi": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940",
+                        "test.bed.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,9a7904908d7400fc67ef0412a925e9fc"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.2",
+            "nextflow": "24.10.5"
+        },
+        "timestamp": "2025-03-26T13:52:41.271812789"
+    },
+    "csi_stub": {
+        "content": [
+            "test.bed.gz"
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "24.04.2"
+        },
+        "timestamp": "2024-02-19T14:51:09.218454"
+    },
+    "tbi_stub": {
+        "content": [
+            "test.bed.gz"
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.4"
+        },
+        "timestamp": "2024-09-25T14:45:18.550930179"
+    },
+    "tbi_test": {
+        "content": [
+            "tbi_test.bed.gz"
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "24.04.2"
+        },
+        "timestamp": "2024-02-19T14:50:51.579654"
+    },
+    "sarscov2_bed_csi_stub": {
+        "content": [
+            {
+                "0": [
+                    
+                ],
+                "1": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940",
+                        "test.bed.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "2": [
+                    "versions.yml:md5,9a7904908d7400fc67ef0412a925e9fc"
+                ],
+                "gz_csi": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940",
+                        "test.bed.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "gz_tbi": [
+                    
+                ],
+                "versions": [
+                    "versions.yml:md5,9a7904908d7400fc67ef0412a925e9fc"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.2",
+            "nextflow": "24.10.5"
+        },
+        "timestamp": "2025-03-26T13:52:37.709221651"
+    }
+}
\ No newline at end of file
diff --git a/modules/nf-core/tabix/bgziptabix/tests/tabix_csi.config b/modules/nf-core/tabix/bgziptabix/tests/tabix_csi.config
new file mode 100644
index 00000000..fb41a314
--- /dev/null
+++ b/modules/nf-core/tabix/bgziptabix/tests/tabix_csi.config
@@ -0,0 +1,5 @@
+process {
+    withName: TABIX_BGZIPTABIX {
+        ext.args2 = '-p vcf --csi'
+    }
+}
diff --git a/modules/nf-core/tabix/bgziptabix/tests/tabix_tbi.config b/modules/nf-core/tabix/bgziptabix/tests/tabix_tbi.config
new file mode 100644
index 00000000..c1915dc4
--- /dev/null
+++ b/modules/nf-core/tabix/bgziptabix/tests/tabix_tbi.config
@@ -0,0 +1,5 @@
+process {
+    withName: TABIX_BGZIPTABIX {
+        ext.args2 = '-p vcf'
+    }
+}
\ No newline at end of file

From b48b05ae0235a1dd6e412d67b521a80019d1716e Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Mon, 4 Aug 2025 15:58:41 +0100
Subject: [PATCH 03/58] Update and addition of new subworkflows

---
 subworkflows/local/accessory_files/main.nf |  4 +-
 subworkflows/local/telo_extraction/main.nf | 74 +++++++++++++++++++++
 subworkflows/local/telo_finder/main.nf     | 75 ++++++++++++++--------
 3 files changed, 126 insertions(+), 27 deletions(-)
 create mode 100644 subworkflows/local/telo_extraction/main.nf

diff --git a/subworkflows/local/accessory_files/main.nf b/subworkflows/local/accessory_files/main.nf
index 07121a29..83c0af65 100644
--- a/subworkflows/local/accessory_files/main.nf
+++ b/subworkflows/local/accessory_files/main.nf
@@ -80,7 +80,7 @@ workflow ACCESSORY_FILES {
             val_teloseq
         )
         ch_versions     = ch_versions.mix(TELO_FINDER.out.versions)
-        telo_file       = TELO_FINDER.out.bedgraph_file.map{ it -> it[1] }
+        telo_file       = TELO_FINDER.out.bedgraph_file
     }
 
 
@@ -118,7 +118,7 @@ workflow ACCESSORY_FILES {
     emit:
     gap_file
     repeat_file
-    telo_file
+    telo_file           // This is the possible collection of telomere files
     longread_output
     versions            = ch_versions
 }
diff --git a/subworkflows/local/telo_extraction/main.nf b/subworkflows/local/telo_extraction/main.nf
new file mode 100644
index 00000000..73a955e6
--- /dev/null
+++ b/subworkflows/local/telo_extraction/main.nf
@@ -0,0 +1,74 @@
+include { GAWK as GAWK_CLEAN_TELOMERE   } from '../../../modules/nf-core/gawk/main'
+include { GAWK as GAWK_MAP_TELO         } from '../../../modules/nf-core/gawk/main'
+include { FIND_TELOMERE_WINDOWS         } from '../../../modules/local/find/telomere_windows/main'
+include { EXTRACT_TELO                  } from '../../../modules/local/extract/telo/main'
+include { TABIX_BGZIPTABIX              } from '../../../modules/nf-core/tabix/bgziptabix'
+
+workflow TELO_EXTRACTION {
+    take:
+    telomere_file //tuple(meta, file)
+
+    main:
+    ch_versions         = Channel.empty()
+
+    //
+    // MODULE: CLEAN THE .TELOMERE FILE IF CONTAINS "you screwed up" ERROR MESSAGE
+    //          (LIKELY WHEN USING LOWERCASE LETTERS OR BAD MOTIF)
+    //          WORKS BE RETURNING LINES THAT START WITH '>'
+    //
+    GAWK_CLEAN_TELOMERE (
+        telomere_file,
+        [],
+        false
+    )
+    ch_versions         = ch_versions.mix( GAWK_CLEAN_TELOMERE.out.versions )
+
+
+    //
+    // MODULE: GENERATES A WINDOWS FILE FROM THE ABOVE
+    //
+    FIND_TELOMERE_WINDOWS (
+        telomere_file
+    )
+    ch_versions         = ch_versions.mix( FIND_TELOMERE_WINDOWS.out.versions )
+
+
+    def windows_file    = FIND_TELOMERE_WINDOWS.out.windows
+    def fallback_file   = GAWK_CLEAN_TELOMERE.out.output
+
+    // Use EXTRACT_TELO if windows_file has content, otherwise fallback to GAWK_MAP_TELO
+    def safe_windows    = windows_file.ifEmpty { Channel.empty() }
+    def fallback_valid  = fallback_file.ifEmpty { Channel.empty() }
+
+    EXTRACT_TELO(
+        safe_windows
+    )
+    ch_versions         = ch_versions.mix( EXTRACT_TELO.out.versions )
+
+    GAWK_MAP_TELO(
+        fallback_valid,
+        [],
+        false
+    )
+    ch_gawk_output      = GAWK_MAP_TELO.out.output.ifEmpty( Channel.empty() )
+    ch_versions         = ch_versions.mix( GAWK_MAP_TELO.out.versions )
+
+    //
+    // MODULE: Merge bed files into one for TABIX_BGZIPTABIX
+    //
+    // EXTRACT_TELO is the more important of the two, then we go to fallback, then just stop no point in running on empty file.
+    def merged_bed      = EXTRACT_TELO.out.bed.ifEmpty { ch_gawk_output }
+
+
+    TABIX_BGZIPTABIX(
+        merged_bed
+    )
+    ch_versions     = ch_versions.mix( TABIX_BGZIPTABIX.out.versions )
+
+    emit:
+    bed_file        = merged_bed
+    bed_gz_tbi      = TABIX_BGZIPTABIX.out.gz_tbi
+    bedgraph_file   = EXTRACT_TELO.out.bedgraph
+    versions        = ch_versions
+
+}
diff --git a/subworkflows/local/telo_finder/main.nf b/subworkflows/local/telo_finder/main.nf
index d0d52123..7b9893a8 100644
--- a/subworkflows/local/telo_finder/main.nf
+++ b/subworkflows/local/telo_finder/main.nf
@@ -5,9 +5,9 @@
 //
 include { GAWK as GAWK_UPPER_SEQUENCE   } from '../../../modules/nf-core/gawk/main'
 include { FIND_TELOMERE_REGIONS         } from '../../../modules/local/find/telomere_regions/main'
-include { GAWK as GAWK_CLEAN_TELOMERE   } from '../../../modules/nf-core/gawk/main'
-include { FIND_TELOMERE_WINDOWS         } from '../../../modules/local/find/telomere_windows/main'
-include { EXTRACT_TELOMERE              } from '../../../modules/local/extract/telomere/main'
+include { GAWK as GAWK_SPLIT_DIRECTIONS } from '../../../modules/local/gawk/main'
+
+include { TELO_EXTRACTION               } from '../../../subworkflows/local/telo_extraction/main'
 
 workflow TELO_FINDER {
 
@@ -41,36 +41,61 @@ workflow TELO_FINDER {
 
 
     //
-    // MODULE: CLEAN THE .TELOMERE FILE IF CONTAINS "you screwed up" ERROR MESSAGE
-    //          (LIKELY WHEN USING LOWERCASE LETTERS OR BAD MOTIF)
-    //          WORKS BE RETURNING LINES THAT START WITH '>'
+    // MODULE: SPLIT THE TELOMERE FILE INTO 5' and 3' FILES
+    //              THIS IS RUNNING ON A LOCAL VERSION OF THE GAWK MODULE
     //
-    GAWK_CLEAN_TELOMERE (
-        FIND_TELOMERE_REGIONS.out.telomere,
-        [],
-        false
-    )
-    ch_versions     = ch_versions.mix( GAWK_CLEAN_TELOMERE.out.versions )
+    if (params.split_telomere) {
+        GAWK_SPLIT_DIRECTIONS (
+            FIND_TELOMERE_REGIONS.out.telomere,
+            file("${projectDir}/bin/gawk_split_directions.awk"),
+            false
+        )
+        ch_versions     = ch_versions.mix( GAWK_SPLIT_DIRECTIONS.out.versions )
 
+        GAWK_SPLIT_DIRECTIONS.out.prime5
+            .map { meta, file ->
+                tuple( [id: meta.id + "_5P"], file)
+            }
+            .set { prime5_telo }
+
+        GAWK_SPLIT_DIRECTIONS.out.prime3
+            .map { meta, file ->
+                tuple( [id: meta.id + "_3P"], file)
+            }
+            .set { prime3_telo }
+
+        prime5_telo
+            .mix(prime3_telo)
+            .mix(FIND_TELOMERE_REGIONS.out.telomere)
+            .set { telo_for_extraction }
+
+    } else {
+        telo_for_extraction = FIND_TELOMERE_REGIONS.out.telomere
+    }
 
-    //
-    // MODULE: GENERATES A WINDOWS FILE FROM THE ABOVE
-    //
-    FIND_TELOMERE_WINDOWS (
-        GAWK_CLEAN_TELOMERE.out.output
-    )
-    ch_versions     = ch_versions.mix( FIND_TELOMERE_WINDOWS.out.versions )
 
     //
-    // MODULE: EXTRACTS THE LOCATION OF TELOMERIC SEQUENCE BASED ON THE WINDOWS
+    // SUBWORKFLOW: TELO_EXTRACTION
+    //              - The prime5.mix(prime3) creates a queue channel to execute
+    //                  TELO_EXTRACTION per item in channel
     //
-    EXTRACT_TELOMERE (
-        FIND_TELOMERE_WINDOWS.out.windows
+    TELO_EXTRACTION (
+        telo_for_extraction
     )
-    ch_versions     = ch_versions.mix( EXTRACT_TELOMERE.out.versions )
+    ch_versions     = ch_versions.mix( TELO_EXTRACTION.out.versions )
+
+
+    TELO_EXTRACTION.out.bedgraph_file
+        .map{ _meta, bedgraph ->
+            bedgraph
+        }
+        .collect()
+        .set { telo_bedgraphs }
+
 
     emit:
-    bed_file        = EXTRACT_TELOMERE.out.bed
-    bedgraph_file   = EXTRACT_TELOMERE.out.bedgraph
+    bed_file        = TELO_EXTRACTION.out.bed_file.collect()    // Not used anymore
+    bed_gz_tbi      = TELO_EXTRACTION.out.bed_gz_tbi.collect()  // Not used anymore
+    bedgraph_file   = telo_bedgraphs                            // Used in pretext_graph
     versions        = ch_versions
 }

From c33946f2eb2e3e457e2676579ea6a6db31ac7e34 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Mon, 4 Aug 2025 15:59:11 +0100
Subject: [PATCH 04/58] Addition of split_telomere var

---
 workflows/curationpretext.nf | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/workflows/curationpretext.nf b/workflows/curationpretext.nf
index 874da2cb..324dc90c 100644
--- a/workflows/curationpretext.nf
+++ b/workflows/curationpretext.nf
@@ -115,6 +115,7 @@ workflow CURATIONPRETEXT {
             cove_file,
             telo_file,
             rept_file,
+            params.split_telomere
         )
         ch_versions         = ch_versions.mix( PRETEXT_INGEST_SNDRD.out.versions )
 
@@ -130,6 +131,7 @@ workflow CURATIONPRETEXT {
                 cove_file,
                 telo_file,
                 rept_file,
+                params.split_telomere
             )
             ch_versions         = ch_versions.mix( PRETEXT_INGEST_SNDRD.out.versions )
         }

From 3976812375513a10c992c965e0db093f4872d77d Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Mon, 4 Aug 2025 16:00:58 +0100
Subject: [PATCH 05/58] Update to files

---
 conf/modules.config  | 11 +++++++++++
 modules.json         | 10 ++++++++++
 nextflow.config      |  1 +
 nextflow_schema.json |  7 +++++++
 4 files changed, 29 insertions(+)

diff --git a/conf/modules.config b/conf/modules.config
index 65623cdb..ccdb5010 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -91,6 +91,17 @@ process {
         ext.suffix = 'telomere'
     }
 
+    withName: 'GAWK_MAP_TELO' {
+        ext.args2 = { "-v OFS=\"\t\" 'BEGIN { sub(/^>/, \"\"); print \$1, \$4, \$5, \$6 }'" }
+        ext.prefix   = { "${meta.id}_map_telo" }
+        ext.suffix   = 'bed'
+    }
+
+    withName: 'GAWK_SPLIT_DIRECTIONS' {
+        ext.prefix  = { "${input}_telo" }
+        ext.suffix  = 'telomere'
+    }
+
     //
     // NOTE: GNU_SORT module derivatives
     //
diff --git a/modules.json b/modules.json
index 30d74ced..cf41394e 100644
--- a/modules.json
+++ b/modules.json
@@ -35,6 +35,11 @@
                         "git_sha": "a29f18660f5e3748d44d6f716241e70c942c065d",
                         "installed_by": ["modules"]
                     },
+                    "cat/cat": {
+                        "branch": "master",
+                        "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46",
+                        "installed_by": ["modules"]
+                    },
                     "gawk": {
                         "branch": "master",
                         "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d",
@@ -92,6 +97,11 @@
                         "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d",
                         "installed_by": ["modules"]
                     },
+                    "tabix/bgziptabix": {
+                        "branch": "master",
+                        "git_sha": "f2cfcf9d3f6a2d123e6c44aefa788aa232204a7a",
+                        "installed_by": ["modules"]
+                    },
                     "ucsc/bedgraphtobigwig": {
                         "branch": "master",
                         "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d",
diff --git a/nextflow.config b/nextflow.config
index f76a56fc..eb54c2ee 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -11,6 +11,7 @@ params {
 
     // Input options
     input                      = null
+    split_telomere             = false
     skip_tracks                = "NONE"
     sample                     = "pretext_rerun"
     teloseq                    = "TTAGGG"
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 307a510b..b3d71f02 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -20,6 +20,13 @@
                     "help_text": "You need the input fasta file",
                     "fa_icon": "fas fa-file-fasta"
                 },
+                "split_telomere": {
+                    "type": "boolean",
+                    "format": "boolean",
+                    "description": "Split the telomere file into 5' and 3' files for seperate ingestion into the HiC maps",
+                    "default": false,
+                    "fa_icon": "fas fa-check"
+                },
                 "skip_tracks": {
                     "type": "string",
                     "description": "Skip generation for specified tracks",

From 6e69c94f7cf5b9b635eabb00f869b1143161d6e1 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Mon, 4 Aug 2025 17:23:37 +0100
Subject: [PATCH 06/58] Update CHANGELOG

---
 CHANGELOG.md | 37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c0ef4bc2..e2c8ccf9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,6 +3,43 @@
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [[1.5.0](https://github.com/sanger-tol/curationpretext/releases/tag/1.5.0)] - UNSC Punic - [2025-08-04]
+
+### Added and Fixed
+
+- Addition of the `--split_telomere` boolean flag, this is false by default.
+  - When `true` the pipeline will split the telomere file into a 5 and 3 prime file.
+- Update `ACCESSORY_FILES` subworkflow:
+  - Remove `GET_LARGEST_SCAFFOLD` as we no longer need it, this was needed for TABIX so that the correct index file was used. This was used by the `TELO_FINDER` and `GAP_FINDER` subworkflows.
+- Update `TELO_FINDER` subworkflow:
+  - Remove `GAWK_MAP_TELO` as it is no longer needed.
+  - Remove `GAWK_CLEAN_TELOMERE` as it is no longer needed. The reason for its inclusion has been fixed.
+  - Update `EXTRACT_TELO` to `EXTRACT_TELOMERE` which also removed the use of the `cat {file} | awk` pattern, replacing it with just `awk`. This was supposed to happen in `1.4.0`, but was forgotten with the files lying dormant in the repo.
+  - Refactor of the `TELO_FINDER` subworkflow, introducing the `TELO_EXTRACTION` subworkflow which is run per telo file. With the introduction of `split_telomere` this can be 3 files.
+- Update `LONGREAD_COVERAGE` subworkflow:
+  - Remove `GRAPH_OVERALL_COVERAGE` as it is not in use.
+- Better formatting in some files.
+- Moved `GAWK_UPPER_SEQUENCE` from the `TELO_FINDER` subworkflow to the first step of the main `curationpretext` workflow, this simply makes more sense.
+
+### Paramters
+
+| Old Version | New Versions     |
+| ----------- | ---------------- |
+| NA          | --split_telomere |
+
+### Software Dependencies
+
+Note, since the pipeline is using Nextflow DSL2, each process will be run with its own Biocontainer. This means that on occasion it is entirely possible for the pipeline to be using different versions of the same tool. However, the overall software dependency changes compared to the last release have been listed below for reference.
+
+| Module                   | Old Version   | New Versions  |
+| ------------------------ | ------------- | ------------- |
+| `GRAPH_OVERALL_COVERAGE` | perl=5.26.2   | REMOVED       |
+| `EXTRACT_TELO`           | coreutils=9.1 | REMOVED       |
+| `EXTRACT_TELOMERE`       | NA            | coreutils=9.1 |
+| `GAWK_CLEAN_TELOMERE`    | 5.3.0         | REMOVED       |
+| `GAWK_MAP_TELO`          | 5.3.0         | REMOVED       |
+| `GET_LARGEST_SCAFF`      | coreutils=9.1 | REMOVED       |
+
 ## [[1.4.2](https://github.com/sanger-tol/curationpretext/releases/tag/1.4.2)] - UNSC Nereid (H2) - [2025-07-28]
 
 ### Added and Fixed

From fb289182ad88cd11b339915bd5ed4b05851e9149 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Mon, 4 Aug 2025 17:23:51 +0100
Subject: [PATCH 07/58] Removed modules

---
 modules.json | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/modules.json b/modules.json
index cf41394e..30d74ced 100644
--- a/modules.json
+++ b/modules.json
@@ -35,11 +35,6 @@
                         "git_sha": "a29f18660f5e3748d44d6f716241e70c942c065d",
                         "installed_by": ["modules"]
                     },
-                    "cat/cat": {
-                        "branch": "master",
-                        "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46",
-                        "installed_by": ["modules"]
-                    },
                     "gawk": {
                         "branch": "master",
                         "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d",
@@ -97,11 +92,6 @@
                         "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d",
                         "installed_by": ["modules"]
                     },
-                    "tabix/bgziptabix": {
-                        "branch": "master",
-                        "git_sha": "f2cfcf9d3f6a2d123e6c44aefa788aa232204a7a",
-                        "installed_by": ["modules"]
-                    },
                     "ucsc/bedgraphtobigwig": {
                         "branch": "master",
                         "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d",

From e7478473e9a25284745182002b32227e1f81fdf1 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Mon, 4 Aug 2025 17:24:25 +0100
Subject: [PATCH 08/58] Added GAWK_UPPED_SEQ to main workflow

---
 workflows/curationpretext.nf | 25 ++++++++++++++++++++-----
 1 file changed, 20 insertions(+), 5 deletions(-)

diff --git a/workflows/curationpretext.nf b/workflows/curationpretext.nf
index 324dc90c..6e63803d 100644
--- a/workflows/curationpretext.nf
+++ b/workflows/curationpretext.nf
@@ -5,11 +5,14 @@
 */
 
 include { SAMTOOLS_FAIDX                            } from '../modules/nf-core/samtools/faidx/main'
-include { GENERATE_MAPS                             } from '../subworkflows/local/generate_maps/main'
-include { ACCESSORY_FILES                           } from '../subworkflows/local/accessory_files/main'
+include { GAWK as GAWK_UPPER_SEQUENCE               } from '../modules/nf-core/gawk/main'
+
 include { PRETEXT_GRAPH as PRETEXT_INGEST_SNDRD     } from '../modules/local/pretext/graph/main'
 include { PRETEXT_GRAPH as PRETEXT_INGEST_HIRES     } from '../modules/local/pretext/graph/main'
 
+include { GENERATE_MAPS                             } from '../subworkflows/local/generate_maps/main'
+include { ACCESSORY_FILES                           } from '../subworkflows/local/accessory_files/main'
+
 include { paramsSummaryMap                          } from 'plugin/nf-schema'
 include { paramsSummaryMultiqc                      } from '../subworkflows/nf-core/utils_nfcore_pipeline'
 include { softwareVersionsToYAML                    } from '../subworkflows/nf-core/utils_nfcore_pipeline'
@@ -32,11 +35,23 @@ workflow CURATIONPRETEXT {
     ch_empty_file       = Channel.fromPath("${baseDir}/assets/EMPTY.txt")
 
 
+    //
+    // MODULE: UPPERCASE THE REFERENCE SEQUENCE
+    //
+    GAWK_UPPER_SEQUENCE(
+        ch_reference,
+        [],
+        false,
+    )
+    ch_upper_ref    = GAWK_UPPER_SEQUENCE.out.output
+    ch_versions     = ch_versions.mix( GAWK_UPPER_SEQUENCE.out.versions )
+
+
     //
     // MODULE: GENERATE INDEX OF REFERENCE FASTA
     //
     SAMTOOLS_FAIDX (
-        ch_reference,
+        ch_upper_ref,
         [[],[]],
         false
     )
@@ -76,7 +91,7 @@ workflow CURATIONPRETEXT {
         // SUBWORKFLOW: GENERATE SUPPLEMENTARY FILES FOR PRETEXT INGESTION
         //
         ACCESSORY_FILES (
-            ch_reference,
+            ch_upper_ref,
             ch_reads,
             val_teloseq,
             SAMTOOLS_FAIDX.out.fai
@@ -96,7 +111,7 @@ workflow CURATIONPRETEXT {
     //              - GENERATE_MAPS IS THE MINIMAL OUTPUT EXPECTED FROM THIS PIPELLINE
     //
     GENERATE_MAPS (
-        ch_reference,
+        ch_upper_ref,
         ch_cram_reads,
         SAMTOOLS_FAIDX.out.fai
     )

From 1ef9c58133491daff343c606ca968fc87d0dbd89 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Mon, 4 Aug 2025 17:26:13 +0100
Subject: [PATCH 09/58] Update subworkflows to remove modules, reorganise files
 and add split_telo support

---
 subworkflows/local/accessory_files/main.nf   | 16 +-----
 subworkflows/local/gap_finder/main.nf        |  1 -
 subworkflows/local/longread_coverage/main.nf | 14 ++----
 subworkflows/local/repeat_density/main.nf    | 15 ++++++
 subworkflows/local/telo_extraction/main.nf   | 53 +++-----------------
 subworkflows/local/telo_finder/main.nf       | 16 +-----
 6 files changed, 28 insertions(+), 87 deletions(-)

diff --git a/subworkflows/local/accessory_files/main.nf b/subworkflows/local/accessory_files/main.nf
index 83c0af65..1f04bb04 100644
--- a/subworkflows/local/accessory_files/main.nf
+++ b/subworkflows/local/accessory_files/main.nf
@@ -9,7 +9,6 @@ include { REPEAT_DENSITY                    } from '../repeat_density/main'
 include { LONGREAD_COVERAGE                 } from '../longread_coverage/main'
 
 include { GAWK as GAWK_GENERATE_GENOME_FILE } from '../../../modules/nf-core/gawk/main'
-include { GET_LARGEST_SCAFFOLD              } from '../../../modules/local/get/largest_scaffold/main'
 
 workflow ACCESSORY_FILES {
     take:
@@ -42,17 +41,6 @@ workflow ACCESSORY_FILES {
     ch_versions         = ch_versions.mix( GAWK_GENERATE_GENOME_FILE.out.versions )
 
 
-    //
-    // MODULE: Cut out the largest scaffold size and use as comparator against 512MB
-    //          This is the cut off for TABIX using tbi indexes
-    //          TODO: Investigate this as a pure groovy function.
-    //
-    GET_LARGEST_SCAFFOLD (
-        GAWK_GENERATE_GENOME_FILE.out.output
-    )
-    ch_versions         = ch_versions.mix( GET_LARGEST_SCAFFOLD.out.versions )
-
-
     //
     // SUBWORKFLOW: GENERATES A GAP.BED FILE TO ID THE LOCATIONS OF GAPS
     //
@@ -60,8 +48,7 @@ workflow ACCESSORY_FILES {
         gap_file            = ch_empty_file
     } else {
         GAP_FINDER (
-            reference_tuple,
-            GET_LARGEST_SCAFFOLD.out.scaff_size.map{it -> it[1].toInteger()}
+            reference_tuple
         )
         ch_versions         = ch_versions.mix(GAP_FINDER.out.versions)
         gap_file            = GAP_FINDER.out.gap_file.map{ it -> it[1] }
@@ -75,7 +62,6 @@ workflow ACCESSORY_FILES {
         telo_file       = ch_empty_file
     } else {
         TELO_FINDER (
-            GET_LARGEST_SCAFFOLD.out.scaff_size.map{it -> it[1].toInteger()},
             reference_tuple,
             val_teloseq
         )
diff --git a/subworkflows/local/gap_finder/main.nf b/subworkflows/local/gap_finder/main.nf
index 406f7173..10ca907c 100644
--- a/subworkflows/local/gap_finder/main.nf
+++ b/subworkflows/local/gap_finder/main.nf
@@ -9,7 +9,6 @@ include { GAWK as GAWK_GAP_LENGTH   } from '../../../modules/nf-core/gawk/main'
 workflow GAP_FINDER {
     take:
     reference_tuple     // Channel [ val(meta), path(fasta) ]
-    max_scaff_size      // val(size of largest scaffold in bp)
 
     main:
     ch_versions     = Channel.empty()
diff --git a/subworkflows/local/longread_coverage/main.nf b/subworkflows/local/longread_coverage/main.nf
index e2e988e3..9fd1f927 100644
--- a/subworkflows/local/longread_coverage/main.nf
+++ b/subworkflows/local/longread_coverage/main.nf
@@ -11,7 +11,6 @@ include { SAMTOOLS_MERGE                                } from '../../../modules
 include { SAMTOOLS_SORT                                 } from '../../../modules/nf-core/samtools/sort/main'
 include { SAMTOOLS_VIEW as SAMTOOLS_VIEW_FILTER_PRIMARY } from '../../../modules/nf-core/samtools/view/main'
 include { UCSC_BEDGRAPHTOBIGWIG                         } from '../../../modules/nf-core/ucsc/bedgraphtobigwig/main'
-include { GRAPH_OVERALL_COVERAGE                        } from '../../../modules/local/graph/overall_coverage/main'
 
 
 workflow LONGREAD_COVERAGE {
@@ -97,7 +96,9 @@ workflow LONGREAD_COVERAGE {
     //
     // MODULE: BAM TO PRIMARY BED
     //
-    BEDTOOLS_BAMTOBED(SAMTOOLS_VIEW_FILTER_PRIMARY.out.bam)
+    BEDTOOLS_BAMTOBED(
+        SAMTOOLS_VIEW_FILTER_PRIMARY.out.bam
+    )
     ch_versions         = ch_versions.mix(BEDTOOLS_BAMTOBED.out.versions)
 
 
@@ -140,15 +141,6 @@ workflow LONGREAD_COVERAGE {
     ch_versions         = ch_versions.mix( GNU_SORT.out.versions )
 
 
-    //
-    // MODULE: GENERATE DEPTHGRAPH
-    //
-    GRAPH_OVERALL_COVERAGE(
-        GNU_SORT.out.sorted
-    )
-    ch_versions         = ch_versions.mix( GRAPH_OVERALL_COVERAGE.out.versions )
-
-
     //
     // LOGIC: PREPARING NORMAL COVERAGE INPUT
     //
diff --git a/subworkflows/local/repeat_density/main.nf b/subworkflows/local/repeat_density/main.nf
index b691a5d0..ce4400d2 100644
--- a/subworkflows/local/repeat_density/main.nf
+++ b/subworkflows/local/repeat_density/main.nf
@@ -25,12 +25,15 @@ workflow REPEAT_DENSITY {
 
     main:
     ch_versions         = Channel.empty()
+
+
     //
     // MODULE: MARK UP THE REPEAT REGIONS OF THE REFERENCE GENOME
     //
     WINDOWMASKER_MKCOUNTS ( reference_tuple )
     ch_versions         = ch_versions.mix( WINDOWMASKER_MKCOUNTS.out.versions )
 
+
     //
     // MODULE: CALCULATE THE STATISTICS OF THE MARKED UP REGIONS
     //
@@ -38,18 +41,21 @@ workflow REPEAT_DENSITY {
                         reference_tuple )
     ch_versions         = ch_versions.mix( WINDOWMASKER_USTAT.out.versions )
 
+
     //
     // MODULE: USE USTAT OUTPUT TO EXTRACT REPEATS FROM FASTA
     //
     EXTRACT_REPEAT( WINDOWMASKER_USTAT.out.intervals )
     ch_versions         = ch_versions.mix( EXTRACT_REPEAT.out.versions )
 
+
     //
     // MODULE: CREATE WINDOWS FROM .GENOME FILE
     //
     BEDTOOLS_MAKEWINDOWS( dot_genome )
     ch_versions         = ch_versions.mix( BEDTOOLS_MAKEWINDOWS.out.versions )
 
+
     //
     // LOGIC: COMBINE TWO CHANNELS AND OUTPUT tuple(meta, windows_file, repeat_file)
     //
@@ -63,6 +69,7 @@ workflow REPEAT_DENSITY {
         }
         .set { intervals }
 
+
     //
     // MODULE: GENERATES THE REPEAT FILE FROM THE WINDOW FILE AND GENOME FILE
     //
@@ -72,6 +79,7 @@ workflow REPEAT_DENSITY {
     )
     ch_versions         = ch_versions.mix( BEDTOOLS_INTERSECT.out.versions )
 
+
     //
     // MODULE: FIXES IDS FOR REPEATS
     //
@@ -82,6 +90,7 @@ workflow REPEAT_DENSITY {
     )
     ch_versions         = ch_versions.mix( GAWK_RENAME_IDS.out.versions )
 
+
     //
     // MODULE: SORTS THE ABOVE BED FILES
     //
@@ -94,6 +103,7 @@ workflow REPEAT_DENSITY {
     GNU_SORT_C ( BEDTOOLS_MAKEWINDOWS.out.bed ) // windows file
     ch_versions         = ch_versions.mix( GNU_SORT_C.out.versions )
 
+
     //
     // MODULE: ADDS 4TH COLUMN TO BED FILE USED IN THE REPEAT DENSITY GRAPH
     //
@@ -104,6 +114,7 @@ workflow REPEAT_DENSITY {
     )
     ch_versions         = ch_versions.mix( GAWK_REFORMAT_INTERSECT.out.versions )
 
+
     //
     // LOGIC: COMBINES THE REFORMATTED INTERSECT FILE AND WINDOWS FILE CHANNELS AND SORTS INTO
     //        tuple(intersect_meta, windows file, intersect file)
@@ -118,6 +129,7 @@ workflow REPEAT_DENSITY {
         }
         .set { for_mapping }
 
+
     //
     // MODULE: MAPS THE REPEATS AGAINST THE REFERENCE GENOME
     //
@@ -127,6 +139,7 @@ workflow REPEAT_DENSITY {
     )
     ch_versions         = ch_versions.mix( BEDTOOLS_MAP.out.versions )
 
+
     //
     // MODULE: REPLACES . WITH 0 IN MAPPED FILE
     //
@@ -137,6 +150,7 @@ workflow REPEAT_DENSITY {
     )
     ch_versions         = ch_versions.mix( GAWK_REPLACE_DOTS.out.versions )
 
+
     //
     // MODULE: CONVERTS GENOME FILE AND BED INTO A BIGWIG FILE
     //
@@ -146,6 +160,7 @@ workflow REPEAT_DENSITY {
     )
     ch_versions         = ch_versions.mix( UCSC_BEDGRAPHTOBIGWIG.out.versions )
 
+
     emit:
     repeat_density      = UCSC_BEDGRAPHTOBIGWIG.out.bigwig
     versions            = ch_versions
diff --git a/subworkflows/local/telo_extraction/main.nf b/subworkflows/local/telo_extraction/main.nf
index 73a955e6..e5bfd667 100644
--- a/subworkflows/local/telo_extraction/main.nf
+++ b/subworkflows/local/telo_extraction/main.nf
@@ -1,8 +1,5 @@
-include { GAWK as GAWK_CLEAN_TELOMERE   } from '../../../modules/nf-core/gawk/main'
-include { GAWK as GAWK_MAP_TELO         } from '../../../modules/nf-core/gawk/main'
 include { FIND_TELOMERE_WINDOWS         } from '../../../modules/local/find/telomere_windows/main'
-include { EXTRACT_TELO                  } from '../../../modules/local/extract/telo/main'
-include { TABIX_BGZIPTABIX              } from '../../../modules/nf-core/tabix/bgziptabix'
+include { EXTRACT_TELOMERE              } from '../../../modules/local/extract/telomere/main'
 
 workflow TELO_EXTRACTION {
     take:
@@ -11,19 +8,6 @@ workflow TELO_EXTRACTION {
     main:
     ch_versions         = Channel.empty()
 
-    //
-    // MODULE: CLEAN THE .TELOMERE FILE IF CONTAINS "you screwed up" ERROR MESSAGE
-    //          (LIKELY WHEN USING LOWERCASE LETTERS OR BAD MOTIF)
-    //          WORKS BE RETURNING LINES THAT START WITH '>'
-    //
-    GAWK_CLEAN_TELOMERE (
-        telomere_file,
-        [],
-        false
-    )
-    ch_versions         = ch_versions.mix( GAWK_CLEAN_TELOMERE.out.versions )
-
-
     //
     // MODULE: GENERATES A WINDOWS FILE FROM THE ABOVE
     //
@@ -34,41 +18,20 @@ workflow TELO_EXTRACTION {
 
 
     def windows_file    = FIND_TELOMERE_WINDOWS.out.windows
-    def fallback_file   = GAWK_CLEAN_TELOMERE.out.output
-
-    // Use EXTRACT_TELO if windows_file has content, otherwise fallback to GAWK_MAP_TELO
     def safe_windows    = windows_file.ifEmpty { Channel.empty() }
-    def fallback_valid  = fallback_file.ifEmpty { Channel.empty() }
-
-    EXTRACT_TELO(
-        safe_windows
-    )
-    ch_versions         = ch_versions.mix( EXTRACT_TELO.out.versions )
-
-    GAWK_MAP_TELO(
-        fallback_valid,
-        [],
-        false
-    )
-    ch_gawk_output      = GAWK_MAP_TELO.out.output.ifEmpty( Channel.empty() )
-    ch_versions         = ch_versions.mix( GAWK_MAP_TELO.out.versions )
 
     //
-    // MODULE: Merge bed files into one for TABIX_BGZIPTABIX
+    // MODULE: Extract the telomere data from the FIND_TELOMERE
+    //          file and reformat into bed
     //
-    // EXTRACT_TELO is the more important of the two, then we go to fallback, then just stop no point in running on empty file.
-    def merged_bed      = EXTRACT_TELO.out.bed.ifEmpty { ch_gawk_output }
-
-
-    TABIX_BGZIPTABIX(
-        merged_bed
+    EXTRACT_TELOMERE(
+        safe_windows
     )
-    ch_versions     = ch_versions.mix( TABIX_BGZIPTABIX.out.versions )
+    ch_versions         = ch_versions.mix( EXTRACT_TELOMERE.out.versions )
+
 
     emit:
-    bed_file        = merged_bed
-    bed_gz_tbi      = TABIX_BGZIPTABIX.out.gz_tbi
-    bedgraph_file   = EXTRACT_TELO.out.bedgraph
+    bedgraph_file   = EXTRACT_TELOMERE.out.bedgraph
     versions        = ch_versions
 
 }
diff --git a/subworkflows/local/telo_finder/main.nf b/subworkflows/local/telo_finder/main.nf
index 7b9893a8..ce827a3d 100644
--- a/subworkflows/local/telo_finder/main.nf
+++ b/subworkflows/local/telo_finder/main.nf
@@ -3,7 +3,6 @@
 //
 // MODULE IMPORT BLOCK
 //
-include { GAWK as GAWK_UPPER_SEQUENCE   } from '../../../modules/nf-core/gawk/main'
 include { FIND_TELOMERE_REGIONS         } from '../../../modules/local/find/telomere_regions/main'
 include { GAWK as GAWK_SPLIT_DIRECTIONS } from '../../../modules/local/gawk/main'
 
@@ -12,7 +11,6 @@ include { TELO_EXTRACTION               } from '../../../subworkflows/local/telo
 workflow TELO_FINDER {
 
     take:
-    max_scaff_size      // val(size of largest scaffold in bp)
     reference_tuple     // Channel [ val(meta), path(fasta) ]
     teloseq
 
@@ -20,21 +18,11 @@ workflow TELO_FINDER {
     ch_versions     = Channel.empty()
 
 
-    //
-    // MODULE: UPPERCASE THE REFERENCE SEQUENCE
-    //
-    GAWK_UPPER_SEQUENCE(
-        reference_tuple,
-        [],
-        false,
-    )
-    ch_versions     = ch_versions.mix( GAWK_UPPER_SEQUENCE.out.versions )
-
     //
     // MODULE: FINDS THE TELOMERIC SEQEUNCE IN REFERENCE
     //
     FIND_TELOMERE_REGIONS (
-        GAWK_UPPER_SEQUENCE.out.output,
+        reference_tuple,
         teloseq
     )
     ch_versions     = ch_versions.mix( FIND_TELOMERE_REGIONS.out.versions )
@@ -94,8 +82,6 @@ workflow TELO_FINDER {
 
 
     emit:
-    bed_file        = TELO_EXTRACTION.out.bed_file.collect()    // Not used anymore
-    bed_gz_tbi      = TELO_EXTRACTION.out.bed_gz_tbi.collect()  // Not used anymore
     bedgraph_file   = telo_bedgraphs                            // Used in pretext_graph
     versions        = ch_versions
 }

From a298944dc9ba1da667371c6a8649f5f1963ba195 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Mon, 4 Aug 2025 17:26:30 +0100
Subject: [PATCH 10/58] Update files

---
 conf/modules.config   | 12 ------------
 conf/test.config      | 19 ++++++++++---------
 conf/test_full.config | 20 +++++++++++---------
 3 files changed, 21 insertions(+), 30 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index ccdb5010..02b8162f 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -85,18 +85,6 @@ process {
         ext.suffix = 'fasta'
     }
 
-    withName: 'GAWK_CLEAN_TELOMERE' {
-        ext.args2 = "'/^>/'"
-        ext.prefix = { "${meta.id}_CLEAN" }
-        ext.suffix = 'telomere'
-    }
-
-    withName: 'GAWK_MAP_TELO' {
-        ext.args2 = { "-v OFS=\"\t\" 'BEGIN { sub(/^>/, \"\"); print \$1, \$4, \$5, \$6 }'" }
-        ext.prefix   = { "${meta.id}_map_telo" }
-        ext.suffix   = 'bed'
-    }
-
     withName: 'GAWK_SPLIT_DIRECTIONS' {
         ext.prefix  = { "${input}_telo" }
         ext.suffix  = 'telomere'
diff --git a/conf/test.config b/conf/test.config
index 80d23e85..f98582dd 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -22,13 +22,14 @@ params {
     config_profile_name        = 'Full test profile'
     config_profile_description = 'Full test dataset to check pipeline function'
 
-    input       = "${baseDir}/TreeValTinyData/assembly/draft/grTriPseu1.fa"
-    reads       = "${baseDir}/TreeValTinyData/genomic_data/pacbio/"
-    cram        = "${baseDir}/TreeValTinyData/genomic_data/hic-arima/"
-    sample      = "CurationPretextTest"
-    teloseq     = "TTAGGG"
-    aligner     = "bwamem2"
-    all_output  = false
-    skip_tracks = "NONE"
-    run_hires   = false
+    input           = "${baseDir}/TreeValTinyData/assembly/draft/grTriPseu1.fa"
+    reads           = "${baseDir}/TreeValTinyData/genomic_data/pacbio/"
+    cram            = "${baseDir}/TreeValTinyData/genomic_data/hic-arima/"
+    sample          = "CurationPretextTest"
+    teloseq         = "TTAGGG"
+    aligner         = "bwamem2"
+    all_output      = false
+    skip_tracks     = "NONE"
+    run_hires       = false
+    split_telomere  = true
 }
diff --git a/conf/test_full.config b/conf/test_full.config
index e164c0aa..3166bfd1 100644
--- a/conf/test_full.config
+++ b/conf/test_full.config
@@ -20,13 +20,15 @@ params {
     // Input data for full size test
     // Limit resources so that this can run on GitHub Actions
 
-    sample      = "testing"
-    input       = "/nfs/treeoflife-01/resources/nextflow/test-data/resources/treeval/TreeValTinyData/assembly/draft/grTriPseu1.fa"
-    reads       = "/nfs/treeoflife-01/resources/nextflow/test-data/resources/treeval/TreeValTinyData/genomic_data/pacbio/"
-    cram        = "/nfs/treeoflife-01/resources/nextflow/test-data/resources/treeval/TreeValTinyData/genomic_data/hic-arima/"
-    sample      = "CurationPretextTest"
-    teloseq     = "TTAGGG"
-    aligner     = "bwamem2"
-    all_output  = true
-    skip_tracks = "NONE"
+    sample          = "testing"
+    input           = "/nfs/treeoflife-01/resources/nextflow/test-data/resources/treeval/TreeValTinyData/assembly/draft/grTriPseu1.fa"
+    reads           = "/nfs/treeoflife-01/resources/nextflow/test-data/resources/treeval/TreeValTinyData/genomic_data/pacbio/"
+    cram            = "/nfs/treeoflife-01/resources/nextflow/test-data/resources/treeval/TreeValTinyData/genomic_data/hic-arima/"
+    sample          = "CurationPretextTest"
+    teloseq         = "TTAGGG"
+    aligner         = "bwamem2"
+    all_output      = true
+    skip_tracks     = "NONE"
+    split_telomere  = true
+
 }

From 6d6f75724ed4916b0ba8974a9288a14bda9a84a3 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Mon, 4 Aug 2025 17:32:05 +0100
Subject: [PATCH 11/58] support split_telomere

---
 bin/gawk_split_directions.awk | 8 ++++++++
 1 file changed, 8 insertions(+)
 create mode 100644 bin/gawk_split_directions.awk

diff --git a/bin/gawk_split_directions.awk b/bin/gawk_split_directions.awk
new file mode 100644
index 00000000..df82aa10
--- /dev/null
+++ b/bin/gawk_split_directions.awk
@@ -0,0 +1,8 @@
+## Split telomere file based on column 4 contents
+## Date: 03/07/2025
+
+BEGIN {
+    FS="\t"; OFS="\t"
+} {
+    print > "direction."$3".telomere"
+}

From 88bea91760f9ea7500a72f6f9fe467d03797c8b5 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Mon, 4 Aug 2025 17:34:23 +0100
Subject: [PATCH 12/58] Additions

---
 CHANGELOG.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e2c8ccf9..0e297302 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -20,6 +20,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   - Remove `GRAPH_OVERALL_COVERAGE` as it is not in use.
 - Better formatting in some files.
 - Moved `GAWK_UPPER_SEQUENCE` from the `TELO_FINDER` subworkflow to the first step of the main `curationpretext` workflow, this simply makes more sense.
+- Removed no longer needed scripts from bin.
+- Added the `gawk_split_directions.awk` script for split telomere.
 
 ### Paramters
 

From 39dc704a7132942319d4e38229d49e68bc79f267 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Mon, 4 Aug 2025 17:40:26 +0100
Subject: [PATCH 13/58] Update Version

---
 nextflow.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nextflow.config b/nextflow.config
index eb54c2ee..9b225f43 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -260,7 +260,7 @@ manifest {
     mainScript      = 'main.nf'
     defaultBranch   = 'main'
     nextflowVersion = '!>=24.04.2'
-    version         = '1.4.2'
+    version         = '1.5.0'
     doi             = '10.5281/zenodo.12773958'
 }
 

From 426325392294a4426f232e1bedc7ff1fd8df3535 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Mon, 4 Aug 2025 19:58:39 +0100
Subject: [PATCH 14/58] Updated test

---
 tests/main.nf.test.snap | 43 ++++++++++++++++++++++-------------------
 1 file changed, 23 insertions(+), 20 deletions(-)

diff --git a/tests/main.nf.test.snap b/tests/main.nf.test.snap
index 7815abe3..32259ee9 100644
--- a/tests/main.nf.test.snap
+++ b/tests/main.nf.test.snap
@@ -1,7 +1,7 @@
 {
     "Full run": {
         "content": [
-            40,
+            42,
             {
                 "BEDTOOLS_BAMTOBED": {
                     "bedtools": "2.31.1"
@@ -43,9 +43,6 @@
                 "FIND_TELOMERE_WINDOWS": {
                     "telomere": 1.0
                 },
-                "GAWK_CLEAN_TELOMERE": {
-                    "gawk": "5.3.0"
-                },
                 "GAWK_GAP_LENGTH": {
                     "gawk": "5.3.0"
                 },
@@ -61,12 +58,11 @@
                 "GAWK_REPLACE_DOTS": {
                     "gawk": "5.3.0"
                 },
-                "GAWK_UPPER_SEQUENCE": {
+                "GAWK_SPLIT_DIRECTIONS": {
                     "gawk": "5.3.0"
                 },
-                "GET_LARGEST_SCAFFOLD": {
-                    "get_largest_scaffold": 2.0,
-                    "coreutils": 9.1
+                "GAWK_UPPER_SEQUENCE": {
+                    "gawk": "5.3.0"
                 },
                 "GNU_SORT": {
                     "coreutils": 9.3
@@ -80,10 +76,6 @@
                 "GNU_SORT_C": {
                     "coreutils": 9.3
                 },
-                "GRAPH_OVERALL_COVERAGE": {
-                    "perl": "(v5.26.2))",
-                    "graph_overall_coverage.pl": 1.0
-                },
                 "MINIMAP2_ALIGN": {
                     "minimap2": "2.28-r1209",
                     "samtools": 1.2
@@ -125,13 +117,17 @@
                     "windowmasker": "1.0.0"
                 },
                 "Workflow": {
-                    "sanger-tol/curationpretext": "v1.4.2"
+                    "sanger-tol/curationpretext": "v1.5.0"
                 }
             },
             [
                 "accessory_files",
                 "accessory_files/CurationPretextTest.bigWig",
                 "accessory_files/CurationPretextTest.gap.bedgraph",
+                "accessory_files/CurationPretextTest_3P_telomere.bed",
+                "accessory_files/CurationPretextTest_3P_telomere.bedgraph",
+                "accessory_files/CurationPretextTest_5P_telomere.bed",
+                "accessory_files/CurationPretextTest_5P_telomere.bedgraph",
                 "accessory_files/CurationPretextTest_telomere.bed",
                 "accessory_files/CurationPretextTest_telomere.bedgraph",
                 "accessory_files/coverage.bigWig",
@@ -139,24 +135,31 @@
                 "pipeline_info/sanger-tol_curationpretext_software_versions.yml",
                 "pretext_maps_processed",
                 "pretext_maps_processed/CurationPretextTest_normal.pretext",
+                "pretext_maps_processed/telo_0.pretext",
+                "pretext_maps_processed/telo_1.pretext",
+                "pretext_maps_processed/telo_2.pretext",
                 "pretext_maps_raw",
                 "pretext_maps_raw/CurationPretextTest_normal_pi.pretext",
                 "pretext_snapshot",
                 "pretext_snapshot/CurationPretextTest_normalFullMap.png"
             ],
-            14,
+            21,
             [
                 "CurationPretextTest.bigWig:md5,3f66a9152d793a62f877b733c2336dfd",
                 "CurationPretextTest.gap.bedgraph:md5,d41d8cd98f00b204e9800998ecf8427e",
+                "CurationPretextTest_3P_telomere.bed:md5,d41d8cd98f00b204e9800998ecf8427e",
+                "CurationPretextTest_3P_telomere.bedgraph:md5,d41d8cd98f00b204e9800998ecf8427e",
+                "CurationPretextTest_5P_telomere.bed:md5,d41d8cd98f00b204e9800998ecf8427e",
+                "CurationPretextTest_5P_telomere.bedgraph:md5,d41d8cd98f00b204e9800998ecf8427e",
                 "CurationPretextTest_telomere.bed:md5,d41d8cd98f00b204e9800998ecf8427e",
                 "CurationPretextTest_telomere.bedgraph:md5,d41d8cd98f00b204e9800998ecf8427e",
-                "coverage.bigWig:md5,2e474506c957152b231ac63c859f0b17"
+                "coverage.bigWig:md5,39b3e8b7751b33758087cafc9a3c689e"
             ],
-            5,
+            9,
             1,
             false,
             true,
-            1,
+            4,
             false,
             true,
             1,
@@ -164,8 +167,8 @@
         ],
         "meta": {
             "nf-test": "0.9.2",
-            "nextflow": "24.04.4"
+            "nextflow": "25.04.1"
         },
-        "timestamp": "2025-04-16T11:23:34.556355"
+        "timestamp": "2025-08-04T17:47:27.212054464"
     }
-}
+}
\ No newline at end of file

From 6f1f44955077acbeae28c7178ef7671ecfe7e245 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Mon, 4 Aug 2025 22:38:21 +0100
Subject: [PATCH 15/58] Update tests

---
 .nf-core.yml                                  |   2 +-
 CITATION.cff                                  |   4 +-
 bin/findHalfcoverage.py                       | 177 ---------------
 bin/get_avgcov.sh                             |  17 --
 bin/graph_overall_coverage.pl                 |  34 ---
 bin/longread_cov_log.py                       |  43 ----
 modules/local/get/largest_scaffold/main.nf    |  43 ----
 modules/local/graph/overall_coverage/main.nf  |  43 ----
 modules/nf-core/cat/cat/environment.yml       |   7 -
 modules/nf-core/cat/cat/main.nf               |  78 -------
 modules/nf-core/cat/cat/meta.yml              |  46 ----
 modules/nf-core/cat/cat/tests/main.nf.test    | 191 ----------------
 .../nf-core/cat/cat/tests/main.nf.test.snap   | 147 -------------
 .../cat/tests/nextflow_unzipped_zipped.config |   6 -
 .../cat/tests/nextflow_zipped_unzipped.config |   8 -
 .../nf-core/tabix/bgziptabix/environment.yml  |   8 -
 modules/nf-core/tabix/bgziptabix/main.nf      |  48 ----
 modules/nf-core/tabix/bgziptabix/meta.yml     |  74 -------
 .../tabix/bgziptabix/tests/main.nf.test       | 123 -----------
 .../tabix/bgziptabix/tests/main.nf.test.snap  | 206 ------------------
 .../tabix/bgziptabix/tests/tabix_csi.config   |   5 -
 .../tabix/bgziptabix/tests/tabix_tbi.config   |   5 -
 tests/main.nf.test                            |   1 +
 23 files changed, 4 insertions(+), 1312 deletions(-)
 delete mode 100755 bin/findHalfcoverage.py
 delete mode 100755 bin/get_avgcov.sh
 delete mode 100755 bin/graph_overall_coverage.pl
 delete mode 100755 bin/longread_cov_log.py
 delete mode 100644 modules/local/get/largest_scaffold/main.nf
 delete mode 100644 modules/local/graph/overall_coverage/main.nf
 delete mode 100644 modules/nf-core/cat/cat/environment.yml
 delete mode 100644 modules/nf-core/cat/cat/main.nf
 delete mode 100644 modules/nf-core/cat/cat/meta.yml
 delete mode 100644 modules/nf-core/cat/cat/tests/main.nf.test
 delete mode 100644 modules/nf-core/cat/cat/tests/main.nf.test.snap
 delete mode 100644 modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config
 delete mode 100644 modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config
 delete mode 100644 modules/nf-core/tabix/bgziptabix/environment.yml
 delete mode 100644 modules/nf-core/tabix/bgziptabix/main.nf
 delete mode 100644 modules/nf-core/tabix/bgziptabix/meta.yml
 delete mode 100644 modules/nf-core/tabix/bgziptabix/tests/main.nf.test
 delete mode 100644 modules/nf-core/tabix/bgziptabix/tests/main.nf.test.snap
 delete mode 100644 modules/nf-core/tabix/bgziptabix/tests/tabix_csi.config
 delete mode 100644 modules/nf-core/tabix/bgziptabix/tests/tabix_tbi.config

diff --git a/.nf-core.yml b/.nf-core.yml
index e3d2362b..2f446907 100644
--- a/.nf-core.yml
+++ b/.nf-core.yml
@@ -48,4 +48,4 @@ template:
     - seqera_platform
     - multiqc
     - rocrate
-  version: 1.4.2
+  version: 1.5.0
diff --git a/CITATION.cff b/CITATION.cff
index 9d72b971..0abe02de 100644
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -30,6 +30,6 @@ identifiers:
     value: 10.5281/zenodo.12773958
 repository-code: "https://github.com/sanger-tol/curationpretext"
 license: MIT
-version: 1.4.2
-date-released: "2025-07-28"
+version: 1.5.0
+date-released: "2025-08-04"
 url: "https://pipelines.tol.sanger.ac.uk/curationpretext"
diff --git a/bin/findHalfcoverage.py b/bin/findHalfcoverage.py
deleted file mode 100755
index f83fdcc5..00000000
--- a/bin/findHalfcoverage.py
+++ /dev/null
@@ -1,177 +0,0 @@
-#! /usr/bin/env python3
-
-import re
-import sys
-from optparse import OptionParser
-
-
-def load_scafsize(file):
-    # example is my.genome file, "scaffold\tsize"
-
-    scafkey = {}
-    scaffile = open(file, "r")
-    for line in scaffile:
-        line = line.replace("\n", "")
-        name, size = re.split("\t", line)
-        scafkey[name] = size
-
-    scaffile.close()
-    return scafkey
-
-
-def getTotallength_undercov(file, cov, wiggleroom):
-    # example is bed file of coverage,
-    # scaffold_100_arrow      0       2       18
-
-    coverage_cutoff = cov + wiggleroom
-
-    myfile = open(file, "r")
-
-    lowcoverage_sum = 0
-    prev_scaf = ""
-    scaf_lc = {}
-
-    for line in myfile:
-        line = line.replace("\n", "")
-        objContents = re.split("\t", line)
-
-        if prev_scaf != objContents[0]:
-            scaf_lc[prev_scaf] = lowcoverage_sum
-            lowcoverage_sum = 0
-
-        if float(objContents[3]) < coverage_cutoff:
-            length = float(objContents[2]) - float(objContents[1])
-            lowcoverage_sum += length
-
-        prev_scaf = objContents[0]
-
-    scaf_lc[prev_scaf] = lowcoverage_sum
-    myfile.close()
-
-    return scaf_lc
-
-
-def get_cov_peaks(file):
-    # example is depthgraph.txt, "coverage\tbasepair count"
-
-    myPeakFile = open(file, "r")
-
-    rows = []
-    for line in myPeakFile:
-        line = line.replace("\n", "")
-        items = re.split("\t", line)
-        rows.append(items)
-
-    myPeakFile.close()
-    # print(rows[0])
-    peakCov = sorted(rows, key=lambda cov: int(cov[1]), reverse=1)[0][0]
-
-    if int(peakCov) == 0:
-        peakCov = sorted(rows, key=lambda cov: int(cov[1]), reverse=1)[1][0]
-
-    halfPeak = int(peakCov) / 2
-    qrtPeak = int(peakCov) / 4
-
-    print("#Coverage Peak is %s, HalfPeak is %s, QuarterPeak is %s " % (peakCov, halfPeak, qrtPeak))
-
-    return (peakCov, halfPeak, qrtPeak)
-
-
-def calc_coverage(scafsize, totallowcov):
-    # calculate the % for lowcov coverage over entire scaffold.
-    return totallowcov / scafsize * 100
-
-
-def getArguments():
-    # get indivudual arguments from user
-
-    parser = OptionParser(version="%prog 1.0")
-    parser.add_option(
-        "-c", "--coveragefile", action="store", type="string", dest="covfile", help="Scaffold Coverage filename"
-    )
-    parser.add_option(
-        "-m", "--mygenome", action="store", type="string", dest="mygenome", help="mygenome file, scaffold - size file"
-    )
-    parser.add_option(
-        "-d",
-        "--depthgraph",
-        action="store",
-        type="string",
-        dest="depth",
-        help="depthgraph file, bp count at each depth",
-    )
-    parser.add_option(
-        "-w",
-        "--wiggle",
-        action="store",
-        type="float",
-        dest="wig",
-        default=5,
-        help="wiggle room to add to depth cutoff ie 30X + wiggleroom. Default is 5X",
-    )
-    parser.add_option(
-        "--cut",
-        action="store",
-        type="float",
-        dest="covcut",
-        default=60,
-        help="%Number for coverage cutoff to include in results.  ie 50% of scaffold needs to be under diploid peak etc.  Default is 60%",
-    )
-    parser.add_option(
-        "-t",
-        "--totalsize",
-        action="store",
-        type="int",
-        dest="totsize",
-        default=250000,
-        help="total size that determines max coverage boundary.",
-    )
-
-    (options, args) = parser.parse_args()
-
-    if options.covfile == None or options.mygenome == None or options.depth == None:
-        print("Missing Options")
-        exit()
-
-    return options
-
-
-def main():
-    # main program
-
-    options = getArguments()
-
-    scaffold_sizes = load_scafsize(options.mygenome)
-    (hapCov, dipCov, tetCov) = get_cov_peaks(options.depth)
-    scaffold_lowcovsum = getTotallength_undercov(options.covfile, dipCov, options.wig)
-
-    for scaffoldName in scaffold_lowcovsum:
-        if scaffoldName == "":
-            continue
-
-        # print("==" + scaffoldName)
-        totalSize = float(scaffold_sizes[scaffoldName])
-        lowcovSize = float(scaffold_lowcovsum[scaffoldName])
-
-        coverage = calc_coverage(totalSize, lowcovSize)
-
-        if coverage > options.covcut:
-            if totalSize > options.totsize:
-                print(
-                    "**\t"
-                    + "\t".join(
-                        [str(i) for i in [scaffoldName, int(totalSize), int(lowcovSize), "{:.1f}".format(coverage)]]
-                    )
-                )
-            else:
-                print(
-                    "==\t"
-                    + "\t".join(
-                        [str(i) for i in [scaffoldName, int(totalSize), int(lowcovSize), "{:.1f}".format(coverage)]]
-                    )
-                )
-
-
-# -- script execuation -- #
-if __name__ == "__main__":
-    main()
diff --git a/bin/get_avgcov.sh b/bin/get_avgcov.sh
deleted file mode 100755
index 2eac5ca5..00000000
--- a/bin/get_avgcov.sh
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/bin/bash
-
-# get_avgcov.sh
-# -------------------
-# A shell script to calculate average coverage for each scaffold
-# into bed format for use
-# -------------------
-# Author = yy5
-# Modified = dp24
-# -------------------
-version='1.0.0'
-if [ $1 == '-v' ];
-then
-    echo "$version"
-else
-    awk '{OFS="\t"; $5=$4*($3-$2); print}' $1|awk '{OFS="\t"; sum[$1]+=$5} END {for (chrom in sum) print chrom, sum[chrom]}'|awk 'BEGIN {FS="\t"; OFS="\t"} NR==FNR {genome[$1]=$2; next} {if ($1 in genome) print $1, genome[$1], $2, $3; else print $1, "NA", $2, $3}' -  $2| awk '{OFS="\t"; print $1,"0",$3,($2/$3)}' | awk 'BEGIN {FS="\t"; OFS="\t"} {printf "%s\t%s\t%s\t%.0f\n", $1, $2, $3, int($4 + 0.5)}'|sort -T $4 -k1,1 -k2,2n> $3
-fi
diff --git a/bin/graph_overall_coverage.pl b/bin/graph_overall_coverage.pl
deleted file mode 100755
index 174e61b7..00000000
--- a/bin/graph_overall_coverage.pl
+++ /dev/null
@@ -1,34 +0,0 @@
-#!/usr/bin/env perl
-
-# Script originally developed by Yumi Sims (yy5@sanger.ac.uk)
-
-use warnings;
-
-# my $file = shift;
-
-my ($file) = @ARGV;
-
-if (!@ARGV || ($ARGV[0] eq '--version')) {
-    print "1.0\n";
-    exit 0;
-}
-
-open (FILE, $file) || die "can't open file $file\n";
-
-my %depthcount;
-while (my $line = <FILE>) {
-    chomp $line;
-    my ($id, $start, $end, $depth) = split ("\t", $line);
-    my $length = $end - $start;
-
-    if ($depthcount{$depth}){
-        $depthcount{$depth} += $length;
-    }
-    else {
-        $depthcount{$depth} = $length;
-    }
-}
-
-foreach my $depth (sort {$a<=>$b} keys %depthcount){
-    print join("\t", $depth, $depthcount{$depth}) ."\n";
-}
diff --git a/bin/longread_cov_log.py b/bin/longread_cov_log.py
deleted file mode 100755
index d5cc177c..00000000
--- a/bin/longread_cov_log.py
+++ /dev/null
@@ -1,43 +0,0 @@
-#!/usr/bin/env python
-
-import optparse
-import math
-
-# Script originally developed by Will Eagles (we3@sanger.ac.uk)
-
-
-def process_line(line):
-    line_values = line.rsplit(None, 1)
-
-    try:
-        cov_val = float(line_values[1])
-    except:
-        cov_val = 0
-
-    if cov_val > 0:
-        log_cov_val = math.log(cov_val)
-    else:
-        log_cov_val = 0
-
-    return line_values[0] + "\t" + str(round(log_cov_val, 2))
-
-
-def main():
-    parser = optparse.OptionParser(version="%prog 1.0")
-    parser.add_option(
-        "-i",
-        "--inputfile",
-        dest="inputfile",
-        default="default.input",
-    )
-
-    options, remainder = parser.parse_args()
-
-    cov_bed = open(options.inputfile, "r")
-
-    for line in cov_bed:
-        print(process_line(line))
-
-
-if __name__ == "__main__":
-    main()
diff --git a/modules/local/get/largest_scaffold/main.nf b/modules/local/get/largest_scaffold/main.nf
deleted file mode 100644
index a496a800..00000000
--- a/modules/local/get/largest_scaffold/main.nf
+++ /dev/null
@@ -1,43 +0,0 @@
-process GET_LARGEST_SCAFFOLD {
-
-    tag "$meta.id"
-    label 'process_low'
-
-    conda "conda-forge::coreutils=9.1"
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/ubuntu:20.04' :
-        'docker.io/ubuntu:20.04' }"
-
-    input:
-    tuple val( meta ), path( file )
-
-    output:
-    env largest_scaff,          emit: scaff_size
-    path "versions.yml",        emit: versions
-
-    script:
-    def LARGEST_SCAFF_VERSION   = "2.0"
-    def VERSION                 = "9.1" // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
-    """
-    largest_scaff=\$(head -n 1 "${file}" | cut -d\$'\t' -f2)
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        get_largest_scaffold: $LARGEST_SCAFF_VERSION
-        coreutils: $VERSION
-    END_VERSIONS
-    """
-
-    stub:
-    def LARGEST_SCAFF_VERSION   = "2.0"
-    def VERSION                 = "9.1" // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
-    """
-    largest_scaff=1000000
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        get_largest_scaff: $LARGEST_SCAFF_VERSION
-        coreutils: $VERSION
-    END_VERSIONS
-    """
-}
diff --git a/modules/local/graph/overall_coverage/main.nf b/modules/local/graph/overall_coverage/main.nf
deleted file mode 100644
index 87892813..00000000
--- a/modules/local/graph/overall_coverage/main.nf
+++ /dev/null
@@ -1,43 +0,0 @@
-process GRAPH_OVERALL_COVERAGE {
-    tag "$meta.id"
-    label 'process_single'
-
-    conda "conda-forge::perl=5.26.2"
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/perl:5.26.2' :
-        'biocontainers/perl:5.26.2' }"
-
-    input:
-    tuple val(meta), path(bed)
-
-    output:
-    tuple val(meta), path("*.part") , emit: part
-    path "versions.yml"             , emit: versions
-
-    when:
-    task.ext.when == null || task.ext.when
-
-    script:
-    def args = task.ext.args ?: ''
-    def prefix = task.ext.prefix ?: "${meta.id}"
-    """
-    graph_overall_coverage.pl $bed > ${prefix}.part
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        perl: \$(echo \$(perl --version 2>&1) | awk '/This/ {print \$9}'))
-        graph_overall_coverage.pl: \$(graph_overall_coverage.pl --version)
-    END_VERSIONS
-    """
-
-    stub:
-    def prefix = task.ext.prefix ?: "${meta.id}"
-    """
-    touch ${prefix}.part
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        perl: \$(echo \$(perl --version 2>&1) | awk '/This/ {print \$9}'))        graph_overall_coverage.pl: \$(graph_overall_coverage.pl --version)
-    END_VERSIONS
-    """
-}
diff --git a/modules/nf-core/cat/cat/environment.yml b/modules/nf-core/cat/cat/environment.yml
deleted file mode 100644
index 50c2059a..00000000
--- a/modules/nf-core/cat/cat/environment.yml
+++ /dev/null
@@ -1,7 +0,0 @@
----
-# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
-channels:
-  - conda-forge
-  - bioconda
-dependencies:
-  - conda-forge::pigz=2.3.4
diff --git a/modules/nf-core/cat/cat/main.nf b/modules/nf-core/cat/cat/main.nf
deleted file mode 100644
index 2862c64c..00000000
--- a/modules/nf-core/cat/cat/main.nf
+++ /dev/null
@@ -1,78 +0,0 @@
-process CAT_CAT {
-    tag "$meta.id"
-    label 'process_low'
-
-    conda "${moduleDir}/environment.yml"
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/pigz:2.3.4' :
-        'biocontainers/pigz:2.3.4' }"
-
-    input:
-    tuple val(meta), path(files_in)
-
-    output:
-    tuple val(meta), path("${prefix}"), emit: file_out
-    path "versions.yml"               , emit: versions
-
-    when:
-    task.ext.when == null || task.ext.when
-
-    script:
-    def args = task.ext.args ?: ''
-    def args2 = task.ext.args2 ?: ''
-    def file_list = files_in.collect { it.toString() }
-
-    // choose appropriate concatenation tool depending on input and output format
-
-    // | input     | output     | command1 | command2 |
-    // |-----------|------------|----------|----------|
-    // | gzipped   | gzipped    | cat      |          |
-    // | ungzipped | ungzipped  | cat      |          |
-    // | gzipped   | ungzipped  | zcat     |          |
-    // | ungzipped | gzipped    | cat      | pigz     |
-
-    // Use input file ending as default
-    prefix   = task.ext.prefix ?: "${meta.id}${getFileSuffix(file_list[0])}"
-    out_zip  = prefix.endsWith('.gz')
-    in_zip   = file_list[0].endsWith('.gz')
-    command1 = (in_zip && !out_zip) ? 'zcat' : 'cat'
-    command2 = (!in_zip && out_zip) ? "| pigz -c -p $task.cpus $args2" : ''
-    if(file_list.contains(prefix.trim())) {
-        error "The name of the input file can't be the same as for the output prefix in the " +
-        "module CAT_CAT (currently `$prefix`). Please choose a different one."
-    }
-    """
-    $command1 \\
-        $args \\
-        ${file_list.join(' ')} \\
-        $command2 \\
-        > ${prefix}
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' )
-    END_VERSIONS
-    """
-
-    stub:
-    def file_list   = files_in.collect { it.toString() }
-    prefix          = task.ext.prefix ?: "${meta.id}${file_list[0].substring(file_list[0].lastIndexOf('.'))}"
-    if(file_list.contains(prefix.trim())) {
-        error "The name of the input file can't be the same as for the output prefix in the " +
-        "module CAT_CAT (currently `$prefix`). Please choose a different one."
-    }
-    """
-    touch $prefix
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' )
-    END_VERSIONS
-    """
-}
-
-// for .gz files also include the second to last extension if it is present. E.g., .fasta.gz
-def getFileSuffix(filename) {
-    def match = filename =~ /^.*?((\.\w{1,5})?(\.\w{1,5}\.gz$))/
-    return match ? match[0][1] : filename.substring(filename.lastIndexOf('.'))
-}
diff --git a/modules/nf-core/cat/cat/meta.yml b/modules/nf-core/cat/cat/meta.yml
deleted file mode 100644
index 2a9284d7..00000000
--- a/modules/nf-core/cat/cat/meta.yml
+++ /dev/null
@@ -1,46 +0,0 @@
-name: cat_cat
-description: A module for concatenation of gzipped or uncompressed files
-keywords:
-  - concatenate
-  - gzip
-  - cat
-tools:
-  - cat:
-      description: Just concatenation
-      documentation: https://man7.org/linux/man-pages/man1/cat.1.html
-      licence: ["GPL-3.0-or-later"]
-      identifier: ""
-input:
-  - - meta:
-        type: map
-        description: |
-          Groovy Map containing sample information
-          e.g. [ id:'test', single_end:false ]
-    - files_in:
-        type: file
-        description: List of compressed / uncompressed files
-        pattern: "*"
-        ontologies: []
-output:
-  file_out:
-    - - meta:
-          type: map
-          description: Groovy Map containing sample information
-      - ${prefix}:
-          type: file
-          description: Concatenated file. Will be gzipped if file_out ends with ".gz"
-          pattern: "${file_out}"
-          ontologies: []
-  versions:
-    - versions.yml:
-        type: file
-        description: File containing software versions
-        pattern: "versions.yml"
-        ontologies:
-          - edam: http://edamontology.org/format_3750 # YAML
-authors:
-  - "@erikrikarddaniel"
-  - "@FriederikeHanssen"
-maintainers:
-  - "@erikrikarddaniel"
-  - "@FriederikeHanssen"
diff --git a/modules/nf-core/cat/cat/tests/main.nf.test b/modules/nf-core/cat/cat/tests/main.nf.test
deleted file mode 100644
index 9cb16178..00000000
--- a/modules/nf-core/cat/cat/tests/main.nf.test
+++ /dev/null
@@ -1,191 +0,0 @@
-nextflow_process {
-
-    name "Test Process CAT_CAT"
-    script "../main.nf"
-    process "CAT_CAT"
-    tag "modules"
-    tag "modules_nfcore"
-    tag "cat"
-    tag "cat/cat"
-
-    test("test_cat_name_conflict") {
-        when {
-            params {
-                outdir   = "${outputDir}"
-            }
-            process {
-                """
-                input[0] =
-                    [
-                        [ id:'genome', single_end:true ],
-                        [
-                            file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true),
-                            file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true)
-                        ]
-                    ]
-                """
-            }
-        }
-        then {
-            assertAll(
-                { assert !process.success },
-                { assert process.stdout.toString().contains("The name of the input file can't be the same as for the output prefix") },
-                { assert snapshot(process.out.versions).match() }
-            )
-        }
-    }
-
-    test("test_cat_unzipped_unzipped") {
-        when {
-            params {
-                outdir   = "${outputDir}"
-            }
-            process {
-                """
-                input[0] =
-                    [
-                        [ id:'test', single_end:true ],
-                        [
-                            file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true),
-                            file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true)
-                        ]
-                    ]
-                """
-            }
-        }
-        then {
-            assertAll(
-                { assert process.success },
-                { assert snapshot(process.out).match() }
-            )
-        }
-    }
-
-
-    test("test_cat_zipped_zipped") {
-        when {
-            params {
-                outdir   = "${outputDir}"
-            }
-            process {
-                """
-                input[0] =
-                    [
-                        [ id:'test', single_end:true ],
-                        [
-                            file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3.gz', checkIfExists: true),
-                            file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true)
-                        ]
-                    ]
-                """
-            }
-        }
-        then {
-            def lines = path(process.out.file_out.get(0).get(1)).linesGzip
-            assertAll(
-                { assert process.success },
-                { assert snapshot(
-                    lines[0..5],
-                    lines.size(),
-                    process.out.versions
-                    ).match()
-                }
-            )
-        }
-    }
-
-    test("test_cat_zipped_unzipped") {
-        config './nextflow_zipped_unzipped.config'
-
-        when {
-            params {
-                outdir   = "${outputDir}"
-            }
-            process {
-                """
-                input[0] =
-                    [
-                        [ id:'test', single_end:true ],
-                        [
-                            file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3.gz', checkIfExists: true),
-                            file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true)
-                        ]
-                    ]
-                """
-            }
-        }
-
-        then {
-            assertAll(
-                { assert process.success },
-                { assert snapshot(process.out).match() }
-            )
-        }
-
-    }
-
-    test("test_cat_unzipped_zipped") {
-        config './nextflow_unzipped_zipped.config'
-        when {
-            params {
-                outdir   = "${outputDir}"
-            }
-            process {
-                """
-                input[0] =
-                    [
-                        [ id:'test', single_end:true ],
-                        [
-                            file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true),
-                            file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true)
-                        ]
-                    ]
-                """
-            }
-        }
-        then {
-            def lines = path(process.out.file_out.get(0).get(1)).linesGzip
-            assertAll(
-                { assert process.success },
-                { assert snapshot(
-                    lines[0..5],
-                    lines.size(),
-                    process.out.versions
-                    ).match()
-                }
-            )
-        }
-    }
-
-    test("test_cat_one_file_unzipped_zipped") {
-        config './nextflow_unzipped_zipped.config'
-        when {
-            params {
-                outdir   = "${outputDir}"
-            }
-            process {
-                """
-                input[0] =
-                    [
-                        [ id:'test', single_end:true ],
-                        [
-                            file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
-                        ]
-                    ]
-                """
-            }
-        }
-        then {
-            def lines = path(process.out.file_out.get(0).get(1)).linesGzip
-            assertAll(
-                { assert process.success },
-                { assert snapshot(
-                    lines[0..5],
-                    lines.size(),
-                    process.out.versions
-                    ).match()
-                }
-            )
-        }
-    }
-}
diff --git a/modules/nf-core/cat/cat/tests/main.nf.test.snap b/modules/nf-core/cat/cat/tests/main.nf.test.snap
deleted file mode 100644
index b7623ee6..00000000
--- a/modules/nf-core/cat/cat/tests/main.nf.test.snap
+++ /dev/null
@@ -1,147 +0,0 @@
-{
-    "test_cat_unzipped_unzipped": {
-        "content": [
-            {
-                "0": [
-                    [
-                        {
-                            "id": "test",
-                            "single_end": true
-                        },
-                        "test.fasta:md5,f44b33a0e441ad58b2d3700270e2dbe2"
-                    ]
-                ],
-                "1": [
-                    "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894"
-                ],
-                "file_out": [
-                    [
-                        {
-                            "id": "test",
-                            "single_end": true
-                        },
-                        "test.fasta:md5,f44b33a0e441ad58b2d3700270e2dbe2"
-                    ]
-                ],
-                "versions": [
-                    "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894"
-                ]
-            }
-        ],
-        "meta": {
-            "nf-test": "0.8.4",
-            "nextflow": "24.04.3"
-        },
-        "timestamp": "2023-10-16T14:32:18.500464399"
-    },
-    "test_cat_zipped_unzipped": {
-        "content": [
-            {
-                "0": [
-                    [
-                        {
-                            "id": "test",
-                            "single_end": true
-                        },
-                        "cat.txt:md5,c439d3b60e7bc03e8802a451a0d9a5d9"
-                    ]
-                ],
-                "1": [
-                    "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894"
-                ],
-                "file_out": [
-                    [
-                        {
-                            "id": "test",
-                            "single_end": true
-                        },
-                        "cat.txt:md5,c439d3b60e7bc03e8802a451a0d9a5d9"
-                    ]
-                ],
-                "versions": [
-                    "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894"
-                ]
-            }
-        ],
-        "meta": {
-            "nf-test": "0.8.4",
-            "nextflow": "24.04.3"
-        },
-        "timestamp": "2023-10-16T14:32:49.642741302"
-    },
-    "test_cat_zipped_zipped": {
-        "content": [
-            [
-                "MT192765.1\tGenbank\ttranscript\t259\t29667\t.\t+\t.\tID=unknown_transcript_1;geneID=orf1ab;gene_name=orf1ab",
-                "MT192765.1\tGenbank\tgene\t259\t21548\t.\t+\t.\tParent=unknown_transcript_1",
-                "MT192765.1\tGenbank\tCDS\t259\t13461\t.\t+\t0\tParent=unknown_transcript_1;exception=\"ribosomal slippage\";gbkey=CDS;gene=orf1ab;note=\"pp1ab;translated=by -1 ribosomal frameshift\";product=\"orf1ab polyprotein\";protein_id=QIK50426.1",
-                "MT192765.1\tGenbank\tCDS\t13461\t21548\t.\t+\t0\tParent=unknown_transcript_1;exception=\"ribosomal slippage\";gbkey=CDS;gene=orf1ab;note=\"pp1ab;translated=by -1 ribosomal frameshift\";product=\"orf1ab polyprotein\";protein_id=QIK50426.1",
-                "MT192765.1\tGenbank\tCDS\t21556\t25377\t.\t+\t0\tParent=unknown_transcript_1;gbkey=CDS;gene=S;note=\"structural protein\";product=\"surface glycoprotein\";protein_id=QIK50427.1",
-                "MT192765.1\tGenbank\tgene\t21556\t25377\t.\t+\t.\tParent=unknown_transcript_1"
-            ],
-            78,
-            [
-                "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894"
-            ]
-        ],
-        "meta": {
-            "nf-test": "0.8.4",
-            "nextflow": "24.04.3"
-        },
-        "timestamp": "2024-07-22T11:51:46.802978"
-    },
-    "test_cat_name_conflict": {
-        "content": [
-            [
-                
-            ]
-        ],
-        "meta": {
-            "nf-test": "0.8.4",
-            "nextflow": "24.04.3"
-        },
-        "timestamp": "2024-07-22T11:51:29.45394"
-    },
-    "test_cat_one_file_unzipped_zipped": {
-        "content": [
-            [
-                ">MT192765.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/PC00101P/2020, complete genome",
-                "GTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGT",
-                "GTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAG",
-                "TAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGG",
-                "GTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTT",
-                "ACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAG"
-            ],
-            374,
-            [
-                "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894"
-            ]
-        ],
-        "meta": {
-            "nf-test": "0.8.4",
-            "nextflow": "24.04.3"
-        },
-        "timestamp": "2024-07-22T11:52:02.774016"
-    },
-    "test_cat_unzipped_zipped": {
-        "content": [
-            [
-                ">MT192765.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/PC00101P/2020, complete genome",
-                "GTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGT",
-                "GTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAG",
-                "TAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGG",
-                "GTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTT",
-                "ACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAG"
-            ],
-            375,
-            [
-                "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894"
-            ]
-        ],
-        "meta": {
-            "nf-test": "0.8.4",
-            "nextflow": "24.04.3"
-        },
-        "timestamp": "2024-07-22T11:51:57.581523"
-    }
-}
\ No newline at end of file
diff --git a/modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config b/modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config
deleted file mode 100644
index ec26b0fd..00000000
--- a/modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config
+++ /dev/null
@@ -1,6 +0,0 @@
-
-process {
-    withName: CAT_CAT {
-        ext.prefix = 'cat.txt.gz'
-    }
-}
diff --git a/modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config b/modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config
deleted file mode 100644
index fbc79783..00000000
--- a/modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config
+++ /dev/null
@@ -1,8 +0,0 @@
-
-process {
-
-    withName: CAT_CAT {
-        ext.prefix = 'cat.txt'
-    }
-
-}
diff --git a/modules/nf-core/tabix/bgziptabix/environment.yml b/modules/nf-core/tabix/bgziptabix/environment.yml
deleted file mode 100644
index 771b1387..00000000
--- a/modules/nf-core/tabix/bgziptabix/environment.yml
+++ /dev/null
@@ -1,8 +0,0 @@
----
-# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
-channels:
-  - conda-forge
-  - bioconda
-
-dependencies:
-  - bioconda::htslib=1.21
diff --git a/modules/nf-core/tabix/bgziptabix/main.nf b/modules/nf-core/tabix/bgziptabix/main.nf
deleted file mode 100644
index f295c7f2..00000000
--- a/modules/nf-core/tabix/bgziptabix/main.nf
+++ /dev/null
@@ -1,48 +0,0 @@
-process TABIX_BGZIPTABIX {
-    tag "$meta.id"
-    label 'process_single'
-
-    conda "${moduleDir}/environment.yml"
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/92/92859404d861ae01afb87e2b789aebc71c0ab546397af890c7df74e4ee22c8dd/data' :
-        'community.wave.seqera.io/library/htslib:1.21--ff8e28a189fbecaa' }"
-
-    input:
-    tuple val(meta), path(input)
-
-    output:
-    tuple val(meta), path("*.gz"), path("*.tbi"), optional: true, emit: gz_tbi
-    tuple val(meta), path("*.gz"), path("*.csi"), optional: true, emit: gz_csi
-    path  "versions.yml" ,                        emit: versions
-
-    when:
-    task.ext.when == null || task.ext.when
-
-    script:
-    def args = task.ext.args ?: ''
-    def args2 = task.ext.args2 ?: ''
-    def prefix = task.ext.prefix ?: "${meta.id}"
-    """
-    bgzip  --threads ${task.cpus} -c $args $input > ${prefix}.${input.getExtension()}.gz
-    tabix --threads ${task.cpus} $args2 ${prefix}.${input.getExtension()}.gz
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//')
-    END_VERSIONS
-    """
-
-    stub:
-    def prefix = task.ext.prefix ?: "${meta.id}"
-    def args2 = task.ext.args2 ?: ''
-    def index = args2.contains("-C ") || args2.contains("--csi") ? "csi" : "tbi"
-    """
-    echo "" | gzip > ${prefix}.${input.getExtension()}.gz
-    touch ${prefix}.${input.getExtension()}.gz.${index}
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//')
-    END_VERSIONS
-    """
-}
diff --git a/modules/nf-core/tabix/bgziptabix/meta.yml b/modules/nf-core/tabix/bgziptabix/meta.yml
deleted file mode 100644
index 9c2c46d1..00000000
--- a/modules/nf-core/tabix/bgziptabix/meta.yml
+++ /dev/null
@@ -1,74 +0,0 @@
-name: tabix_bgziptabix
-description: bgzip a sorted tab-delimited genome file and then create tabix index
-keywords:
-  - bgzip
-  - compress
-  - index
-  - tabix
-  - vcf
-tools:
-  - tabix:
-      description: Generic indexer for TAB-delimited genome position files.
-      homepage: https://www.htslib.org/doc/tabix.html
-      documentation: https://www.htslib.org/doc/tabix.1.html
-      doi: 10.1093/bioinformatics/btq671
-      licence: ["MIT"]
-      identifier: biotools:tabix
-input:
-  - - meta:
-        type: map
-        description: |
-          Groovy Map containing sample information
-          e.g. [ id:'test', single_end:false ]
-    - input:
-        type: file
-        description: Sorted tab-delimited genome file
-        ontologies: []
-output:
-  gz_tbi:
-    - - meta:
-          type: map
-          description: |
-            Groovy Map containing sample information
-            e.g. [ id:'test', single_end:false ]
-      - "*.gz":
-          type: file
-          description: bgzipped tab-delimited genome file
-          pattern: "*.gz"
-          ontologies:
-            - edam: http://edamontology.org/format_3989 # GZIP format
-      - "*.tbi":
-          type: file
-          description: tabix index file
-          pattern: "*.tbi"
-          ontologies: []
-  gz_csi:
-    - - meta:
-          type: map
-          description: |
-            Groovy Map containing sample information
-            e.g. [ id:'test', single_end:false ]
-      - "*.gz":
-          type: file
-          description: bgzipped tab-delimited genome file
-          pattern: "*.gz"
-          ontologies:
-            - edam: http://edamontology.org/format_3989 # GZIP format
-      - "*.csi":
-          type: file
-          description: csi index file
-          pattern: "*.csi"
-          ontologies: []
-  versions:
-    - versions.yml:
-        type: file
-        description: File containing software versions
-        pattern: "versions.yml"
-        ontologies:
-          - edam: http://edamontology.org/format_3750 # YAML
-authors:
-  - "@maxulysse"
-  - "@DLBPointon"
-maintainers:
-  - "@maxulysse"
-  - "@DLBPointon"
diff --git a/modules/nf-core/tabix/bgziptabix/tests/main.nf.test b/modules/nf-core/tabix/bgziptabix/tests/main.nf.test
deleted file mode 100644
index cdb016e5..00000000
--- a/modules/nf-core/tabix/bgziptabix/tests/main.nf.test
+++ /dev/null
@@ -1,123 +0,0 @@
-nextflow_process {
-
-    name "Test Process TABIX_BGZIPTABIX"
-    script "../main.nf"
-    process "TABIX_BGZIPTABIX"
-
-    tag "modules"
-    tag "modules_nfcore"
-    tag "tabix"
-    tag "tabix/bgziptabix"
-
-    test("sarscov2_bed_tbi") {
-        config "./tabix_tbi.config"
-
-        when {
-            process {
-                """
-                input[0] = [
-                                [ id:'tbi_test' ],
-                                [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) ]
-                ]
-                """
-            }
-        }
-
-        then {
-            assertAll (
-                { assert process.success },
-                { assert snapshot(process.out).match() },
-                { assert snapshot(
-                            file(process.out.gz_tbi[0][1]).name
-                                ).match("tbi_test")
-                }
-            )
-        }
-    }
-
-    test("sarscov2_bed_csi") {
-        config "./tabix_csi.config"
-
-        when {
-            process {
-                """
-                input[0] = [
-                                [ id:'csi_test' ],
-                                [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) ]
-                ]
-                """
-            }
-        }
-
-        then {
-            assertAll (
-                { assert process.success },
-                { assert snapshot(process.out).match() },
-                { assert snapshot(
-                            file(process.out.gz_csi[0][1]).name
-                                ).match("csi_test")
-                }
-            )
-        }
-
-    }
-
-    test("sarscov2_bed_csi_stub") {
-        config "./tabix_csi.config"
-
-        options "-stub"
-
-        when {
-            process {
-                """
-                input[0] = [
-                                [ id:'test' ],
-                                [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) ]
-                ]
-                """
-            }
-        }
-
-        then {
-            assertAll (
-                { assert process.success },
-                { assert snapshot(process.out).match() },
-                { assert snapshot(
-                            file(process.out.gz_csi[0][1]).name
-                                ).match("csi_stub")
-                }
-            )
-        }
-
-    }
-
-    test("sarscov2_bed_tbi_stub") {
-        config "./tabix_tbi.config"
-
-        options "-stub"
-
-        when {
-            process {
-                """
-                input[0] = [
-                                [ id:'test' ],
-                                [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) ]
-                ]
-                """
-            }
-        }
-
-        then {
-            assertAll (
-                { assert process.success },
-                { assert snapshot(process.out).match() },
-                { assert snapshot(
-                            file(process.out.gz_tbi[0][1]).name
-                                ).match("tbi_stub")
-                }
-            )
-        }
-
-    }
-
-}
diff --git a/modules/nf-core/tabix/bgziptabix/tests/main.nf.test.snap b/modules/nf-core/tabix/bgziptabix/tests/main.nf.test.snap
deleted file mode 100644
index 5f818045..00000000
--- a/modules/nf-core/tabix/bgziptabix/tests/main.nf.test.snap
+++ /dev/null
@@ -1,206 +0,0 @@
-{
-    "sarscov2_bed_tbi": {
-        "content": [
-            {
-                "0": [
-                    [
-                        {
-                            "id": "tbi_test"
-                        },
-                        "tbi_test.bed.gz:md5,fe4053cf4de3aebbdfc3be2efb125a74",
-                        "tbi_test.bed.gz.tbi:md5,ca06caf88b1e3c67d5fcba0a1460b52c"
-                    ]
-                ],
-                "1": [
-                    
-                ],
-                "2": [
-                    "versions.yml:md5,9a7904908d7400fc67ef0412a925e9fc"
-                ],
-                "gz_csi": [
-                    
-                ],
-                "gz_tbi": [
-                    [
-                        {
-                            "id": "tbi_test"
-                        },
-                        "tbi_test.bed.gz:md5,fe4053cf4de3aebbdfc3be2efb125a74",
-                        "tbi_test.bed.gz.tbi:md5,ca06caf88b1e3c67d5fcba0a1460b52c"
-                    ]
-                ],
-                "versions": [
-                    "versions.yml:md5,9a7904908d7400fc67ef0412a925e9fc"
-                ]
-            }
-        ],
-        "meta": {
-            "nf-test": "0.9.2",
-            "nextflow": "24.10.5"
-        },
-        "timestamp": "2025-03-26T13:52:30.53305451"
-    },
-    "sarscov2_bed_csi": {
-        "content": [
-            {
-                "0": [
-                    
-                ],
-                "1": [
-                    [
-                        {
-                            "id": "csi_test"
-                        },
-                        "csi_test.bed.gz:md5,fe4053cf4de3aebbdfc3be2efb125a74",
-                        "csi_test.bed.gz.csi:md5,c9c0377de58fdc89672bb3005a0d69f5"
-                    ]
-                ],
-                "2": [
-                    "versions.yml:md5,9a7904908d7400fc67ef0412a925e9fc"
-                ],
-                "gz_csi": [
-                    [
-                        {
-                            "id": "csi_test"
-                        },
-                        "csi_test.bed.gz:md5,fe4053cf4de3aebbdfc3be2efb125a74",
-                        "csi_test.bed.gz.csi:md5,c9c0377de58fdc89672bb3005a0d69f5"
-                    ]
-                ],
-                "gz_tbi": [
-                    
-                ],
-                "versions": [
-                    "versions.yml:md5,9a7904908d7400fc67ef0412a925e9fc"
-                ]
-            }
-        ],
-        "meta": {
-            "nf-test": "0.9.2",
-            "nextflow": "24.10.5"
-        },
-        "timestamp": "2025-03-26T13:52:34.152301569"
-    },
-    "csi_test": {
-        "content": [
-            "csi_test.bed.gz"
-        ],
-        "meta": {
-            "nf-test": "0.8.4",
-            "nextflow": "24.04.2"
-        },
-        "timestamp": "2024-02-19T14:51:00.548801"
-    },
-    "sarscov2_bed_tbi_stub": {
-        "content": [
-            {
-                "0": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940",
-                        "test.bed.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e"
-                    ]
-                ],
-                "1": [
-                    
-                ],
-                "2": [
-                    "versions.yml:md5,9a7904908d7400fc67ef0412a925e9fc"
-                ],
-                "gz_csi": [
-                    
-                ],
-                "gz_tbi": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940",
-                        "test.bed.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e"
-                    ]
-                ],
-                "versions": [
-                    "versions.yml:md5,9a7904908d7400fc67ef0412a925e9fc"
-                ]
-            }
-        ],
-        "meta": {
-            "nf-test": "0.9.2",
-            "nextflow": "24.10.5"
-        },
-        "timestamp": "2025-03-26T13:52:41.271812789"
-    },
-    "csi_stub": {
-        "content": [
-            "test.bed.gz"
-        ],
-        "meta": {
-            "nf-test": "0.8.4",
-            "nextflow": "24.04.2"
-        },
-        "timestamp": "2024-02-19T14:51:09.218454"
-    },
-    "tbi_stub": {
-        "content": [
-            "test.bed.gz"
-        ],
-        "meta": {
-            "nf-test": "0.9.0",
-            "nextflow": "24.04.4"
-        },
-        "timestamp": "2024-09-25T14:45:18.550930179"
-    },
-    "tbi_test": {
-        "content": [
-            "tbi_test.bed.gz"
-        ],
-        "meta": {
-            "nf-test": "0.8.4",
-            "nextflow": "24.04.2"
-        },
-        "timestamp": "2024-02-19T14:50:51.579654"
-    },
-    "sarscov2_bed_csi_stub": {
-        "content": [
-            {
-                "0": [
-                    
-                ],
-                "1": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940",
-                        "test.bed.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e"
-                    ]
-                ],
-                "2": [
-                    "versions.yml:md5,9a7904908d7400fc67ef0412a925e9fc"
-                ],
-                "gz_csi": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940",
-                        "test.bed.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e"
-                    ]
-                ],
-                "gz_tbi": [
-                    
-                ],
-                "versions": [
-                    "versions.yml:md5,9a7904908d7400fc67ef0412a925e9fc"
-                ]
-            }
-        ],
-        "meta": {
-            "nf-test": "0.9.2",
-            "nextflow": "24.10.5"
-        },
-        "timestamp": "2025-03-26T13:52:37.709221651"
-    }
-}
\ No newline at end of file
diff --git a/modules/nf-core/tabix/bgziptabix/tests/tabix_csi.config b/modules/nf-core/tabix/bgziptabix/tests/tabix_csi.config
deleted file mode 100644
index fb41a314..00000000
--- a/modules/nf-core/tabix/bgziptabix/tests/tabix_csi.config
+++ /dev/null
@@ -1,5 +0,0 @@
-process {
-    withName: TABIX_BGZIPTABIX {
-        ext.args2 = '-p vcf --csi'
-    }
-}
diff --git a/modules/nf-core/tabix/bgziptabix/tests/tabix_tbi.config b/modules/nf-core/tabix/bgziptabix/tests/tabix_tbi.config
deleted file mode 100644
index c1915dc4..00000000
--- a/modules/nf-core/tabix/bgziptabix/tests/tabix_tbi.config
+++ /dev/null
@@ -1,5 +0,0 @@
-process {
-    withName: TABIX_BGZIPTABIX {
-        ext.args2 = '-p vcf'
-    }
-}
\ No newline at end of file
diff --git a/tests/main.nf.test b/tests/main.nf.test
index a39a3713..269608a5 100644
--- a/tests/main.nf.test
+++ b/tests/main.nf.test
@@ -13,6 +13,7 @@ nextflow_pipeline {
                 outdir = "${outputDir}"
                 all_output = true
                 skip_tracks = "NONE"
+                split_telomere = true
             }
         }
 

From 13bd9bb0f9987b9c3af8f3caa9410606ba960b74 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Tue, 5 Aug 2025 12:06:25 +0100
Subject: [PATCH 16/58] Updates

---
 conf/modules.config     | 15 ++++++++++++---
 tests/main.nf.test.snap | 13 +++++--------
 2 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index 02b8162f..f867ecc2 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -17,9 +17,18 @@ process {
     //
     withName: 'PRETEXT_INGEST_SNDRD|PRETEXT_INGEST_HIRES' {
         publishDir = [
-            path: { "${params.outdir}/pretext_maps_processed" },
-            mode: params.publish_dir_mode,
-            saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
+            [
+                path:       { "${params.outdir}/pretext_maps_processed" },
+                pattern:    "*normal.pretext",
+                mode:       params.publish_dir_mode,
+                saveAs:     { filename -> filename.equals('versions.yml') ? null : filename },
+            ],
+            [
+                path:       { "${params.outdir}/pretext_maps_processed" },
+                pattern:    "*hr.pretext",
+                mode:       params.publish_dir_mode,
+                saveAs:     { filename -> filename.equals('versions.yml') ? null : filename },
+            ],
         ]
     }
 
diff --git a/tests/main.nf.test.snap b/tests/main.nf.test.snap
index 32259ee9..5cac579d 100644
--- a/tests/main.nf.test.snap
+++ b/tests/main.nf.test.snap
@@ -135,15 +135,12 @@
                 "pipeline_info/sanger-tol_curationpretext_software_versions.yml",
                 "pretext_maps_processed",
                 "pretext_maps_processed/CurationPretextTest_normal.pretext",
-                "pretext_maps_processed/telo_0.pretext",
-                "pretext_maps_processed/telo_1.pretext",
-                "pretext_maps_processed/telo_2.pretext",
                 "pretext_maps_raw",
                 "pretext_maps_raw/CurationPretextTest_normal_pi.pretext",
                 "pretext_snapshot",
                 "pretext_snapshot/CurationPretextTest_normalFullMap.png"
             ],
-            21,
+            18,
             [
                 "CurationPretextTest.bigWig:md5,3f66a9152d793a62f877b733c2336dfd",
                 "CurationPretextTest.gap.bedgraph:md5,d41d8cd98f00b204e9800998ecf8427e",
@@ -159,7 +156,7 @@
             1,
             false,
             true,
-            4,
+            1,
             false,
             true,
             1,
@@ -167,8 +164,8 @@
         ],
         "meta": {
             "nf-test": "0.9.2",
-            "nextflow": "25.04.1"
+            "nextflow": "24.04.4"
         },
-        "timestamp": "2025-08-04T17:47:27.212054464"
+        "timestamp": "2025-08-05T10:25:42.179879"
     }
-}
\ No newline at end of file
+}

From ed7c32322250fd0bda9bd1020dfdb7800379bdf8 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Thu, 21 Aug 2025 16:31:58 +0100
Subject: [PATCH 17/58] Update tests

---
 tests/main.nf.test      | 20 +++++++++-----------
 tests/main.nf.test.snap |  6 +++---
 2 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/tests/main.nf.test b/tests/main.nf.test
index 269608a5..bede8ee8 100644
--- a/tests/main.nf.test
+++ b/tests/main.nf.test
@@ -31,25 +31,23 @@ nextflow_pipeline {
             assertAll(
                 {assert workflow.success},
                 {assert snapshot(
-                    // Test for number of successful processes - should be 29 for a full run
-                    workflow.trace.succeeded().size(),
+                    // Test for number of successful processes
+                    workflow.trace.succeeded().size(), // 42 with out needing to gunzip the assembly
 
                     removeNextflowVersion("$outputDir/pipeline_info/sanger-tol_curationpretext_software_versions.yml"),
 
                     // Stable name with relative path
                     stable_name,
-                    stable_name.size(),
+                    stable_name.size(), // 18
 
                     // Accessory files
                     accessories,
-                    accessories.size(),
-
-                    // The two pretext files
-                    // Presence of files indicated presence of the raw_pretexts
-                    // we expect this to be a list of two files
-                    // we can't use their md5sum as they will be different everytime
-                    // Then double check that there are two
-                    // one is a hr and the other a normal variant
+                    accessories.size(), // 9
+
+                    // The pretext files
+                    // We only expect 1 pretext file as when using the `test` profile
+                    // we are skipping hr pretext file generation
+                    // so size will be 1 and presence of hr file is false
                     pretext_maps_raw.size(),
                     pretext_maps_raw.any{it.toString().contains("_hr_pi.pretext".toString())},
                     pretext_maps_raw.any{it.toString().contains("_normal_pi.pretext".toString())},
diff --git a/tests/main.nf.test.snap b/tests/main.nf.test.snap
index 5cac579d..4b352b5c 100644
--- a/tests/main.nf.test.snap
+++ b/tests/main.nf.test.snap
@@ -164,8 +164,8 @@
         ],
         "meta": {
             "nf-test": "0.9.2",
-            "nextflow": "24.04.4"
+            "nextflow": "25.04.6"
         },
-        "timestamp": "2025-08-05T10:25:42.179879"
+        "timestamp": "2025-08-21T16:06:51.036682"
     }
-}
+}
\ No newline at end of file

From 656d9fcdd7f954f7c6412ce362812cd50a34c01d Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Thu, 21 Aug 2025 16:32:46 +0100
Subject: [PATCH 18/58] Add GUNZIP

---
 workflows/curationpretext.nf | 31 +++++++++++++++++++++++++++++--
 1 file changed, 29 insertions(+), 2 deletions(-)

diff --git a/workflows/curationpretext.nf b/workflows/curationpretext.nf
index 6e63803d..69ef0a1d 100644
--- a/workflows/curationpretext.nf
+++ b/workflows/curationpretext.nf
@@ -4,8 +4,9 @@
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */
 
-include { SAMTOOLS_FAIDX                            } from '../modules/nf-core/samtools/faidx/main'
 include { GAWK as GAWK_UPPER_SEQUENCE               } from '../modules/nf-core/gawk/main'
+include { SAMTOOLS_FAIDX                            } from '../modules/nf-core/samtools/faidx/main'
+include { GUNZIP                                    } from '../modules/nf-core/gunzip/main'
 
 include { PRETEXT_GRAPH as PRETEXT_INGEST_SNDRD     } from '../modules/local/pretext/graph/main'
 include { PRETEXT_GRAPH as PRETEXT_INGEST_HIRES     } from '../modules/local/pretext/graph/main'
@@ -35,11 +36,37 @@ workflow CURATIONPRETEXT {
     ch_empty_file       = Channel.fromPath("${baseDir}/assets/EMPTY.txt")
 
 
+    ch_reference
+        .branch { meta, file ->
+            zipped: file.name.endsWith('.gz')
+            unzipped: !file.name.endsWith('.gz')
+        }
+        .set {ch_input}
+
+    //
+    // MODULE: UNZIP INPUTS IF NEEDED
+    //
+    GUNZIP (
+        ch_input.zipped
+    )
+    ch_versions = ch_versions.mix(GUNZIP.out.versions)
+
+
+    //
+    // LOGIC: MIX CHANELS WHICH MAY OR MAY NOT BE EMPTY INTO A SINGLE QUEUE CHANNEL
+    //
+    unzipped_input = Channel.empty()
+
+    unzipped_input
+        .mix(ch_input.unzipped, GUNZIP.out.gunzip)
+        .set { unzipped_reference }
+
+
     //
     // MODULE: UPPERCASE THE REFERENCE SEQUENCE
     //
     GAWK_UPPER_SEQUENCE(
-        ch_reference,
+        unzipped_reference,
         [],
         false,
     )

From 84fa91ed2aa55a3346a28e4c95bab62bf6816ea1 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Thu, 21 Aug 2025 16:33:52 +0100
Subject: [PATCH 19/58] Minor Updates

---
 modules/local/pretext/graph/main.nf | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/modules/local/pretext/graph/main.nf b/modules/local/pretext/graph/main.nf
index ac966417..49bec139 100644
--- a/modules/local/pretext/graph/main.nf
+++ b/modules/local/pretext/graph/main.nf
@@ -85,38 +85,35 @@ process PRETEXT_GRAPH {
             esac
         done
 
-        ls telomere/*
-        echo \$file_og
-
         if [ -s "\$file_og" ]; then
-            echo "Processing OG_TELOMERE file..."
+            echo "Processing OG_TELOMERE file: \$file_og"
             PretextGraph $args -i "\$input_file" -n "og_telomere" -o telo_0.pretext < "\$file_og"
         else
-            echo "No OG TELOMERE file"
+            echo "OG TELOMERE file - Could be empty or missing"
             cp "\$input_file" telo_0.pretext
         fi
 
         if [ -s "\$file_telox" ]; then
-            echo "Processing TELOX_TELOMERE file..."
+            echo "Processing TELOX_TELOMERE file: \$file_telox"
             PretextGraph $args -i telo_0.pretext -n "telox_telomere" -o telo_1.pretext < "\$file_telox"
         else
-            echo "No TELOX file"
+            echo "TELOX file - Could be empty or missing"
             cp telo_0.pretext telo_1.pretext
         fi
 
         if [ -s "\$file_5p" ]; then
-            echo "Processing 5 Prime TELOMERE file..."
+            echo "Processing 5-Prime TELOMERE file: \$file_5p"
             PretextGraph $args -i telo_1.pretext -n "5p_telomere" -o telo_2.pretext < "\$file_5p"
         else
-            echo "No 5Prime TELOMERE file"
+            echo "5-Prime TELOMERE file - Could be empty or missing"
             cp telo_1.pretext telo_2.pretext
         fi
 
         if [ -s "\$file_3p" ]; then
-            echo "Processing 3 Prime TELOMERE file..."
+            echo "Processing 3-Prime TELOMERE file: \$file_3p"
             PretextGraph $args -i telo_2.pretext -n "3p_telomere" -o "${prefix}.pretext" < "\$file_3p"
         else
-            echo "No 3Prime TELOMERE file"
+            echo "3-Prime TELOMERE file - Could be empty or missing"
             cp telo_2.pretext "${prefix}.pretext"
         fi
 

From 908783fffc73d8ba7c87c87d5fed36b6a1cba005 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Thu, 21 Aug 2025 16:40:53 +0100
Subject: [PATCH 20/58] Adding Data download to setup

---
 tests/main.nf.test | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/tests/main.nf.test b/tests/main.nf.test
index bede8ee8..bb46ee51 100644
--- a/tests/main.nf.test
+++ b/tests/main.nf.test
@@ -8,6 +8,17 @@ nextflow_pipeline {
 
     test("Full run") {
 
+        setup {
+            println "\nDownloading the test data..."
+            def command = ['bash', '-c', "curl https://tolit.cog.sanger.ac.uk/test-data/resources/treeval/TreeValTinyData.tar.gz | tar xzf - -C ${projectDir}/"]
+            def process = command.execute()
+            process.waitFor()
+
+            if (process.exitValue() != 0) {
+                throw new RuntimeException("Error - failed to download ${dbKey}: ${process.err.text}")
+            }
+        }
+
         when {
             params {
                 outdir = "${outputDir}"

From c92706dffecf0bf07231a5f275a9983e350c94e3 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Thu, 21 Aug 2025 16:41:56 +0100
Subject: [PATCH 21/58] Remove now duplicated data download

---
 .github/workflows/ci.yml | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 583d8add..cf35c042 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -67,11 +67,6 @@ jobs:
           mkdir -p $NXF_SINGULARITY_CACHEDIR
           mkdir -p $NXF_SINGULARITY_LIBRARYDIR
 
-      - name: Download test data
-        # Download A fungal test data set that is full enough to show some real output.
-        run: |
-          curl https://tolit.cog.sanger.ac.uk/test-data/resources/treeval/TreeValTinyData.tar.gz | tar xzf -
-
       - name: Install nf-test
         uses: nf-core/setup-nf-test@v1
 

From 848cb1c4f8a8effd9436854bf7ad79e573dacc25 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Thu, 21 Aug 2025 16:43:09 +0100
Subject: [PATCH 22/58] Update CHANGELOG

---
 CHANGELOG.md | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0e297302..3956adb4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Added and Fixed
 
+- Template update to 3.3.3. <TODO in next PR>.
 - Addition of the `--split_telomere` boolean flag, this is false by default.
   - When `true` the pipeline will split the telomere file into a 5 and 3 prime file.
 - Update `ACCESSORY_FILES` subworkflow:
@@ -22,6 +23,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Moved `GAWK_UPPER_SEQUENCE` from the `TELO_FINDER` subworkflow to the first step of the main `curationpretext` workflow, this simply makes more sense.
 - Removed no longer needed scripts from bin.
 - Added the `gawk_split_directions.awk` script for split telomere.
+- Addition of GUNZIP for the input reference genome.
+- Update tests.
 
 ### Paramters
 
@@ -41,6 +44,8 @@ Note, since the pipeline is using Nextflow DSL2, each process will be run with i
 | `GAWK_CLEAN_TELOMERE`    | 5.3.0         | REMOVED       |
 | `GAWK_MAP_TELO`          | 5.3.0         | REMOVED       |
 | `GET_LARGEST_SCAFF`      | coreutils=9.1 | REMOVED       |
+| `GUNZIP`                 | NA            | 1.13          |
+
 
 ## [[1.4.2](https://github.com/sanger-tol/curationpretext/releases/tag/1.4.2)] - UNSC Nereid (H2) - [2025-07-28]
 

From e335ab85ea3298e59c9da3eaecc2e79dfb71df39 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Thu, 21 Aug 2025 16:43:52 +0100
Subject: [PATCH 23/58] Update modules

---
 modules.json | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/modules.json b/modules.json
index 30d74ced..61f01451 100644
--- a/modules.json
+++ b/modules.json
@@ -45,6 +45,11 @@
                         "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
                         "installed_by": ["modules"]
                     },
+                    "gunzip": {
+                        "branch": "master",
+                        "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46",
+                        "installed_by": ["modules"]
+                    },
                     "minimap2/align": {
                         "branch": "master",
                         "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d",

From d549bc510a90a46504da95eb249eb8269787ba59 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Thu, 21 Aug 2025 16:46:52 +0100
Subject: [PATCH 24/58] Update

---
 modules/nf-core/gunzip/environment.yml        |  12 ++
 modules/nf-core/gunzip/main.nf                |  55 +++++++
 modules/nf-core/gunzip/meta.yml               |  52 +++++++
 modules/nf-core/gunzip/tests/main.nf.test     | 121 ++++++++++++++++
 .../nf-core/gunzip/tests/main.nf.test.snap    | 134 ++++++++++++++++++
 modules/nf-core/gunzip/tests/nextflow.config  |   5 +
 6 files changed, 379 insertions(+)
 create mode 100644 modules/nf-core/gunzip/environment.yml
 create mode 100644 modules/nf-core/gunzip/main.nf
 create mode 100644 modules/nf-core/gunzip/meta.yml
 create mode 100644 modules/nf-core/gunzip/tests/main.nf.test
 create mode 100644 modules/nf-core/gunzip/tests/main.nf.test.snap
 create mode 100644 modules/nf-core/gunzip/tests/nextflow.config

diff --git a/modules/nf-core/gunzip/environment.yml b/modules/nf-core/gunzip/environment.yml
new file mode 100644
index 00000000..9b926b1f
--- /dev/null
+++ b/modules/nf-core/gunzip/environment.yml
@@ -0,0 +1,12 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - conda-forge::coreutils=9.5
+  - conda-forge::grep=3.11
+  - conda-forge::gzip=1.13
+  - conda-forge::lbzip2=2.5
+  - conda-forge::sed=4.8
+  - conda-forge::tar=1.34
diff --git a/modules/nf-core/gunzip/main.nf b/modules/nf-core/gunzip/main.nf
new file mode 100644
index 00000000..3ffc8e92
--- /dev/null
+++ b/modules/nf-core/gunzip/main.nf
@@ -0,0 +1,55 @@
+process GUNZIP {
+    tag "${archive}"
+    label 'process_single'
+
+    conda "${moduleDir}/environment.yml"
+    container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
+        ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/52/52ccce28d2ab928ab862e25aae26314d69c8e38bd41ca9431c67ef05221348aa/data'
+        : 'community.wave.seqera.io/library/coreutils_grep_gzip_lbzip2_pruned:838ba80435a629f8'}"
+
+    input:
+    tuple val(meta), path(archive)
+
+    output:
+    tuple val(meta), path("${gunzip}"), emit: gunzip
+    path "versions.yml", emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def extension = (archive.toString() - '.gz').tokenize('.')[-1]
+    def name = archive.toString() - '.gz' - ".${extension}"
+    def prefix = task.ext.prefix ?: name
+    gunzip = prefix + ".${extension}"
+    """
+    # Not calling gunzip itself because it creates files
+    # with the original group ownership rather than the
+    # default one for that user / the work directory
+    gzip \\
+        -cd \\
+        ${args} \\
+        ${archive} \\
+        > ${gunzip}
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//')
+    END_VERSIONS
+    """
+
+    stub:
+    def args = task.ext.args ?: ''
+    def extension = (archive.toString() - '.gz').tokenize('.')[-1]
+    def name = archive.toString() - '.gz' - ".${extension}"
+    def prefix = task.ext.prefix ?: name
+    gunzip = prefix + ".${extension}"
+    """
+    touch ${gunzip}
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//')
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/gunzip/meta.yml b/modules/nf-core/gunzip/meta.yml
new file mode 100644
index 00000000..926bb22a
--- /dev/null
+++ b/modules/nf-core/gunzip/meta.yml
@@ -0,0 +1,52 @@
+name: gunzip
+description: Compresses and decompresses files.
+keywords:
+  - gunzip
+  - compression
+  - decompression
+tools:
+  - gunzip:
+      description: |
+        gzip is a file format and a software application used for file compression and decompression.
+      documentation: https://www.gnu.org/software/gzip/manual/gzip.html
+      licence: ["GPL-3.0-or-later"]
+      identifier: ""
+input:
+  - - meta:
+        type: map
+        description: |
+          Optional groovy Map containing meta information
+          e.g. [ id:'test', single_end:false ]
+    - archive:
+        type: file
+        description: File to be compressed/uncompressed
+        pattern: "*.*"
+        ontologies: []
+output:
+  gunzip:
+    - - meta:
+          type: file
+          description: Compressed/uncompressed file
+          pattern: "*.*"
+          ontologies: []
+      - ${gunzip}:
+          type: file
+          description: Compressed/uncompressed file
+          pattern: "*.*"
+          ontologies: []
+  versions:
+    - versions.yml:
+        type: file
+        description: File containing software versions
+        pattern: "versions.yml"
+        ontologies:
+          - edam: http://edamontology.org/format_3750 # YAML
+authors:
+  - "@joseespinosa"
+  - "@drpatelh"
+  - "@jfy133"
+maintainers:
+  - "@joseespinosa"
+  - "@drpatelh"
+  - "@jfy133"
+  - "@gallvp"
diff --git a/modules/nf-core/gunzip/tests/main.nf.test b/modules/nf-core/gunzip/tests/main.nf.test
new file mode 100644
index 00000000..776211ad
--- /dev/null
+++ b/modules/nf-core/gunzip/tests/main.nf.test
@@ -0,0 +1,121 @@
+nextflow_process {
+
+    name "Test Process GUNZIP"
+    script "../main.nf"
+    process "GUNZIP"
+    tag "gunzip"
+    tag "modules_nfcore"
+    tag "modules"
+
+    test("Should run without failures") {
+
+        when {
+            params {
+                outdir = "$outputDir"
+            }
+            process {
+                """
+                input[0] = Channel.of([
+                        [],
+                        file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)
+                    ]
+                )
+                """
+            }
+        }
+
+        then {
+            assertAll(
+            { assert process.success },
+            { assert snapshot(process.out).match() }
+            )
+        }
+
+    }
+
+    test("Should run without failures - prefix") {
+
+        config './nextflow.config'
+
+        when {
+            params {
+                outdir = "$outputDir"
+            }
+            process {
+                """
+                input[0] = Channel.of([
+                        [ id: 'test' ],
+                        file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)
+                    ]
+                )
+                """
+            }
+        }
+
+        then {
+            assertAll(
+            { assert process.success },
+            { assert snapshot(process.out).match() }
+            )
+        }
+
+    }
+
+    test("Should run without failures - stub") {
+
+        options '-stub'
+
+        when {
+            params {
+                outdir = "$outputDir"
+            }
+            process {
+                """
+                input[0] = Channel.of([
+                        [],
+                        file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)
+                    ]
+                )
+                """
+            }
+        }
+
+        then {
+            assertAll(
+            { assert process.success },
+            { assert snapshot(process.out).match() }
+            )
+        }
+
+    }
+
+    test("Should run without failures - prefix - stub") {
+
+        options '-stub'
+        config './nextflow.config'
+
+        when {
+            params {
+                outdir = "$outputDir"
+            }
+            process {
+                """
+                input[0] = Channel.of([
+                        [ id: 'test' ],
+                        file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)
+                    ]
+                )
+                """
+            }
+        }
+
+        then {
+            assertAll(
+            { assert process.success },
+            { assert snapshot(process.out).match() }
+            )
+        }
+
+    }
+
+}
diff --git a/modules/nf-core/gunzip/tests/main.nf.test.snap b/modules/nf-core/gunzip/tests/main.nf.test.snap
new file mode 100644
index 00000000..a0f0e67e
--- /dev/null
+++ b/modules/nf-core/gunzip/tests/main.nf.test.snap
@@ -0,0 +1,134 @@
+{
+    "Should run without failures - prefix - stub": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.xyz.fastq:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "1": [
+                    "versions.yml:md5,d327e4a19a6d5c5e974136cef8999d8c"
+                ],
+                "gunzip": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.xyz.fastq:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,d327e4a19a6d5c5e974136cef8999d8c"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.2",
+            "nextflow": "24.10.2"
+        },
+        "timestamp": "2024-12-13T11:48:22.080222697"
+    },
+    "Should run without failures - stub": {
+        "content": [
+            {
+                "0": [
+                    [
+                        [
+                            
+                        ],
+                        "test_1.fastq:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "1": [
+                    "versions.yml:md5,d327e4a19a6d5c5e974136cef8999d8c"
+                ],
+                "gunzip": [
+                    [
+                        [
+                            
+                        ],
+                        "test_1.fastq:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,d327e4a19a6d5c5e974136cef8999d8c"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.2",
+            "nextflow": "24.10.2"
+        },
+        "timestamp": "2024-12-13T11:48:14.593020264"
+    },
+    "Should run without failures": {
+        "content": [
+            {
+                "0": [
+                    [
+                        [
+                            
+                        ],
+                        "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec"
+                    ]
+                ],
+                "1": [
+                    "versions.yml:md5,d327e4a19a6d5c5e974136cef8999d8c"
+                ],
+                "gunzip": [
+                    [
+                        [
+                            
+                        ],
+                        "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,d327e4a19a6d5c5e974136cef8999d8c"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.2",
+            "nextflow": "24.10.2"
+        },
+        "timestamp": "2024-12-13T11:48:01.295397925"
+    },
+    "Should run without failures - prefix": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.xyz.fastq:md5,4161df271f9bfcd25d5845a1e220dbec"
+                    ]
+                ],
+                "1": [
+                    "versions.yml:md5,d327e4a19a6d5c5e974136cef8999d8c"
+                ],
+                "gunzip": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.xyz.fastq:md5,4161df271f9bfcd25d5845a1e220dbec"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,d327e4a19a6d5c5e974136cef8999d8c"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.2",
+            "nextflow": "24.10.2"
+        },
+        "timestamp": "2024-12-13T11:48:07.414271387"
+    }
+}
\ No newline at end of file
diff --git a/modules/nf-core/gunzip/tests/nextflow.config b/modules/nf-core/gunzip/tests/nextflow.config
new file mode 100644
index 00000000..dec77642
--- /dev/null
+++ b/modules/nf-core/gunzip/tests/nextflow.config
@@ -0,0 +1,5 @@
+process {
+    withName: GUNZIP {
+        ext.prefix = { "${meta.id}.xyz" }
+    }
+}

From eb53abe819672e253aebf40b4ae84fa821b2fe8e Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Thu, 21 Aug 2025 20:55:54 +0100
Subject: [PATCH 25/58] Update

---
 modules.json | 98 ++++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 73 insertions(+), 25 deletions(-)

diff --git a/modules.json b/modules.json
index 61f01451..1fddd1b6 100644
--- a/modules.json
+++ b/modules.json
@@ -8,109 +8,151 @@
                     "bedtools/bamtobed": {
                         "branch": "master",
                         "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "bedtools/genomecov": {
                         "branch": "master",
                         "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "bedtools/intersect": {
                         "branch": "master",
                         "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "bedtools/makewindows": {
                         "branch": "master",
                         "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "bedtools/map": {
                         "branch": "master",
                         "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "bwamem2/index": {
                         "branch": "master",
                         "git_sha": "a29f18660f5e3748d44d6f716241e70c942c065d",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "gawk": {
                         "branch": "master",
                         "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "gnu/sort": {
                         "branch": "master",
                         "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "gunzip": {
                         "branch": "master",
                         "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "minimap2/align": {
                         "branch": "master",
                         "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "minimap2/index": {
                         "branch": "master",
                         "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "pretextmap": {
                         "branch": "master",
                         "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d",
-                        "installed_by": ["modules"],
+                        "installed_by": [
+                            "modules"
+                        ],
                         "patch": "modules/nf-core/pretextmap/pretextmap.diff"
                     },
                     "pretextsnapshot": {
                         "branch": "master",
                         "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc",
-                        "installed_by": ["modules"],
+                        "installed_by": [
+                            "modules"
+                        ],
                         "patch": "modules/nf-core/pretextsnapshot/pretextsnapshot.diff"
                     },
                     "samtools/faidx": {
                         "branch": "master",
                         "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "samtools/merge": {
                         "branch": "master",
                         "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "samtools/sort": {
                         "branch": "master",
                         "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "samtools/view": {
                         "branch": "master",
                         "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "seqtk/cutn": {
                         "branch": "master",
                         "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "ucsc/bedgraphtobigwig": {
                         "branch": "master",
                         "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "windowmasker/mkcounts": {
                         "branch": "master",
                         "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "windowmasker/ustat": {
                         "branch": "master",
                         "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     }
                 }
             },
@@ -119,20 +161,26 @@
                     "utils_nextflow_pipeline": {
                         "branch": "master",
                         "git_sha": "c2b22d85f30a706a3073387f30380704fcae013b",
-                        "installed_by": ["subworkflows"]
+                        "installed_by": [
+                            "subworkflows"
+                        ]
                     },
                     "utils_nfcore_pipeline": {
                         "branch": "master",
                         "git_sha": "51ae5406a030d4da1e49e4dab49756844fdd6c7a",
-                        "installed_by": ["subworkflows"]
+                        "installed_by": [
+                            "subworkflows"
+                        ]
                     },
                     "utils_nfschema_plugin": {
                         "branch": "master",
                         "git_sha": "2fd2cd6d0e7b273747f32e465fdc6bcc3ae0814e",
-                        "installed_by": ["subworkflows"]
+                        "installed_by": [
+                            "subworkflows"
+                        ]
                     }
                 }
             }
         }
     }
-}
+}
\ No newline at end of file

From 2398af56ec7694075813c136750a07f5fff57100 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Fri, 22 Aug 2025 10:45:27 +0100
Subject: [PATCH 26/58] Update Tests

---
 tests/main.nf.test.snap | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/main.nf.test.snap b/tests/main.nf.test.snap
index 4b352b5c..8d8f6ea3 100644
--- a/tests/main.nf.test.snap
+++ b/tests/main.nf.test.snap
@@ -150,7 +150,7 @@
                 "CurationPretextTest_5P_telomere.bedgraph:md5,d41d8cd98f00b204e9800998ecf8427e",
                 "CurationPretextTest_telomere.bed:md5,d41d8cd98f00b204e9800998ecf8427e",
                 "CurationPretextTest_telomere.bedgraph:md5,d41d8cd98f00b204e9800998ecf8427e",
-                "coverage.bigWig:md5,39b3e8b7751b33758087cafc9a3c689e"
+                "coverage.bigWig:md5,2e474506c957152b231ac63c859f0b17"
             ],
             9,
             1,
@@ -166,6 +166,6 @@
             "nf-test": "0.9.2",
             "nextflow": "25.04.6"
         },
-        "timestamp": "2025-08-21T16:06:51.036682"
+        "timestamp": "2025-08-21T21:25:49.92252227"
     }
 }
\ No newline at end of file

From 08ff1fa61e4a0fe3bfe2b95a36dbd4a5e5b0b5ad Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Fri, 22 Aug 2025 11:04:45 +0100
Subject: [PATCH 27/58] Prettier!

---
 CHANGELOG.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3956adb4..b04b2082 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -46,7 +46,6 @@ Note, since the pipeline is using Nextflow DSL2, each process will be run with i
 | `GET_LARGEST_SCAFF`      | coreutils=9.1 | REMOVED       |
 | `GUNZIP`                 | NA            | 1.13          |
 
-
 ## [[1.4.2](https://github.com/sanger-tol/curationpretext/releases/tag/1.4.2)] - UNSC Nereid (H2) - [2025-07-28]
 
 ### Added and Fixed

From 6feb191e5d7ef3805c0fc9d2a3a33ccdbe23c209 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Fri, 22 Aug 2025 11:28:56 +0100
Subject: [PATCH 28/58] Prettier

---
 modules.json | 98 ++++++++++++++--------------------------------------
 1 file changed, 25 insertions(+), 73 deletions(-)

diff --git a/modules.json b/modules.json
index 1fddd1b6..61f01451 100644
--- a/modules.json
+++ b/modules.json
@@ -8,151 +8,109 @@
                     "bedtools/bamtobed": {
                         "branch": "master",
                         "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "bedtools/genomecov": {
                         "branch": "master",
                         "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "bedtools/intersect": {
                         "branch": "master",
                         "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "bedtools/makewindows": {
                         "branch": "master",
                         "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "bedtools/map": {
                         "branch": "master",
                         "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "bwamem2/index": {
                         "branch": "master",
                         "git_sha": "a29f18660f5e3748d44d6f716241e70c942c065d",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "gawk": {
                         "branch": "master",
                         "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "gnu/sort": {
                         "branch": "master",
                         "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "gunzip": {
                         "branch": "master",
                         "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "minimap2/align": {
                         "branch": "master",
                         "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "minimap2/index": {
                         "branch": "master",
                         "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "pretextmap": {
                         "branch": "master",
                         "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d",
-                        "installed_by": [
-                            "modules"
-                        ],
+                        "installed_by": ["modules"],
                         "patch": "modules/nf-core/pretextmap/pretextmap.diff"
                     },
                     "pretextsnapshot": {
                         "branch": "master",
                         "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc",
-                        "installed_by": [
-                            "modules"
-                        ],
+                        "installed_by": ["modules"],
                         "patch": "modules/nf-core/pretextsnapshot/pretextsnapshot.diff"
                     },
                     "samtools/faidx": {
                         "branch": "master",
                         "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "samtools/merge": {
                         "branch": "master",
                         "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "samtools/sort": {
                         "branch": "master",
                         "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "samtools/view": {
                         "branch": "master",
                         "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "seqtk/cutn": {
                         "branch": "master",
                         "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "ucsc/bedgraphtobigwig": {
                         "branch": "master",
                         "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "windowmasker/mkcounts": {
                         "branch": "master",
                         "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "windowmasker/ustat": {
                         "branch": "master",
                         "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     }
                 }
             },
@@ -161,26 +119,20 @@
                     "utils_nextflow_pipeline": {
                         "branch": "master",
                         "git_sha": "c2b22d85f30a706a3073387f30380704fcae013b",
-                        "installed_by": [
-                            "subworkflows"
-                        ]
+                        "installed_by": ["subworkflows"]
                     },
                     "utils_nfcore_pipeline": {
                         "branch": "master",
                         "git_sha": "51ae5406a030d4da1e49e4dab49756844fdd6c7a",
-                        "installed_by": [
-                            "subworkflows"
-                        ]
+                        "installed_by": ["subworkflows"]
                     },
                     "utils_nfschema_plugin": {
                         "branch": "master",
                         "git_sha": "2fd2cd6d0e7b273747f32e465fdc6bcc3ae0814e",
-                        "installed_by": [
-                            "subworkflows"
-                        ]
+                        "installed_by": ["subworkflows"]
                     }
                 }
             }
         }
     }
-}
\ No newline at end of file
+}

From ab458848c03ca248d1c321432398489fe1a5a293 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Fri, 22 Aug 2025 16:55:53 +0100
Subject: [PATCH 29/58] Correct the direction of the telo

---
 modules/local/gawk/main.nf | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/local/gawk/main.nf b/modules/local/gawk/main.nf
index f7f34b2e..f8e631ea 100644
--- a/modules/local/gawk/main.nf
+++ b/modules/local/gawk/main.nf
@@ -13,8 +13,8 @@ process GAWK {
     val(disable_redirect_output)
 
     output:
-    tuple val(meta), path("direction.0.${suffix}"), emit: prime3
-    tuple val(meta), path("direction.1.${suffix}"), emit: prime5
+    tuple val(meta), path("direction.0.${suffix}"), emit: prime5
+    tuple val(meta), path("direction.1.${suffix}"), emit: prime3
     path "versions.yml"                           , emit: versions
 
     when:

From d2ace1fe7326afa16720ac6a8863c363536f447b Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Tue, 26 Aug 2025 14:44:09 +0100
Subject: [PATCH 30/58] Update based on comments

---
 modules/local/extract/telo/main.nf     | 43 ----------------
 modules/local/extract/telomere/main.nf |  2 +-
 modules/local/gawk/environment.yml     |  7 ---
 modules/local/gawk/main.nf             | 68 --------------------------
 modules/local/gawk/meta.yml            | 63 ------------------------
 subworkflows/local/telo_finder/main.nf |  5 +-
 6 files changed, 3 insertions(+), 185 deletions(-)
 delete mode 100755 modules/local/extract/telo/main.nf
 delete mode 100644 modules/local/gawk/environment.yml
 delete mode 100644 modules/local/gawk/main.nf
 delete mode 100644 modules/local/gawk/meta.yml

diff --git a/modules/local/extract/telo/main.nf b/modules/local/extract/telo/main.nf
deleted file mode 100755
index 380c1acf..00000000
--- a/modules/local/extract/telo/main.nf
+++ /dev/null
@@ -1,43 +0,0 @@
-process EXTRACT_TELO {
-    tag "${meta.id}"
-    label 'process_low'
-
-    conda "conda-forge::coreutils=9.1"
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-    'https://depot.galaxyproject.org/singularity/ubuntu:20.04' :
-    'docker.io/ubuntu:20.04' }"
-
-    input:
-    tuple val( meta ), path( file )
-
-    output:
-    tuple val( meta ), file( "*bed" )   , emit: bed
-    tuple val( meta ), file("*bedgraph"), emit: bedgraph
-    path "versions.yml"                 , emit: versions
-
-    script:
-    def prefix  = task.ext.prefix ?: "${meta.id}"
-    def VERSION = "9.1" // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
-    """
-    cat "${file}" | awk '{print \$2"\\t"\$4"\\t"\$5}' | sed 's/>//g' > ${prefix}_telomere.bed
-    cat "${file}" | awk '{print \$2"\\t"\$4"\\t"\$5"\\t"(((\$5-\$4)<0)?-(\$5-\$4):(\$5-\$4))}' | sed 's/>//g' > ${prefix}_telomere.bedgraph
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        coreutils: $VERSION
-    END_VERSIONS
-    """
-
-    stub:
-    def prefix  = task.ext.prefix ?: "${meta.id}"
-    def VERSION = "9.1" // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
-    """
-    touch ${prefix}_telomere.bed
-    touch ${prefix}_telomere.bedgraph
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        coreutils: $VERSION
-    END_VERSIONS
-    """
-}
diff --git a/modules/local/extract/telomere/main.nf b/modules/local/extract/telomere/main.nf
index a0ce237d..41022f00 100644
--- a/modules/local/extract/telomere/main.nf
+++ b/modules/local/extract/telomere/main.nf
@@ -1,6 +1,6 @@
 process EXTRACT_TELOMERE {
     tag "${meta.id}"
-    label 'process_low'
+    label 'process_single'
 
     conda "conda-forge::coreutils=9.1"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
diff --git a/modules/local/gawk/environment.yml b/modules/local/gawk/environment.yml
deleted file mode 100644
index f52109e8..00000000
--- a/modules/local/gawk/environment.yml
+++ /dev/null
@@ -1,7 +0,0 @@
----
-# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
-channels:
-  - conda-forge
-  - bioconda
-dependencies:
-  - conda-forge::gawk=5.3.0
diff --git a/modules/local/gawk/main.nf b/modules/local/gawk/main.nf
deleted file mode 100644
index f8e631ea..00000000
--- a/modules/local/gawk/main.nf
+++ /dev/null
@@ -1,68 +0,0 @@
-process GAWK {
-    tag "$meta.id"
-    label 'process_single'
-
-    conda "${moduleDir}/environment.yml"
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/gawk:5.3.0' :
-        'biocontainers/gawk:5.3.0' }"
-
-    input:
-    tuple val(meta), path(input, arity: '0..*')
-    path(program_file)
-    val(disable_redirect_output)
-
-    output:
-    tuple val(meta), path("direction.0.${suffix}"), emit: prime5
-    tuple val(meta), path("direction.1.${suffix}"), emit: prime3
-    path "versions.yml"                           , emit: versions
-
-    when:
-    task.ext.when == null || task.ext.when
-
-    script:
-    def args  = task.ext.args  ?: '' // args is used for the main arguments of the tool
-    def args2 = task.ext.args2 ?: '' // args2 is used to specify a program when no program file has been given
-    prefix    = task.ext.prefix ?: "${meta.id}"
-    suffix    = task.ext.suffix ?: "${input.collect{ it.getExtension()}.get(0)}" // use the first extension of the input files
-
-    program    = program_file ? "-f ${program_file}" : "${args2}"
-    lst_gz     = input.findResults{ it.getExtension().endsWith("gz") ? it.toString() : null }
-    unzip      = lst_gz ? "gunzip -q -f ${lst_gz.join(" ")}" : ""
-    input_cmd  = input.collect { it.toString() - ~/\.gz$/ }.join(" ")
-    cleanup    = lst_gz ? "rm ${lst_gz.collect{ it - ~/\.gz$/ }.join(" ")}" : ""
-
-    input.collect{
-        assert it.name != "${prefix}.${suffix}" : "Input and output names are the same, set prefix in module configuration to disambiguate!"
-    }
-
-    """
-    ${unzip}
-
-    awk \\
-        ${args} \\
-        ${program} \\
-        ${input_cmd}
-
-    ${cleanup}
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        gawk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//')
-    END_VERSIONS
-    """
-
-    stub:
-    prefix = task.ext.prefix ?: "${meta.id}"
-    suffix = task.ext.suffix ?: "${input.getExtension()}"
-    def create_cmd = suffix.endsWith("gz") ? "echo '' | gzip >" : "touch"
-
-    """
-    ${create_cmd} ${prefix}.${suffix}
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        gawk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//')
-    END_VERSIONS
-    """
-}
diff --git a/modules/local/gawk/meta.yml b/modules/local/gawk/meta.yml
deleted file mode 100644
index 34c50b12..00000000
--- a/modules/local/gawk/meta.yml
+++ /dev/null
@@ -1,63 +0,0 @@
-name: "gawk"
-description: |
-  If you are like many computer users, you would frequently like to make changes in various text files
-  wherever certain patterns appear, or extract data from parts of certain lines while discarding the rest.
-  The job is easy with awk, especially the GNU implementation gawk.
-keywords:
-  - gawk
-  - awk
-  - txt
-  - text
-  - file parsing
-tools:
-  - "gawk":
-      description: "GNU awk"
-      homepage: "https://www.gnu.org/software/gawk/"
-      documentation: "https://www.gnu.org/software/gawk/manual/"
-      tool_dev_url: "https://www.gnu.org/prep/ftp.html"
-      licence: ["GPL v3"]
-      identifier: ""
-input:
-  - - meta:
-        type: map
-        description: |
-          Groovy Map containing sample information
-          e.g. [ id:'test', single_end:false ]
-    - input:
-        type: file
-        description: The input file - Specify the logic that needs to be executed on
-          this file on the `ext.args2` or in the program file.
-          If the files have a `.gz` extension, they will be unzipped using `zcat`.
-        pattern: "*"
-  - - program_file:
-        type: file
-        description: Optional file containing logic for awk to execute. If you don't
-          wish to use a file, you can use `ext.args2` to specify the logic.
-        pattern: "*"
-  - - disable_redirect_output:
-        type: boolean
-        description: Disable the redirection of awk output to a given file. This is
-          useful if you want to use awk's built-in redirect to write files instead
-          of the shell's redirect.
-output:
-  - output:
-      - meta:
-          type: map
-          description: |
-            Groovy Map containing sample information
-            e.g. [ id:'test', single_end:false ]
-      - "*.${suffix}":
-          type: file
-          description: The output file - if using shell redirection, specify the name of this
-            file using `ext.prefix` and the extension using `ext.suffix`. Otherwise, ensure
-            the awk program produces files with the extension in `ext.suffix`.
-          pattern: "*"
-  - versions:
-      - versions.yml:
-          type: file
-          description: File containing software versions
-          pattern: "versions.yml"
-authors:
-  - "@nvnieuwk"
-maintainers:
-  - "@nvnieuwk"
diff --git a/subworkflows/local/telo_finder/main.nf b/subworkflows/local/telo_finder/main.nf
index ce827a3d..cdf0d223 100644
--- a/subworkflows/local/telo_finder/main.nf
+++ b/subworkflows/local/telo_finder/main.nf
@@ -4,7 +4,7 @@
 // MODULE IMPORT BLOCK
 //
 include { FIND_TELOMERE_REGIONS         } from '../../../modules/local/find/telomere_regions/main'
-include { GAWK as GAWK_SPLIT_DIRECTIONS } from '../../../modules/local/gawk/main'
+include { GAWK_SPLIT_DIRECTIONS         } from '../../../modules/local/gawk_split_directions/main'
 
 include { TELO_EXTRACTION               } from '../../../subworkflows/local/telo_extraction/main'
 
@@ -35,8 +35,7 @@ workflow TELO_FINDER {
     if (params.split_telomere) {
         GAWK_SPLIT_DIRECTIONS (
             FIND_TELOMERE_REGIONS.out.telomere,
-            file("${projectDir}/bin/gawk_split_directions.awk"),
-            false
+            file("${projectDir}/bin/gawk_split_directions.awk")
         )
         ch_versions     = ch_versions.mix( GAWK_SPLIT_DIRECTIONS.out.versions )
 

From 17590d3f6f5395f5edd03a6c8778fe6566f289ab Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Tue, 26 Aug 2025 14:44:38 +0100
Subject: [PATCH 31/58] Update based on comments

---
 .../gawk_split_directions/environment.yml     |  7 +++
 modules/local/gawk_split_directions/main.nf   | 58 +++++++++++++++++
 modules/local/gawk_split_directions/meta.yml  | 63 +++++++++++++++++++
 3 files changed, 128 insertions(+)
 create mode 100644 modules/local/gawk_split_directions/environment.yml
 create mode 100644 modules/local/gawk_split_directions/main.nf
 create mode 100644 modules/local/gawk_split_directions/meta.yml

diff --git a/modules/local/gawk_split_directions/environment.yml b/modules/local/gawk_split_directions/environment.yml
new file mode 100644
index 00000000..f52109e8
--- /dev/null
+++ b/modules/local/gawk_split_directions/environment.yml
@@ -0,0 +1,7 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - conda-forge::gawk=5.3.0
diff --git a/modules/local/gawk_split_directions/main.nf b/modules/local/gawk_split_directions/main.nf
new file mode 100644
index 00000000..29b4af8a
--- /dev/null
+++ b/modules/local/gawk_split_directions/main.nf
@@ -0,0 +1,58 @@
+process GAWK_SPLIT_DIRECTIONS {
+    tag "$meta.id"
+    label 'process_single'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/gawk:5.3.0' :
+        'biocontainers/gawk:5.3.0' }"
+
+    input:
+    tuple val(meta), path(input)
+    path(program_file)
+
+    output:
+    tuple val(meta), path("direction.0.${suffix}"), emit: prime5
+    tuple val(meta), path("direction.1.${suffix}"), emit: prime3
+    path "versions.yml"                           , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args  = task.ext.args  ?: '' // args is used for the main arguments of the tool
+    def args2 = task.ext.args2 ?: '' // args2 is used to specify a program when no program file has been given
+    prefix    = task.ext.prefix ?: "${meta.id}"
+    suffix    = task.ext.suffix ?: "${input.collect{ it.getExtension()}.get(0)}" // use the first extension of the input files
+
+    program    = program_file ? "-f ${program_file}" : "${args2}"
+
+    input.collect{
+        assert it.name != "${prefix}.${suffix}" : "Input and output names are the same, set prefix in module configuration to disambiguate!"
+    }
+
+    """
+    awk \\
+        ${args} \\
+        ${program} \\
+        ${input}
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        gawk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//')
+    END_VERSIONS
+    """
+
+    stub:
+    prefix = task.ext.prefix ?: "${meta.id}"
+    suffix = task.ext.suffix ?: "${input.getExtension()}"
+
+    """
+    touch ${prefix}.${suffix}
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        gawk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//')
+    END_VERSIONS
+    """
+}
diff --git a/modules/local/gawk_split_directions/meta.yml b/modules/local/gawk_split_directions/meta.yml
new file mode 100644
index 00000000..34c50b12
--- /dev/null
+++ b/modules/local/gawk_split_directions/meta.yml
@@ -0,0 +1,63 @@
+name: "gawk"
+description: |
+  If you are like many computer users, you would frequently like to make changes in various text files
+  wherever certain patterns appear, or extract data from parts of certain lines while discarding the rest.
+  The job is easy with awk, especially the GNU implementation gawk.
+keywords:
+  - gawk
+  - awk
+  - txt
+  - text
+  - file parsing
+tools:
+  - "gawk":
+      description: "GNU awk"
+      homepage: "https://www.gnu.org/software/gawk/"
+      documentation: "https://www.gnu.org/software/gawk/manual/"
+      tool_dev_url: "https://www.gnu.org/prep/ftp.html"
+      licence: ["GPL v3"]
+      identifier: ""
+input:
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing sample information
+          e.g. [ id:'test', single_end:false ]
+    - input:
+        type: file
+        description: The input file - Specify the logic that needs to be executed on
+          this file on the `ext.args2` or in the program file.
+          If the files have a `.gz` extension, they will be unzipped using `zcat`.
+        pattern: "*"
+  - - program_file:
+        type: file
+        description: Optional file containing logic for awk to execute. If you don't
+          wish to use a file, you can use `ext.args2` to specify the logic.
+        pattern: "*"
+  - - disable_redirect_output:
+        type: boolean
+        description: Disable the redirection of awk output to a given file. This is
+          useful if you want to use awk's built-in redirect to write files instead
+          of the shell's redirect.
+output:
+  - output:
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+      - "*.${suffix}":
+          type: file
+          description: The output file - if using shell redirection, specify the name of this
+            file using `ext.prefix` and the extension using `ext.suffix`. Otherwise, ensure
+            the awk program produces files with the extension in `ext.suffix`.
+          pattern: "*"
+  - versions:
+      - versions.yml:
+          type: file
+          description: File containing software versions
+          pattern: "versions.yml"
+authors:
+  - "@nvnieuwk"
+maintainers:
+  - "@nvnieuwk"

From 2c8a2715ba7ba458caeb3ed2d3807b6d7ebaad4c Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Tue, 26 Aug 2025 14:49:47 +0100
Subject: [PATCH 32/58] Update

---
 CHANGELOG.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index b04b2082..c7ef5be3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Better formatting in some files.
 - Moved `GAWK_UPPER_SEQUENCE` from the `TELO_FINDER` subworkflow to the first step of the main `curationpretext` workflow, this simply makes more sense.
 - Removed no longer needed scripts from bin.
+- Added the module `GAWK_SPLIT_DIRECTIONS` module, a local copy of the nf-core `GAWK` module.
 - Added the `gawk_split_directions.awk` script for split telomere.
 - Addition of GUNZIP for the input reference genome.
 - Update tests.
@@ -45,6 +46,7 @@ Note, since the pipeline is using Nextflow DSL2, each process will be run with i
 | `GAWK_MAP_TELO`          | 5.3.0         | REMOVED       |
 | `GET_LARGEST_SCAFF`      | coreutils=9.1 | REMOVED       |
 | `GUNZIP`                 | NA            | 1.13          |
+| `GAWK_SPLIT_DIRECTIONS   | NA            | 5.3.0         |
 
 ## [[1.4.2](https://github.com/sanger-tol/curationpretext/releases/tag/1.4.2)] - UNSC Nereid (H2) - [2025-07-28]
 

From f4b52e7989cc3dff37cef8d923939191bf34ff81 Mon Sep 17 00:00:00 2001
From: Damon-Lee Pointon <51855558+DLBPointon@users.noreply.github.com>
Date: Tue, 26 Aug 2025 17:26:53 +0100
Subject: [PATCH 33/58] Update CHANGELOG.md

Missed a '`'
---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c7ef5be3..e7f8b932 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -46,7 +46,7 @@ Note, since the pipeline is using Nextflow DSL2, each process will be run with i
 | `GAWK_MAP_TELO`          | 5.3.0         | REMOVED       |
 | `GET_LARGEST_SCAFF`      | coreutils=9.1 | REMOVED       |
 | `GUNZIP`                 | NA            | 1.13          |
-| `GAWK_SPLIT_DIRECTIONS   | NA            | 5.3.0         |
+| `GAWK_SPLIT_DIRECTIONS`  | NA            | 5.3.0         |
 
 ## [[1.4.2](https://github.com/sanger-tol/curationpretext/releases/tag/1.4.2)] - UNSC Nereid (H2) - [2025-07-28]
 

From ea76b009a1bd99c97b1f5e587f04368f35428d3e Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Thu, 28 Aug 2025 11:58:33 +0100
Subject: [PATCH 34/58] Patch

---
 conf/base.config | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/conf/base.config b/conf/base.config
index 9add5450..0e65e0a4 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -21,6 +21,7 @@ process {
     withName:SAMTOOLS_MERGE {
         cpus    = { 16                          }
         memory  = { 50.GB     * task.attempt    }
+        time    = { 20.h      * task.attempt    }
     }
 
     withName: '.*:.*:LONGREAD_COVERAGE:(MINIMAP2_ALIGN|MINIMAP2_ALIGN_SPLIT)' {
@@ -86,6 +87,11 @@ process {
         memory = { 1.GB      * task.attempt }
     }
 
+    withName: BEDTOOLS_INTERSECT {
+        memory = { 10.GB * task.attempt }
+        time   = { 20.h  * task.attempt }
+    }
+
     // Process-specific resource requirements
     // NOTE - Please try and reuse the labels below as much as possible.
     //        These labels are used and recognised by default in DSL2 files hosted on nf-core/modules.
@@ -100,7 +106,7 @@ process {
     withLabel:process_low {
         cpus   = { 2     * task.attempt }
         memory = { 12.GB * task.attempt }
-        time   = { 4.h   * task.attempt }
+        time   = { 20.h  * task.attempt }
     }
     withLabel:process_medium {
         cpus   = { 6     * task.attempt }

From bb0308d39e8f43fb5d5d98ab952fabe06cb2fe48 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Thu, 28 Aug 2025 11:59:18 +0100
Subject: [PATCH 35/58] Patch

---
 conf/base.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conf/base.config b/conf/base.config
index 0e65e0a4..e250d8dc 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -106,7 +106,7 @@ process {
     withLabel:process_low {
         cpus   = { 2     * task.attempt }
         memory = { 12.GB * task.attempt }
-        time   = { 20.h  * task.attempt }
+        time   = { 4.h   * task.attempt }
     }
     withLabel:process_medium {
         cpus   = { 6     * task.attempt }

From a80391efbe45f95305b78a44185dcfdf3e334af8 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Thu, 28 Aug 2025 16:47:54 +0100
Subject: [PATCH 36/58] Template update for nf-core/tools version 3.3.2

---
 .editorconfig                                 |  37 -----
 .github/CONTRIBUTING.md                       |   2 +-
 .github/actions/get-shards/action.yml         |  69 +++++++++
 .github/actions/nf-test/action.yml            | 109 ++++++++++++++
 .github/workflows/ci.yml                      |  88 -----------
 .github/workflows/clean-up.yml                |   2 +-
 .github/workflows/download_pipeline.yml       |  20 +--
 .../{fix-linting.yml => fix_linting.yml}      |   4 +-
 .github/workflows/linting.yml                 |  17 +--
 .github/workflows/linting_comment.yml         |   4 +-
 .github/workflows/nf-test.yml                 | 140 ++++++++++++++++++
 ...mment.yml => template-version-comment.yml} |   2 +-
 .nf-core.yml                                  |   4 +-
 .pre-commit-config.yaml                       |  26 +++-
 .prettierrc.yml                               |   5 +
 CHANGELOG.md                                  |   2 +-
 README.md                                     |   7 +-
 assets/schema_input.json                      |   4 +-
 conf/base.config                              |   6 +-
 nextflow.config                               |  22 ++-
 nf-test.config                                |  24 +++
 .../main.nf                                   |   1 -
 .../tests/nextflow.config                     |   2 +-
 tests/.nftignore                              |   2 +
 tests/default.nf.test                         |  35 +++++
 tests/nextflow.config                         |  14 ++
 26 files changed, 472 insertions(+), 176 deletions(-)
 delete mode 100644 .editorconfig
 create mode 100644 .github/actions/get-shards/action.yml
 create mode 100644 .github/actions/nf-test/action.yml
 delete mode 100644 .github/workflows/ci.yml
 rename .github/workflows/{fix-linting.yml => fix_linting.yml} (96%)
 create mode 100644 .github/workflows/nf-test.yml
 rename .github/workflows/{template_version_comment.yml => template-version-comment.yml} (95%)
 create mode 100644 nf-test.config
 create mode 100644 tests/.nftignore
 create mode 100644 tests/default.nf.test
 create mode 100644 tests/nextflow.config

diff --git a/.editorconfig b/.editorconfig
deleted file mode 100644
index 6d9b74cc..00000000
--- a/.editorconfig
+++ /dev/null
@@ -1,37 +0,0 @@
-root = true
-
-[*]
-charset = utf-8
-end_of_line = lf
-insert_final_newline = true
-trim_trailing_whitespace = true
-indent_size = 4
-indent_style = space
-
-[*.{md,yml,yaml,html,css,scss,js}]
-indent_size = 2
-
-# These files are edited and tested upstream in nf-core/modules
-[/modules/nf-core/**]
-charset = unset
-end_of_line = unset
-insert_final_newline = unset
-trim_trailing_whitespace = unset
-indent_style = unset
-[/subworkflows/nf-core/**]
-charset = unset
-end_of_line = unset
-insert_final_newline = unset
-trim_trailing_whitespace = unset
-indent_style = unset
-
-[/assets/email*]
-indent_size = unset
-
-# ignore python and markdown
-[*.{py,md}]
-indent_style = unset
-
-# ignore ro-crate metadata files
-[**/ro-crate-metadata.json]
-insert_final_newline = unset
diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md
index d7a0a69f..e3f6b58c 100644
--- a/.github/CONTRIBUTING.md
+++ b/.github/CONTRIBUTING.md
@@ -71,7 +71,7 @@ If you wish to contribute a new step, please use the following coding standards:
 5. Add any new parameters to `nextflow_schema.json` with help text (via the `nf-core pipelines schema build` tool).
 6. Add sanity checks and validation for all relevant parameters.
 7. Perform local tests to validate that the new code works as expected.
-8. If applicable, add a new test command in `.github/workflow/ci.yml`.
+8. If applicable, add a new test in the `tests` directory.
 
 ### Default values
 
diff --git a/.github/actions/get-shards/action.yml b/.github/actions/get-shards/action.yml
new file mode 100644
index 00000000..34085279
--- /dev/null
+++ b/.github/actions/get-shards/action.yml
@@ -0,0 +1,69 @@
+name: "Get number of shards"
+description: "Get the number of nf-test shards for the current CI job"
+inputs:
+  max_shards:
+    description: "Maximum number of shards allowed"
+    required: true
+  paths:
+    description: "Component paths to test"
+    required: false
+  tags:
+    description: "Tags to pass as argument for nf-test --tag parameter"
+    required: false
+outputs:
+  shard:
+    description: "Array of shard numbers"
+    value: ${{ steps.shards.outputs.shard }}
+  total_shards:
+    description: "Total number of shards"
+    value: ${{ steps.shards.outputs.total_shards }}
+runs:
+  using: "composite"
+  steps:
+    - name: Install nf-test
+      uses: nf-core/setup-nf-test@v1
+      with:
+        version: ${{ env.NFT_VER }}
+    - name: Get number of shards
+      id: shards
+      shell: bash
+      run: |
+        # Run nf-test with dynamic parameter
+        nftest_output=$(nf-test test \
+          --profile +docker \
+          $(if [ -n "${{ inputs.tags }}" ]; then echo "--tag ${{ inputs.tags }}"; fi) \
+          --dry-run \
+          --ci \
+          --changed-since HEAD^) || {
+            echo "nf-test command failed with exit code $?"
+            echo "Full output: $nftest_output"
+            exit 1
+        }
+        echo "nf-test dry-run output: $nftest_output"
+
+        # Default values for shard and total_shards
+        shard="[]"
+        total_shards=0
+
+        # Check if there are related tests
+        if echo "$nftest_output" | grep -q 'No tests to execute'; then
+          echo "No related tests found."
+        else
+          # Extract the number of related tests
+          number_of_shards=$(echo "$nftest_output" | sed -n 's|.*Executed \([0-9]*\) tests.*|\1|p')
+          if [[ -n "$number_of_shards" && "$number_of_shards" -gt 0 ]]; then
+            shards_to_run=$(( $number_of_shards < ${{ inputs.max_shards }} ? $number_of_shards : ${{ inputs.max_shards }} ))
+            shard=$(seq 1 "$shards_to_run" | jq -R . | jq -c -s .)
+            total_shards="$shards_to_run"
+          else
+            echo "Unexpected output format. Falling back to default values."
+          fi
+        fi
+
+        # Write to GitHub Actions outputs
+        echo "shard=$shard" >> $GITHUB_OUTPUT
+        echo "total_shards=$total_shards" >> $GITHUB_OUTPUT
+
+        # Debugging output
+        echo "Final shard array: $shard"
+        echo "Total number of shards: $total_shards"
diff --git a/.github/actions/nf-test/action.yml b/.github/actions/nf-test/action.yml
new file mode 100644
index 00000000..bf44d961
--- /dev/null
+++ b/.github/actions/nf-test/action.yml
@@ -0,0 +1,109 @@
+name: "nf-test Action"
+description: "Runs nf-test with common setup steps"
+inputs:
+  profile:
+    description: "Profile to use"
+    required: true
+  shard:
+    description: "Shard number for this CI job"
+    required: true
+  total_shards:
+    description: "Total number of test shards(NOT the total number of matrix jobs)"
+    required: true
+  paths:
+    description: "Test paths"
+    required: true
+  tags:
+    description: "Tags to pass as argument for nf-test --tag parameter"
+    required: false
+runs:
+  using: "composite"
+  steps:
+    - name: Setup Nextflow
+      uses: nf-core/setup-nextflow@v2
+      with:
+        version: "${{ env.NXF_VERSION }}"
+
+    - name: Set up Python
+      uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5
+      with:
+        python-version: "3.13"
+
+    - name: Install nf-test
+      uses: nf-core/setup-nf-test@v1
+      with:
+        version: "${{ env.NFT_VER }}"
+        install-pdiff: true
+
+    - name: Setup apptainer
+      if: contains(inputs.profile, 'singularity')
+      uses: eWaterCycle/setup-apptainer@main
+
+    - name: Set up Singularity
+      if: contains(inputs.profile, 'singularity')
+      shell: bash
+      run: |
+        mkdir -p $NXF_SINGULARITY_CACHEDIR
+        mkdir -p $NXF_SINGULARITY_LIBRARYDIR
+
+    - name: Conda setup
+      if: contains(inputs.profile, 'conda')
+      uses: conda-incubator/setup-miniconda@505e6394dae86d6a5c7fbb6e3fb8938e3e863830 # v3
+      with:
+        auto-update-conda: true
+        conda-solver: libmamba
+        conda-remove-defaults: true
+
+    - name: Run nf-test
+      shell: bash
+      env:
+        NFT_WORKDIR: ${{ env.NFT_WORKDIR }}
+      run: |
+        nf-test test \
+          --profile=+${{ inputs.profile }} \
+          $(if [ -n "${{ inputs.tags }}" ]; then echo "--tag ${{ inputs.tags }}"; fi) \
+          --ci \
+          --changed-since HEAD^ \
+          --verbose \
+          --tap=test.tap \
+          --shard ${{ inputs.shard }}/${{ inputs.total_shards }}
+
+          # Save the absolute path of the test.tap file to the output
+          echo "tap_file_path=$(realpath test.tap)" >> $GITHUB_OUTPUT
+
+    - name: Generate test summary
+      if: always()
+      shell: bash
+      run: |
+        # Add header if it doesn't exist (using a token file to track this)
+        if [ ! -f ".summary_header" ]; then
+          echo "# 🚀 nf-test results" >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
+          echo "| Status | Test Name | Profile | Shard |" >> $GITHUB_STEP_SUMMARY
+          echo "|:------:|-----------|---------|-------|" >> $GITHUB_STEP_SUMMARY
+          touch .summary_header
+        fi
+
+        if [ -f test.tap ]; then
+          while IFS= read -r line; do
+            if [[ $line =~ ^ok ]]; then
+              test_name="${line#ok }"
+              # Remove the test number from the beginning
+              test_name="${test_name#* }"
+              echo "| ✅ | ${test_name} | ${{ inputs.profile }} | ${{ inputs.shard }}/${{ inputs.total_shards }} |" >> $GITHUB_STEP_SUMMARY
+            elif [[ $line =~ ^not\ ok ]]; then
+              test_name="${line#not ok }"
+              # Remove the test number from the beginning
+              test_name="${test_name#* }"
+              echo "| ❌ | ${test_name} | ${{ inputs.profile }} | ${{ inputs.shard }}/${{ inputs.total_shards }} |" >> $GITHUB_STEP_SUMMARY
+            fi
+          done < test.tap
+        else
+          echo "| ⚠️ | No test results found | ${{ inputs.profile }} | ${{ inputs.shard }}/${{ inputs.total_shards }} |" >> $GITHUB_STEP_SUMMARY
+        fi
+
+    - name: Clean up
+      if: always()
+      shell: bash
+      run: |
+        sudo rm -rf /home/ubuntu/tests/
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
deleted file mode 100644
index 44033727..00000000
--- a/.github/workflows/ci.yml
+++ /dev/null
@@ -1,88 +0,0 @@
-name: nf-core CI
-# This workflow runs the pipeline with the minimal test dataset to check that it completes without any syntax errors
-on:
-  push:
-    branches:
-      - dev
-  pull_request:
-  release:
-    types: [published]
-  workflow_dispatch:
-
-env:
-  NXF_ANSI_LOG: false
-  NXF_SINGULARITY_CACHEDIR: ${{ github.workspace }}/.singularity
-  NXF_SINGULARITY_LIBRARYDIR: ${{ github.workspace }}/.singularity
-
-concurrency:
-  group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}"
-  cancel-in-progress: true
-
-jobs:
-  test:
-    name: "Run pipeline with test data (${{ matrix.NXF_VER }} | ${{ matrix.test_name }} | ${{ matrix.profile }})"
-    # Only run on push if this is the nf-core dev branch (merged PRs)
-    if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'sanger-tol/curationpretext') }}"
-    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        NXF_VER:
-          - "24.04.2"
-          - "latest-everything"
-        profile:
-          - "conda"
-          - "docker"
-          - "singularity"
-        test_name:
-          - "test"
-        isMaster:
-          - ${{ github.base_ref == 'master' }}
-        # Exclude conda and singularity on dev
-        exclude:
-          - isMaster: false
-            profile: "conda"
-          - isMaster: false
-            profile: "singularity"
-    steps:
-      - name: Check out pipeline code
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4
-        with:
-          fetch-depth: 0
-
-      - name: Set up Nextflow
-        uses: nf-core/setup-nextflow@v2
-        with:
-          version: "${{ matrix.NXF_VER }}"
-
-      - name: Set up Apptainer
-        if: matrix.profile == 'singularity'
-        uses: eWaterCycle/setup-apptainer@main
-
-      - name: Set up Singularity
-        if: matrix.profile == 'singularity'
-        run: |
-          mkdir -p $NXF_SINGULARITY_CACHEDIR
-          mkdir -p $NXF_SINGULARITY_LIBRARYDIR
-
-      - name: Set up Miniconda
-        if: matrix.profile == 'conda'
-        uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3
-        with:
-          miniconda-version: "latest"
-          auto-update-conda: true
-          conda-solver: libmamba
-          channels: conda-forge,bioconda
-
-      - name: Set up Conda
-        if: matrix.profile == 'conda'
-        run: |
-          echo $(realpath $CONDA)/condabin >> $GITHUB_PATH
-          echo $(realpath python) >> $GITHUB_PATH
-
-      - name: Clean up Disk space
-        uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
-
-      - name: "Run pipeline with test data ${{ matrix.NXF_VER }} | ${{ matrix.test_name }} | ${{ matrix.profile }}"
-        continue-on-error: ${{ matrix.NXF_VER == 'latest-everything' }}
-        run: |
-          nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.test_name }},${{ matrix.profile }} --outdir ./results
diff --git a/.github/workflows/clean-up.yml b/.github/workflows/clean-up.yml
index 0b6b1f27..ac030fd5 100644
--- a/.github/workflows/clean-up.yml
+++ b/.github/workflows/clean-up.yml
@@ -10,7 +10,7 @@ jobs:
       issues: write
       pull-requests: write
     steps:
-      - uses: actions/stale@28ca1036281a5e5922ead5184a1bbf96e5fc984e # v9
+      - uses: actions/stale@5bef64f19d7facfb25b37b414482c7164d639639 # v9
         with:
           stale-issue-message: "This issue has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment otherwise this issue will be closed in 20 days."
           stale-pr-message: "This PR has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment if it is still useful."
diff --git a/.github/workflows/download_pipeline.yml b/.github/workflows/download_pipeline.yml
index ab06316e..999bcc38 100644
--- a/.github/workflows/download_pipeline.yml
+++ b/.github/workflows/download_pipeline.yml
@@ -12,14 +12,6 @@ on:
         required: true
         default: "dev"
   pull_request:
-    types:
-      - opened
-      - edited
-      - synchronize
-    branches:
-      - main
-      - master
-  pull_request_target:
     branches:
       - main
       - master
@@ -52,9 +44,9 @@ jobs:
       - name: Disk space cleanup
         uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
 
-      - uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5
+      - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5
         with:
-          python-version: "3.12"
+          python-version: "3.13"
           architecture: "x64"
 
       - name: Setup Apptainer
@@ -120,6 +112,7 @@ jobs:
           echo "IMAGE_COUNT_AFTER=$image_count" >> "$GITHUB_OUTPUT"
 
       - name: Compare container image counts
+        id: count_comparison
         run: |
           if [ "${{ steps.count_initial.outputs.IMAGE_COUNT_INITIAL }}" -ne "${{ steps.count_afterwards.outputs.IMAGE_COUNT_AFTER }}" ]; then
             initial_count=${{ steps.count_initial.outputs.IMAGE_COUNT_INITIAL }}
@@ -132,3 +125,10 @@ jobs:
           else
             echo "The pipeline can be downloaded successfully!"
           fi
+
+      - name: Upload Nextflow logfile for debugging purposes
+        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
+        with:
+          name: nextflow_logfile.txt
+          path: .nextflow.log*
+          include-hidden-files: true
diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix_linting.yml
similarity index 96%
rename from .github/workflows/fix-linting.yml
rename to .github/workflows/fix_linting.yml
index 94c929ba..1c97b461 100644
--- a/.github/workflows/fix-linting.yml
+++ b/.github/workflows/fix_linting.yml
@@ -32,9 +32,9 @@ jobs:
           GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }}
 
       # Install and run pre-commit
-      - uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5
+      - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5
         with:
-          python-version: "3.12"
+          python-version: "3.13"
 
       - name: Install pre-commit
         run: pip install pre-commit
diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml
index dbd52d5a..8b0f88c3 100644
--- a/.github/workflows/linting.yml
+++ b/.github/workflows/linting.yml
@@ -3,9 +3,6 @@ name: nf-core linting
 # It runs the `nf-core pipelines lint` and markdown lint tests to ensure
 # that the code meets the nf-core guidelines.
 on:
-  push:
-    branches:
-      - dev
   pull_request:
   release:
     types: [published]
@@ -16,10 +13,10 @@ jobs:
     steps:
       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4
 
-      - name: Set up Python 3.12
-        uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5
+      - name: Set up Python 3.13
+        uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5
         with:
-          python-version: "3.12"
+          python-version: "3.13"
 
       - name: Install pre-commit
         run: pip install pre-commit
@@ -36,13 +33,13 @@ jobs:
       - name: Install Nextflow
         uses: nf-core/setup-nextflow@v2
 
-      - uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5
+      - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5
         with:
-          python-version: "3.12"
+          python-version: "3.13"
           architecture: "x64"
 
       - name: read .nf-core.yml
-        uses: pietrobolcato/action-read-yaml@1.1.0
+        uses: pietrobolcato/action-read-yaml@9f13718d61111b69f30ab4ac683e67a56d254e1d # 1.1.0
         id: read_yml
         with:
           config: ${{ github.workspace }}/.nf-core.yml
@@ -74,7 +71,7 @@ jobs:
 
       - name: Upload linting log file artifact
         if: ${{ always() }}
-        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4
+        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
         with:
           name: linting-logs
           path: |
diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml
index 95b6b6af..d43797d9 100644
--- a/.github/workflows/linting_comment.yml
+++ b/.github/workflows/linting_comment.yml
@@ -11,7 +11,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Download lint results
-        uses: dawidd6/action-download-artifact@20319c5641d495c8a52e688b7dc5fada6c3a9fbc # v8
+        uses: dawidd6/action-download-artifact@ac66b43f0e6a346234dd65d4d0c8fbb31cb316e5 # v11
         with:
           workflow: linting.yml
           workflow_conclusion: completed
@@ -21,7 +21,7 @@ jobs:
         run: echo "pr_number=$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT
 
       - name: Post PR comment
-        uses: marocchino/sticky-pull-request-comment@331f8f5b4215f0445d3c07b4967662a32a2d3e31 # v2
+        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2
         with:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
           number: ${{ steps.pr_number.outputs.pr_number }}
diff --git a/.github/workflows/nf-test.yml b/.github/workflows/nf-test.yml
new file mode 100644
index 00000000..593c9360
--- /dev/null
+++ b/.github/workflows/nf-test.yml
@@ -0,0 +1,140 @@
+name: Run nf-test
+on:
+  pull_request:
+    paths-ignore:
+      - "docs/**"
+      - "**/meta.yml"
+      - "**/*.md"
+      - "**/*.png"
+      - "**/*.svg"
+  release:
+    types: [published]
+  workflow_dispatch:
+
+# Cancel if a newer run is started
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+env:
+  GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+  NFT_VER: "0.9.2"
+  NFT_WORKDIR: "~"
+  NXF_ANSI_LOG: false
+  NXF_SINGULARITY_CACHEDIR: ${{ github.workspace }}/.singularity
+  NXF_SINGULARITY_LIBRARYDIR: ${{ github.workspace }}/.singularity
+
+jobs:
+  nf-test-changes:
+    name: nf-test-changes
+    runs-on: # use GitHub runners
+      - "ubuntu-latest"
+    outputs:
+      shard: ${{ steps.set-shards.outputs.shard }}
+      total_shards: ${{ steps.set-shards.outputs.total_shards }}
+    steps:
+      - name: Clean Workspace # Purge the workspace in case it's running on a self-hosted runner
+        run: |
+          ls -la ./
+          rm -rf ./* || true
+          rm -rf ./.??* || true
+          ls -la ./
+      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4
+        with:
+          fetch-depth: 0
+
+      - name: get number of shards
+        id: set-shards
+        uses: ./.github/actions/get-shards
+        env:
+          NFT_VER: ${{ env.NFT_VER }}
+        with:
+          max_shards: 7
+
+      - name: debug
+        run: |
+          echo ${{ steps.set-shards.outputs.shard }}
+          echo ${{ steps.set-shards.outputs.total_shards }}
+
+  nf-test:
+    name: "${{ matrix.profile }} | ${{ matrix.NXF_VER }} | ${{ matrix.shard }}/${{ needs.nf-test-changes.outputs.total_shards }}"
+    needs: [nf-test-changes]
+    if: ${{ needs.nf-test-changes.outputs.total_shards != '0' }}
+    runs-on: # use GitHub runners
+      - "ubuntu-latest"
+    strategy:
+      fail-fast: false
+      matrix:
+        shard: ${{ fromJson(needs.nf-test-changes.outputs.shard) }}
+        profile: [conda, docker, singularity]
+        isMain:
+          - ${{ github.base_ref == 'master' || github.base_ref == 'main' }}
+        # Exclude conda and singularity on dev
+        exclude:
+          - isMain: false
+            profile: "conda"
+          - isMain: false
+            profile: "singularity"
+        NXF_VER:
+          - "24.10.5"
+          - "latest-everything"
+    env:
+      NXF_ANSI_LOG: false
+      TOTAL_SHARDS: ${{ needs.nf-test-changes.outputs.total_shards }}
+
+    steps:
+      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4
+        with:
+          fetch-depth: 0
+
+      - name: Run nf-test
+        id: run_nf_test
+        uses: ./.github/actions/nf-test
+        continue-on-error: ${{ matrix.NXF_VER == 'latest-everything' }}
+        env:
+          NFT_WORKDIR: ${{ env.NFT_WORKDIR }}
+        with:
+          profile: ${{ matrix.profile }}
+          shard: ${{ matrix.shard }}
+          total_shards: ${{ env.TOTAL_SHARDS }}
+
+      - name: Report test status
+        if: ${{ always() }}
+        run: |
+          if [[ "${{ steps.run_nf_test.outcome }}" == "failure" ]]; then
+            echo "::error::Test with ${{ matrix.NXF_VER }} failed"
+            # Add to workflow summary
+            echo "## ❌ Test failed: ${{ matrix.profile }} | ${{ matrix.NXF_VER }} | Shard ${{ matrix.shard }}/${{ env.TOTAL_SHARDS }}" >> $GITHUB_STEP_SUMMARY
+            if [[ "${{ matrix.NXF_VER }}" == "latest-everything" ]]; then
+              echo "::warning::Test with latest-everything failed but will not cause workflow failure. Please check if the error is expected or if it needs fixing."
+            fi
+            if [[ "${{ matrix.NXF_VER }}" != "latest-everything" ]]; then
+              exit 1
+            fi
+          fi
+
+  confirm-pass:
+    needs: [nf-test]
+    if: always()
+    runs-on: # use GitHub runners
+      - "ubuntu-latest"
+    steps:
+      - name: One or more tests failed (excluding latest-everything)
+        if: ${{ contains(needs.*.result, 'failure') }}
+        run: exit 1
+
+      - name: One or more tests cancelled
+        if: ${{ contains(needs.*.result, 'cancelled') }}
+        run: exit 1
+
+      - name: All tests ok
+        if: ${{ contains(needs.*.result, 'success') }}
+        run: exit 0
+
+      - name: debug-print
+        if: always()
+        run: |
+          echo "::group::DEBUG: `needs` Contents"
+          echo "DEBUG: toJSON(needs) = ${{ toJSON(needs) }}"
+          echo "DEBUG: toJSON(needs.*.result) = ${{ toJSON(needs.*.result) }}"
+          echo "::endgroup::"
diff --git a/.github/workflows/template_version_comment.yml b/.github/workflows/template-version-comment.yml
similarity index 95%
rename from .github/workflows/template_version_comment.yml
rename to .github/workflows/template-version-comment.yml
index 537529bc..beb5c77f 100644
--- a/.github/workflows/template_version_comment.yml
+++ b/.github/workflows/template-version-comment.yml
@@ -14,7 +14,7 @@ jobs:
           ref: ${{ github.event.pull_request.head.sha }}
 
       - name: Read template version from .nf-core.yml
-        uses: nichmor/minimal-read-yaml@v0.0.2
+        uses: nichmor/minimal-read-yaml@1f7205277e25e156e1f63815781db80a6d490b8f # v0.0.2
         id: read_yml
         with:
           config: ${{ github.workspace }}/.nf-core.yml
diff --git a/.nf-core.yml b/.nf-core.yml
index 259a3724..485f5487 100644
--- a/.nf-core.yml
+++ b/.nf-core.yml
@@ -30,7 +30,7 @@ lint:
   nextflow_config:
     - manifest.name
     - manifest.homePage
-nf_core_version: 3.2.1
+nf_core_version: 3.3.2
 repository_type: pipeline
 template:
   author: Damon-Lee B Pointon (@DLBPointon)
@@ -48,4 +48,4 @@ template:
     - seqera_platform
     - multiqc
     - rocrate
-  version: 1.4.1
+  version: 1.4.2
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 1dec8650..bb41beec 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -4,10 +4,24 @@ repos:
     hooks:
       - id: prettier
         additional_dependencies:
-          - prettier@3.2.5
-
-  - repo: https://github.com/editorconfig-checker/editorconfig-checker.python
-    rev: "3.1.2"
+          - prettier@3.6.2
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v5.0.0
     hooks:
-      - id: editorconfig-checker
-        alias: ec
+      - id: trailing-whitespace
+        args: [--markdown-linebreak-ext=md]
+        exclude: |
+          (?x)^(
+              .*ro-crate-metadata.json$|
+              modules/nf-core/.*|
+              subworkflows/nf-core/.*|
+              .*\.snap$
+          )$
+      - id: end-of-file-fixer
+        exclude: |
+          (?x)^(
+              .*ro-crate-metadata.json$|
+              modules/nf-core/.*|
+              subworkflows/nf-core/.*|
+              .*\.snap$
+          )$
diff --git a/.prettierrc.yml b/.prettierrc.yml
index c81f9a76..07dbd8bb 100644
--- a/.prettierrc.yml
+++ b/.prettierrc.yml
@@ -1 +1,6 @@
 printWidth: 120
+tabWidth: 4
+overrides:
+  - files: "*.{md,yml,yaml,html,css,scss,js,cff}"
+    options:
+      tabWidth: 2
diff --git a/CHANGELOG.md b/CHANGELOG.md
index bc3e350d..a38609fc 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,7 +3,7 @@
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
-## v1.4.1 - [date]
+## v1.4.2 - [date]
 
 Initial release of sanger-tol/curationpretext, created with the [nf-core](https://nf-co.re/) template.
 
diff --git a/README.md b/README.md
index 86c881cb..3b3843c7 100644
--- a/README.md
+++ b/README.md
@@ -1,10 +1,11 @@
 # sanger-tol/curationpretext
 
-[![GitHub Actions CI Status](https://github.com/sanger-tol/curationpretext/actions/workflows/ci.yml/badge.svg)](https://github.com/sanger-tol/curationpretext/actions/workflows/ci.yml)
+[![GitHub Actions CI Status](https://github.com/sanger-tol/curationpretext/actions/workflows/nf-test.yml/badge.svg)](https://github.com/sanger-tol/curationpretext/actions/workflows/nf-test.yml)
 [![GitHub Actions Linting Status](https://github.com/sanger-tol/curationpretext/actions/workflows/linting.yml/badge.svg)](https://github.com/sanger-tol/curationpretext/actions/workflows/linting.yml)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)
 [![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)
 
-[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.2-23aa62.svg)](https://www.nextflow.io/)
+[![Nextflow](https://img.shields.io/badge/version-%E2%89%A524.10.5-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)
+[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.3.2-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.3.2)
 [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)
 [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)
 [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)
@@ -21,7 +22,7 @@
 -->
 
 <!-- TODO nf-core: Include a figure that guides the user through the major workflow steps. Many nf-core
-     workflows use the "tube map" design for that. See https://nf-co.re/docs/contributing/design_guidelines#examples for examples.   -->
+     workflows use the "tube map" design for that. See https://nf-co.re/docs/guidelines/graphic_design/workflow_diagrams#examples for examples.   -->
 <!-- TODO nf-core: Fill in short bullet-pointed list of the default steps in the pipeline -->
 
 ## Usage
diff --git a/assets/schema_input.json b/assets/schema_input.json
index 7dea6e86..c21d7515 100644
--- a/assets/schema_input.json
+++ b/assets/schema_input.json
@@ -17,14 +17,14 @@
                 "type": "string",
                 "format": "file-path",
                 "exists": true,
-                "pattern": "^\\S+\\.f(ast)?q\\.gz$",
+                "pattern": "^([\\S\\s]*\\/)?[^\\s\\/]+\\.f(ast)?q\\.gz$",
                 "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'"
             },
             "fastq_2": {
                 "type": "string",
                 "format": "file-path",
                 "exists": true,
-                "pattern": "^\\S+\\.f(ast)?q\\.gz$",
+                "pattern": "^([\\S\\s]*\\/)?[^\\s\\/]+\\.f(ast)?q\\.gz$",
                 "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'"
             }
         },
diff --git a/conf/base.config b/conf/base.config
index 45a8a625..8370150f 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -15,7 +15,7 @@ process {
     memory = { 6.GB   * task.attempt }
     time   = { 4.h    * task.attempt }
 
-    errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' }
+    errorStrategy = { task.exitStatus in ((130..145) + 104 + 175) ? 'retry' : 'finish' }
     maxRetries    = 1
     maxErrors     = '-1'
 
@@ -59,4 +59,8 @@ process {
         errorStrategy = 'retry'
         maxRetries    = 2
     }
+    withLabel: process_gpu {
+        ext.use_gpu = { workflow.profile.contains('gpu') }
+        accelerator = { workflow.profile.contains('gpu') ? 1 : null }
+    }
 }
diff --git a/nextflow.config b/nextflow.config
index db46f10c..396dc039 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -148,16 +148,25 @@ profiles {
             ]
         }
     }
+    gpu {
+        docker.runOptions       = '-u $(id -u):$(id -g) --gpus all'
+        apptainer.runOptions    = '--nv'
+        singularity.runOptions  = '--nv'
+    }
     test      { includeConfig 'conf/test.config'      }
     test_full { includeConfig 'conf/test_full.config' }
 }
 
-// Load nf-core custom profiles from different Institutions
-includeConfig !System.getenv('NXF_OFFLINE') && params.custom_config_base ? "${params.custom_config_base}/nfcore_custom.config" : "/dev/null"
+// Load nf-core custom profiles from different institutions
+
+// If params.custom_config_base is set AND either the NXF_OFFLINE environment variable is not set or params.custom_config_base is a local path, the nfcore_custom.config file from the specified base path is included.
+// Load sanger-tol/curationpretext custom profiles from different institutions.
+includeConfig params.custom_config_base && (!System.getenv('NXF_OFFLINE') || !params.custom_config_base.startsWith('http')) ? "${params.custom_config_base}/nfcore_custom.config" : "/dev/null"
+
 
 // Load sanger-tol/curationpretext custom profiles from different institutions.
 // TODO nf-core: Optionally, you can add a pipeline-specific nf-core config at https://github.com/nf-core/configs
-// includeConfig !System.getenv('NXF_OFFLINE') && params.custom_config_base ? "${params.custom_config_base}/pipeline/curationpretext.config" : "/dev/null"
+// includeConfig params.custom_config_base && (!System.getenv('NXF_OFFLINE') || !params.custom_config_base.startsWith('http')) ? "${params.custom_config_base}/pipeline/curationpretext.config" : "/dev/null"
 
 // Set default registry for Apptainer, Docker, Podman, Charliecloud and Singularity independent of -profile
 // Will not be used unless Apptainer / Docker / Podman / Charliecloud / Singularity are enabled
@@ -213,7 +222,6 @@ dag {
 
 manifest {
     name            = 'sanger-tol/curationpretext'
-    author          = """Damon-Lee B Pointon (@DLBPointon)""" // The author field is deprecated from Nextflow version 24.10.0, use contributors instead
     contributors    = [
         // TODO nf-core: Update the field with the details of the contributors to your pipeline. New with Nextflow version 24.10.0
         [
@@ -229,14 +237,14 @@ manifest {
     description     = """A simple pipeline to generate pretext files for genomic curation."""
     mainScript      = 'main.nf'
     defaultBranch   = 'master'
-    nextflowVersion = '!>=24.04.2'
-    version         = '1.4.1'
+    nextflowVersion = '!>=24.10.5'
+    version         = '1.4.2'
     doi             = ''
 }
 
 // Nextflow plugins
 plugins {
-    id 'nf-schema@2.2.0' // Validation of pipeline parameters and creation of an input channel from a sample sheet
+    id 'nf-schema@2.4.2' // Validation of pipeline parameters and creation of an input channel from a sample sheet
 }
 
 validation {
diff --git a/nf-test.config b/nf-test.config
new file mode 100644
index 00000000..3a1fff59
--- /dev/null
+++ b/nf-test.config
@@ -0,0 +1,24 @@
+config {
+    // location for all nf-test tests
+    testsDir "."
+
+    // nf-test directory including temporary files for each test
+    workDir System.getenv("NFT_WORKDIR") ?: ".nf-test"
+
+    // location of an optional nextflow.config file specific for executing tests
+    configFile "tests/nextflow.config"
+
+    // ignore tests coming from the nf-core/modules repo
+    ignore 'modules/nf-core/**/tests/*', 'subworkflows/nf-core/**/tests/*'
+
+    // run all test with defined profile(s) from the main nextflow.config
+    profile "test"
+
+    // list of filenames or patterns that should be trigger a full test run
+    triggers 'nextflow.config', 'nf-test.config', 'conf/test.config', 'tests/nextflow.config', 'tests/.nftignore'
+
+    // load the necessary plugins
+    plugins {
+        load "nft-utils@0.0.3"
+    }
+}
diff --git a/subworkflows/local/utils_nfcore_curationpretext_pipeline/main.nf b/subworkflows/local/utils_nfcore_curationpretext_pipeline/main.nf
index aa3f2b7d..40153083 100644
--- a/subworkflows/local/utils_nfcore_curationpretext_pipeline/main.nf
+++ b/subworkflows/local/utils_nfcore_curationpretext_pipeline/main.nf
@@ -219,4 +219,3 @@ def methodsDescriptionText(mqc_methods_yaml) {
 
     return description_html.toString()
 }
-
diff --git a/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config b/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config
index 0907ac58..09ef842a 100644
--- a/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config
+++ b/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config
@@ -1,5 +1,5 @@
 plugins {
-    id "nf-schema@2.1.0"
+    id "nf-schema@2.4.2"
 }
 
 validation {
diff --git a/tests/.nftignore b/tests/.nftignore
new file mode 100644
index 00000000..73eb92f7
--- /dev/null
+++ b/tests/.nftignore
@@ -0,0 +1,2 @@
+.DS_Store
+pipeline_info/*.{html,json,txt,yml}
diff --git a/tests/default.nf.test b/tests/default.nf.test
new file mode 100644
index 00000000..5c9f4cf2
--- /dev/null
+++ b/tests/default.nf.test
@@ -0,0 +1,35 @@
+nextflow_pipeline {
+
+    name "Test pipeline"
+    script "../main.nf"
+    tag "pipeline"
+
+    test("-profile test") {
+
+        when {
+            params {
+                outdir = "$outputDir"
+            }
+        }
+
+        then {
+            // stable_name: All files + folders in ${params.outdir}/ with a stable name
+            def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}'])
+            // stable_path: All files in ${params.outdir}/ with stable content
+            def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore')
+            assertAll(
+                { assert workflow.success},
+                { assert snapshot(
+                    // Number of successful tasks
+                    workflow.trace.succeeded().size(),
+                    // pipeline versions.yml file for multiqc from which Nextflow version is removed because we test pipelines on multiple Nextflow versions
+                    removeNextflowVersion("$outputDir/pipeline_info/curationpretext_software_mqc_versions.yml"),
+                    // All stable path name, with a relative path
+                    stable_name,
+                    // All files with stable contents
+                    stable_path
+                ).match() }
+            )
+        }
+    }
+}
diff --git a/tests/nextflow.config b/tests/nextflow.config
new file mode 100644
index 00000000..e3be3550
--- /dev/null
+++ b/tests/nextflow.config
@@ -0,0 +1,14 @@
+/*
+========================================================================================
+    Nextflow config file for running nf-test tests
+========================================================================================
+*/
+
+// TODO nf-core: Specify any additional parameters here
+// Or any resources requirements
+params {
+    modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/'
+    pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/curationpretext'
+}
+
+aws.client.anonymous = true // fixes S3 access issues on self-hosted runners

From 84d9bb96df56c9df8042e70c7b59c1ad02a554a8 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Thu, 28 Aug 2025 17:04:48 +0100
Subject: [PATCH 37/58] Template Update 332

---
 tests/default.nf.test.snap | 171 +++++++++++++++++++++++++++++++++++++
 1 file changed, 171 insertions(+)
 create mode 100644 tests/default.nf.test.snap

diff --git a/tests/default.nf.test.snap b/tests/default.nf.test.snap
new file mode 100644
index 00000000..7815abe3
--- /dev/null
+++ b/tests/default.nf.test.snap
@@ -0,0 +1,171 @@
+{
+    "Full run": {
+        "content": [
+            40,
+            {
+                "BEDTOOLS_BAMTOBED": {
+                    "bedtools": "2.31.1"
+                },
+                "BEDTOOLS_GENOMECOV": {
+                    "bedtools": "2.31.1"
+                },
+                "BEDTOOLS_INTERSECT": {
+                    "bedtools": "2.31.1"
+                },
+                "BEDTOOLS_MAKEWINDOWS": {
+                    "bedtools": "2.31.1"
+                },
+                "BEDTOOLS_MAP": {
+                    "bedtools": "2.31.1"
+                },
+                "BWAMEM2_INDEX": {
+                    "bwamem2": "2.2.1"
+                },
+                "CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT": {
+                    "samtools": 1.17,
+                    "bwa-mem2": "2.2.1",
+                    "staden_io_lib": "1.14.14"
+                },
+                "CRAM_GENERATE_CSV": {
+                    "samtools": 1.17
+                },
+                "EXTRACT_REPEAT": {
+                    "perl": "(v5.26.2))",
+                    "extract_repeat.pl": 1.0
+                },
+                "EXTRACT_TELOMERE": {
+                    "extract_telomere": 2.0,
+                    "coreutils": 9.1
+                },
+                "FIND_TELOMERE_REGIONS": {
+                    "find_telomere": 1.0
+                },
+                "FIND_TELOMERE_WINDOWS": {
+                    "telomere": 1.0
+                },
+                "GAWK_CLEAN_TELOMERE": {
+                    "gawk": "5.3.0"
+                },
+                "GAWK_GAP_LENGTH": {
+                    "gawk": "5.3.0"
+                },
+                "GAWK_GENERATE_GENOME_FILE": {
+                    "gawk": "5.3.0"
+                },
+                "GAWK_REFORMAT_INTERSECT": {
+                    "gawk": "5.3.0"
+                },
+                "GAWK_RENAME_IDS": {
+                    "gawk": "5.3.0"
+                },
+                "GAWK_REPLACE_DOTS": {
+                    "gawk": "5.3.0"
+                },
+                "GAWK_UPPER_SEQUENCE": {
+                    "gawk": "5.3.0"
+                },
+                "GET_LARGEST_SCAFFOLD": {
+                    "get_largest_scaffold": 2.0,
+                    "coreutils": 9.1
+                },
+                "GNU_SORT": {
+                    "coreutils": 9.3
+                },
+                "GNU_SORT_A": {
+                    "coreutils": 9.3
+                },
+                "GNU_SORT_B": {
+                    "coreutils": 9.3
+                },
+                "GNU_SORT_C": {
+                    "coreutils": 9.3
+                },
+                "GRAPH_OVERALL_COVERAGE": {
+                    "perl": "(v5.26.2))",
+                    "graph_overall_coverage.pl": 1.0
+                },
+                "MINIMAP2_ALIGN": {
+                    "minimap2": "2.28-r1209",
+                    "samtools": 1.2
+                },
+                "PRETEXTMAP_STANDRD": {
+                    "pretextmap": "0.1.9",
+                    "samtools": 1.21
+                },
+                "PRETEXT_INGEST_SNDRD": {
+                    "PretextGraph": "0.0.9",
+                    "PretextMap": "0.1.9",
+                    "bigWigToBedGraph": 447
+                },
+                "SAMTOOLS_FAIDX": {
+                    "samtools": 1.21
+                },
+                "SAMTOOLS_MERGE": {
+                    "samtools": 1.21
+                },
+                "SAMTOOLS_SORT": {
+                    "samtools": 1.21
+                },
+                "SAMTOOLS_VIEW_FILTER_PRIMARY": {
+                    "samtools": 1.21
+                },
+                "SEQTK_CUTN": {
+                    "seqtk": "1.4-r122"
+                },
+                "SNAPSHOT_SRES": {
+                    "pretextsnapshot": "0.0.4"
+                },
+                "UCSC_BEDGRAPHTOBIGWIG": {
+                    "ucsc": 469
+                },
+                "WINDOWMASKER_MKCOUNTS": {
+                    "windowmasker": "1.0.0"
+                },
+                "WINDOWMASKER_USTAT": {
+                    "windowmasker": "1.0.0"
+                },
+                "Workflow": {
+                    "sanger-tol/curationpretext": "v1.4.2"
+                }
+            },
+            [
+                "accessory_files",
+                "accessory_files/CurationPretextTest.bigWig",
+                "accessory_files/CurationPretextTest.gap.bedgraph",
+                "accessory_files/CurationPretextTest_telomere.bed",
+                "accessory_files/CurationPretextTest_telomere.bedgraph",
+                "accessory_files/coverage.bigWig",
+                "pipeline_info",
+                "pipeline_info/sanger-tol_curationpretext_software_versions.yml",
+                "pretext_maps_processed",
+                "pretext_maps_processed/CurationPretextTest_normal.pretext",
+                "pretext_maps_raw",
+                "pretext_maps_raw/CurationPretextTest_normal_pi.pretext",
+                "pretext_snapshot",
+                "pretext_snapshot/CurationPretextTest_normalFullMap.png"
+            ],
+            14,
+            [
+                "CurationPretextTest.bigWig:md5,3f66a9152d793a62f877b733c2336dfd",
+                "CurationPretextTest.gap.bedgraph:md5,d41d8cd98f00b204e9800998ecf8427e",
+                "CurationPretextTest_telomere.bed:md5,d41d8cd98f00b204e9800998ecf8427e",
+                "CurationPretextTest_telomere.bedgraph:md5,d41d8cd98f00b204e9800998ecf8427e",
+                "coverage.bigWig:md5,2e474506c957152b231ac63c859f0b17"
+            ],
+            5,
+            1,
+            false,
+            true,
+            1,
+            false,
+            true,
+            1,
+            true
+        ],
+        "meta": {
+            "nf-test": "0.9.2",
+            "nextflow": "24.04.4"
+        },
+        "timestamp": "2025-04-16T11:23:34.556355"
+    }
+}

From 0832a1a24c5ca0e55c2c853b23e5f5600b1d7a68 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Fri, 29 Aug 2025 09:36:39 +0100
Subject: [PATCH 38/58] updated ro-crate

---
 ro-crate-metadata.json | 27 ++++++++++++++++++++-------
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json
index 50ea4f24..61ee727e 100644
--- a/ro-crate-metadata.json
+++ b/ro-crate-metadata.json
@@ -23,7 +23,7 @@
             "@type": "Dataset",
             "creativeWorkStatus": "Stable",
             "datePublished": "2025-05-27T09:34:43+00:00",
-            "description": "# ![sanger-tol/curationpretext](docs/images/curationpretext-light.png#gh-light-mode-only) ![sanger-tol/curationpretext](docs/images/curationpretext-dark.png#gh-dark-mode-only)\n\n[![GitHub Actions CI Status](https://github.com/sanger-tol/curationpretext/workflows/nf-core%20CI/badge.svg)](https://github.com/sanger-tol/curationpretext/actions?query=workflow%3A%22nf-core+CI%22)\n[![GitHub Actions Linting Status](https://github.com/sanger-tol/curationpretext/workflows/nf-core%20linting/badge.svg)](https://github.com/sanger-tol/curationpretext/actions?query=workflow%3A%22nf-core+linting%22)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.12773958-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.12773958)\n\n[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.2-23aa62.svg)](https://www.nextflow.io/)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/sanger-tol/curationpretext)\n\n## Introduction\n\n**sanger-tol/curationpretext** is a bioinformatics pipeline typically used in conjunction with [TreeVal](https://github.com/sanger-tol/treeval) to generate pretext maps (and optionally telomeric, gap, coverage, and repeat density plots which can be ingested into pretext) for the manual curation of high quality genomes.\n\nThis is intended as a supplementary pipeline for the [treeval](https://github.com/sanger-tol/treeval) project. This pipeline can be simply used to generate pretext maps, information on how to run this pipeline can be found in the [usage documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/usage).\n\n![Workflow Diagram](./docs/images/CurationPretext_1_3_0.png)\n\n1. Generate Maps - Generates pretext maps as well as a static image.\n\n2. Accessory files - Generates the repeat density, gap, telomere, and coverage tracks.\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://pipelines.tol.sanger.ac.uk/curationpretext/1.4.0/usage) on how to set-up Nextflow. Make sure to [test your setup](https://pipelines.tol.sanger.ac.uk/curationpretext/1.4.0/usage) with `-profile test` before running the workflow on actual data.\n\nCurrently, the pipeline uses the following flags:\n\n- `--input`\n\n  - The absolute path to the assembled genome in, e.g., `/path/to/assembly.fa`\n\n- `--reads`\n\n  - The directory of the fasta files generated from longread reads, e.g., `/path/to/fasta/`\n\n- `--read_type`\n\n  - The type of longread data you are utilising, e.g., ont, illumina, hifi.\n\n- `--aligner`\n\n  - The aligner yopu wish to use for the coverage generation, defaults to bwamem2 but minimap2 is also supported.\n\n- `--cram`\n\n  - The directory of the cram _and_ cram.crai files, e.g., `/path/to/cram/`\n\n- `--map_order`\n\n  - hic map scaffold order, input either `length` or `unsorted`\n\n- `--teloseq`\n\n  - A telomeric sequence, e.g., `TTAGGG`\n\n- `--all_output`\n\n  - An option to output all maps + accessory files, the default will only output the pretextmaps where ingestion has occured.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run sanger-tol/curationpretext \\\n  --input { input.fasta } \\\n  --cram { path/to/cram/ } \\\n  --reads { path/to/longread/fasta/ } \\\n  --read_type { default is \"hifi\" }\n  --sample { default is \"pretext_rerun\" } \\\n  --teloseq { default is \"TTAGGG\" } \\\n  --map_order { default is \"unsorted\" } \\\n  --all_output <true/false> \\\n  --outdir { OUTDIR } \\\n  -profile <docker/singularity/{institute}>\n\n```\n\n> **Warning:**\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those\n> provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_;\n\nFor more details, please refer to the [usage documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/usage) and the [parameter documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/parameters).\n\n## Pipeline output\n\nTo see the the results of a test run with a full size dataset refer to the [results](https://pipelines.tol.sanger.ac.uk/curationpretext/results) tab on the sanger-tol/curationpretext website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/output).\n\n## Credits\n\nsanger-tol/curationpretext was originally written by Damon-Lee B Pointon (@DLBPointon).\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- @muffato - For reviews.\n\n- @yumisims - TreeVal and Software.\n\n- @weaglesBio - TreeVal and Software.\n\n- @josieparis - Help with better docs and testing.\n\n- @mahesh-panchal - Large support with 1.2.0 in making the pipeline more robust with other HPC environments.\n\n- @GRIT - For feedback and feature requests.\n\n- @prototaxites - Support with 1.3.0 and showing me the power of GAWK.\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\n## Citations\n\nIf you use sanger-tol/curationpretext for your analysis, please cite it using the following doi: [10.5281/zenodo.12773958](https://doi.org/10.5281/zenodo.12773958)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nThis pipeline uses code and infrastructure developed and maintained by the [nf-core](https://nf-co.re) community, reused here under the [MIT license](https://github.com/nf-core/tools/blob/main/LICENSE).\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n",
+            "description": "# ![sanger-tol/curationpretext](docs/images/curationpretext-light.png#gh-light-mode-only) ![sanger-tol/curationpretext](docs/images/curationpretext-dark.png#gh-dark-mode-only)\n\n[![GitHub Actions CI Status](https://github.com/sanger-tol/curationpretext/actions/workflows/nf-test.yml/badge.svg)](https://github.com/sanger-tol/curationpretext/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/sanger-tol/curationpretext/actions/workflows/linting.yml/badge.svg)](https://github.com/sanger-tol/curationpretext/actions/workflows/linting.yml)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.12773958-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.12773958)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A524.10.5-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.3.2-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.3.2)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/sanger-tol/curationpretext)\n\n## Introduction\n\n**sanger-tol/curationpretext** is a bioinformatics pipeline typically used in conjunction with [TreeVal](https://github.com/sanger-tol/treeval) to generate pretext maps (and optionally telomeric, gap, coverage, and repeat density plots which can be ingested into pretext) for the manual curation of high quality genomes.\n\nThis is intended as a supplementary pipeline for the [treeval](https://github.com/sanger-tol/treeval) project. This pipeline can be simply used to generate pretext maps, information on how to run this pipeline can be found in the [usage documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/usage).\n\n![Workflow Diagram](./docs/images/CurationPretext_1_3_0.png)\n\n1. Generate Maps - Generates pretext maps as well as a static image.\n\n2. Accessory files - Generates the repeat density, gap, telomere, and coverage tracks.\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nCurrently, the pipeline uses the following flags:\n\n- `--input`\n\n  - The absolute path to the assembled genome in, e.g., `/path/to/assembly.fa`\n\n- `--reads`\n\n  - The directory of the fasta files generated from longread reads, e.g., `/path/to/fasta/`\n\n- `--read_type`\n\n  - The type of longread data you are utilising, e.g., ont, illumina, hifi.\n\n- `--aligner`\n\n  - The aligner yopu wish to use for the coverage generation, defaults to bwamem2 but minimap2 is also supported.\n\n- `--cram`\n\n  - The directory of the cram _and_ cram.crai files, e.g., `/path/to/cram/`\n\n- `--map_order`\n\n  - hic map scaffold order, input either `length` or `unsorted`\n\n- `--teloseq`\n\n  - A telomeric sequence, e.g., `TTAGGG`\n\n- `--all_output`\n\n  - An option to output all maps + accessory files, the default will only output the pretextmaps where ingestion has occured.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run sanger-tol/curationpretext \\\n  --input { input.fasta } \\\n  --cram { path/to/cram/ } \\\n  --reads { path/to/longread/fasta/ } \\\n  --read_type { default is \"hifi\" }\n  --sample { default is \"pretext_rerun\" } \\\n  --teloseq { default is \"TTAGGG\" } \\\n  --map_order { default is \"unsorted\" } \\\n  --all_output <true/false> \\\n  --outdir { OUTDIR } \\\n  -profile <docker/singularity/{institute}>\n\n```\n\n> **Warning:**\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those\n> provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_;\n\nFor more details, please refer to the [usage documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/usage) and the [parameter documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/parameters).\n\n## Pipeline output\n\nTo see the the results of a test run with a full size dataset refer to the [results](https://pipelines.tol.sanger.ac.uk/curationpretext/results) tab on the sanger-tol/curationpretext website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/output).\n\n## Credits\n\nsanger-tol/curationpretext was originally written by Damon-Lee B Pointon (@DLBPointon).\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- @muffato - For reviews.\n\n- @yumisims - TreeVal and Software.\n\n- @weaglesBio - TreeVal and Software.\n\n- @josieparis - Help with better docs and testing.\n\n- @mahesh-panchal - Large support with 1.2.0 in making the pipeline more robust with other HPC environments.\n\n- @GRIT - For feedback and feature requests.\n\n- @prototaxites - Support with 1.3.0 and showing me the power of GAWK.\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\n## Citations\n\nIf you use sanger-tol/curationpretext for your analysis, please cite it using the following doi: [10.5281/zenodo.12773958](https://doi.org/10.5281/zenodo.12773958)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nThis pipeline uses code and infrastructure developed and maintained by the [nf-core](https://nf-co.re) community, reused here under the [MIT license](https://github.com/nf-core/tools/blob/main/LICENSE).\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n",
             "hasPart": [
                 {
                     "@id": "main.nf"
@@ -124,7 +124,11 @@
         },
         {
             "@id": "main.nf",
-            "@type": ["File", "SoftwareSourceCode", "ComputationalWorkflow"],
+            "@type": [
+                "File",
+                "SoftwareSourceCode",
+                "ComputationalWorkflow"
+            ],
             "creator": [
                 {
                     "@id": "https://orcid.org/0000-0002-7860-3560"
@@ -133,9 +137,16 @@
             "dateCreated": "",
             "dateModified": "2025-05-27T10:34:43Z",
             "dct:conformsTo": "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/",
-            "keywords": ["nf-core", "nextflow"],
-            "license": ["MIT"],
-            "name": ["sanger-tol/curationpretext"],
+            "keywords": [
+                "nf-core",
+                "nextflow"
+            ],
+            "license": [
+                "MIT"
+            ],
+            "name": [
+                "sanger-tol/curationpretext"
+            ],
             "programmingLanguage": {
                 "@id": "https://w3id.org/workflowhub/workflow-ro-crate#nextflow"
             },
@@ -146,7 +157,9 @@
                 "https://github.com/sanger-tol/curationpretext",
                 "https://pipelines.tol.sanger.ac.uk/sanger-tol/curationpretext/1.4.2/"
             ],
-            "version": ["1.4.2"]
+            "version": [
+                "1.4.2"
+            ]
         },
         {
             "@id": "https://w3id.org/workflowhub/workflow-ro-crate#nextflow",
@@ -332,4 +345,4 @@
             "name": "Josie Paris"
         }
     ]
-}
+}
\ No newline at end of file

From 7d8c1390428bb8edf6b24ecc3f5c06fd03a78b1c Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Fri, 29 Aug 2025 09:41:36 +0100
Subject: [PATCH 39/58] Pre-Commit Prettier

---
 CHANGELOG.md | 2 --
 README.md    | 8 --------
 2 files changed, 10 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e7f8b932..fe9a0345 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -126,11 +126,9 @@ Note, since the pipeline is using Nextflow DSL2, each process will be run with i
 ### Added and Fixed
 
 - GRIT found a bug in `pretext_graph` ingestion code where null values were being introduced as the track name
-
   - This has now need hardcoded, there was no need for dynamic naming anyway
 
 - GRIT found a bug in `pretext_graph` ingestion where gap and telomere tracks stopped being ingested correctly and would no longer display or be zeroed out.
-
   - I'm not entirely sure of the cause of this but i think it is a mix of how pretext handles unnamed tracks, assuming their datatype so a null named gap track would be treated as a repeat track, and incorrect logic in the pretext_graph module.
 
 - Added GAWK module (as GAWK_CLEAN_TELOMERE) to remove "you screwed up" (this is a legacy error message which will be changed to something more informative and professional) error lines which can appear with some telo motifs or lower case motifs. These will otherwise cause the FIND_TELOMERE_WINDOWS process to crash.
diff --git a/README.md b/README.md
index 74b503de..d68799fe 100644
--- a/README.md
+++ b/README.md
@@ -31,35 +31,27 @@ This is intended as a supplementary pipeline for the [treeval](https://github.co
 Currently, the pipeline uses the following flags:
 
 - `--input`
-
   - The absolute path to the assembled genome in, e.g., `/path/to/assembly.fa`
 
 - `--reads`
-
   - The directory of the fasta files generated from longread reads, e.g., `/path/to/fasta/`
 
 - `--read_type`
-
   - The type of longread data you are utilising, e.g., ont, illumina, hifi.
 
 - `--aligner`
-
   - The aligner yopu wish to use for the coverage generation, defaults to bwamem2 but minimap2 is also supported.
 
 - `--cram`
-
   - The directory of the cram _and_ cram.crai files, e.g., `/path/to/cram/`
 
 - `--map_order`
-
   - hic map scaffold order, input either `length` or `unsorted`
 
 - `--teloseq`
-
   - A telomeric sequence, e.g., `TTAGGG`
 
 - `--all_output`
-
   - An option to output all maps + accessory files, the default will only output the pretextmaps where ingestion has occured.
 
 Now, you can run the pipeline using:

From cebb3ca789cbe7447acb01f7617313b6194810fb Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Fri, 29 Aug 2025 09:43:05 +0100
Subject: [PATCH 40/58] Pre-Commit Prettier

---
 ro-crate-metadata.json | 25 ++++++-------------------
 1 file changed, 6 insertions(+), 19 deletions(-)

diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json
index 61ee727e..86ee1f8c 100644
--- a/ro-crate-metadata.json
+++ b/ro-crate-metadata.json
@@ -124,11 +124,7 @@
         },
         {
             "@id": "main.nf",
-            "@type": [
-                "File",
-                "SoftwareSourceCode",
-                "ComputationalWorkflow"
-            ],
+            "@type": ["File", "SoftwareSourceCode", "ComputationalWorkflow"],
             "creator": [
                 {
                     "@id": "https://orcid.org/0000-0002-7860-3560"
@@ -137,16 +133,9 @@
             "dateCreated": "",
             "dateModified": "2025-05-27T10:34:43Z",
             "dct:conformsTo": "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/",
-            "keywords": [
-                "nf-core",
-                "nextflow"
-            ],
-            "license": [
-                "MIT"
-            ],
-            "name": [
-                "sanger-tol/curationpretext"
-            ],
+            "keywords": ["nf-core", "nextflow"],
+            "license": ["MIT"],
+            "name": ["sanger-tol/curationpretext"],
             "programmingLanguage": {
                 "@id": "https://w3id.org/workflowhub/workflow-ro-crate#nextflow"
             },
@@ -157,9 +146,7 @@
                 "https://github.com/sanger-tol/curationpretext",
                 "https://pipelines.tol.sanger.ac.uk/sanger-tol/curationpretext/1.4.2/"
             ],
-            "version": [
-                "1.4.2"
-            ]
+            "version": ["1.4.2"]
         },
         {
             "@id": "https://w3id.org/workflowhub/workflow-ro-crate#nextflow",
@@ -345,4 +332,4 @@
             "name": "Josie Paris"
         }
     ]
-}
\ No newline at end of file
+}

From 78075a28b0d6b4c5ec113ed154529abb62b6c979 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Fri, 29 Aug 2025 11:09:49 +0100
Subject: [PATCH 41/58] Another rocrate update

---
 ro-crate-metadata.json | 27 ++++++++++++++++++++-------
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json
index 86ee1f8c..05ff8efd 100644
--- a/ro-crate-metadata.json
+++ b/ro-crate-metadata.json
@@ -23,7 +23,7 @@
             "@type": "Dataset",
             "creativeWorkStatus": "Stable",
             "datePublished": "2025-05-27T09:34:43+00:00",
-            "description": "# ![sanger-tol/curationpretext](docs/images/curationpretext-light.png#gh-light-mode-only) ![sanger-tol/curationpretext](docs/images/curationpretext-dark.png#gh-dark-mode-only)\n\n[![GitHub Actions CI Status](https://github.com/sanger-tol/curationpretext/actions/workflows/nf-test.yml/badge.svg)](https://github.com/sanger-tol/curationpretext/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/sanger-tol/curationpretext/actions/workflows/linting.yml/badge.svg)](https://github.com/sanger-tol/curationpretext/actions/workflows/linting.yml)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.12773958-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.12773958)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A524.10.5-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.3.2-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.3.2)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/sanger-tol/curationpretext)\n\n## Introduction\n\n**sanger-tol/curationpretext** is a bioinformatics pipeline typically used in conjunction with [TreeVal](https://github.com/sanger-tol/treeval) to generate pretext maps (and optionally telomeric, gap, coverage, and repeat density plots which can be ingested into pretext) for the manual curation of high quality genomes.\n\nThis is intended as a supplementary pipeline for the [treeval](https://github.com/sanger-tol/treeval) project. This pipeline can be simply used to generate pretext maps, information on how to run this pipeline can be found in the [usage documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/usage).\n\n![Workflow Diagram](./docs/images/CurationPretext_1_3_0.png)\n\n1. Generate Maps - Generates pretext maps as well as a static image.\n\n2. Accessory files - Generates the repeat density, gap, telomere, and coverage tracks.\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nCurrently, the pipeline uses the following flags:\n\n- `--input`\n\n  - The absolute path to the assembled genome in, e.g., `/path/to/assembly.fa`\n\n- `--reads`\n\n  - The directory of the fasta files generated from longread reads, e.g., `/path/to/fasta/`\n\n- `--read_type`\n\n  - The type of longread data you are utilising, e.g., ont, illumina, hifi.\n\n- `--aligner`\n\n  - The aligner yopu wish to use for the coverage generation, defaults to bwamem2 but minimap2 is also supported.\n\n- `--cram`\n\n  - The directory of the cram _and_ cram.crai files, e.g., `/path/to/cram/`\n\n- `--map_order`\n\n  - hic map scaffold order, input either `length` or `unsorted`\n\n- `--teloseq`\n\n  - A telomeric sequence, e.g., `TTAGGG`\n\n- `--all_output`\n\n  - An option to output all maps + accessory files, the default will only output the pretextmaps where ingestion has occured.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run sanger-tol/curationpretext \\\n  --input { input.fasta } \\\n  --cram { path/to/cram/ } \\\n  --reads { path/to/longread/fasta/ } \\\n  --read_type { default is \"hifi\" }\n  --sample { default is \"pretext_rerun\" } \\\n  --teloseq { default is \"TTAGGG\" } \\\n  --map_order { default is \"unsorted\" } \\\n  --all_output <true/false> \\\n  --outdir { OUTDIR } \\\n  -profile <docker/singularity/{institute}>\n\n```\n\n> **Warning:**\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those\n> provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_;\n\nFor more details, please refer to the [usage documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/usage) and the [parameter documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/parameters).\n\n## Pipeline output\n\nTo see the the results of a test run with a full size dataset refer to the [results](https://pipelines.tol.sanger.ac.uk/curationpretext/results) tab on the sanger-tol/curationpretext website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/output).\n\n## Credits\n\nsanger-tol/curationpretext was originally written by Damon-Lee B Pointon (@DLBPointon).\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- @muffato - For reviews.\n\n- @yumisims - TreeVal and Software.\n\n- @weaglesBio - TreeVal and Software.\n\n- @josieparis - Help with better docs and testing.\n\n- @mahesh-panchal - Large support with 1.2.0 in making the pipeline more robust with other HPC environments.\n\n- @GRIT - For feedback and feature requests.\n\n- @prototaxites - Support with 1.3.0 and showing me the power of GAWK.\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\n## Citations\n\nIf you use sanger-tol/curationpretext for your analysis, please cite it using the following doi: [10.5281/zenodo.12773958](https://doi.org/10.5281/zenodo.12773958)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nThis pipeline uses code and infrastructure developed and maintained by the [nf-core](https://nf-co.re) community, reused here under the [MIT license](https://github.com/nf-core/tools/blob/main/LICENSE).\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n",
+            "description": "# ![sanger-tol/curationpretext](docs/images/curationpretext-light.png#gh-light-mode-only) ![sanger-tol/curationpretext](docs/images/curationpretext-dark.png#gh-dark-mode-only)\n\n[![GitHub Actions CI Status](https://github.com/sanger-tol/curationpretext/actions/workflows/nf-test.yml/badge.svg)](https://github.com/sanger-tol/curationpretext/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/sanger-tol/curationpretext/actions/workflows/linting.yml/badge.svg)](https://github.com/sanger-tol/curationpretext/actions/workflows/linting.yml)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.12773958-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.12773958)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A524.10.5-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.3.2-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.3.2)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/sanger-tol/curationpretext)\n\n## Introduction\n\n**sanger-tol/curationpretext** is a bioinformatics pipeline typically used in conjunction with [TreeVal](https://github.com/sanger-tol/treeval) to generate pretext maps (and optionally telomeric, gap, coverage, and repeat density plots which can be ingested into pretext) for the manual curation of high quality genomes.\n\nThis is intended as a supplementary pipeline for the [treeval](https://github.com/sanger-tol/treeval) project. This pipeline can be simply used to generate pretext maps, information on how to run this pipeline can be found in the [usage documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/usage).\n\n![Workflow Diagram](./docs/images/CurationPretext_1_3_0.png)\n\n1. Generate Maps - Generates pretext maps as well as a static image.\n\n2. Accessory files - Generates the repeat density, gap, telomere, and coverage tracks.\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nCurrently, the pipeline uses the following flags:\n\n- `--input`\n  - The absolute path to the assembled genome in, e.g., `/path/to/assembly.fa`\n\n- `--reads`\n  - The directory of the fasta files generated from longread reads, e.g., `/path/to/fasta/`\n\n- `--read_type`\n  - The type of longread data you are utilising, e.g., ont, illumina, hifi.\n\n- `--aligner`\n  - The aligner yopu wish to use for the coverage generation, defaults to bwamem2 but minimap2 is also supported.\n\n- `--cram`\n  - The directory of the cram _and_ cram.crai files, e.g., `/path/to/cram/`\n\n- `--map_order`\n  - hic map scaffold order, input either `length` or `unsorted`\n\n- `--teloseq`\n  - A telomeric sequence, e.g., `TTAGGG`\n\n- `--all_output`\n  - An option to output all maps + accessory files, the default will only output the pretextmaps where ingestion has occured.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run sanger-tol/curationpretext \\\n  --input { input.fasta } \\\n  --cram { path/to/cram/ } \\\n  --reads { path/to/longread/fasta/ } \\\n  --read_type { default is \"hifi\" }\n  --sample { default is \"pretext_rerun\" } \\\n  --teloseq { default is \"TTAGGG\" } \\\n  --map_order { default is \"unsorted\" } \\\n  --all_output <true/false> \\\n  --outdir { OUTDIR } \\\n  -profile <docker/singularity/{institute}>\n\n```\n\n> **Warning:**\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those\n> provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_;\n\nFor more details, please refer to the [usage documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/usage) and the [parameter documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/parameters).\n\n## Pipeline output\n\nTo see the the results of a test run with a full size dataset refer to the [results](https://pipelines.tol.sanger.ac.uk/curationpretext/results) tab on the sanger-tol/curationpretext website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/output).\n\n## Credits\n\nsanger-tol/curationpretext was originally written by Damon-Lee B Pointon (@DLBPointon).\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- @muffato - For reviews.\n\n- @yumisims - TreeVal and Software.\n\n- @weaglesBio - TreeVal and Software.\n\n- @josieparis - Help with better docs and testing.\n\n- @mahesh-panchal - Large support with 1.2.0 in making the pipeline more robust with other HPC environments.\n\n- @GRIT - For feedback and feature requests.\n\n- @prototaxites - Support with 1.3.0 and showing me the power of GAWK.\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\n## Citations\n\nIf you use sanger-tol/curationpretext for your analysis, please cite it using the following doi: [10.5281/zenodo.12773958](https://doi.org/10.5281/zenodo.12773958)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nThis pipeline uses code and infrastructure developed and maintained by the [nf-core](https://nf-co.re) community, reused here under the [MIT license](https://github.com/nf-core/tools/blob/main/LICENSE).\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n",
             "hasPart": [
                 {
                     "@id": "main.nf"
@@ -124,7 +124,11 @@
         },
         {
             "@id": "main.nf",
-            "@type": ["File", "SoftwareSourceCode", "ComputationalWorkflow"],
+            "@type": [
+                "File",
+                "SoftwareSourceCode",
+                "ComputationalWorkflow"
+            ],
             "creator": [
                 {
                     "@id": "https://orcid.org/0000-0002-7860-3560"
@@ -133,9 +137,16 @@
             "dateCreated": "",
             "dateModified": "2025-05-27T10:34:43Z",
             "dct:conformsTo": "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/",
-            "keywords": ["nf-core", "nextflow"],
-            "license": ["MIT"],
-            "name": ["sanger-tol/curationpretext"],
+            "keywords": [
+                "nf-core",
+                "nextflow"
+            ],
+            "license": [
+                "MIT"
+            ],
+            "name": [
+                "sanger-tol/curationpretext"
+            ],
             "programmingLanguage": {
                 "@id": "https://w3id.org/workflowhub/workflow-ro-crate#nextflow"
             },
@@ -146,7 +157,9 @@
                 "https://github.com/sanger-tol/curationpretext",
                 "https://pipelines.tol.sanger.ac.uk/sanger-tol/curationpretext/1.4.2/"
             ],
-            "version": ["1.4.2"]
+            "version": [
+                "1.4.2"
+            ]
         },
         {
             "@id": "https://w3id.org/workflowhub/workflow-ro-crate#nextflow",
@@ -332,4 +345,4 @@
             "name": "Josie Paris"
         }
     ]
-}
+}
\ No newline at end of file

From 521e2d609667b1e6cf3abf10d3556040683606e6 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Fri, 29 Aug 2025 11:10:48 +0100
Subject: [PATCH 42/58] Another rocrate update

---
 ro-crate-metadata.json | 25 ++++++-------------------
 1 file changed, 6 insertions(+), 19 deletions(-)

diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json
index 05ff8efd..fd7ddd4a 100644
--- a/ro-crate-metadata.json
+++ b/ro-crate-metadata.json
@@ -124,11 +124,7 @@
         },
         {
             "@id": "main.nf",
-            "@type": [
-                "File",
-                "SoftwareSourceCode",
-                "ComputationalWorkflow"
-            ],
+            "@type": ["File", "SoftwareSourceCode", "ComputationalWorkflow"],
             "creator": [
                 {
                     "@id": "https://orcid.org/0000-0002-7860-3560"
@@ -137,16 +133,9 @@
             "dateCreated": "",
             "dateModified": "2025-05-27T10:34:43Z",
             "dct:conformsTo": "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/",
-            "keywords": [
-                "nf-core",
-                "nextflow"
-            ],
-            "license": [
-                "MIT"
-            ],
-            "name": [
-                "sanger-tol/curationpretext"
-            ],
+            "keywords": ["nf-core", "nextflow"],
+            "license": ["MIT"],
+            "name": ["sanger-tol/curationpretext"],
             "programmingLanguage": {
                 "@id": "https://w3id.org/workflowhub/workflow-ro-crate#nextflow"
             },
@@ -157,9 +146,7 @@
                 "https://github.com/sanger-tol/curationpretext",
                 "https://pipelines.tol.sanger.ac.uk/sanger-tol/curationpretext/1.4.2/"
             ],
-            "version": [
-                "1.4.2"
-            ]
+            "version": ["1.4.2"]
         },
         {
             "@id": "https://w3id.org/workflowhub/workflow-ro-crate#nextflow",
@@ -345,4 +332,4 @@
             "name": "Josie Paris"
         }
     ]
-}
\ No newline at end of file
+}

From 26162d766d8a880481b69d4a39a8d94f103955d8 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Mon, 15 Sep 2025 10:44:11 +0100
Subject: [PATCH 43/58] Update to resources

---
 conf/base.config | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/conf/base.config b/conf/base.config
index 84aed456..16ee6a42 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -31,12 +31,12 @@ process {
 
     withName: CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT {
         cpus    = { 16 }
-        memory  = { 1.GB     * ( reference.size() < 2e9 ? 80 : Math.ceil( ( reference.size() / 1e+9 ) * 30 ) * Math.ceil( task.attempt * 1 ) ) }
+        memory  = { 1.GB     * ( reference.size() < 2e9 ? 80 : Math.ceil( ( reference.size() / 1e+9 ) * 50 ) * Math.ceil( task.attempt * 1 ) ) }
     }
 
     withName: CRAM_FILTER_MINIMAP2_FILTER5END_FIXMATE_SORT {
         cpus    = { 16 }
-        memory  = { 1.GB     * ( reference.size() < 2e9 ? 50 : Math.ceil( ( reference.size() / 1e+9 ) * 3 ) * Math.ceil( task.attempt * 1 ) ) }
+        memory  = { 1.GB     * ( reference.size() < 2e9 ? 50 : Math.ceil( ( reference.size() / 1e+9 ) * 8 ) * Math.ceil( task.attempt * 1 ) ) }
     }
 
     withName: PRETEXT_GRAPH {

From e196eaa2bf9026accd5003d39a54828e6d573a43 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Mon, 15 Sep 2025 10:44:37 +0100
Subject: [PATCH 44/58] Update naming of tracks to match what pretextview
 expects

---
 modules/local/pretext/graph/main.nf | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/modules/local/pretext/graph/main.nf b/modules/local/pretext/graph/main.nf
index 49bec139..df351d1f 100644
--- a/modules/local/pretext/graph/main.nf
+++ b/modules/local/pretext/graph/main.nf
@@ -30,6 +30,20 @@ process PRETEXT_GRAPH {
     def UCSC_VERSION = '447' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
 
     // Using single [ ] as nextflow will use sh where possible not bash
+    //
+    // Core Args must match the below (taken from PretextView), this allows
+    // the use of keyboard shortcuts for main tracks:
+    //
+    // data_type_dic{  // use this data_type
+    //     {"default", 0, },
+    //     {"repeat_density", 1},
+    //     {"gap", 2},
+    //     {"coverage", 3},
+    //     {"coverage_avg", 4},
+    //     {"telomere", 5},
+    //     {"not_weighted", 6}
+    // };
+    //
     """
     echo "PROCESSING ESSENTIAL FILES"
 
@@ -87,7 +101,9 @@ process PRETEXT_GRAPH {
 
         if [ -s "\$file_og" ]; then
             echo "Processing OG_TELOMERE file: \$file_og"
-            PretextGraph $args -i "\$input_file" -n "og_telomere" -o telo_0.pretext < "\$file_og"
+
+            # Must be named "telomere"
+            PretextGraph $args -i "\$input_file" -n "telomere" -o telo_0.pretext < "\$file_og"
         else
             echo "OG TELOMERE file - Could be empty or missing"
             cp "\$input_file" telo_0.pretext

From e102e0f4250ba504bad3d2ec4e659cb1b8d622af Mon Sep 17 00:00:00 2001
From: Damon-Lee Pointon <51855558+DLBPointon@users.noreply.github.com>
Date: Tue, 16 Sep 2025 10:37:32 +0100
Subject: [PATCH 45/58] Update nextflow.config

Correction from @muffato

Co-authored-by: Matthieu Muffato <mm49@sanger.ac.uk>
---
 nextflow.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nextflow.config b/nextflow.config
index c8f7f654..cd1f3d60 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -265,7 +265,7 @@ manifest {
     homePage        = 'https://github.com/sanger-tol/curationpretext'
     description     = """A simple pipeline to generate pretext files for genomic curation."""
     mainScript      = 'main.nf'
-    defaultBranch   = 'master'
+    defaultBranch   = 'main'
     nextflowVersion = '!>=24.10.5'
     version         = '1.5.0'
     doi             = '10.5281/zenodo.12773958'

From bd428b344bbfdaf715f344509a81feb2a0eb08bd Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Tue, 16 Sep 2025 12:28:34 +0100
Subject: [PATCH 46/58] Add switch for large genomes to switch over to MINIMAP2
 rather than end-user selection

---
 .../utils_nfcore_curationpretext_pipeline/main.nf | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/subworkflows/local/utils_nfcore_curationpretext_pipeline/main.nf b/subworkflows/local/utils_nfcore_curationpretext_pipeline/main.nf
index c91cb7d3..8a3b4670 100644
--- a/subworkflows/local/utils_nfcore_curationpretext_pipeline/main.nf
+++ b/subworkflows/local/utils_nfcore_curationpretext_pipeline/main.nf
@@ -79,17 +79,24 @@ workflow PIPELINE_INITIALISATION {
                         type: 'dir'
                     )
 
-    ch_reference    = input_fasta.map { fasta ->
+    ch_reference = input_fasta.map { fasta ->
+        def fasta_size = fasta.size()
+        def selected_aligner = (params_aligner == "AUTO") ?
+            (fasta_size > 5e9 ? "minimap2" : "bwamem2") :
+            params.aligner
+
         tuple(
-            [   id: params.sample,
-                aligner: params.aligner,
+            [
+                id: params.sample,
+                aligner: selected_aligner,
                 map_order: params.map_order,
-                ref_size: fasta.size(),
+                ref_size: fasta_size,
             ],
             fasta
         )
     }
 
+
     ch_cram_reads   = cram_dir.map { dir ->
         tuple(
             [   id: params.sample   ],

From 512ac147e39939c661078d4165dbadc6a1e4539a Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Tue, 16 Sep 2025 12:28:52 +0100
Subject: [PATCH 47/58] Adding AUTO as option

---
 nextflow.config      | 2 +-
 nextflow_schema.json | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/nextflow.config b/nextflow.config
index cd1f3d60..b86dba7b 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -17,7 +17,7 @@ params {
     teloseq                    = "TTAGGG"
     reads                      = null
     cram                       = null
-    aligner                    = "bwamem2"
+    aligner                    = "AUTO"
     read_type                  = "hifi"
     map_order                  = "unsorted"
     all_output                 = false
diff --git a/nextflow_schema.json b/nextflow_schema.json
index b3d71f02..06b76aca 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -69,9 +69,9 @@
                 "aligner": {
                     "type": "string",
                     "description": "Aligner for use {minimap2, bwamem2} in generating map",
-                    "help_text": "Pick between {minimap2, bwamem2}. Defaults to 'minimap2'",
+                    "help_text": "Pick between {minimap2, bwamem2, AUTO}. Defaults to 'minimap2'",
                     "fa_icon": "fas fa-file-signature",
-                    "enum": ["bwamem2", "minimap2"]
+                    "enum": ["bwamem2", "minimap2", "AUTO"]
                 },
                 "run_hires": {
                     "type": "boolean",

From d7e6c97c932f973cd82ffcfd1cd7efe061d57c56 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Tue, 16 Sep 2025 12:29:28 +0100
Subject: [PATCH 48/58] TreeVal Parity for the resource configs

---
 conf/base.config | 37 ++++++++++++++++++++++++++++---------
 1 file changed, 28 insertions(+), 9 deletions(-)

diff --git a/conf/base.config b/conf/base.config
index 16ee6a42..9ca32fe1 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -21,22 +21,41 @@ process {
     withName:SAMTOOLS_MERGE {
         cpus    = { 16                          }
         memory  = { 50.GB     * task.attempt    }
-        time    = { 20.h      * task.attempt    }
+        time    = { 30.h      * task.attempt    }
     }
 
     withName: '.*:.*:LONGREAD_COVERAGE:(MINIMAP2_ALIGN|MINIMAP2_ALIGN_SPLIT)' {
-        cpus    = { 16                          }
-        memory  = { 1.GB     * ( reference.size() < 2e9 ? 50 : Math.ceil( ( reference.size() / 1e+9 ) * 20 ) * Math.ceil( task.attempt * 1 )) }
+        cpus   = { 20   * 1 }
+        memory = {
+                1.GB * (
+                reference.size() < 2e9 ? 30 :
+                (reference.size() < 5e9 ? 40 :
+                (reference.size() < 10e9 ? 60 :
+                Math.ceil((reference.size() / 1e9) * 3)
+                    )
+                )
+            ) * Math.ceil(task.attempt * 1)
+        }
+        time   = { 1.h  * ( reference.size() < 1e9 ? 10 : reference.size() < 10e9 ? 30 : 48) }
     }
 
     withName: CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT {
-        cpus    = { 16 }
-        memory  = { 1.GB     * ( reference.size() < 2e9 ? 80 : Math.ceil( ( reference.size() / 1e+9 ) * 50 ) * Math.ceil( task.attempt * 1 ) ) }
+        cpus    = { 16   * 1 }
+        memory  = { 1.GB * ( reference.size() < 2e9 ? 80 : Math.ceil( ( reference.size() / 1e+9 ) * 30 ) * Math.ceil( task.attempt * 1 ) ) }
     }
 
     withName: CRAM_FILTER_MINIMAP2_FILTER5END_FIXMATE_SORT {
-        cpus    = { 16 }
-        memory  = { 1.GB     * ( reference.size() < 2e9 ? 50 : Math.ceil( ( reference.size() / 1e+9 ) * 8 ) * Math.ceil( task.attempt * 1 ) ) }
+        cpus   = { 16   * 1 }
+        memory = {
+                1.GB * (
+                reference.size() < 2e9 ? 30 :
+                (reference.size() < 5e9 ? 40 :
+                (reference.size() < 10e9 ? 60 :
+                Math.ceil((reference.size() / 1e9) * 3)
+                    )
+                )
+            ) * Math.ceil(task.attempt * 1)
+        }
     }
 
     withName: PRETEXT_GRAPH {
@@ -44,9 +63,9 @@ process {
     }
 
     withName: PRETEXTMAP_STANDRD{
-        cpus    = { 8        * task.attempt }
+        cpus    = { 8        * 1 }
         memory  = { 3.GB     * task.attempt }
-        time   =  { 1.h      * ( ( fasta.size() < 4e9 ? 24 : 48 ) * Math.ceil( task.attempt * 1 ) ) }
+        time    = { 1.h      * ( ( fasta.size() < 4e9 ? 24 : 48 ) * task.attempt ) }
     }
 
     withName: PRETEXTMAP_HIGHRES {

From 08462959ec31fd396a9e1b5657d85a09f72b3d2e Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Tue, 16 Sep 2025 12:36:50 +0100
Subject: [PATCH 49/58] Fat finger spelling mistake

---
 .../local/utils_nfcore_curationpretext_pipeline/main.nf         | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/subworkflows/local/utils_nfcore_curationpretext_pipeline/main.nf b/subworkflows/local/utils_nfcore_curationpretext_pipeline/main.nf
index 8a3b4670..f0882ce6 100644
--- a/subworkflows/local/utils_nfcore_curationpretext_pipeline/main.nf
+++ b/subworkflows/local/utils_nfcore_curationpretext_pipeline/main.nf
@@ -81,7 +81,7 @@ workflow PIPELINE_INITIALISATION {
 
     ch_reference = input_fasta.map { fasta ->
         def fasta_size = fasta.size()
-        def selected_aligner = (params_aligner == "AUTO") ?
+        def selected_aligner = (params.aligner == "AUTO") ?
             (fasta_size > 5e9 ? "minimap2" : "bwamem2") :
             params.aligner
 

From f799ad8a533f2aaedd79d9e633aded8d3a16a3ec Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Tue, 16 Sep 2025 12:51:10 +0100
Subject: [PATCH 50/58] Pipeline doesn't support conda

---
 .github/workflows/nf-test.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/nf-test.yml b/.github/workflows/nf-test.yml
index 593c9360..f43d5b74 100644
--- a/.github/workflows/nf-test.yml
+++ b/.github/workflows/nf-test.yml
@@ -71,8 +71,8 @@ jobs:
           - ${{ github.base_ref == 'master' || github.base_ref == 'main' }}
         # Exclude conda and singularity on dev
         exclude:
-          - isMain: false
-            profile: "conda"
+          # - isMain: false
+          #   profile: "conda"
           - isMain: false
             profile: "singularity"
         NXF_VER:

From 48f7b5ce7d0530e3b0c19b0eaad51d4d15057045 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Tue, 16 Sep 2025 13:40:23 +0100
Subject: [PATCH 51/58] Update CHANGELOG

---
 CHANGELOG.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index fe9a0345..901bff0c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -26,6 +26,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Added the `gawk_split_directions.awk` script for split telomere.
 - Addition of GUNZIP for the input reference genome.
 - Update tests.
+- Added an "AUTO" value to the `--aligner` arg. If a genome is >5Gb it will use minimap2 else bwamem2.
+- Parity update for the base.config to match TreeVal.
 
 ### Paramters
 

From f39eecfb6ac189bee8c3bb4d618df2b24792ce23 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Tue, 16 Sep 2025 13:41:21 +0100
Subject: [PATCH 52/58] remove conda this time

---
 .github/workflows/nf-test.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/nf-test.yml b/.github/workflows/nf-test.yml
index f43d5b74..e113a611 100644
--- a/.github/workflows/nf-test.yml
+++ b/.github/workflows/nf-test.yml
@@ -66,7 +66,7 @@ jobs:
       fail-fast: false
       matrix:
         shard: ${{ fromJson(needs.nf-test-changes.outputs.shard) }}
-        profile: [conda, docker, singularity]
+        profile: [docker, singularity]
         isMain:
           - ${{ github.base_ref == 'master' || github.base_ref == 'main' }}
         # Exclude conda and singularity on dev

From dfa8f0b01388fe3840de57afaaf635b26bfd147d Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Fri, 19 Sep 2025 11:45:02 +0100
Subject: [PATCH 53/58] Update some text

---
 README.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/README.md b/README.md
index d68799fe..8553501e 100644
--- a/README.md
+++ b/README.md
@@ -33,8 +33,12 @@ Currently, the pipeline uses the following flags:
 - `--input`
   - The absolute path to the assembled genome in, e.g., `/path/to/assembly.fa`
 
+- `--sample`
+  - Sample is the naming prefix of the output files, e.g. iyTipFemo
+
 - `--reads`
   - The directory of the fasta files generated from longread reads, e.g., `/path/to/fasta/`
+  - This folder *must* contain files in a `.fasta.gz` format, or they will be skipped by the internal file search function.
 
 - `--read_type`
   - The type of longread data you are utilising, e.g., ont, illumina, hifi.

From 39d021ff1d6d9e7037c8894a3f2deddbaccc59b6 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Fri, 19 Sep 2025 11:47:20 +0100
Subject: [PATCH 54/58] add line on sample

---
 docs/usage.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/usage.md b/docs/usage.md
index 842b4cdd..c723c594 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -10,6 +10,8 @@ Currently, the pipeline expects input data to be in a specific format.
 
 The `--input` should be `.fasta` or `.fa` (the same format but differing suffix).
 
+The `--sample` is your chosen naming for the output files.
+
 The `--cram` should point to the folder containing `.cram` files along with a `.crai` per `.cram`.
 
 The `--reads` should point to the folder containing `.fasta.gz` files.

From be1c6920893e130ba69f6156f000988eb272569b Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Fri, 19 Sep 2025 11:53:06 +0100
Subject: [PATCH 55/58] linting

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 8553501e..52b99db1 100644
--- a/README.md
+++ b/README.md
@@ -38,7 +38,7 @@ Currently, the pipeline uses the following flags:
 
 - `--reads`
   - The directory of the fasta files generated from longread reads, e.g., `/path/to/fasta/`
-  - This folder *must* contain files in a `.fasta.gz` format, or they will be skipped by the internal file search function.
+  - This folder _must_ contain files in a `.fasta.gz` format, or they will be skipped by the internal file search function.
 
 - `--read_type`
   - The type of longread data you are utilising, e.g., ont, illumina, hifi.

From 8aa53411de1450a324a7f8f470a2194fc5a1d1b8 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Fri, 19 Sep 2025 12:14:35 +0100
Subject: [PATCH 56/58] Update

---
 ro-crate-metadata.json | 27 ++++++++++++++++++++-------
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json
index fd7ddd4a..b39134f9 100644
--- a/ro-crate-metadata.json
+++ b/ro-crate-metadata.json
@@ -23,7 +23,7 @@
             "@type": "Dataset",
             "creativeWorkStatus": "Stable",
             "datePublished": "2025-05-27T09:34:43+00:00",
-            "description": "# ![sanger-tol/curationpretext](docs/images/curationpretext-light.png#gh-light-mode-only) ![sanger-tol/curationpretext](docs/images/curationpretext-dark.png#gh-dark-mode-only)\n\n[![GitHub Actions CI Status](https://github.com/sanger-tol/curationpretext/actions/workflows/nf-test.yml/badge.svg)](https://github.com/sanger-tol/curationpretext/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/sanger-tol/curationpretext/actions/workflows/linting.yml/badge.svg)](https://github.com/sanger-tol/curationpretext/actions/workflows/linting.yml)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.12773958-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.12773958)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A524.10.5-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.3.2-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.3.2)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/sanger-tol/curationpretext)\n\n## Introduction\n\n**sanger-tol/curationpretext** is a bioinformatics pipeline typically used in conjunction with [TreeVal](https://github.com/sanger-tol/treeval) to generate pretext maps (and optionally telomeric, gap, coverage, and repeat density plots which can be ingested into pretext) for the manual curation of high quality genomes.\n\nThis is intended as a supplementary pipeline for the [treeval](https://github.com/sanger-tol/treeval) project. This pipeline can be simply used to generate pretext maps, information on how to run this pipeline can be found in the [usage documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/usage).\n\n![Workflow Diagram](./docs/images/CurationPretext_1_3_0.png)\n\n1. Generate Maps - Generates pretext maps as well as a static image.\n\n2. Accessory files - Generates the repeat density, gap, telomere, and coverage tracks.\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nCurrently, the pipeline uses the following flags:\n\n- `--input`\n  - The absolute path to the assembled genome in, e.g., `/path/to/assembly.fa`\n\n- `--reads`\n  - The directory of the fasta files generated from longread reads, e.g., `/path/to/fasta/`\n\n- `--read_type`\n  - The type of longread data you are utilising, e.g., ont, illumina, hifi.\n\n- `--aligner`\n  - The aligner yopu wish to use for the coverage generation, defaults to bwamem2 but minimap2 is also supported.\n\n- `--cram`\n  - The directory of the cram _and_ cram.crai files, e.g., `/path/to/cram/`\n\n- `--map_order`\n  - hic map scaffold order, input either `length` or `unsorted`\n\n- `--teloseq`\n  - A telomeric sequence, e.g., `TTAGGG`\n\n- `--all_output`\n  - An option to output all maps + accessory files, the default will only output the pretextmaps where ingestion has occured.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run sanger-tol/curationpretext \\\n  --input { input.fasta } \\\n  --cram { path/to/cram/ } \\\n  --reads { path/to/longread/fasta/ } \\\n  --read_type { default is \"hifi\" }\n  --sample { default is \"pretext_rerun\" } \\\n  --teloseq { default is \"TTAGGG\" } \\\n  --map_order { default is \"unsorted\" } \\\n  --all_output <true/false> \\\n  --outdir { OUTDIR } \\\n  -profile <docker/singularity/{institute}>\n\n```\n\n> **Warning:**\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those\n> provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_;\n\nFor more details, please refer to the [usage documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/usage) and the [parameter documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/parameters).\n\n## Pipeline output\n\nTo see the the results of a test run with a full size dataset refer to the [results](https://pipelines.tol.sanger.ac.uk/curationpretext/results) tab on the sanger-tol/curationpretext website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/output).\n\n## Credits\n\nsanger-tol/curationpretext was originally written by Damon-Lee B Pointon (@DLBPointon).\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- @muffato - For reviews.\n\n- @yumisims - TreeVal and Software.\n\n- @weaglesBio - TreeVal and Software.\n\n- @josieparis - Help with better docs and testing.\n\n- @mahesh-panchal - Large support with 1.2.0 in making the pipeline more robust with other HPC environments.\n\n- @GRIT - For feedback and feature requests.\n\n- @prototaxites - Support with 1.3.0 and showing me the power of GAWK.\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\n## Citations\n\nIf you use sanger-tol/curationpretext for your analysis, please cite it using the following doi: [10.5281/zenodo.12773958](https://doi.org/10.5281/zenodo.12773958)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nThis pipeline uses code and infrastructure developed and maintained by the [nf-core](https://nf-co.re) community, reused here under the [MIT license](https://github.com/nf-core/tools/blob/main/LICENSE).\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n",
+            "description": "# ![sanger-tol/curationpretext](docs/images/curationpretext-light.png#gh-light-mode-only) ![sanger-tol/curationpretext](docs/images/curationpretext-dark.png#gh-dark-mode-only)\n\n[![GitHub Actions CI Status](https://github.com/sanger-tol/curationpretext/actions/workflows/nf-test.yml/badge.svg)](https://github.com/sanger-tol/curationpretext/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/sanger-tol/curationpretext/actions/workflows/linting.yml/badge.svg)](https://github.com/sanger-tol/curationpretext/actions/workflows/linting.yml)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.12773958-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.12773958)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A524.10.5-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.3.2-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.3.2)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/sanger-tol/curationpretext)\n\n## Introduction\n\n**sanger-tol/curationpretext** is a bioinformatics pipeline typically used in conjunction with [TreeVal](https://github.com/sanger-tol/treeval) to generate pretext maps (and optionally telomeric, gap, coverage, and repeat density plots which can be ingested into pretext) for the manual curation of high quality genomes.\n\nThis is intended as a supplementary pipeline for the [treeval](https://github.com/sanger-tol/treeval) project. This pipeline can be simply used to generate pretext maps, information on how to run this pipeline can be found in the [usage documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/usage).\n\n![Workflow Diagram](./docs/images/CurationPretext_1_3_0.png)\n\n1. Generate Maps - Generates pretext maps as well as a static image.\n\n2. Accessory files - Generates the repeat density, gap, telomere, and coverage tracks.\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nCurrently, the pipeline uses the following flags:\n\n- `--input`\n  - The absolute path to the assembled genome in, e.g., `/path/to/assembly.fa`\n\n- `--sample`\n  - Sample is the naming prefix of the output files, e.g. iyTipFemo\n\n- `--reads`\n  - The directory of the fasta files generated from longread reads, e.g., `/path/to/fasta/`\n  - This folder _must_ contain files in a `.fasta.gz` format, or they will be skipped by the internal file search function.\n\n- `--read_type`\n  - The type of longread data you are utilising, e.g., ont, illumina, hifi.\n\n- `--aligner`\n  - The aligner yopu wish to use for the coverage generation, defaults to bwamem2 but minimap2 is also supported.\n\n- `--cram`\n  - The directory of the cram _and_ cram.crai files, e.g., `/path/to/cram/`\n\n- `--map_order`\n  - hic map scaffold order, input either `length` or `unsorted`\n\n- `--teloseq`\n  - A telomeric sequence, e.g., `TTAGGG`\n\n- `--all_output`\n  - An option to output all maps + accessory files, the default will only output the pretextmaps where ingestion has occured.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run sanger-tol/curationpretext \\\n  --input { input.fasta } \\\n  --cram { path/to/cram/ } \\\n  --reads { path/to/longread/fasta/ } \\\n  --read_type { default is \"hifi\" }\n  --sample { default is \"pretext_rerun\" } \\\n  --teloseq { default is \"TTAGGG\" } \\\n  --map_order { default is \"unsorted\" } \\\n  --all_output <true/false> \\\n  --outdir { OUTDIR } \\\n  -profile <docker/singularity/{institute}>\n\n```\n\n> **Warning:**\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those\n> provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_;\n\nFor more details, please refer to the [usage documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/usage) and the [parameter documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/parameters).\n\n## Pipeline output\n\nTo see the the results of a test run with a full size dataset refer to the [results](https://pipelines.tol.sanger.ac.uk/curationpretext/results) tab on the sanger-tol/curationpretext website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/output).\n\n## Credits\n\nsanger-tol/curationpretext was originally written by Damon-Lee B Pointon (@DLBPointon).\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- @muffato - For reviews.\n\n- @yumisims - TreeVal and Software.\n\n- @weaglesBio - TreeVal and Software.\n\n- @josieparis - Help with better docs and testing.\n\n- @mahesh-panchal - Large support with 1.2.0 in making the pipeline more robust with other HPC environments.\n\n- @GRIT - For feedback and feature requests.\n\n- @prototaxites - Support with 1.3.0 and showing me the power of GAWK.\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\n## Citations\n\nIf you use sanger-tol/curationpretext for your analysis, please cite it using the following doi: [10.5281/zenodo.12773958](https://doi.org/10.5281/zenodo.12773958)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nThis pipeline uses code and infrastructure developed and maintained by the [nf-core](https://nf-co.re) community, reused here under the [MIT license](https://github.com/nf-core/tools/blob/main/LICENSE).\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n",
             "hasPart": [
                 {
                     "@id": "main.nf"
@@ -124,7 +124,11 @@
         },
         {
             "@id": "main.nf",
-            "@type": ["File", "SoftwareSourceCode", "ComputationalWorkflow"],
+            "@type": [
+                "File",
+                "SoftwareSourceCode",
+                "ComputationalWorkflow"
+            ],
             "creator": [
                 {
                     "@id": "https://orcid.org/0000-0002-7860-3560"
@@ -133,9 +137,16 @@
             "dateCreated": "",
             "dateModified": "2025-05-27T10:34:43Z",
             "dct:conformsTo": "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/",
-            "keywords": ["nf-core", "nextflow"],
-            "license": ["MIT"],
-            "name": ["sanger-tol/curationpretext"],
+            "keywords": [
+                "nf-core",
+                "nextflow"
+            ],
+            "license": [
+                "MIT"
+            ],
+            "name": [
+                "sanger-tol/curationpretext"
+            ],
             "programmingLanguage": {
                 "@id": "https://w3id.org/workflowhub/workflow-ro-crate#nextflow"
             },
@@ -146,7 +157,9 @@
                 "https://github.com/sanger-tol/curationpretext",
                 "https://pipelines.tol.sanger.ac.uk/sanger-tol/curationpretext/1.4.2/"
             ],
-            "version": ["1.4.2"]
+            "version": [
+                "1.4.2"
+            ]
         },
         {
             "@id": "https://w3id.org/workflowhub/workflow-ro-crate#nextflow",
@@ -332,4 +345,4 @@
             "name": "Josie Paris"
         }
     ]
-}
+}
\ No newline at end of file

From a74b9e12fee0c3d92abafd8e07999bea68bf1ccf Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Fri, 19 Sep 2025 13:26:27 +0100
Subject: [PATCH 57/58] Pre-commit linting

---
 ro-crate-metadata.json | 25 ++++++-------------------
 1 file changed, 6 insertions(+), 19 deletions(-)

diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json
index b39134f9..c8de9e84 100644
--- a/ro-crate-metadata.json
+++ b/ro-crate-metadata.json
@@ -124,11 +124,7 @@
         },
         {
             "@id": "main.nf",
-            "@type": [
-                "File",
-                "SoftwareSourceCode",
-                "ComputationalWorkflow"
-            ],
+            "@type": ["File", "SoftwareSourceCode", "ComputationalWorkflow"],
             "creator": [
                 {
                     "@id": "https://orcid.org/0000-0002-7860-3560"
@@ -137,16 +133,9 @@
             "dateCreated": "",
             "dateModified": "2025-05-27T10:34:43Z",
             "dct:conformsTo": "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/",
-            "keywords": [
-                "nf-core",
-                "nextflow"
-            ],
-            "license": [
-                "MIT"
-            ],
-            "name": [
-                "sanger-tol/curationpretext"
-            ],
+            "keywords": ["nf-core", "nextflow"],
+            "license": ["MIT"],
+            "name": ["sanger-tol/curationpretext"],
             "programmingLanguage": {
                 "@id": "https://w3id.org/workflowhub/workflow-ro-crate#nextflow"
             },
@@ -157,9 +146,7 @@
                 "https://github.com/sanger-tol/curationpretext",
                 "https://pipelines.tol.sanger.ac.uk/sanger-tol/curationpretext/1.4.2/"
             ],
-            "version": [
-                "1.4.2"
-            ]
+            "version": ["1.4.2"]
         },
         {
             "@id": "https://w3id.org/workflowhub/workflow-ro-crate#nextflow",
@@ -345,4 +332,4 @@
             "name": "Josie Paris"
         }
     ]
-}
\ No newline at end of file
+}

From b921914fa9757083ef9a2f7712689611a7db6a0a Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Fri, 19 Sep 2025 13:50:09 +0100
Subject: [PATCH 58/58] Updates

---
 CHANGELOG.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 901bff0c..77dbc52b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Added and Fixed
 
-- Template update to 3.3.3. <TODO in next PR>.
+- Template update to 3.3.2.
 - Addition of the `--split_telomere` boolean flag, this is false by default.
   - When `true` the pipeline will split the telomere file into a 5 and 3 prime file.
 - Update `ACCESSORY_FILES` subworkflow:
@@ -28,6 +28,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Update tests.
 - Added an "AUTO" value to the `--aligner` arg. If a genome is >5Gb it will use minimap2 else bwamem2.
 - Parity update for the base.config to match TreeVal.
+- Minor Doc updates.
+- Comment out the CONDA workflow requirement, pipeline does not support conda.
 
 ### Paramters