From bd428b344bbfdaf715f344509a81feb2a0eb08bd Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Tue, 16 Sep 2025 12:28:34 +0100
Subject: [PATCH 01/13] Add switch for large genomes to switch over to MINIMAP2
 rather than end-user selection

---
 .../utils_nfcore_curationpretext_pipeline/main.nf | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/subworkflows/local/utils_nfcore_curationpretext_pipeline/main.nf b/subworkflows/local/utils_nfcore_curationpretext_pipeline/main.nf
index c91cb7d3..8a3b4670 100644
--- a/subworkflows/local/utils_nfcore_curationpretext_pipeline/main.nf
+++ b/subworkflows/local/utils_nfcore_curationpretext_pipeline/main.nf
@@ -79,17 +79,24 @@ workflow PIPELINE_INITIALISATION {
                         type: 'dir'
                     )
 
-    ch_reference    = input_fasta.map { fasta ->
+    ch_reference = input_fasta.map { fasta ->
+        def fasta_size = fasta.size()
+        def selected_aligner = (params_aligner == "AUTO") ?
+            (fasta_size > 5e9 ? "minimap2" : "bwamem2") :
+            params.aligner
+
         tuple(
-            [   id: params.sample,
-                aligner: params.aligner,
+            [
+                id: params.sample,
+                aligner: selected_aligner,
                 map_order: params.map_order,
-                ref_size: fasta.size(),
+                ref_size: fasta_size,
             ],
             fasta
         )
     }
 
+
     ch_cram_reads   = cram_dir.map { dir ->
         tuple(
             [   id: params.sample   ],

From 512ac147e39939c661078d4165dbadc6a1e4539a Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Tue, 16 Sep 2025 12:28:52 +0100
Subject: [PATCH 02/13] Adding AUTO as option

---
 nextflow.config      | 2 +-
 nextflow_schema.json | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/nextflow.config b/nextflow.config
index cd1f3d60..b86dba7b 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -17,7 +17,7 @@ params {
     teloseq                    = "TTAGGG"
     reads                      = null
     cram                       = null
-    aligner                    = "bwamem2"
+    aligner                    = "AUTO"
     read_type                  = "hifi"
     map_order                  = "unsorted"
     all_output                 = false
diff --git a/nextflow_schema.json b/nextflow_schema.json
index b3d71f02..06b76aca 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -69,9 +69,9 @@
                 "aligner": {
                     "type": "string",
                     "description": "Aligner for use {minimap2, bwamem2} in generating map",
-                    "help_text": "Pick between {minimap2, bwamem2}. Defaults to 'minimap2'",
+                    "help_text": "Pick between {minimap2, bwamem2, AUTO}. Defaults to 'minimap2'",
                     "fa_icon": "fas fa-file-signature",
-                    "enum": ["bwamem2", "minimap2"]
+                    "enum": ["bwamem2", "minimap2", "AUTO"]
                 },
                 "run_hires": {
                     "type": "boolean",

From d7e6c97c932f973cd82ffcfd1cd7efe061d57c56 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Tue, 16 Sep 2025 12:29:28 +0100
Subject: [PATCH 03/13] TreeVal Parity for the resource configs

---
 conf/base.config | 37 ++++++++++++++++++++++++++++---------
 1 file changed, 28 insertions(+), 9 deletions(-)

diff --git a/conf/base.config b/conf/base.config
index 16ee6a42..9ca32fe1 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -21,22 +21,41 @@ process {
     withName:SAMTOOLS_MERGE {
         cpus    = { 16                          }
         memory  = { 50.GB     * task.attempt    }
-        time    = { 20.h      * task.attempt    }
+        time    = { 30.h      * task.attempt    }
     }
 
     withName: '.*:.*:LONGREAD_COVERAGE:(MINIMAP2_ALIGN|MINIMAP2_ALIGN_SPLIT)' {
-        cpus    = { 16                          }
-        memory  = { 1.GB     * ( reference.size() < 2e9 ? 50 : Math.ceil( ( reference.size() / 1e+9 ) * 20 ) * Math.ceil( task.attempt * 1 )) }
+        cpus   = { 20   * 1 }
+        memory = {
+                1.GB * (
+                reference.size() < 2e9 ? 30 :
+                (reference.size() < 5e9 ? 40 :
+                (reference.size() < 10e9 ? 60 :
+                Math.ceil((reference.size() / 1e9) * 3)
+                    )
+                )
+            ) * Math.ceil(task.attempt * 1)
+        }
+        time   = { 1.h  * ( reference.size() < 1e9 ? 10 : reference.size() < 10e9 ? 30 : 48) }
     }
 
     withName: CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT {
-        cpus    = { 16 }
-        memory  = { 1.GB     * ( reference.size() < 2e9 ? 80 : Math.ceil( ( reference.size() / 1e+9 ) * 50 ) * Math.ceil( task.attempt * 1 ) ) }
+        cpus    = { 16   * 1 }
+        memory  = { 1.GB * ( reference.size() < 2e9 ? 80 : Math.ceil( ( reference.size() / 1e+9 ) * 30 ) * Math.ceil( task.attempt * 1 ) ) }
     }
 
     withName: CRAM_FILTER_MINIMAP2_FILTER5END_FIXMATE_SORT {
-        cpus    = { 16 }
-        memory  = { 1.GB     * ( reference.size() < 2e9 ? 50 : Math.ceil( ( reference.size() / 1e+9 ) * 8 ) * Math.ceil( task.attempt * 1 ) ) }
+        cpus   = { 16   * 1 }
+        memory = {
+                1.GB * (
+                reference.size() < 2e9 ? 30 :
+                (reference.size() < 5e9 ? 40 :
+                (reference.size() < 10e9 ? 60 :
+                Math.ceil((reference.size() / 1e9) * 3)
+                    )
+                )
+            ) * Math.ceil(task.attempt * 1)
+        }
     }
 
     withName: PRETEXT_GRAPH {
@@ -44,9 +63,9 @@ process {
     }
 
     withName: PRETEXTMAP_STANDRD{
-        cpus    = { 8        * task.attempt }
+        cpus    = { 8        * 1 }
         memory  = { 3.GB     * task.attempt }
-        time   =  { 1.h      * ( ( fasta.size() < 4e9 ? 24 : 48 ) * Math.ceil( task.attempt * 1 ) ) }
+        time    = { 1.h      * ( ( fasta.size() < 4e9 ? 24 : 48 ) * task.attempt ) }
     }
 
     withName: PRETEXTMAP_HIGHRES {

From 08462959ec31fd396a9e1b5657d85a09f72b3d2e Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Tue, 16 Sep 2025 12:36:50 +0100
Subject: [PATCH 04/13] Fat finger spelling mistake

---
 .../local/utils_nfcore_curationpretext_pipeline/main.nf         | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/subworkflows/local/utils_nfcore_curationpretext_pipeline/main.nf b/subworkflows/local/utils_nfcore_curationpretext_pipeline/main.nf
index 8a3b4670..f0882ce6 100644
--- a/subworkflows/local/utils_nfcore_curationpretext_pipeline/main.nf
+++ b/subworkflows/local/utils_nfcore_curationpretext_pipeline/main.nf
@@ -81,7 +81,7 @@ workflow PIPELINE_INITIALISATION {
 
     ch_reference = input_fasta.map { fasta ->
         def fasta_size = fasta.size()
-        def selected_aligner = (params_aligner == "AUTO") ?
+        def selected_aligner = (params.aligner == "AUTO") ?
             (fasta_size > 5e9 ? "minimap2" : "bwamem2") :
             params.aligner
 

From f799ad8a533f2aaedd79d9e633aded8d3a16a3ec Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Tue, 16 Sep 2025 12:51:10 +0100
Subject: [PATCH 05/13] Pipeline doesn't support conda

---
 .github/workflows/nf-test.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/nf-test.yml b/.github/workflows/nf-test.yml
index 593c9360..f43d5b74 100644
--- a/.github/workflows/nf-test.yml
+++ b/.github/workflows/nf-test.yml
@@ -71,8 +71,8 @@ jobs:
           - ${{ github.base_ref == 'master' || github.base_ref == 'main' }}
         # Exclude conda and singularity on dev
         exclude:
-          - isMain: false
-            profile: "conda"
+          # - isMain: false
+          #   profile: "conda"
           - isMain: false
             profile: "singularity"
         NXF_VER:

From 48f7b5ce7d0530e3b0c19b0eaad51d4d15057045 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Tue, 16 Sep 2025 13:40:23 +0100
Subject: [PATCH 06/13] Update CHANGELOG

---
 CHANGELOG.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index fe9a0345..901bff0c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -26,6 +26,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Added the `gawk_split_directions.awk` script for split telomere.
 - Addition of GUNZIP for the input reference genome.
 - Update tests.
+- Added an "AUTO" value to the `--aligner` arg. If a genome is >5Gb it will use minimap2 else bwamem2.
+- Parity update for the base.config to match TreeVal.
 
 ### Paramters
 

From f39eecfb6ac189bee8c3bb4d618df2b24792ce23 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Tue, 16 Sep 2025 13:41:21 +0100
Subject: [PATCH 07/13] remove conda this time

---
 .github/workflows/nf-test.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/nf-test.yml b/.github/workflows/nf-test.yml
index f43d5b74..e113a611 100644
--- a/.github/workflows/nf-test.yml
+++ b/.github/workflows/nf-test.yml
@@ -66,7 +66,7 @@ jobs:
       fail-fast: false
       matrix:
         shard: ${{ fromJson(needs.nf-test-changes.outputs.shard) }}
-        profile: [conda, docker, singularity]
+        profile: [docker, singularity]
         isMain:
           - ${{ github.base_ref == 'master' || github.base_ref == 'main' }}
         # Exclude conda and singularity on dev

From dfa8f0b01388fe3840de57afaaf635b26bfd147d Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Fri, 19 Sep 2025 11:45:02 +0100
Subject: [PATCH 08/13] Update some text

---
 README.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/README.md b/README.md
index d68799fe..8553501e 100644
--- a/README.md
+++ b/README.md
@@ -33,8 +33,12 @@ Currently, the pipeline uses the following flags:
 - `--input`
   - The absolute path to the assembled genome in, e.g., `/path/to/assembly.fa`
 
+- `--sample`
+  - Sample is the naming prefix of the output files, e.g. iyTipFemo
+
 - `--reads`
   - The directory of the fasta files generated from longread reads, e.g., `/path/to/fasta/`
+  - This folder *must* contain files in a `.fasta.gz` format, or they will be skipped by the internal file search function.
 
 - `--read_type`
   - The type of longread data you are utilising, e.g., ont, illumina, hifi.

From 39d021ff1d6d9e7037c8894a3f2deddbaccc59b6 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Fri, 19 Sep 2025 11:47:20 +0100
Subject: [PATCH 09/13] add line on sample

---
 docs/usage.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/usage.md b/docs/usage.md
index 842b4cdd..c723c594 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -10,6 +10,8 @@ Currently, the pipeline expects input data to be in a specific format.
 
 The `--input` should be `.fasta` or `.fa` (the same format but differing suffix).
 
+The `--sample` is your chosen naming for the output files.
+
 The `--cram` should point to the folder containing `.cram` files along with a `.crai` per `.cram`.
 
 The `--reads` should point to the folder containing `.fasta.gz` files.

From be1c6920893e130ba69f6156f000988eb272569b Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Fri, 19 Sep 2025 11:53:06 +0100
Subject: [PATCH 10/13] linting

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 8553501e..52b99db1 100644
--- a/README.md
+++ b/README.md
@@ -38,7 +38,7 @@ Currently, the pipeline uses the following flags:
 
 - `--reads`
   - The directory of the fasta files generated from longread reads, e.g., `/path/to/fasta/`
-  - This folder *must* contain files in a `.fasta.gz` format, or they will be skipped by the internal file search function.
+  - This folder _must_ contain files in a `.fasta.gz` format, or they will be skipped by the internal file search function.
 
 - `--read_type`
   - The type of longread data you are utilising, e.g., ont, illumina, hifi.

From 8aa53411de1450a324a7f8f470a2194fc5a1d1b8 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Fri, 19 Sep 2025 12:14:35 +0100
Subject: [PATCH 11/13] Update

---
 ro-crate-metadata.json | 27 ++++++++++++++++++++-------
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json
index fd7ddd4a..b39134f9 100644
--- a/ro-crate-metadata.json
+++ b/ro-crate-metadata.json
@@ -23,7 +23,7 @@
             "@type": "Dataset",
             "creativeWorkStatus": "Stable",
             "datePublished": "2025-05-27T09:34:43+00:00",
-            "description": "# ![sanger-tol/curationpretext](docs/images/curationpretext-light.png#gh-light-mode-only) ![sanger-tol/curationpretext](docs/images/curationpretext-dark.png#gh-dark-mode-only)\n\n[![GitHub Actions CI Status](https://github.com/sanger-tol/curationpretext/actions/workflows/nf-test.yml/badge.svg)](https://github.com/sanger-tol/curationpretext/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/sanger-tol/curationpretext/actions/workflows/linting.yml/badge.svg)](https://github.com/sanger-tol/curationpretext/actions/workflows/linting.yml)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.12773958-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.12773958)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A524.10.5-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.3.2-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.3.2)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/sanger-tol/curationpretext)\n\n## Introduction\n\n**sanger-tol/curationpretext** is a bioinformatics pipeline typically used in conjunction with [TreeVal](https://github.com/sanger-tol/treeval) to generate pretext maps (and optionally telomeric, gap, coverage, and repeat density plots which can be ingested into pretext) for the manual curation of high quality genomes.\n\nThis is intended as a supplementary pipeline for the [treeval](https://github.com/sanger-tol/treeval) project. This pipeline can be simply used to generate pretext maps, information on how to run this pipeline can be found in the [usage documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/usage).\n\n![Workflow Diagram](./docs/images/CurationPretext_1_3_0.png)\n\n1. Generate Maps - Generates pretext maps as well as a static image.\n\n2. Accessory files - Generates the repeat density, gap, telomere, and coverage tracks.\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nCurrently, the pipeline uses the following flags:\n\n- `--input`\n  - The absolute path to the assembled genome in, e.g., `/path/to/assembly.fa`\n\n- `--reads`\n  - The directory of the fasta files generated from longread reads, e.g., `/path/to/fasta/`\n\n- `--read_type`\n  - The type of longread data you are utilising, e.g., ont, illumina, hifi.\n\n- `--aligner`\n  - The aligner yopu wish to use for the coverage generation, defaults to bwamem2 but minimap2 is also supported.\n\n- `--cram`\n  - The directory of the cram _and_ cram.crai files, e.g., `/path/to/cram/`\n\n- `--map_order`\n  - hic map scaffold order, input either `length` or `unsorted`\n\n- `--teloseq`\n  - A telomeric sequence, e.g., `TTAGGG`\n\n- `--all_output`\n  - An option to output all maps + accessory files, the default will only output the pretextmaps where ingestion has occured.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run sanger-tol/curationpretext \\\n  --input { input.fasta } \\\n  --cram { path/to/cram/ } \\\n  --reads { path/to/longread/fasta/ } \\\n  --read_type { default is \"hifi\" }\n  --sample { default is \"pretext_rerun\" } \\\n  --teloseq { default is \"TTAGGG\" } \\\n  --map_order { default is \"unsorted\" } \\\n  --all_output <true/false> \\\n  --outdir { OUTDIR } \\\n  -profile <docker/singularity/{institute}>\n\n```\n\n> **Warning:**\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those\n> provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_;\n\nFor more details, please refer to the [usage documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/usage) and the [parameter documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/parameters).\n\n## Pipeline output\n\nTo see the the results of a test run with a full size dataset refer to the [results](https://pipelines.tol.sanger.ac.uk/curationpretext/results) tab on the sanger-tol/curationpretext website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/output).\n\n## Credits\n\nsanger-tol/curationpretext was originally written by Damon-Lee B Pointon (@DLBPointon).\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- @muffato - For reviews.\n\n- @yumisims - TreeVal and Software.\n\n- @weaglesBio - TreeVal and Software.\n\n- @josieparis - Help with better docs and testing.\n\n- @mahesh-panchal - Large support with 1.2.0 in making the pipeline more robust with other HPC environments.\n\n- @GRIT - For feedback and feature requests.\n\n- @prototaxites - Support with 1.3.0 and showing me the power of GAWK.\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\n## Citations\n\nIf you use sanger-tol/curationpretext for your analysis, please cite it using the following doi: [10.5281/zenodo.12773958](https://doi.org/10.5281/zenodo.12773958)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nThis pipeline uses code and infrastructure developed and maintained by the [nf-core](https://nf-co.re) community, reused here under the [MIT license](https://github.com/nf-core/tools/blob/main/LICENSE).\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n",
+            "description": "# ![sanger-tol/curationpretext](docs/images/curationpretext-light.png#gh-light-mode-only) ![sanger-tol/curationpretext](docs/images/curationpretext-dark.png#gh-dark-mode-only)\n\n[![GitHub Actions CI Status](https://github.com/sanger-tol/curationpretext/actions/workflows/nf-test.yml/badge.svg)](https://github.com/sanger-tol/curationpretext/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/sanger-tol/curationpretext/actions/workflows/linting.yml/badge.svg)](https://github.com/sanger-tol/curationpretext/actions/workflows/linting.yml)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.12773958-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.12773958)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A524.10.5-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.3.2-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.3.2)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/sanger-tol/curationpretext)\n\n## Introduction\n\n**sanger-tol/curationpretext** is a bioinformatics pipeline typically used in conjunction with [TreeVal](https://github.com/sanger-tol/treeval) to generate pretext maps (and optionally telomeric, gap, coverage, and repeat density plots which can be ingested into pretext) for the manual curation of high quality genomes.\n\nThis is intended as a supplementary pipeline for the [treeval](https://github.com/sanger-tol/treeval) project. This pipeline can be simply used to generate pretext maps, information on how to run this pipeline can be found in the [usage documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/usage).\n\n![Workflow Diagram](./docs/images/CurationPretext_1_3_0.png)\n\n1. Generate Maps - Generates pretext maps as well as a static image.\n\n2. Accessory files - Generates the repeat density, gap, telomere, and coverage tracks.\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nCurrently, the pipeline uses the following flags:\n\n- `--input`\n  - The absolute path to the assembled genome in, e.g., `/path/to/assembly.fa`\n\n- `--sample`\n  - Sample is the naming prefix of the output files, e.g. iyTipFemo\n\n- `--reads`\n  - The directory of the fasta files generated from longread reads, e.g., `/path/to/fasta/`\n  - This folder _must_ contain files in a `.fasta.gz` format, or they will be skipped by the internal file search function.\n\n- `--read_type`\n  - The type of longread data you are utilising, e.g., ont, illumina, hifi.\n\n- `--aligner`\n  - The aligner yopu wish to use for the coverage generation, defaults to bwamem2 but minimap2 is also supported.\n\n- `--cram`\n  - The directory of the cram _and_ cram.crai files, e.g., `/path/to/cram/`\n\n- `--map_order`\n  - hic map scaffold order, input either `length` or `unsorted`\n\n- `--teloseq`\n  - A telomeric sequence, e.g., `TTAGGG`\n\n- `--all_output`\n  - An option to output all maps + accessory files, the default will only output the pretextmaps where ingestion has occured.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run sanger-tol/curationpretext \\\n  --input { input.fasta } \\\n  --cram { path/to/cram/ } \\\n  --reads { path/to/longread/fasta/ } \\\n  --read_type { default is \"hifi\" }\n  --sample { default is \"pretext_rerun\" } \\\n  --teloseq { default is \"TTAGGG\" } \\\n  --map_order { default is \"unsorted\" } \\\n  --all_output <true/false> \\\n  --outdir { OUTDIR } \\\n  -profile <docker/singularity/{institute}>\n\n```\n\n> **Warning:**\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those\n> provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_;\n\nFor more details, please refer to the [usage documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/usage) and the [parameter documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/parameters).\n\n## Pipeline output\n\nTo see the the results of a test run with a full size dataset refer to the [results](https://pipelines.tol.sanger.ac.uk/curationpretext/results) tab on the sanger-tol/curationpretext website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/output).\n\n## Credits\n\nsanger-tol/curationpretext was originally written by Damon-Lee B Pointon (@DLBPointon).\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- @muffato - For reviews.\n\n- @yumisims - TreeVal and Software.\n\n- @weaglesBio - TreeVal and Software.\n\n- @josieparis - Help with better docs and testing.\n\n- @mahesh-panchal - Large support with 1.2.0 in making the pipeline more robust with other HPC environments.\n\n- @GRIT - For feedback and feature requests.\n\n- @prototaxites - Support with 1.3.0 and showing me the power of GAWK.\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\n## Citations\n\nIf you use sanger-tol/curationpretext for your analysis, please cite it using the following doi: [10.5281/zenodo.12773958](https://doi.org/10.5281/zenodo.12773958)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nThis pipeline uses code and infrastructure developed and maintained by the [nf-core](https://nf-co.re) community, reused here under the [MIT license](https://github.com/nf-core/tools/blob/main/LICENSE).\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n",
             "hasPart": [
                 {
                     "@id": "main.nf"
@@ -124,7 +124,11 @@
         },
         {
             "@id": "main.nf",
-            "@type": ["File", "SoftwareSourceCode", "ComputationalWorkflow"],
+            "@type": [
+                "File",
+                "SoftwareSourceCode",
+                "ComputationalWorkflow"
+            ],
             "creator": [
                 {
                     "@id": "https://orcid.org/0000-0002-7860-3560"
@@ -133,9 +137,16 @@
             "dateCreated": "",
             "dateModified": "2025-05-27T10:34:43Z",
             "dct:conformsTo": "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/",
-            "keywords": ["nf-core", "nextflow"],
-            "license": ["MIT"],
-            "name": ["sanger-tol/curationpretext"],
+            "keywords": [
+                "nf-core",
+                "nextflow"
+            ],
+            "license": [
+                "MIT"
+            ],
+            "name": [
+                "sanger-tol/curationpretext"
+            ],
             "programmingLanguage": {
                 "@id": "https://w3id.org/workflowhub/workflow-ro-crate#nextflow"
             },
@@ -146,7 +157,9 @@
                 "https://github.com/sanger-tol/curationpretext",
                 "https://pipelines.tol.sanger.ac.uk/sanger-tol/curationpretext/1.4.2/"
             ],
-            "version": ["1.4.2"]
+            "version": [
+                "1.4.2"
+            ]
         },
         {
             "@id": "https://w3id.org/workflowhub/workflow-ro-crate#nextflow",
@@ -332,4 +345,4 @@
             "name": "Josie Paris"
         }
     ]
-}
+}
\ No newline at end of file

From a74b9e12fee0c3d92abafd8e07999bea68bf1ccf Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Fri, 19 Sep 2025 13:26:27 +0100
Subject: [PATCH 12/13] Pre-commit linting

---
 ro-crate-metadata.json | 25 ++++++-------------------
 1 file changed, 6 insertions(+), 19 deletions(-)

diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json
index b39134f9..c8de9e84 100644
--- a/ro-crate-metadata.json
+++ b/ro-crate-metadata.json
@@ -124,11 +124,7 @@
         },
         {
             "@id": "main.nf",
-            "@type": [
-                "File",
-                "SoftwareSourceCode",
-                "ComputationalWorkflow"
-            ],
+            "@type": ["File", "SoftwareSourceCode", "ComputationalWorkflow"],
             "creator": [
                 {
                     "@id": "https://orcid.org/0000-0002-7860-3560"
@@ -137,16 +133,9 @@
             "dateCreated": "",
             "dateModified": "2025-05-27T10:34:43Z",
             "dct:conformsTo": "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/",
-            "keywords": [
-                "nf-core",
-                "nextflow"
-            ],
-            "license": [
-                "MIT"
-            ],
-            "name": [
-                "sanger-tol/curationpretext"
-            ],
+            "keywords": ["nf-core", "nextflow"],
+            "license": ["MIT"],
+            "name": ["sanger-tol/curationpretext"],
             "programmingLanguage": {
                 "@id": "https://w3id.org/workflowhub/workflow-ro-crate#nextflow"
             },
@@ -157,9 +146,7 @@
                 "https://github.com/sanger-tol/curationpretext",
                 "https://pipelines.tol.sanger.ac.uk/sanger-tol/curationpretext/1.4.2/"
             ],
-            "version": [
-                "1.4.2"
-            ]
+            "version": ["1.4.2"]
         },
         {
             "@id": "https://w3id.org/workflowhub/workflow-ro-crate#nextflow",
@@ -345,4 +332,4 @@
             "name": "Josie Paris"
         }
     ]
-}
\ No newline at end of file
+}

From b921914fa9757083ef9a2f7712689611a7db6a0a Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Fri, 19 Sep 2025 13:50:09 +0100
Subject: [PATCH 13/13] Updates

---
 CHANGELOG.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 901bff0c..77dbc52b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Added and Fixed
 
-- Template update to 3.3.3. <TODO in next PR>.
+- Template update to 3.3.2.
 - Addition of the `--split_telomere` boolean flag, this is false by default.
   - When `true` the pipeline will split the telomere file into a 5 and 3 prime file.
 - Update `ACCESSORY_FILES` subworkflow:
@@ -28,6 +28,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Update tests.
 - Added an "AUTO" value to the `--aligner` arg. If a genome is >5Gb it will use minimap2 else bwamem2.
 - Parity update for the base.config to match TreeVal.
+- Minor Doc updates.
+- Comment out the CONDA workflow requirement, pipeline does not support conda.
 
 ### Paramters