sanger-tol · DLBPointon · Sep 22, 2025 · Sep 16, 2025 · Sep 16, 2025 · Sep 16, 2025
diff --git a/.github/workflows/nf-test.yml b/.github/workflows/nf-test.yml
@@ -66,13 +66,13 @@ jobs:
       fail-fast: false
       matrix:
         shard: ${{ fromJson(needs.nf-test-changes.outputs.shard) }}
-        profile: [conda, docker, singularity]
+        profile: [docker, singularity]
         isMain:
           - ${{ github.base_ref == 'master' || github.base_ref == 'main' }}
         # Exclude conda and singularity on dev
         exclude:
-          - isMain: false
-            profile: "conda"
+          # - isMain: false
+          #   profile: "conda"
           - isMain: false
             profile: "singularity"
         NXF_VER:

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Added and Fixed
 
-- Template update to 3.3.3. <TODO in next PR>.
+- Template update to 3.3.2.
 - Addition of the `--split_telomere` boolean flag, this is false by default.
   - When `true` the pipeline will split the telomere file into a 5 and 3 prime file.
 - Update `ACCESSORY_FILES` subworkflow:
@@ -26,6 +26,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Added the `gawk_split_directions.awk` script for split telomere.
 - Addition of GUNZIP for the input reference genome.
 - Update tests.
+- Added an "AUTO" value to the `--aligner` arg. If a genome is >5Gb it will use minimap2 else bwamem2.
+- Parity update for the base.config to match TreeVal.
+- Minor Doc updates.
+- Comment out the CONDA workflow requirement, pipeline does not support conda.
 
 ### Paramters
 

diff --git a/README.md b/README.md
@@ -33,8 +33,12 @@ Currently, the pipeline uses the following flags:
 - `--input`
   - The absolute path to the assembled genome in, e.g., `/path/to/assembly.fa`
 
+- `--sample`
+  - Sample is the naming prefix of the output files, e.g. iyTipFemo
+
 - `--reads`
   - The directory of the fasta files generated from longread reads, e.g., `/path/to/fasta/`
+  - This folder _must_ contain files in a `.fasta.gz` format, or they will be skipped by the internal file search function.
 
 - `--read_type`
   - The type of longread data you are utilising, e.g., ont, illumina, hifi.

diff --git a/conf/base.config b/conf/base.config
@@ -21,32 +21,51 @@ process {
     withName:SAMTOOLS_MERGE {
         cpus    = { 16                          }
         memory  = { 50.GB     * task.attempt    }
-        time    = { 20.h      * task.attempt    }
+        time    = { 30.h      * task.attempt    }
     }
 
     withName: '.*:.*:LONGREAD_COVERAGE:(MINIMAP2_ALIGN|MINIMAP2_ALIGN_SPLIT)' {
-        cpus    = { 16                          }
-        memory  = { 1.GB     * ( reference.size() < 2e9 ? 50 : Math.ceil( ( reference.size() / 1e+9 ) * 20 ) * Math.ceil( task.attempt * 1 )) }
+        cpus   = { 20   * 1 }
+        memory = {
+                1.GB * (
+                reference.size() < 2e9 ? 30 :
+                (reference.size() < 5e9 ? 40 :
+                (reference.size() < 10e9 ? 60 :
+                Math.ceil((reference.size() / 1e9) * 3)
+                    )
+                )
+            ) * Math.ceil(task.attempt * 1)
+        }
+        time   = { 1.h  * ( reference.size() < 1e9 ? 10 : reference.size() < 10e9 ? 30 : 48) }
     }
 
     withName: CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT {
-        cpus    = { 16 }
-        memory  = { 1.GB     * ( reference.size() < 2e9 ? 80 : Math.ceil( ( reference.size() / 1e+9 ) * 50 ) * Math.ceil( task.attempt * 1 ) ) }
+        cpus    = { 16   * 1 }
+        memory  = { 1.GB * ( reference.size() < 2e9 ? 80 : Math.ceil( ( reference.size() / 1e+9 ) * 30 ) * Math.ceil( task.attempt * 1 ) ) }
     }
 
     withName: CRAM_FILTER_MINIMAP2_FILTER5END_FIXMATE_SORT {
-        cpus    = { 16 }
-        memory  = { 1.GB     * ( reference.size() < 2e9 ? 50 : Math.ceil( ( reference.size() / 1e+9 ) * 8 ) * Math.ceil( task.attempt * 1 ) ) }
+        cpus   = { 16   * 1 }
+        memory = {
+                1.GB * (
+                reference.size() < 2e9 ? 30 :
+                (reference.size() < 5e9 ? 40 :
+                (reference.size() < 10e9 ? 60 :
+                Math.ceil((reference.size() / 1e9) * 3)
+                    )
+                )
+            ) * Math.ceil(task.attempt * 1)
+        }
     }
 
     withName: PRETEXT_GRAPH {
         memory  = { 128.MB   * Math.ceil( task.attempt * 1.5 ) }
     }
 
     withName: PRETEXTMAP_STANDRD{
-        cpus    = { 8        * task.attempt }
+        cpus    = { 8        * 1 }
         memory  = { 3.GB     * task.attempt }
-        time   =  { 1.h      * ( ( fasta.size() < 4e9 ? 24 : 48 ) * Math.ceil( task.attempt * 1 ) ) }
+        time    = { 1.h      * ( ( fasta.size() < 4e9 ? 24 : 48 ) * task.attempt ) }
     }
 
     withName: PRETEXTMAP_HIGHRES {

diff --git a/docs/usage.md b/docs/usage.md
@@ -10,6 +10,8 @@ Currently, the pipeline expects input data to be in a specific format.
 
 The `--input` should be `.fasta` or `.fa` (the same format but differing suffix).
 
+The `--sample` is your chosen naming for the output files.
+
 The `--cram` should point to the folder containing `.cram` files along with a `.crai` per `.cram`.
 
 The `--reads` should point to the folder containing `.fasta.gz` files.

diff --git a/nextflow.config b/nextflow.config
@@ -17,7 +17,7 @@ params {
     teloseq                    = "TTAGGG"
     reads                      = null
     cram                       = null
-    aligner                    = "bwamem2"
+    aligner                    = "AUTO"
     read_type                  = "hifi"
     map_order                  = "unsorted"
     all_output                 = false

diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -69,9 +69,9 @@
                 "aligner": {
                     "type": "string",
                     "description": "Aligner for use {minimap2, bwamem2} in generating map",
-                    "help_text": "Pick between {minimap2, bwamem2}. Defaults to 'minimap2'",
+                    "help_text": "Pick between {minimap2, bwamem2, AUTO}. Defaults to 'minimap2'",
                     "fa_icon": "fas fa-file-signature",
-                    "enum": ["bwamem2", "minimap2"]
+                    "enum": ["bwamem2", "minimap2", "AUTO"]
                 },
                 "run_hires": {
                     "type": "boolean",

diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json
@@ -23,7 +23,7 @@
             "@type": "Dataset",
             "creativeWorkStatus": "Stable",
             "datePublished": "2025-05-27T09:34:43+00:00",
-            "description": "# ![sanger-tol/curationpretext](docs/images/curationpretext-light.png#gh-light-mode-only) ![sanger-tol/curationpretext](docs/images/curationpretext-dark.png#gh-dark-mode-only)\n\n[![GitHub Actions CI Status](https://github.com/sanger-tol/curationpretext/actions/workflows/nf-test.yml/badge.svg)](https://github.com/sanger-tol/curationpretext/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/sanger-tol/curationpretext/actions/workflows/linting.yml/badge.svg)](https://github.com/sanger-tol/curationpretext/actions/workflows/linting.yml)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.12773958-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.12773958)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A524.10.5-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.3.2-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.3.2)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/sanger-tol/curationpretext)\n\n## Introduction\n\n**sanger-tol/curationpretext** is a bioinformatics pipeline typically used in conjunction with [TreeVal](https://github.com/sanger-tol/treeval) to generate pretext maps (and optionally telomeric, gap, coverage, and repeat density plots which can be ingested into pretext) for the manual curation of high quality genomes.\n\nThis is intended as a supplementary pipeline for the [treeval](https://github.com/sanger-tol/treeval) project. This pipeline can be simply used to generate pretext maps, information on how to run this pipeline can be found in the [usage documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/usage).\n\n![Workflow Diagram](./docs/images/CurationPretext_1_3_0.png)\n\n1. Generate Maps - Generates pretext maps as well as a static image.\n\n2. Accessory files - Generates the repeat density, gap, telomere, and coverage tracks.\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nCurrently, the pipeline uses the following flags:\n\n- `--input`\n  - The absolute path to the assembled genome in, e.g., `/path/to/assembly.fa`\n\n- `--reads`\n  - The directory of the fasta files generated from longread reads, e.g., `/path/to/fasta/`\n\n- `--read_type`\n  - The type of longread data you are utilising, e.g., ont, illumina, hifi.\n\n- `--aligner`\n  - The aligner yopu wish to use for the coverage generation, defaults to bwamem2 but minimap2 is also supported.\n\n- `--cram`\n  - The directory of the cram _and_ cram.crai files, e.g., `/path/to/cram/`\n\n- `--map_order`\n  - hic map scaffold order, input either `length` or `unsorted`\n\n- `--teloseq`\n  - A telomeric sequence, e.g., `TTAGGG`\n\n- `--all_output`\n  - An option to output all maps + accessory files, the default will only output the pretextmaps where ingestion has occured.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run sanger-tol/curationpretext \\\n  --input { input.fasta } \\\n  --cram { path/to/cram/ } \\\n  --reads { path/to/longread/fasta/ } \\\n  --read_type { default is \"hifi\" }\n  --sample { default is \"pretext_rerun\" } \\\n  --teloseq { default is \"TTAGGG\" } \\\n  --map_order { default is \"unsorted\" } \\\n  --all_output <true/false> \\\n  --outdir { OUTDIR } \\\n  -profile <docker/singularity/{institute}>\n\n```\n\n> **Warning:**\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those\n> provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_;\n\nFor more details, please refer to the [usage documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/usage) and the [parameter documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/parameters).\n\n## Pipeline output\n\nTo see the the results of a test run with a full size dataset refer to the [results](https://pipelines.tol.sanger.ac.uk/curationpretext/results) tab on the sanger-tol/curationpretext website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/output).\n\n## Credits\n\nsanger-tol/curationpretext was originally written by Damon-Lee B Pointon (@DLBPointon).\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- @muffato - For reviews.\n\n- @yumisims - TreeVal and Software.\n\n- @weaglesBio - TreeVal and Software.\n\n- @josieparis - Help with better docs and testing.\n\n- @mahesh-panchal - Large support with 1.2.0 in making the pipeline more robust with other HPC environments.\n\n- @GRIT - For feedback and feature requests.\n\n- @prototaxites - Support with 1.3.0 and showing me the power of GAWK.\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\n## Citations\n\nIf you use sanger-tol/curationpretext for your analysis, please cite it using the following doi: [10.5281/zenodo.12773958](https://doi.org/10.5281/zenodo.12773958)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nThis pipeline uses code and infrastructure developed and maintained by the [nf-core](https://nf-co.re) community, reused here under the [MIT license](https://github.com/nf-core/tools/blob/main/LICENSE).\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n",
+            "description": "# ![sanger-tol/curationpretext](docs/images/curationpretext-light.png#gh-light-mode-only) ![sanger-tol/curationpretext](docs/images/curationpretext-dark.png#gh-dark-mode-only)\n\n[![GitHub Actions CI Status](https://github.com/sanger-tol/curationpretext/actions/workflows/nf-test.yml/badge.svg)](https://github.com/sanger-tol/curationpretext/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/sanger-tol/curationpretext/actions/workflows/linting.yml/badge.svg)](https://github.com/sanger-tol/curationpretext/actions/workflows/linting.yml)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.12773958-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.12773958)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A524.10.5-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.3.2-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.3.2)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/sanger-tol/curationpretext)\n\n## Introduction\n\n**sanger-tol/curationpretext** is a bioinformatics pipeline typically used in conjunction with [TreeVal](https://github.com/sanger-tol/treeval) to generate pretext maps (and optionally telomeric, gap, coverage, and repeat density plots which can be ingested into pretext) for the manual curation of high quality genomes.\n\nThis is intended as a supplementary pipeline for the [treeval](https://github.com/sanger-tol/treeval) project. This pipeline can be simply used to generate pretext maps, information on how to run this pipeline can be found in the [usage documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/usage).\n\n![Workflow Diagram](./docs/images/CurationPretext_1_3_0.png)\n\n1. Generate Maps - Generates pretext maps as well as a static image.\n\n2. Accessory files - Generates the repeat density, gap, telomere, and coverage tracks.\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nCurrently, the pipeline uses the following flags:\n\n- `--input`\n  - The absolute path to the assembled genome in, e.g., `/path/to/assembly.fa`\n\n- `--sample`\n  - Sample is the naming prefix of the output files, e.g. iyTipFemo\n\n- `--reads`\n  - The directory of the fasta files generated from longread reads, e.g., `/path/to/fasta/`\n  - This folder _must_ contain files in a `.fasta.gz` format, or they will be skipped by the internal file search function.\n\n- `--read_type`\n  - The type of longread data you are utilising, e.g., ont, illumina, hifi.\n\n- `--aligner`\n  - The aligner yopu wish to use for the coverage generation, defaults to bwamem2 but minimap2 is also supported.\n\n- `--cram`\n  - The directory of the cram _and_ cram.crai files, e.g., `/path/to/cram/`\n\n- `--map_order`\n  - hic map scaffold order, input either `length` or `unsorted`\n\n- `--teloseq`\n  - A telomeric sequence, e.g., `TTAGGG`\n\n- `--all_output`\n  - An option to output all maps + accessory files, the default will only output the pretextmaps where ingestion has occured.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run sanger-tol/curationpretext \\\n  --input { input.fasta } \\\n  --cram { path/to/cram/ } \\\n  --reads { path/to/longread/fasta/ } \\\n  --read_type { default is \"hifi\" }\n  --sample { default is \"pretext_rerun\" } \\\n  --teloseq { default is \"TTAGGG\" } \\\n  --map_order { default is \"unsorted\" } \\\n  --all_output <true/false> \\\n  --outdir { OUTDIR } \\\n  -profile <docker/singularity/{institute}>\n\n```\n\n> **Warning:**\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those\n> provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_;\n\nFor more details, please refer to the [usage documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/usage) and the [parameter documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/parameters).\n\n## Pipeline output\n\nTo see the the results of a test run with a full size dataset refer to the [results](https://pipelines.tol.sanger.ac.uk/curationpretext/results) tab on the sanger-tol/curationpretext website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/output).\n\n## Credits\n\nsanger-tol/curationpretext was originally written by Damon-Lee B Pointon (@DLBPointon).\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- @muffato - For reviews.\n\n- @yumisims - TreeVal and Software.\n\n- @weaglesBio - TreeVal and Software.\n\n- @josieparis - Help with better docs and testing.\n\n- @mahesh-panchal - Large support with 1.2.0 in making the pipeline more robust with other HPC environments.\n\n- @GRIT - For feedback and feature requests.\n\n- @prototaxites - Support with 1.3.0 and showing me the power of GAWK.\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\n## Citations\n\nIf you use sanger-tol/curationpretext for your analysis, please cite it using the following doi: [10.5281/zenodo.12773958](https://doi.org/10.5281/zenodo.12773958)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nThis pipeline uses code and infrastructure developed and maintained by the [nf-core](https://nf-co.re) community, reused here under the [MIT license](https://github.com/nf-core/tools/blob/main/LICENSE).\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n",
             "hasPart": [
                 {
                     "@id": "main.nf"