From bd428b344bbfdaf715f344509a81feb2a0eb08bd Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Tue, 16 Sep 2025 12:28:34 +0100 Subject: [PATCH 01/13] Add switch for large genomes to switch over to MINIMAP2 rather than end-user selection --- .../utils_nfcore_curationpretext_pipeline/main.nf | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/subworkflows/local/utils_nfcore_curationpretext_pipeline/main.nf b/subworkflows/local/utils_nfcore_curationpretext_pipeline/main.nf index c91cb7d3..8a3b4670 100644 --- a/subworkflows/local/utils_nfcore_curationpretext_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_curationpretext_pipeline/main.nf @@ -79,17 +79,24 @@ workflow PIPELINE_INITIALISATION { type: 'dir' ) - ch_reference = input_fasta.map { fasta -> + ch_reference = input_fasta.map { fasta -> + def fasta_size = fasta.size() + def selected_aligner = (params_aligner == "AUTO") ? + (fasta_size > 5e9 ? "minimap2" : "bwamem2") : + params.aligner + tuple( - [ id: params.sample, - aligner: params.aligner, + [ + id: params.sample, + aligner: selected_aligner, map_order: params.map_order, - ref_size: fasta.size(), + ref_size: fasta_size, ], fasta ) } + ch_cram_reads = cram_dir.map { dir -> tuple( [ id: params.sample ], From 512ac147e39939c661078d4165dbadc6a1e4539a Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Tue, 16 Sep 2025 12:28:52 +0100 Subject: [PATCH 02/13] Adding AUTO as option --- nextflow.config | 2 +- nextflow_schema.json | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/nextflow.config b/nextflow.config index cd1f3d60..b86dba7b 100644 --- a/nextflow.config +++ b/nextflow.config @@ -17,7 +17,7 @@ params { teloseq = "TTAGGG" reads = null cram = null - aligner = "bwamem2" + aligner = "AUTO" read_type = "hifi" map_order = "unsorted" all_output = false diff --git a/nextflow_schema.json b/nextflow_schema.json index b3d71f02..06b76aca 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -69,9 +69,9 @@ "aligner": { "type": "string", "description": "Aligner for use {minimap2, bwamem2} in generating map", - "help_text": "Pick between {minimap2, bwamem2}. Defaults to 'minimap2'", + "help_text": "Pick between {minimap2, bwamem2, AUTO}. Defaults to 'minimap2'", "fa_icon": "fas fa-file-signature", - "enum": ["bwamem2", "minimap2"] + "enum": ["bwamem2", "minimap2", "AUTO"] }, "run_hires": { "type": "boolean", From d7e6c97c932f973cd82ffcfd1cd7efe061d57c56 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Tue, 16 Sep 2025 12:29:28 +0100 Subject: [PATCH 03/13] TreeVal Parity for the resource configs --- conf/base.config | 37 ++++++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/conf/base.config b/conf/base.config index 16ee6a42..9ca32fe1 100644 --- a/conf/base.config +++ b/conf/base.config @@ -21,22 +21,41 @@ process { withName:SAMTOOLS_MERGE { cpus = { 16 } memory = { 50.GB * task.attempt } - time = { 20.h * task.attempt } + time = { 30.h * task.attempt } } withName: '.*:.*:LONGREAD_COVERAGE:(MINIMAP2_ALIGN|MINIMAP2_ALIGN_SPLIT)' { - cpus = { 16 } - memory = { 1.GB * ( reference.size() < 2e9 ? 50 : Math.ceil( ( reference.size() / 1e+9 ) * 20 ) * Math.ceil( task.attempt * 1 )) } + cpus = { 20 * 1 } + memory = { + 1.GB * ( + reference.size() < 2e9 ? 30 : + (reference.size() < 5e9 ? 40 : + (reference.size() < 10e9 ? 60 : + Math.ceil((reference.size() / 1e9) * 3) + ) + ) + ) * Math.ceil(task.attempt * 1) + } + time = { 1.h * ( reference.size() < 1e9 ? 10 : reference.size() < 10e9 ? 30 : 48) } } withName: CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT { - cpus = { 16 } - memory = { 1.GB * ( reference.size() < 2e9 ? 80 : Math.ceil( ( reference.size() / 1e+9 ) * 50 ) * Math.ceil( task.attempt * 1 ) ) } + cpus = { 16 * 1 } + memory = { 1.GB * ( reference.size() < 2e9 ? 80 : Math.ceil( ( reference.size() / 1e+9 ) * 30 ) * Math.ceil( task.attempt * 1 ) ) } } withName: CRAM_FILTER_MINIMAP2_FILTER5END_FIXMATE_SORT { - cpus = { 16 } - memory = { 1.GB * ( reference.size() < 2e9 ? 50 : Math.ceil( ( reference.size() / 1e+9 ) * 8 ) * Math.ceil( task.attempt * 1 ) ) } + cpus = { 16 * 1 } + memory = { + 1.GB * ( + reference.size() < 2e9 ? 30 : + (reference.size() < 5e9 ? 40 : + (reference.size() < 10e9 ? 60 : + Math.ceil((reference.size() / 1e9) * 3) + ) + ) + ) * Math.ceil(task.attempt * 1) + } } withName: PRETEXT_GRAPH { @@ -44,9 +63,9 @@ process { } withName: PRETEXTMAP_STANDRD{ - cpus = { 8 * task.attempt } + cpus = { 8 * 1 } memory = { 3.GB * task.attempt } - time = { 1.h * ( ( fasta.size() < 4e9 ? 24 : 48 ) * Math.ceil( task.attempt * 1 ) ) } + time = { 1.h * ( ( fasta.size() < 4e9 ? 24 : 48 ) * task.attempt ) } } withName: PRETEXTMAP_HIGHRES { From 08462959ec31fd396a9e1b5657d85a09f72b3d2e Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Tue, 16 Sep 2025 12:36:50 +0100 Subject: [PATCH 04/13] Fat finger spelling mistake --- .../local/utils_nfcore_curationpretext_pipeline/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/utils_nfcore_curationpretext_pipeline/main.nf b/subworkflows/local/utils_nfcore_curationpretext_pipeline/main.nf index 8a3b4670..f0882ce6 100644 --- a/subworkflows/local/utils_nfcore_curationpretext_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_curationpretext_pipeline/main.nf @@ -81,7 +81,7 @@ workflow PIPELINE_INITIALISATION { ch_reference = input_fasta.map { fasta -> def fasta_size = fasta.size() - def selected_aligner = (params_aligner == "AUTO") ? + def selected_aligner = (params.aligner == "AUTO") ? (fasta_size > 5e9 ? "minimap2" : "bwamem2") : params.aligner From f799ad8a533f2aaedd79d9e633aded8d3a16a3ec Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Tue, 16 Sep 2025 12:51:10 +0100 Subject: [PATCH 05/13] Pipeline doesn't support conda --- .github/workflows/nf-test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/nf-test.yml b/.github/workflows/nf-test.yml index 593c9360..f43d5b74 100644 --- a/.github/workflows/nf-test.yml +++ b/.github/workflows/nf-test.yml @@ -71,8 +71,8 @@ jobs: - ${{ github.base_ref == 'master' || github.base_ref == 'main' }} # Exclude conda and singularity on dev exclude: - - isMain: false - profile: "conda" + # - isMain: false + # profile: "conda" - isMain: false profile: "singularity" NXF_VER: From 48f7b5ce7d0530e3b0c19b0eaad51d4d15057045 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Tue, 16 Sep 2025 13:40:23 +0100 Subject: [PATCH 06/13] Update CHANGELOG --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index fe9a0345..901bff0c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,6 +26,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Added the `gawk_split_directions.awk` script for split telomere. - Addition of GUNZIP for the input reference genome. - Update tests. +- Added an "AUTO" value to the `--aligner` arg. If a genome is >5Gb it will use minimap2 else bwamem2. +- Parity update for the base.config to match TreeVal. ### Paramters From f39eecfb6ac189bee8c3bb4d618df2b24792ce23 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Tue, 16 Sep 2025 13:41:21 +0100 Subject: [PATCH 07/13] remove conda this time --- .github/workflows/nf-test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/nf-test.yml b/.github/workflows/nf-test.yml index f43d5b74..e113a611 100644 --- a/.github/workflows/nf-test.yml +++ b/.github/workflows/nf-test.yml @@ -66,7 +66,7 @@ jobs: fail-fast: false matrix: shard: ${{ fromJson(needs.nf-test-changes.outputs.shard) }} - profile: [conda, docker, singularity] + profile: [docker, singularity] isMain: - ${{ github.base_ref == 'master' || github.base_ref == 'main' }} # Exclude conda and singularity on dev From dfa8f0b01388fe3840de57afaaf635b26bfd147d Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Fri, 19 Sep 2025 11:45:02 +0100 Subject: [PATCH 08/13] Update some text --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index d68799fe..8553501e 100644 --- a/README.md +++ b/README.md @@ -33,8 +33,12 @@ Currently, the pipeline uses the following flags: - `--input` - The absolute path to the assembled genome in, e.g., `/path/to/assembly.fa` +- `--sample` + - Sample is the naming prefix of the output files, e.g. iyTipFemo + - `--reads` - The directory of the fasta files generated from longread reads, e.g., `/path/to/fasta/` + - This folder *must* contain files in a `.fasta.gz` format, or they will be skipped by the internal file search function. - `--read_type` - The type of longread data you are utilising, e.g., ont, illumina, hifi. From 39d021ff1d6d9e7037c8894a3f2deddbaccc59b6 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Fri, 19 Sep 2025 11:47:20 +0100 Subject: [PATCH 09/13] add line on sample --- docs/usage.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/usage.md b/docs/usage.md index 842b4cdd..c723c594 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -10,6 +10,8 @@ Currently, the pipeline expects input data to be in a specific format. The `--input` should be `.fasta` or `.fa` (the same format but differing suffix). +The `--sample` is your chosen naming for the output files. + The `--cram` should point to the folder containing `.cram` files along with a `.crai` per `.cram`. The `--reads` should point to the folder containing `.fasta.gz` files. From be1c6920893e130ba69f6156f000988eb272569b Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Fri, 19 Sep 2025 11:53:06 +0100 Subject: [PATCH 10/13] linting --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 8553501e..52b99db1 100644 --- a/README.md +++ b/README.md @@ -38,7 +38,7 @@ Currently, the pipeline uses the following flags: - `--reads` - The directory of the fasta files generated from longread reads, e.g., `/path/to/fasta/` - - This folder *must* contain files in a `.fasta.gz` format, or they will be skipped by the internal file search function. + - This folder _must_ contain files in a `.fasta.gz` format, or they will be skipped by the internal file search function. - `--read_type` - The type of longread data you are utilising, e.g., ont, illumina, hifi. From 8aa53411de1450a324a7f8f470a2194fc5a1d1b8 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Fri, 19 Sep 2025 12:14:35 +0100 Subject: [PATCH 11/13] Update --- ro-crate-metadata.json | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json index fd7ddd4a..b39134f9 100644 --- a/ro-crate-metadata.json +++ b/ro-crate-metadata.json @@ -23,7 +23,7 @@ "@type": "Dataset", "creativeWorkStatus": "Stable", "datePublished": "2025-05-27T09:34:43+00:00", - "description": "# ![sanger-tol/curationpretext](docs/images/curationpretext-light.png#gh-light-mode-only) ![sanger-tol/curationpretext](docs/images/curationpretext-dark.png#gh-dark-mode-only)\n\n[![GitHub Actions CI Status](https://github.com/sanger-tol/curationpretext/actions/workflows/nf-test.yml/badge.svg)](https://github.com/sanger-tol/curationpretext/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/sanger-tol/curationpretext/actions/workflows/linting.yml/badge.svg)](https://github.com/sanger-tol/curationpretext/actions/workflows/linting.yml)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.12773958-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.12773958)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A524.10.5-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.3.2-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.3.2)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/sanger-tol/curationpretext)\n\n## Introduction\n\n**sanger-tol/curationpretext** is a bioinformatics pipeline typically used in conjunction with [TreeVal](https://github.com/sanger-tol/treeval) to generate pretext maps (and optionally telomeric, gap, coverage, and repeat density plots which can be ingested into pretext) for the manual curation of high quality genomes.\n\nThis is intended as a supplementary pipeline for the [treeval](https://github.com/sanger-tol/treeval) project. This pipeline can be simply used to generate pretext maps, information on how to run this pipeline can be found in the [usage documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/usage).\n\n![Workflow Diagram](./docs/images/CurationPretext_1_3_0.png)\n\n1. Generate Maps - Generates pretext maps as well as a static image.\n\n2. Accessory files - Generates the repeat density, gap, telomere, and coverage tracks.\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nCurrently, the pipeline uses the following flags:\n\n- `--input`\n - The absolute path to the assembled genome in, e.g., `/path/to/assembly.fa`\n\n- `--reads`\n - The directory of the fasta files generated from longread reads, e.g., `/path/to/fasta/`\n\n- `--read_type`\n - The type of longread data you are utilising, e.g., ont, illumina, hifi.\n\n- `--aligner`\n - The aligner yopu wish to use for the coverage generation, defaults to bwamem2 but minimap2 is also supported.\n\n- `--cram`\n - The directory of the cram _and_ cram.crai files, e.g., `/path/to/cram/`\n\n- `--map_order`\n - hic map scaffold order, input either `length` or `unsorted`\n\n- `--teloseq`\n - A telomeric sequence, e.g., `TTAGGG`\n\n- `--all_output`\n - An option to output all maps + accessory files, the default will only output the pretextmaps where ingestion has occured.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run sanger-tol/curationpretext \\\n --input { input.fasta } \\\n --cram { path/to/cram/ } \\\n --reads { path/to/longread/fasta/ } \\\n --read_type { default is \"hifi\" }\n --sample { default is \"pretext_rerun\" } \\\n --teloseq { default is \"TTAGGG\" } \\\n --map_order { default is \"unsorted\" } \\\n --all_output \\\n --outdir { OUTDIR } \\\n -profile \n\n```\n\n> **Warning:**\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those\n> provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_;\n\nFor more details, please refer to the [usage documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/usage) and the [parameter documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/parameters).\n\n## Pipeline output\n\nTo see the the results of a test run with a full size dataset refer to the [results](https://pipelines.tol.sanger.ac.uk/curationpretext/results) tab on the sanger-tol/curationpretext website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/output).\n\n## Credits\n\nsanger-tol/curationpretext was originally written by Damon-Lee B Pointon (@DLBPointon).\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- @muffato - For reviews.\n\n- @yumisims - TreeVal and Software.\n\n- @weaglesBio - TreeVal and Software.\n\n- @josieparis - Help with better docs and testing.\n\n- @mahesh-panchal - Large support with 1.2.0 in making the pipeline more robust with other HPC environments.\n\n- @GRIT - For feedback and feature requests.\n\n- @prototaxites - Support with 1.3.0 and showing me the power of GAWK.\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\n## Citations\n\nIf you use sanger-tol/curationpretext for your analysis, please cite it using the following doi: [10.5281/zenodo.12773958](https://doi.org/10.5281/zenodo.12773958)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nThis pipeline uses code and infrastructure developed and maintained by the [nf-core](https://nf-co.re) community, reused here under the [MIT license](https://github.com/nf-core/tools/blob/main/LICENSE).\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", + "description": "# ![sanger-tol/curationpretext](docs/images/curationpretext-light.png#gh-light-mode-only) ![sanger-tol/curationpretext](docs/images/curationpretext-dark.png#gh-dark-mode-only)\n\n[![GitHub Actions CI Status](https://github.com/sanger-tol/curationpretext/actions/workflows/nf-test.yml/badge.svg)](https://github.com/sanger-tol/curationpretext/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/sanger-tol/curationpretext/actions/workflows/linting.yml/badge.svg)](https://github.com/sanger-tol/curationpretext/actions/workflows/linting.yml)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.12773958-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.12773958)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A524.10.5-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.3.2-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.3.2)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/sanger-tol/curationpretext)\n\n## Introduction\n\n**sanger-tol/curationpretext** is a bioinformatics pipeline typically used in conjunction with [TreeVal](https://github.com/sanger-tol/treeval) to generate pretext maps (and optionally telomeric, gap, coverage, and repeat density plots which can be ingested into pretext) for the manual curation of high quality genomes.\n\nThis is intended as a supplementary pipeline for the [treeval](https://github.com/sanger-tol/treeval) project. This pipeline can be simply used to generate pretext maps, information on how to run this pipeline can be found in the [usage documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/usage).\n\n![Workflow Diagram](./docs/images/CurationPretext_1_3_0.png)\n\n1. Generate Maps - Generates pretext maps as well as a static image.\n\n2. Accessory files - Generates the repeat density, gap, telomere, and coverage tracks.\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nCurrently, the pipeline uses the following flags:\n\n- `--input`\n - The absolute path to the assembled genome in, e.g., `/path/to/assembly.fa`\n\n- `--sample`\n - Sample is the naming prefix of the output files, e.g. iyTipFemo\n\n- `--reads`\n - The directory of the fasta files generated from longread reads, e.g., `/path/to/fasta/`\n - This folder _must_ contain files in a `.fasta.gz` format, or they will be skipped by the internal file search function.\n\n- `--read_type`\n - The type of longread data you are utilising, e.g., ont, illumina, hifi.\n\n- `--aligner`\n - The aligner yopu wish to use for the coverage generation, defaults to bwamem2 but minimap2 is also supported.\n\n- `--cram`\n - The directory of the cram _and_ cram.crai files, e.g., `/path/to/cram/`\n\n- `--map_order`\n - hic map scaffold order, input either `length` or `unsorted`\n\n- `--teloseq`\n - A telomeric sequence, e.g., `TTAGGG`\n\n- `--all_output`\n - An option to output all maps + accessory files, the default will only output the pretextmaps where ingestion has occured.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run sanger-tol/curationpretext \\\n --input { input.fasta } \\\n --cram { path/to/cram/ } \\\n --reads { path/to/longread/fasta/ } \\\n --read_type { default is \"hifi\" }\n --sample { default is \"pretext_rerun\" } \\\n --teloseq { default is \"TTAGGG\" } \\\n --map_order { default is \"unsorted\" } \\\n --all_output \\\n --outdir { OUTDIR } \\\n -profile \n\n```\n\n> **Warning:**\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those\n> provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_;\n\nFor more details, please refer to the [usage documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/usage) and the [parameter documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/parameters).\n\n## Pipeline output\n\nTo see the the results of a test run with a full size dataset refer to the [results](https://pipelines.tol.sanger.ac.uk/curationpretext/results) tab on the sanger-tol/curationpretext website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/output).\n\n## Credits\n\nsanger-tol/curationpretext was originally written by Damon-Lee B Pointon (@DLBPointon).\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- @muffato - For reviews.\n\n- @yumisims - TreeVal and Software.\n\n- @weaglesBio - TreeVal and Software.\n\n- @josieparis - Help with better docs and testing.\n\n- @mahesh-panchal - Large support with 1.2.0 in making the pipeline more robust with other HPC environments.\n\n- @GRIT - For feedback and feature requests.\n\n- @prototaxites - Support with 1.3.0 and showing me the power of GAWK.\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\n## Citations\n\nIf you use sanger-tol/curationpretext for your analysis, please cite it using the following doi: [10.5281/zenodo.12773958](https://doi.org/10.5281/zenodo.12773958)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nThis pipeline uses code and infrastructure developed and maintained by the [nf-core](https://nf-co.re) community, reused here under the [MIT license](https://github.com/nf-core/tools/blob/main/LICENSE).\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", "hasPart": [ { "@id": "main.nf" @@ -124,7 +124,11 @@ }, { "@id": "main.nf", - "@type": ["File", "SoftwareSourceCode", "ComputationalWorkflow"], + "@type": [ + "File", + "SoftwareSourceCode", + "ComputationalWorkflow" + ], "creator": [ { "@id": "https://orcid.org/0000-0002-7860-3560" @@ -133,9 +137,16 @@ "dateCreated": "", "dateModified": "2025-05-27T10:34:43Z", "dct:conformsTo": "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/", - "keywords": ["nf-core", "nextflow"], - "license": ["MIT"], - "name": ["sanger-tol/curationpretext"], + "keywords": [ + "nf-core", + "nextflow" + ], + "license": [ + "MIT" + ], + "name": [ + "sanger-tol/curationpretext" + ], "programmingLanguage": { "@id": "https://w3id.org/workflowhub/workflow-ro-crate#nextflow" }, @@ -146,7 +157,9 @@ "https://github.com/sanger-tol/curationpretext", "https://pipelines.tol.sanger.ac.uk/sanger-tol/curationpretext/1.4.2/" ], - "version": ["1.4.2"] + "version": [ + "1.4.2" + ] }, { "@id": "https://w3id.org/workflowhub/workflow-ro-crate#nextflow", @@ -332,4 +345,4 @@ "name": "Josie Paris" } ] -} +} \ No newline at end of file From a74b9e12fee0c3d92abafd8e07999bea68bf1ccf Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Fri, 19 Sep 2025 13:26:27 +0100 Subject: [PATCH 12/13] Pre-commit linting --- ro-crate-metadata.json | 25 ++++++------------------- 1 file changed, 6 insertions(+), 19 deletions(-) diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json index b39134f9..c8de9e84 100644 --- a/ro-crate-metadata.json +++ b/ro-crate-metadata.json @@ -124,11 +124,7 @@ }, { "@id": "main.nf", - "@type": [ - "File", - "SoftwareSourceCode", - "ComputationalWorkflow" - ], + "@type": ["File", "SoftwareSourceCode", "ComputationalWorkflow"], "creator": [ { "@id": "https://orcid.org/0000-0002-7860-3560" @@ -137,16 +133,9 @@ "dateCreated": "", "dateModified": "2025-05-27T10:34:43Z", "dct:conformsTo": "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/", - "keywords": [ - "nf-core", - "nextflow" - ], - "license": [ - "MIT" - ], - "name": [ - "sanger-tol/curationpretext" - ], + "keywords": ["nf-core", "nextflow"], + "license": ["MIT"], + "name": ["sanger-tol/curationpretext"], "programmingLanguage": { "@id": "https://w3id.org/workflowhub/workflow-ro-crate#nextflow" }, @@ -157,9 +146,7 @@ "https://github.com/sanger-tol/curationpretext", "https://pipelines.tol.sanger.ac.uk/sanger-tol/curationpretext/1.4.2/" ], - "version": [ - "1.4.2" - ] + "version": ["1.4.2"] }, { "@id": "https://w3id.org/workflowhub/workflow-ro-crate#nextflow", @@ -345,4 +332,4 @@ "name": "Josie Paris" } ] -} \ No newline at end of file +} From b921914fa9757083ef9a2f7712689611a7db6a0a Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Fri, 19 Sep 2025 13:50:09 +0100 Subject: [PATCH 13/13] Updates --- CHANGELOG.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 901bff0c..77dbc52b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added and Fixed -- Template update to 3.3.3. . +- Template update to 3.3.2. - Addition of the `--split_telomere` boolean flag, this is false by default. - When `true` the pipeline will split the telomere file into a 5 and 3 prime file. - Update `ACCESSORY_FILES` subworkflow: @@ -28,6 +28,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Update tests. - Added an "AUTO" value to the `--aligner` arg. If a genome is >5Gb it will use minimap2 else bwamem2. - Parity update for the base.config to match TreeVal. +- Minor Doc updates. +- Comment out the CONDA workflow requirement, pipeline does not support conda. ### Paramters