diff --git a/.nf-core.yml b/.nf-core.yml index c51b9619..1a8f8840 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -48,4 +48,4 @@ template: - seqera_platform - multiqc - rocrate - version: 1.6.0 + version: 1.6.1 diff --git a/CHANGELOG.md b/CHANGELOG.md index ac220849..2de3eb50 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,26 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [[1.6.1](https://github.com/sanger-tol/curationpretext/releases/tag/1.6.1)] - UNSC Trafalgar (H1) - [2025-03-13] + +## Added and Fixed + +- Update to add `--run_ultra` parameter + - Enum of ["yes", "no", "force"] + - When run as 'yes', ultra resolution maps will be generated if the genome is > 4.Gb. + - When run as 'ultra', an ultra resolution map will be generated regardless of genome size. +- Update to `pretextmap` to a version that supports `--ultraRes` +- Update to config file to support the above. +- Addition of example params file `assets/example_params_file.yaml` + +### Software Dependencies + +Note, since the pipeline is using Nextflow DSL2, each process will be run with its own Biocontainer. This means that on occasion it is entirely possible for the pipeline to be using different versions of the same tool. However, the overall software dependency changes compared to the last release have been listed below for reference. + +| Module | Old Version | New Versions | +| ------------ | ----------- | ------------ | +| `PRETEXTMAP` | 0.1.9 | 0.2.4 | + ## [[1.6.0](https://github.com/sanger-tol/curationpretext/releases/tag/1.6.0)] - UNSC Trafalgar - [2025-02-19] ## Added and Fixed diff --git a/README.md b/README.md index cd38ec33..a2fa5d85 100644 --- a/README.md +++ b/README.md @@ -77,6 +77,12 @@ Currently, the pipeline uses the following flags: - `--cram_chunk_size` - The number of records in a cram file should be chunked into, defaults to 10000. +- `--run_hires` + - A boolean to run the pipeline in hires mode, i.e., generate hires resolution maps. Default is `true` + +- `--run_ultra` + - A string argument to run the pipeline in ultra resolution mode, i.e., generate ultra resolution maps. Options are: `yes`, `force`, `no`. Default is `yes`, this runs ultra resolution maps is the genome file is > 4.Gb. + Now, you can run the pipeline using: ```bash diff --git a/assets/example_params_file.yaml b/assets/example_params_file.yaml new file mode 100644 index 00000000..bba4a9ea --- /dev/null +++ b/assets/example_params_file.yaml @@ -0,0 +1,14 @@ +sample: "CurationPretextTest_MIN" +teloseq: "TTA" +aligner: "bwamem2" +all_output: false +skip_tracks: "NONE" +run_hires: false +run_ultra: "force" +split_telomere: true +input: "https://tolit.cog.sanger.ac.uk/test-data/resources/treeval/TreeValTinyData/assembly/draft/grTriPseu1.fa" +reads: + - "https://tolit.cog.sanger.ac.uk/test-data/resources/treeval/TreeValTinyData/genomic_data/pacbio/seqkitPacbio50000.fasta.gz" +cram: + - "https://tolit.cog.sanger.ac.uk/test-data/resources/treeval/TreeValTinyData/genomic_data/hic-arima/SUBSET-1000.cram" + - "https://tolit.cog.sanger.ac.uk/test-data/resources/treeval/TreeValTinyData/genomic_data/hic-arima/SUBSET-2000.cram" diff --git a/assets/schema_input.json b/assets/schema_input.json deleted file mode 100644 index 4c47266a..00000000 --- a/assets/schema_input.json +++ /dev/null @@ -1,33 +0,0 @@ -{ - "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "https://raw.githubusercontent.com/sanger-tol/curationpretext/main/assets/schema_input.json", - "title": "sanger-tol/curationpretext pipeline - params.input schema", - "description": "Schema for the file provided with params.input", - "type": "array", - "items": { - "type": "object", - "properties": { - "sample": { - "type": "string", - "pattern": "^\\S+$", - "errorMessage": "Sample name must be provided and cannot contain spaces", - "meta": ["id"] - }, - "fastq_1": { - "type": "string", - "format": "file-path", - "exists": true, - "pattern": "^([\\S\\s]*\\/)?[^\\s\\/]+\\.f(ast)?q\\.gz$", - "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" - }, - "fastq_2": { - "type": "string", - "format": "file-path", - "exists": true, - "pattern": "^([\\S\\s]*\\/)?[^\\s\\/]+\\.f(ast)?q\\.gz$", - "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" - } - }, - "required": ["sample", "fastq_1"] - } -} diff --git a/conf/base.config b/conf/base.config index dca4223c..b1ab5ed0 100644 --- a/conf/base.config +++ b/conf/base.config @@ -69,14 +69,20 @@ process { } withName: ".*:CREATE_MAPS_STDRD:PRETEXTMAP" { - cpus = { 8 * 1 } + cpus = { 6 * 1 } memory = { 3.GB * task.attempt } time = { 1.h * ( ( fasta.size() < 4e9 ? 24 : 48 ) * task.attempt ) } } withName: ".*:CREATE_MAPS_HIRES:PRETEXTMAP" { - cpus = { 8 * task.attempt } - memory = { 20.GB * Math.ceil( task.attempt * 2.6 ) } + cpus = { 6 * task.attempt } + memory = { 20.GB * task.attempt } + time = { 1.h * ( ( fasta.size() < 4e9 ? 24 : 48 ) * Math.ceil( task.attempt * 1 ) ) } + } + + withName: ".*:CREATE_MAPS_ULTRA:PRETEXTMAP" { + cpus = { 6 * task.attempt } + memory = { 40.GB * task.attempt } time = { 1.h * ( ( fasta.size() < 4e9 ? 24 : 48 ) * Math.ceil( task.attempt * 1 ) ) } } diff --git a/conf/modules.config b/conf/modules.config index cfe7ae5f..214bde8e 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -15,7 +15,7 @@ process { // // NOTE: GROUP THE PUBLISHDIR DIRECTIVES // - withName: 'PRETEXT_INGEST_SNDRD|PRETEXT_INGEST_HIRES' { + withName: 'PRETEXT_INGEST_SNDRD|PRETEXT_INGEST_HIRES|PRETEXT_INGEST_ULTRA' { publishDir = [ [ path: { "${params.outdir}/pretext_maps_processed" }, @@ -29,6 +29,12 @@ process { mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, ], + [ + path: { "${params.outdir}/pretext_maps_processed" }, + pattern: "*ultra.pretext", + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ], ] } @@ -170,6 +176,11 @@ process { ext.prefix = { "${meta.id}_hr" } } + withName: "PRETEXT_INGEST_ULTRA" { + ext.args = { "--textureBuffer 1G" } + ext.prefix = { "${meta.id}_ultra" } + } + withName: "PRETEXTSNAPSHOT" { ext.args = { "--sequences '=full' --resolution 1440 --memory ${task.memory}" } ext.prefix = { "${meta.id}_normal" } @@ -238,6 +249,11 @@ process { ext.prefix = { "${meta.id}_hr_pi" } } + withName: ".*:CREATE_MAPS_ULTRA:PRETEXTMAP" { + ext.args = { "${meta.map_order.equals("length") ? "--sortby length": "--sortby nosort" } --ultraRes --mapq ${params.multi_mapping}" } + ext.prefix = { "${meta.id}_ultra_pi" } + } + withName: ".*:ALIGN_CRAM:CRAMALIGN_BWAMEM2ALIGNHIC" { ext.args = "" ext.args2 = "-F0xB00 -nt" diff --git a/conf/test.config b/conf/test.config index 2013e915..4dbddc07 100644 --- a/conf/test.config +++ b/conf/test.config @@ -32,5 +32,6 @@ params { all_output = false skip_tracks = "NONE" run_hires = false + run_ultra = "no" split_telomere = true } diff --git a/conf/test_full.config b/conf/test_full.config index 37179351..c694065b 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -29,6 +29,7 @@ params { all_output = true skip_tracks = "NONE" run_hires = true + run_ultra = "force" split_telomere = true } diff --git a/main.nf b/main.nf index 4dd04dc6..5c4bf6f4 100644 --- a/main.nf +++ b/main.nf @@ -39,6 +39,7 @@ workflow SANGER_TOL_CURATIONPRETEXT { aligner skip_tracks run_hires + run_ultra split_telomere cram_chunk_size @@ -54,6 +55,7 @@ workflow SANGER_TOL_CURATIONPRETEXT { aligner, skip_tracks, run_hires, + run_ultra, split_telomere, cram_chunk_size ) @@ -98,6 +100,7 @@ workflow { params.aligner, params.skip_tracks, params.run_hires, + params.run_ultra, params.split_telomere, params.cram_chunk_size ) diff --git a/modules.json b/modules.json index e4e53e4e..3b097660 100644 --- a/modules.json +++ b/modules.json @@ -77,9 +77,8 @@ }, "pretextmap": { "branch": "master", - "git_sha": "000647dd5c075642ac90213b17f67f76236a7346", - "installed_by": ["modules", "pairs_create_contact_maps"], - "patch": "modules/nf-core/pretextmap/pretextmap.diff" + "git_sha": "b94e5739438038828eae63cdb55f8d8ac2d99bf6", + "installed_by": ["modules", "pairs_create_contact_maps"] }, "pretextsnapshot": { "branch": "master", diff --git a/modules/nf-core/pretextmap/environment.yml b/modules/nf-core/pretextmap/environment.yml index 721f0e74..845f12d6 100644 --- a/modules/nf-core/pretextmap/environment.yml +++ b/modules/nf-core/pretextmap/environment.yml @@ -5,5 +5,5 @@ channels: - bioconda dependencies: - bioconda::htslib=1.23 - - bioconda::pretextmap=0.1.9 + - bioconda::pretextmap=0.2.4 - bioconda::samtools=1.23 diff --git a/modules/nf-core/pretextmap/main.nf b/modules/nf-core/pretextmap/main.nf index 55f251ca..7ef6fe04 100644 --- a/modules/nf-core/pretextmap/main.nf +++ b/modules/nf-core/pretextmap/main.nf @@ -4,9 +4,9 @@ process PRETEXTMAP { label 'process_low' conda "${moduleDir}/environment.yml" - - container "quay.io/sanger-tol/pretext:0.0.9-yy5-c2" - + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/f6/f6b88d972aaf27c7e748e2c3b5ee86065dc5ed7824d7d13937c65844242211e2/data': + 'community.wave.seqera.io/library/htslib_pretextmap_samtools:8a29f6d0f55f98f9' }" input: tuple val(meta), path(input) diff --git a/modules/nf-core/pretextmap/pretextmap.diff b/modules/nf-core/pretextmap/pretextmap.diff deleted file mode 100644 index 1128d2a7..00000000 --- a/modules/nf-core/pretextmap/pretextmap.diff +++ /dev/null @@ -1,33 +0,0 @@ -Changes in component 'nf-core/pretextmap' -'modules/nf-core/pretextmap/meta.yml' is unchanged -Changes in 'pretextmap/environment.yml': ---- modules/nf-core/pretextmap/environment.yml -+++ modules/nf-core/pretextmap/environment.yml -@@ -5,5 +5,5 @@ - - bioconda - dependencies: - - bioconda::htslib=1.23 -- - bioconda::pretextmap=0.2.3 -+ - bioconda::pretextmap=0.1.9 - - bioconda::samtools=1.23 - -Changes in 'pretextmap/main.nf': ---- modules/nf-core/pretextmap/main.nf -+++ modules/nf-core/pretextmap/main.nf -@@ -4,9 +4,9 @@ - label 'process_low' - - conda "${moduleDir}/environment.yml" -- container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? -- 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/e9/e9e6a49d9810ef0101a4a003afeda9b32c1d0d06b196ec13a5c9f5919bd1869e/data': -- 'community.wave.seqera.io/library/htslib_pretextmap_samtools:6d973e19ac7b0a1f' }" -+ -+ container "quay.io/sanger-tol/pretext:0.0.9-yy5-c2" -+ - - input: - tuple val(meta), path(input) - -'modules/nf-core/pretextmap/tests/main.nf.test' is unchanged -'modules/nf-core/pretextmap/tests/main.nf.test.snap' is unchanged -************************************************************ diff --git a/modules/nf-core/pretextmap/tests/main.nf.test.snap b/modules/nf-core/pretextmap/tests/main.nf.test.snap index 5b78b8cf..13e2d7d3 100644 --- a/modules/nf-core/pretextmap/tests/main.nf.test.snap +++ b/modules/nf-core/pretextmap/tests/main.nf.test.snap @@ -8,14 +8,14 @@ "id": "test", "single_end": false }, - "test.pretext:md5,36105209541b09098a2d070cce5866a9" + "test.pretext:md5,b32bf7fc9f91a85e2d37886daf500e1b" ] ], "1": [ [ "PRETEXTMAP", "PretextMap", - "0.2.3" + "0.2.4" ] ], "2": [ @@ -31,14 +31,14 @@ "id": "test", "single_end": false }, - "test.pretext:md5,36105209541b09098a2d070cce5866a9" + "test.pretext:md5,b32bf7fc9f91a85e2d37886daf500e1b" ] ], "versions_pretextmap": [ [ "PRETEXTMAP", "PretextMap", - "0.2.3" + "0.2.4" ] ], "versions_samtools": [ @@ -52,9 +52,9 @@ ], "meta": { "nf-test": "0.9.3", - "nextflow": "25.10.0" + "nextflow": "25.10.2" }, - "timestamp": "2026-01-26T08:48:03.239812" + "timestamp": "2026-03-09T11:00:02.297422554" }, "homo_cram_map_stub": { "content": [ @@ -72,7 +72,7 @@ [ "PRETEXTMAP", "PretextMap", - "0.2.3" + "0.2.4" ] ], "2": [ @@ -95,7 +95,7 @@ [ "PRETEXTMAP", "PretextMap", - "0.2.3" + "0.2.4" ] ], "versions_samtools": [ @@ -109,9 +109,9 @@ ], "meta": { "nf-test": "0.9.3", - "nextflow": "25.10.0" + "nextflow": "25.10.2" }, - "timestamp": "2026-01-26T08:56:52.454859" + "timestamp": "2026-03-09T11:13:12.262693811" }, "homo_cram_map_nofai": { "content": [ @@ -122,14 +122,14 @@ "id": "test", "single_end": false }, - "test.pretext:md5,36105209541b09098a2d070cce5866a9" + "test.pretext:md5,b32bf7fc9f91a85e2d37886daf500e1b" ] ], "1": [ [ "PRETEXTMAP", "PretextMap", - "0.2.3" + "0.2.4" ] ], "2": [ @@ -145,14 +145,14 @@ "id": "test", "single_end": false }, - "test.pretext:md5,36105209541b09098a2d070cce5866a9" + "test.pretext:md5,b32bf7fc9f91a85e2d37886daf500e1b" ] ], "versions_pretextmap": [ [ "PRETEXTMAP", "PretextMap", - "0.2.3" + "0.2.4" ] ], "versions_samtools": [ @@ -166,9 +166,9 @@ ], "meta": { "nf-test": "0.9.3", - "nextflow": "25.10.0" + "nextflow": "25.10.2" }, - "timestamp": "2026-01-26T08:50:51.096526" + "timestamp": "2026-03-09T11:04:12.368177716" }, "mapped_pairs_map": { "content": [ @@ -186,7 +186,7 @@ [ "PRETEXTMAP", "PretextMap", - "0.2.3" + "0.2.4" ] ], "2": [ @@ -209,7 +209,7 @@ [ "PRETEXTMAP", "PretextMap", - "0.2.3" + "0.2.4" ] ], "versions_samtools": [ @@ -223,9 +223,9 @@ ], "meta": { "nf-test": "0.9.3", - "nextflow": "25.10.0" + "nextflow": "25.10.2" }, - "timestamp": "2026-01-26T08:56:43.224672" + "timestamp": "2026-03-09T11:13:01.188693704" }, "homo_bam_map": { "content": [ @@ -236,14 +236,14 @@ "id": "test", "single_end": false }, - "test.pretext:md5,36105209541b09098a2d070cce5866a9" + "test.pretext:md5,b32bf7fc9f91a85e2d37886daf500e1b" ] ], "1": [ [ "PRETEXTMAP", "PretextMap", - "0.2.3" + "0.2.4" ] ], "2": [ @@ -259,14 +259,14 @@ "id": "test", "single_end": false }, - "test.pretext:md5,36105209541b09098a2d070cce5866a9" + "test.pretext:md5,b32bf7fc9f91a85e2d37886daf500e1b" ] ], "versions_pretextmap": [ [ "PRETEXTMAP", "PretextMap", - "0.2.3" + "0.2.4" ] ], "versions_samtools": [ @@ -280,8 +280,8 @@ ], "meta": { "nf-test": "0.9.3", - "nextflow": "25.10.0" + "nextflow": "25.10.2" }, - "timestamp": "2026-01-26T08:53:42.104066" + "timestamp": "2026-03-09T11:08:26.562100038" } } \ No newline at end of file diff --git a/nextflow.config b/nextflow.config index d4129c05..7e186303 100644 --- a/nextflow.config +++ b/nextflow.config @@ -22,6 +22,7 @@ params { map_order = "unsorted" all_output = false run_hires = true + run_ultra = "yes" multi_mapping = 0 cram_chunk_size = 10000 pre_mapped_bam = null diff --git a/nextflow_schema.json b/nextflow_schema.json index 632f2422..cb3df322 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -86,10 +86,17 @@ }, "run_hires": { "type": "boolean", - "description": "Run HiRes pretext maps", + "description": "Run High Resolution pretext maps", "help_text": "Boolean to switch off HiRes pretextmap generation", "fa_icon": "fas fa-check" }, + "run_ultra": { + "type": "string", + "description": "Run Ultra resolution pretext maps", + "help_text": "Switch for Ultra resolution pretextmap generation", + "fa_icon": "fas fa-check", + "enum": ["yes", "no", "force"] + }, "cram_chunk_size": { "type": "integer", "description": "Chunk size for splitting CRAM files", diff --git a/tests/default.nf.test.snap b/tests/default.nf.test.snap index 2e707513..e288dd78 100644 --- a/tests/default.nf.test.snap +++ b/tests/default.nf.test.snap @@ -69,8 +69,8 @@ "minimap2": "2.29-r1283" }, "PRETEXTMAP": { - "PretextMap": "0.1.9", - "samtools": 1.21 + "PretextMap": "0.2.4", + "samtools": 1.23 }, "PRETEXTSNAPSHOT": { "PretextSnapshot": "0.0.5" @@ -170,4 +170,4 @@ }, "timestamp": "2026-03-05T13:59:35.465106708" } -} \ No newline at end of file +} diff --git a/workflows/curationpretext.nf b/workflows/curationpretext.nf index d6f8b641..56f22748 100644 --- a/workflows/curationpretext.nf +++ b/workflows/curationpretext.nf @@ -12,6 +12,7 @@ include { GUNZIP } from '../modules/n //LOCAL MODULES include { PRETEXT_GRAPH as PRETEXT_INGEST_SNDRD } from '../modules/local/pretext/graph/main' include { PRETEXT_GRAPH as PRETEXT_INGEST_HIRES } from '../modules/local/pretext/graph/main' +include { PRETEXT_GRAPH as PRETEXT_INGEST_ULTRA } from '../modules/local/pretext/graph/main' // LOCAL SUBWORKFLOWS include { ACCESSORY_FILES } from '../subworkflows/local/accessory_files/main' @@ -20,8 +21,9 @@ include { ACCESSORY_FILES } from '../subworkfl include { CRAM_MAP_ILLUMINA_HIC as ALIGN_CRAM } from '../subworkflows/sanger-tol/cram_map_illumina_hic/main' include { PAIRS_CREATE_CONTACT_MAPS as CREATE_MAPS_STDRD } from '../subworkflows/sanger-tol/pairs_create_contact_maps/main' include { PAIRS_CREATE_CONTACT_MAPS as CREATE_MAPS_HIRES } from '../subworkflows/sanger-tol/pairs_create_contact_maps/main' +include { PAIRS_CREATE_CONTACT_MAPS as CREATE_MAPS_ULTRA } from '../subworkflows/sanger-tol/pairs_create_contact_maps/main' - +// FUNCTION IMPORTS include { paramsSummaryMap } from 'plugin/nf-schema' include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' @@ -43,6 +45,7 @@ workflow CURATIONPRETEXT { val_aligner val_skip_tracks val_run_hires + val_run_ultra val_split_telomere val_cram_chunk_size @@ -193,6 +196,30 @@ workflow CURATIONPRETEXT { [] ) + // + // SUBWORKFLOW: MAP THE PRETEXT FILE + // IF val_run_ultra IS "true" CALCULATE WHETHER THE REF IS > 4GB AND MAP ULTRA + // IF val_run_ultra IS "force" MAP ULTRA + // + def ultra_input = mapped_bam + .combine(ch_reference) + .filter { _mapped_meta, _bam, _ref_meta, ref_fasta -> + val_run_ultra == "force" || (val_run_ultra == "true" && ref_fasta.size() > 4.GB) + } + .map { mapped_meta, bam, ref_meta, ref_fasta -> + [mapped_meta, bam] + } + + CREATE_MAPS_ULTRA ( + ultra_input, + [[:],[]], + true, + false, + false, + false, + [] + ) + // // MODULE: INGEST ACCESSORY FILES INTO PRETEXT BY DEFAULT @@ -213,7 +240,21 @@ workflow CURATIONPRETEXT { // - ADAPTED FROM TREEVAL // PRETEXT_INGEST_HIRES ( - CREATE_MAPS_HIRES.out.pretext.filter { val_run_hires && !dont_generate_tracks.contains("ALL") }, + CREATE_MAPS_HIRES.out.pretext.filter { !dont_generate_tracks.contains("ALL") }, + gaps_file, + cove_file, + telo_file, + rept_file, + val_split_telomere + ) + + + // + // MODULE: INGEST ACCESSORY FILES INTO PRETEXT BY DEFAULT + // - ADAPTED FROM TREEVAL + // + PRETEXT_INGEST_ULTRA ( + CREATE_MAPS_ULTRA.out.pretext.filter { !dont_generate_tracks.contains("ALL") }, gaps_file, cove_file, telo_file,