diff --git a/CHANGELOG.md b/CHANGELOG.md index 7df7a85d4..a2af31746 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,11 +8,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` - Parameter `cadd_prescored` to pass a directory of pre-scored CADD indel annotations to the CADD process in genome and mitochondrial SNV annotation subworkflows +- Parameter `manta_call_regions` to restrict Manta SV calling to specified regions (e.g. primary chromosomes) via a bgzipped, tabix-indexed BED file, reducing runtime without affecting other callers ### `Fixed` - Add a bcftools norm split-multiallelics step after merging standard and shifted MT calls to handle new multiallelic sites introduced by bcftools merge [#855](https://github.com/nf-core/raredisease/pull/855) +### Parameters + +| Old parameter | New parameter | +| ------------- | ---------------------- | +| | cadd_prescored | +| | manta_call_regions | +| | manta_call_regions_tbi | + ## 3.0.0 - Mario [2026-05-12] ### `Added` diff --git a/docs/usage.md b/docs/usage.md index e5963d4d2..3007cbdf6 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -263,10 +263,14 @@ The mandatory and optional parameters for each category are tabulated below. ##### 5. Variant calling - Structural variants -| Mandatory | Optional | -| --------- | ---------- | -| | target_bed | -| | bwa | +| Mandatory | Optional | +| --------- | ---------------------------------- | +| | target_bed | +| | bwa | +| | manta_call_regions1 | +| | manta_call_regions_tbi1 | + +1 A bgzipped BED file (`.bed.gz`) and its tabix index (`.bed.gz.tbi`) restricting Manta's SV calling to specific regions. Both parameters must be supplied together. Only applied for WGS; for WES, Manta always uses `target_bed` and these parameters have no effect. Useful for reducing runtime on references with many short contigs such as GRCh38 by limiting analysis to primary chromosomes. ##### 6. Copy number variant calling diff --git a/main.nf b/main.nf index 881cd401b..ab4402c72 100644 --- a/main.nf +++ b/main.nf @@ -74,6 +74,8 @@ workflow NFCORE_RAREDISEASE { val_known_dbsnp_tbi val_light_strand_origin_end val_light_strand_origin_start + val_manta_call_regions + val_manta_call_regions_tbi val_mbuffer_mem val_mito_length val_mito_name @@ -262,6 +264,11 @@ workflow NFCORE_RAREDISEASE { ch_cadd_header = channel.fromPath("$projectDir/assets/cadd_to_vcf_header_-1.0-.txt", checkIfExists: true).collect() ch_foundin_header = channel.fromPath("$projectDir/assets/foundin.hdr", checkIfExists: true).collect() + ch_manta_regions = val_analysis_type.equals("wgs") + ? (val_manta_call_regions + ? channel.value([file(val_manta_call_regions), file(val_manta_call_regions_tbi)]) + : channel.value([[], []])) + : ch_target_bed.map { _meta, bed, tbi -> [bed, tbi] } ch_ngsbits_method = channel.value(val_ngsbits_samplegender_method) ch_sentieon_pcr_indel_model = channel.value(val_sentieon_dnascope_pcr_indel_model) ch_subdepth = channel.value(val_subdepth) @@ -404,6 +411,7 @@ workflow NFCORE_RAREDISEASE { ch_hgnc_ids, ch_intervals_wgs, ch_intervals_y, + ch_manta_regions, ch_me_references, ch_me_svdb_resources, ch_ml_model, @@ -595,6 +603,8 @@ workflow { params.known_dbsnp_tbi, params.light_strand_origin_end, params.light_strand_origin_start, + params.manta_call_regions, + params.manta_call_regions_tbi, params.mbuffer_mem, params.mito_length, params.mito_name, diff --git a/nextflow.config b/nextflow.config index 6c6fdb6d4..858381fdf 100644 --- a/nextflow.config +++ b/nextflow.config @@ -55,6 +55,8 @@ params { call_interval = null cadd_prescored = null cadd_resources = null + manta_call_regions = null + manta_call_regions_tbi = null gcnvcaller_model = null gens_interval_list = null gens_pon_female = null diff --git a/nextflow_schema.json b/nextflow_schema.json index e6016072f..5529d2a94 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -243,6 +243,23 @@ "description": "Local directory base for genome references that map to the config.", "help_text": "This folder is a flat structure with file names that map to the config." }, + "manta_call_regions": { + "type": "string", + "exists": true, + "format": "file-path", + "fa_icon": "fas fa-file", + "pattern": "^\\S+\\.bed\\.gz$", + "description": "Path to a bgzipped BED file restricting Manta SV calling to specific regions (e.g. primary chromosomes). Only applied for WGS; for WES, Manta always uses target_bed instead.", + "help_text": "Must be supplied together with --manta_call_regions_tbi. Only used when analysis_type is wgs — for wes, Manta uses the target_bed supplied via --target_bed and this parameter has no effect. Useful for reducing runtime on references with many short contigs such as GRCh38." + }, + "manta_call_regions_tbi": { + "type": "string", + "exists": true, + "format": "file-path", + "fa_icon": "fas fa-file", + "pattern": "^\\S+\\.bed\\.gz\\.tbi$", + "description": "Tabix index for the file supplied via --manta_call_regions." + }, "mito_name": { "type": "string", "description": "Name of the mitochondrial contig in the reference fasta file", diff --git a/subworkflows/local/call_structural_variants/main.nf b/subworkflows/local/call_structural_variants/main.nf index b5407b11e..871914722 100644 --- a/subworkflows/local/call_structural_variants/main.nf +++ b/subworkflows/local/call_structural_variants/main.nf @@ -24,6 +24,7 @@ workflow CALL_STRUCTURAL_VARIANTS { ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] ch_genome_hisat2index // channel: [mandatory] [ val(meta), path(hisat2index) ] + ch_manta_regions // channel: [mandatory] [ path(bed), path(tbi) ] ch_mitosalt_config // channel: [mandatory] [val(mitosalt_breakspan),val(mitosalt_breakthreshold),...,val(mitosalt_split_length)] ch_mt_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ] ch_mt_fai // channel: [mandatory] [ val(meta), path(mtfai) ] @@ -34,7 +35,6 @@ workflow CALL_STRUCTURAL_VARIANTS { ch_reads // channel: [mandatory] [ val(meta), [path(reads)] ] ch_subdepth // channel: [mandatory] [ val(mitosalt_depth) ] ch_svcaller_priority // channel: [mandatory] [ val(["var caller tag 1", ...]) ] - ch_target_bed // channel: [mandatory for WES] [ val(meta), path(bed), path(tbi) ] skip_germlinecnvcaller // boolean skip_mitosalt // boolean val_analysis_type // string: "wes", "wgs", or "mito" @@ -59,7 +59,7 @@ workflow CALL_STRUCTURAL_VARIANTS { ch_tiddit_vcf = channel.empty() if (!val_analysis_type.equals("mito")) { - CALL_SV_MANTA (ch_genome_bam, ch_genome_bai, ch_genome_fasta, ch_genome_fai, ch_case_info, ch_target_bed, val_analysis_type) + CALL_SV_MANTA (ch_genome_bam, ch_genome_bai, ch_genome_fasta, ch_genome_fai, ch_case_info, ch_manta_regions) .filtered_diploid_sv_vcf .collect{ _meta, vcf -> vcf } .set{ ch_manta_vcf } diff --git a/subworkflows/local/call_structural_variants/tests/main.nf.test b/subworkflows/local/call_structural_variants/tests/main.nf.test index f2d69db9d..f4287b53b 100644 --- a/subworkflows/local/call_structural_variants/tests/main.nf.test +++ b/subworkflows/local/call_structural_variants/tests/main.nf.test @@ -42,27 +42,27 @@ nextflow_workflow { input[8] = channel.of([[id:'genome'], file(params.pipelines_testdata_base_path + 'reference/reference.fasta.fai', checkIfExists: true)]).collect() input[9] = channel.of([[id:'genome'], file(params.pipelines_testdata_base_path + 'reference/reference.fasta', checkIfExists: true)]).collect() input[10] = Channel.from("\$PWD").map { dir -> [[id:'genome'], file(dir)] } - input[11] = channel.of([15, 2, 5, 30000, 30, 0.00001, 5, 1000, 80, 10000, 5, 15]) - input[12] = channel.of([ + input[11] = channel.value([[], []]) + input[12] = channel.of([15, 2, 5, 30000, 30, 0.00001, 5, 1000, 80, 10000, 5, 15]) + input[13] = channel.of([ [id:'earlycasualcaiman', sample:'earlycasualcaiman', single_end:false, num_lanes:1, read_group: "'@RG\\\\tID:earlycasualcaiman\\\\tPL:illumina\\\\tSM:earlycasualcaiman'", lane:1, sex:1, phenotype:1, paternal:0, maternal:0, case_id:'justhusky'], file(params.pipelines_testdata_base_path + 'testdata/earlycasualcaiman_sorted_md.bam', checkIfExists: true), file(params.pipelines_testdata_base_path + 'testdata/earlycasualcaiman_sorted_md.bam.bai', checkIfExists: true) ]) - input[13] = channel.of([[id:'genome'], file(params.pipelines_testdata_base_path + 'reference/reference_mt.fa.fai', checkIfExists: true)]).collect() - input[14] = channel.of([[id:'genome'], file(params.pipelines_testdata_base_path + 'reference/reference_mt.fa', checkIfExists: true)]).collect() - input[15] = Channel.from("${projectDir}/subworkflows/local/variant_evaluation/tests").map { dir -> [[id:'mt'], file(dir)] } - input[16] = channel.of([[id:'ploidy'], []]) - input[17] = channel.of([[]]) - input[18] = channel.of([ + input[14] = channel.of([[id:'genome'], file(params.pipelines_testdata_base_path + 'reference/reference_mt.fa.fai', checkIfExists: true)]).collect() + input[15] = channel.of([[id:'genome'], file(params.pipelines_testdata_base_path + 'reference/reference_mt.fa', checkIfExists: true)]).collect() + input[16] = Channel.from("${projectDir}/subworkflows/local/variant_evaluation/tests").map { dir -> [[id:'mt'], file(dir)] } + input[17] = channel.of([[id:'ploidy'], []]) + input[18] = channel.of([[]]) + input[19] = channel.of([ [id:'earlycasualcaiman', sample:'earlycasualcaiman', single_end:false, num_lanes:1, read_group: "'@RG\\\\tID:earlycasualcaiman\\\\tPL:illumina\\\\tSM:earlycasualcaiman'", lane:1, sex:1, phenotype:1, paternal:0, maternal:0, case_id:'justhusky'], [ file(params.pipelines_testdata_base_path + 'testdata/earlycasualcaiman_mt_1.fastq.gz', checkIfExists: true), file(params.pipelines_testdata_base_path + 'testdata/earlycasualcaiman_mt_2.fastq.gz', checkIfExists: true) ] ]) - input[19] = channel.value(10000000) - input[20] = channel.value(['manta', 'tiddit', 'cnvnator']) - input[21] = channel.of([[id:'target'], [], []]) + input[20] = channel.value(10000000) + input[21] = channel.value(['manta', 'tiddit', 'cnvnator']) input[22] = true input[23] = false input[24] = 'wgs' diff --git a/subworkflows/local/call_structural_variants/tests/main.nf.test.snap b/subworkflows/local/call_structural_variants/tests/main.nf.test.snap index d3b9e9234..3b8a5f36c 100644 --- a/subworkflows/local/call_structural_variants/tests/main.nf.test.snap +++ b/subworkflows/local/call_structural_variants/tests/main.nf.test.snap @@ -16,10 +16,10 @@ "justhusky_sv.vcf.gz.tbi" ] ], + "timestamp": "2026-04-14T15:51:47.077800376", "meta": { "nf-test": "0.9.3", "nextflow": "25.10.4" - }, - "timestamp": "2026-04-14T15:51:47.077800376" + } } } \ No newline at end of file diff --git a/subworkflows/local/call_sv_manta/main.nf b/subworkflows/local/call_sv_manta/main.nf index 3a514c3ec..2c01d251f 100644 --- a/subworkflows/local/call_sv_manta/main.nf +++ b/subworkflows/local/call_sv_manta/main.nf @@ -7,13 +7,12 @@ include { MANTA_GERMLINE as MANTA } from '../../../modules/nf-core/ workflow CALL_SV_MANTA { take: - ch_bam // channel: [mandatory] [ val(meta), path(bam) ] - ch_bai // channel: [mandatory] [ val(meta), path(bai) ] - ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] - ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] - ch_case_info // channel: [mandatory] [ val(case_info) ] - ch_bed // channel: [mandatory for WES] [ val(meta), path(bed), path(tbi) ] - val_analysis_type // string: "wes", "wgs", or "mito" + ch_bam // channel: [mandatory] [ val(meta), path(bam) ] + ch_bai // channel: [mandatory] [ val(meta), path(bai) ] + ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] + ch_case_info // channel: [mandatory] [ val(case_info) ] + ch_regions // channel: [mandatory] [ path(bed), path(tbi) ] main: ch_bam.map{ _meta, bam -> bam } @@ -26,24 +25,11 @@ workflow CALL_SV_MANTA { .toList() .set { bai_file_list } - ch_bed.map { - _id, bed_file, index -> - return [bed_file, index]} - .set { bed_input } - - if (val_analysis_type.equals("wgs")) { - ch_case_info.combine(bam_file_list) - .combine(bai_file_list) - .map { meta, input, index -> [meta, input, index] + [ [], [] ] } - .set { manta_input } - MANTA ( manta_input, ch_genome_fasta, ch_genome_fai, [] ) - } else { - ch_case_info.combine(bam_file_list) - .combine(bai_file_list) - .combine(bed_input) - .set { manta_input } - MANTA ( manta_input, ch_genome_fasta, ch_genome_fai, [] ) - } + ch_case_info.combine(bam_file_list) + .combine(bai_file_list) + .combine(ch_regions) + .set { manta_input } + MANTA ( manta_input, ch_genome_fasta, ch_genome_fai, [] ) MANTA.out.diploid_sv_vcf .join(MANTA.out.diploid_sv_vcf_tbi) diff --git a/subworkflows/local/call_sv_manta/tests/main.nf.test b/subworkflows/local/call_sv_manta/tests/main.nf.test index fd43a3cda..1757244d9 100644 --- a/subworkflows/local/call_sv_manta/tests/main.nf.test +++ b/subworkflows/local/call_sv_manta/tests/main.nf.test @@ -12,7 +12,7 @@ nextflow_workflow { config "./nextflow.config" - test("CALL_SV_MANTA - wgs") { + test("CALL_SV_MANTA") { when { workflow { @@ -36,8 +36,7 @@ nextflow_workflow { input[2] = channel.of([id:'genome'], file(params.pipelines_testdata_base_path + 'reference/reference.fasta', checkIfExists: true)).collect() input[3] = channel.of([id:'genome'], file(params.pipelines_testdata_base_path + 'reference/reference.fasta.fai', checkIfExists: true)).collect() input[4] = channel.of([id:'justhusky']) - input[5] = channel.of([[id:'target'], [], []]) - input[6] = 'wgs' + input[5] = channel.value([[], []]) """ } } @@ -52,7 +51,7 @@ nextflow_workflow { ) } } - test("CALL_SV_MANTA - wgs, stub") { + test("CALL_SV_MANTA - stub") { options "-stub" @@ -78,8 +77,7 @@ nextflow_workflow { input[2] = channel.of([id:'genome'], file(params.pipelines_testdata_base_path + 'reference/reference.fasta', checkIfExists: true)).collect() input[3] = channel.of([id:'genome'], file(params.pipelines_testdata_base_path + 'reference/reference.fasta.fai', checkIfExists: true)).collect() input[4] = channel.of([id:'justhusky']) - input[5] = channel.of([[id:'target'], [], []]) - input[6] = 'wgs' + input[5] = channel.value([[], []]) """ } } diff --git a/subworkflows/local/call_sv_manta/tests/main.nf.test.snap b/subworkflows/local/call_sv_manta/tests/main.nf.test.snap index cbc481726..b2b600081 100644 --- a/subworkflows/local/call_sv_manta/tests/main.nf.test.snap +++ b/subworkflows/local/call_sv_manta/tests/main.nf.test.snap @@ -1,16 +1,16 @@ { - "CALL_SV_MANTA - wgs": { + "CALL_SV_MANTA": { "content": [ "9f37331609347a1685ba4862d8583b2f", "2111e88e54fdd01f0492901606bcea6f" ], - "timestamp": "2026-03-01T20:55:33.556714847", + "timestamp": "2026-06-03T11:38:41.791162924", "meta": { - "nf-test": "0.9.4", + "nf-test": "0.9.5", "nextflow": "25.10.4" } }, - "CALL_SV_MANTA - wgs, stub": { + "CALL_SV_MANTA - stub": { "content": [ { "0": [ @@ -127,9 +127,9 @@ ] } ], - "timestamp": "2026-03-05T12:58:54.578278212", + "timestamp": "2026-06-03T11:39:05.440330127", "meta": { - "nf-test": "0.9.4", + "nf-test": "0.9.5", "nextflow": "25.10.4" } } diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index b514dd6c5..420c43c7f 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -104,6 +104,7 @@ workflow RAREDISEASE { ch_hgnc_ids ch_intervals_wgs ch_intervals_y + ch_manta_regions ch_me_references ch_me_svdb_resources ch_ml_model @@ -636,6 +637,7 @@ workflow RAREDISEASE { ch_genome_fai, ch_genome_fasta, ch_genome_hisat2index, + ch_manta_regions, ch_mitosalt_config, ch_mapped.mt_bam_bai, ch_mt_fai, @@ -646,7 +648,6 @@ workflow RAREDISEASE { ch_input_fastqs, ch_subdepth, ch_svcaller_priority, - ch_target_bed, skip_germlinecnvcaller, skip_mitosalt, val_analysis_type,