Skip to content

Commit a68931d

Browse files
authored
Merge pull request #823 from nf-core/bcftools_norm
Add an option to skip multiallelics split step in SNV calling workflows.
2 parents 59a7eb2 + d771f2f commit a68931d

12 files changed

Lines changed: 68 additions & 32 deletions

File tree

CHANGELOG.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
3030
- Env variable NXF_SINGULARITY_NEW_PID_NAMESPACE = false to accommodate hisat2 running with latest Nextflow and Singularity [#775](https://github.com/nf-core/raredisease/pull/775)
3131
- Parameter `exclude_alt` to filter alignments to alt/unplaced contigs after alignment using samtools view, retaining only primary chromosomes (GRCh37: 1-22,X,Y,MT / GRCh38: chr1-chr22,chrX,chrY,chrM). Note that enabling this will restrict variant calling to these chromosomes [#803](https://github.com/nf-core/raredisease/pull/803)]
3232
- Parameters `save_all_mapped_as_cram` and `save_noalt_mapped_as_cram` to replace `save_mapped_as_cram`, allowing independent control over publishing unfiltered and alt-filtered alignment files as CRAM [#807](https://github.com/nf-core/raredisease/pull/807)
33-
- Parameter `run_vcfanno_db_sanity_check` to check vcfanno database files for zero records and remove the corresponding annotation blocks from the TOML config before running vcfanno [#802](https://github.com/nf-core/raredisease/pull/821)
33+
- Parameter `run_vcfanno_db_sanity_check` to check vcfanno database files for zero records and remove the corresponding annotation blocks from the TOML config before running vcfanno [#821](https://github.com/nf-core/raredisease/pull/821)
34+
- Added `--skip_split_multiallelics` parameter to allow users to skip the `bcftools norm --multiallelics -both` step in SNV calling (DeepVariant and Sentieon), which can cause indel quality degradation in single-interval runs [#823](https://github.com/nf-core/raredisease/pull/823)
3435

3536
### `Changed`
3637

@@ -87,6 +88,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
8788
| | save_all_mapped_as_cram |
8889
| | save_noalt_mapped_as_cram |
8990
| | run_vcfanno_db_sanity_check |
91+
| | skip_split_multiallelics |
9092

9193
### Tool updates
9294

docs/usage.md

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -246,18 +246,20 @@ The mandatory and optional parameters for each category are tabulated below.
246246

247247
##### 4. Variant calling - SNV
248248

249-
| Mandatory | Optional |
250-
| -------------------------- | --------------------------- |
251-
| variant_caller<sup>1</sup> | known_dbsnp<sup>2</sup> |
252-
| ml_model<sup>2</sup> | known_dbsnp_tbi<sup>2</sup> |
253-
| analysis_type<sup>3</sup> | call_interval<sup>2</sup> |
254-
| | known_dbsnp_tbi<sup>2</sup> |
255-
| | par_bed<sup>4</sup> |
249+
| Mandatory | Optional |
250+
| -------------------------- | ------------------------------------ |
251+
| variant_caller<sup>1</sup> | known_dbsnp<sup>2</sup> |
252+
| ml_model<sup>2</sup> | known_dbsnp_tbi<sup>2</sup> |
253+
| analysis_type<sup>3</sup> | call_interval<sup>2</sup> |
254+
| | known_dbsnp_tbi<sup>2</sup> |
255+
| | par_bed<sup>4</sup> |
256+
| | skip_split_multiallelics<sup>5</sup> |
256257

257258
<sup>1</sup>Default variant caller is DeepVariant, but you have the option to use Sentieon as well.<br />
258259
<sup>2</sup>These parameters are only used by Sentieon.<br />
259260
<sup>3</sup>Default is `WGS`, but you have the option to choose `WES` and `mito` as well.<br />
260261
<sup>4</sup>This parameter is only used by Deepvariant.<br />
262+
<sup>5</sup>Skips `bcftools norm --multiallelics -both` in both DeepVariant and Sentieon SNV calling. Recommended for single-interval runs to avoid indel quality degradation. See [#813](https://github.com/nf-core/raredisease/issues/813) for details.<br />
261263

262264
##### 5. Variant calling - Structural variants
263265

main.nf

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ workflow NFCORE_RAREDISEASE {
5151
val_cadd_resources
5252
val_call_interval
5353
val_concatenate_snv_calls
54+
val_skip_split_multiallelics
5455
val_exclude_alt
5556
val_extract_alignments
5657
val_fai
@@ -479,6 +480,7 @@ workflow NFCORE_RAREDISEASE {
479480
val_analysis_type,
480481
val_cadd_resources,
481482
val_concatenate_snv_calls,
483+
val_skip_split_multiallelics,
482484
val_exclude_alt,
483485
val_extract_alignments,
484486
val_genome,
@@ -567,6 +569,7 @@ workflow {
567569
params.cadd_resources,
568570
params.call_interval,
569571
params.concatenate_snv_calls,
572+
params.skip_split_multiallelics,
570573
params.exclude_alt,
571574
params.extract_alignments,
572575
params.fai,

nextflow.config

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ params {
2828
// Main options
2929
analysis_type = 'wgs'
3030
bait_padding = 100
31+
cadd_resources = null
3132
concatenate_snv_calls = false
3233
exclude_alt = false
3334
extract_alignments = false
@@ -40,9 +41,9 @@ params {
4041
save_all_mapped_as_cram = false
4142
save_noalt_mapped_as_cram = false
4243
scatter_count = 20
43-
skip_tools = null
44+
skip_split_multiallelics = false
4445
skip_subworkflows = null
45-
cadd_resources = null
46+
skip_tools = null
4647
platform = 'illumina'
4748

4849
// reference file options

nextflow_schema.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -602,6 +602,12 @@
602602
"description": "Number of intervals to split your genome into (used to parallelize annotations)",
603603
"fa_icon": "fas fa-less-than"
604604
},
605+
"skip_split_multiallelics": {
606+
"type": "boolean",
607+
"fa_icon": "fas fa-forward",
608+
"description": "Skip the split multiallelics step in SNV calling.",
609+
"help_text": "Skips bcftools norm --multiallelics -both in CALL_SNV_SENTIEON and CALL_SNV_DEEPVARIANT. Useful for single-interval runs where the step is unnecessary and can degrade indel quality."
610+
},
605611
"skip_subworkflows": {
606612
"type": "string",
607613
"fa_icon": "fas fa-forward",

subworkflows/local/call_snv/main.nf

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,11 @@ workflow CALL_SNV {
3636
ch_par_bed // channel: [optional] [ val(meta), path(bed) ]
3737
ch_pcr_indel_model // channel: [optional] [ val(sentieon_dnascope_pcr_indel_model) ]
3838
ch_target_bed // channel: [mandatory] [ val(meta), path(bed), path(index) ]
39-
val_analysis_type // string: 'wgs', 'wes', or 'mito'
40-
val_concatenate_snv_calls // boolean
41-
val_run_mt_for_wes // boolean
42-
val_variant_caller // string: 'deepvariant' or 'sentieon'
39+
val_analysis_type // string: 'wgs', 'wes', or 'mito'
40+
val_concatenate_snv_calls // boolean
41+
val_run_mt_for_wes // boolean
42+
val_skip_split_multiallelics // boolean
43+
val_variant_caller // string: 'deepvariant' or 'sentieon'
4344

4445
main:
4546
ch_concat_publish = channel.empty()
@@ -67,7 +68,8 @@ workflow CALL_SNV {
6768
ch_genome_fasta,
6869
ch_par_bed,
6970
ch_target_bed,
70-
val_analysis_type
71+
val_analysis_type,
72+
val_skip_split_multiallelics
7173
)
7274
ch_deepvar_vcf = CALL_SNV_DEEPVARIANT.out.vcf
7375
ch_deepvar_tbi = CALL_SNV_DEEPVARIANT.out.tabix
@@ -86,7 +88,8 @@ workflow CALL_SNV {
8688
ch_genome_fai,
8789
ch_genome_fasta,
8890
ch_ml_model,
89-
ch_pcr_indel_model
91+
ch_pcr_indel_model,
92+
val_skip_split_multiallelics
9093
)
9194
ch_sentieon_vcf = CALL_SNV_SENTIEON.out.vcf
9295
ch_sentieon_tbi = CALL_SNV_SENTIEON.out.tabix

subworkflows/local/call_snv/tests/main.nf.test

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,8 @@ nextflow_workflow {
5959
input[24] = 'wgs'
6060
input[25] = false
6161
input[26] = false
62-
input[27] = 'deepvariant'
62+
input[27] = false
63+
input[28] = 'deepvariant'
6364
"""
6465
}
6566
}
@@ -127,7 +128,8 @@ nextflow_workflow {
127128
input[24] = 'wgs'
128129
input[25] = false
129130
input[26] = false
130-
input[27] = 'deepvariant'
131+
input[27] = false
132+
input[28] = 'deepvariant'
131133
"""
132134
}
133135
}

subworkflows/local/call_snv_deepvariant/main.nf

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,10 @@ workflow CALL_SNV_DEEPVARIANT {
1818
ch_genome_chrsizes // channel: [mandatory] [ path(chrsizes) ]
1919
ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ]
2020
ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ]
21-
ch_par_bed // channel: [optional] [ val(meta), path(bed) ]
22-
ch_target_bed // channel: [mandatory] [ val(meta), path(bed), path(index) ]
23-
val_analysis_type // boolean
21+
ch_par_bed // channel: [optional] [ val(meta), path(bed) ]
22+
ch_target_bed // channel: [mandatory] [ val(meta), path(bed), path(index) ]
23+
val_analysis_type // boolean
24+
val_skip_split_multiallelics // boolean
2425

2526
main:
2627

@@ -55,11 +56,15 @@ workflow CALL_SNV_DEEPVARIANT {
5556
ch_split_multi_in = GLNEXUS.out.bcf
5657
.map{ meta, bcf ->
5758
return [meta, bcf, []] }
58-
SPLIT_MULTIALLELICS_GL (ch_split_multi_in, ch_genome_fasta)
5959

60-
ch_remove_dup_in = SPLIT_MULTIALLELICS_GL.out.vcf
61-
.map{ meta, vcf ->
62-
return [meta, vcf, []] }
60+
if (!val_skip_split_multiallelics) {
61+
SPLIT_MULTIALLELICS_GL (ch_split_multi_in, ch_genome_fasta)
62+
ch_remove_dup_in = SPLIT_MULTIALLELICS_GL.out.vcf
63+
.map{ meta, vcf ->
64+
return [meta, vcf, []] }
65+
} else {
66+
ch_remove_dup_in = ch_split_multi_in
67+
}
6368
REMOVE_DUPLICATES_GL (ch_remove_dup_in, ch_genome_fasta)
6469

6570
ch_genome_chrsizes.flatten().map{chromsizes ->

subworkflows/local/call_snv_deepvariant/tests/main.nf.test

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ nextflow_workflow {
3737
input[6] = channel.of([[id:'par'], []])
3838
input[7] = channel.of([[id:'target'], [], []])
3939
input[8] = 'wgs'
40+
input[9] = false
4041
"""
4142
}
4243
}
@@ -73,6 +74,7 @@ nextflow_workflow {
7374
input[6] = channel.of([[id:'par'], []])
7475
input[7] = channel.of([[id:'target'], [], []])
7576
input[8] = 'wgs'
77+
input[9] = false
7678
"""
7779
}
7880
}

subworkflows/local/call_snv_sentieon/main.nf

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,9 @@ workflow CALL_SNV_SENTIEON {
2323
ch_genome_chrsizes // channel: [mandatory] [ path(chrsizes) ]
2424
ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ]
2525
ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ]
26-
ch_ml_model // channel: [mandatory] [ val(meta), path(model) ]
27-
ch_pcr_indel_model // channel: [optional] [ val(sentieon_dnascope_pcr_indel_model) ]
26+
ch_ml_model // channel: [mandatory] [ val(meta), path(model) ]
27+
ch_pcr_indel_model // channel: [optional] [ val(sentieon_dnascope_pcr_indel_model) ]
28+
val_skip_split_multiallelics // boolean
2829

2930
main:
3031
// Combine bam and intervals
@@ -79,11 +80,16 @@ workflow CALL_SNV_SENTIEON {
7980

8081
ch_vcf_idx_case = ch_vcf_idx_merge_in.single.mix(ch_split_multi_in)
8182

82-
SPLIT_MULTIALLELICS_SEN(ch_vcf_idx_case, ch_genome_fasta)
83-
84-
ch_remove_dup_in = SPLIT_MULTIALLELICS_SEN.out.vcf
85-
.map{meta, vcf ->
86-
return [meta, vcf, []]}
83+
if (!val_skip_split_multiallelics) {
84+
SPLIT_MULTIALLELICS_SEN(ch_vcf_idx_case, ch_genome_fasta)
85+
ch_remove_dup_in = SPLIT_MULTIALLELICS_SEN.out.vcf
86+
.map{meta, vcf ->
87+
return [meta, vcf, []]}
88+
} else {
89+
ch_remove_dup_in = ch_vcf_idx_case
90+
.map{meta, vcf, _idx ->
91+
return [meta, vcf, []]}
92+
}
8793

8894
REMOVE_DUPLICATES_SEN(ch_remove_dup_in, ch_genome_fasta)
8995

0 commit comments

Comments
 (0)