Skip to content

Commit aef6ac9

Browse files
Filter wham-only DELs and scramble-only SVAs in CleanVcf & docs updates (#740)
1 parent 6ea99cf commit aef6ac9

File tree

24 files changed

+357
-390
lines changed

24 files changed

+357
-390
lines changed

.github/.dockstore.yml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,15 @@ workflows:
198198
tags:
199199
- /.*/
200200

201+
- subclass: WDL
202+
name: VisualizeCnvs
203+
primaryDescriptorPath: /wdl/VisualizeCnvs.wdl
204+
filters:
205+
branches:
206+
- main
207+
tags:
208+
- /.*/
209+
201210
- subclass: WDL
202211
name: SingleSamplePipeline
203212
primaryDescriptorPath: /wdl/GATKSVPipelineSingleSample.wdl

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
A structural variation discovery pipeline for Illumina short-read whole-genome sequencing (WGS) data.
44

5-
For technical documentation on GATK-SV, including how to run the pipeline, please refer to our website.
5+
For technical documentation on GATK-SV, including how to run the pipeline, please refer to our [website](https://broadinstitute.github.io/gatk-sv/).
66

77
## Repository structure
88
* `/carrot`: [Carrot](https://github.com/broadinstitute/carrot) tests

inputs/templates/terra_workspaces/cohort_mode/cohort_mode_workspace_dashboard.md.tmpl

Lines changed: 4 additions & 214 deletions
Large diffs are not rendered by default.

inputs/templates/terra_workspaces/cohort_mode/samples_1kgp_156.tsv.tmpl

Lines changed: 157 additions & 0 deletions
Large diffs are not rendered by default.

inputs/templates/terra_workspaces/cohort_mode/samples_1kgp.tsv.tmpl renamed to inputs/templates/terra_workspaces/cohort_mode/samples_1kgp_312.tsv.tmpl

File renamed without changes.
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
{
2+
"VisualizeCnvs.vcf_or_bed": "${this.filtered_vcf}",
3+
"VisualizeCnvs.prefix": "${this.sample_set_set_id}",
4+
"VisualizeCnvs.median_files": "${this.sample_sets.median_cov}",
5+
"VisualizeCnvs.rd_files": "${this.sample_sets.merged_bincov}",
6+
"VisualizeCnvs.ped_file": "${workspace.cohort_ped_file}",
7+
"VisualizeCnvs.min_size": 50000,
8+
"VisualizeCnvs.flags": "-s 999999999",
9+
"VisualizeCnvs.sv_pipeline_docker": "${workspace.sv_pipeline_docker}"
10+
}

inputs/templates/test/VisualizeCnvs/VisualizeCnvs.json.tmpl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,6 @@
55
"VisualizeCnvs.rd_files": [{{ test_batch.merged_coverage_file | tojson }}],
66
"VisualizeCnvs.ped_file": {{ test_batch.ped_file | tojson }},
77
"VisualizeCnvs.min_size": 50000,
8-
"VisualizeCnvs.flags": "",
8+
"VisualizeCnvs.flags": "-s 999999999",
99
"VisualizeCnvs.sv_pipeline_docker": {{ dockers.sv_pipeline_docker | tojson }}
1010
}

scripts/test/terra_validation.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ def main():
113113
parser.add_argument("-j", "--womtool-jar", help="Path to womtool jar", required=True)
114114
parser.add_argument("-n", "--num-input-jsons",
115115
help="Number of Terra input JSONs expected",
116-
required=False, default=25, type=int)
116+
required=False, default=26, type=int)
117117
parser.add_argument("--log-level",
118118
help="Specify level of logging information, ie. info, warning, error (not case-sensitive)",
119119
required=False, default="INFO")

wdl/CleanVcfChromosome.wdl

Lines changed: 62 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ workflow CleanVcfChromosome {
5353
RuntimeAttr? runtime_override_stitch_fragmented_cnvs
5454
RuntimeAttr? runtime_override_final_cleanup
5555
RuntimeAttr? runtime_override_rescue_me_dels
56+
RuntimeAttr? runtime_attr_add_high_fp_rate_filters
5657

5758
# Clean vcf 1b
5859
RuntimeAttr? runtime_attr_override_subset_large_cnvs_1b
@@ -299,9 +300,17 @@ workflow CleanVcfChromosome {
299300
runtime_attr_override = runtime_override_rescue_me_dels
300301
}
301302
302-
call FinalCleanup {
303+
call AddHighFDRFilters {
303304
input:
304305
vcf=RescueMobileElementDeletions.out,
306+
prefix="~{prefix}.high_fdr_filtered",
307+
sv_pipeline_docker=sv_pipeline_docker,
308+
runtime_attr_override=runtime_attr_add_high_fp_rate_filters
309+
}
310+
311+
call FinalCleanup {
312+
input:
313+
vcf=AddHighFDRFilters.out,
305314
contig=contig,
306315
prefix="~{prefix}.final_cleanup",
307316
sv_pipeline_docker=sv_pipeline_docker,
@@ -799,6 +808,58 @@ task StitchFragmentedCnvs {
799808
}
800809
}
801810

811+
# Add FILTER status for pockets of variants with high FP rate: wham-only DELs and Scramble-only SVAs with HIGH_SR_BACKGROUND
812+
task AddHighFDRFilters {
813+
input {
814+
File vcf
815+
String prefix
816+
String sv_pipeline_docker
817+
RuntimeAttr? runtime_attr_override
818+
}
819+
820+
Float input_size = size(vcf, "GiB")
821+
RuntimeAttr runtime_default = object {
822+
mem_gb: 3.75,
823+
disk_gb: ceil(10.0 + input_size * 3.0),
824+
cpu_cores: 1,
825+
preemptible_tries: 3,
826+
max_retries: 1,
827+
boot_disk_gb: 10
828+
}
829+
RuntimeAttr runtime_override = select_first([runtime_attr_override, runtime_default])
830+
runtime {
831+
memory: "~{select_first([runtime_override.mem_gb, runtime_default.mem_gb])} GB"
832+
disks: "local-disk ~{select_first([runtime_override.disk_gb, runtime_default.disk_gb])} HDD"
833+
cpu: select_first([runtime_override.cpu_cores, runtime_default.cpu_cores])
834+
preemptible: select_first([runtime_override.preemptible_tries, runtime_default.preemptible_tries])
835+
maxRetries: select_first([runtime_override.max_retries, runtime_default.max_retries])
836+
docker: sv_pipeline_docker
837+
bootDiskSizeGb: select_first([runtime_override.boot_disk_gb, runtime_default.boot_disk_gb])
838+
}
839+
840+
command <<<
841+
set -euo pipefail
842+
843+
python <<CODE
844+
import pysam
845+
with pysam.VariantFile("~{vcf}", 'r') as fin:
846+
header = fin.header
847+
header.add_line("##FILTER=<ID=HIGH_ALGORITHM_FDR,Description=\"Categories of variants with low precision including Wham-only deletions and certain Scramble SVAs\">")
848+
with pysam.VariantFile("~{prefix}.vcf.gz", 'w', header=header) as fo:
849+
for record in fin:
850+
if (record.info['ALGORITHMS'] == ('wham',) and record.info['SVTYPE'] == 'DEL') or \
851+
(record.info['ALGORITHMS'] == ('scramble',) and record.info['HIGH_SR_BACKGROUND'] and record.alts == ('<INS:ME:SVA>',)):
852+
record.filter.add('HIGH_ALGORITHM_FDR')
853+
fo.write(record)
854+
CODE
855+
>>>
856+
857+
output {
858+
File out = "~{prefix}.vcf.gz"
859+
}
860+
}
861+
862+
802863

803864
# Final VCF cleanup
804865
task FinalCleanup {

website/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
# Generated files
88
.docusaurus
99
.cache-loader
10+
package-lock.json
1011

1112
# Misc
1213
.DS_Store

0 commit comments

Comments
 (0)