Skip to content

Commit 3ce0ddd

Browse files
authored
Modify genotype filtering to ingest cutoffs from a table (#864)
1 parent 8d48278 commit 3ce0ddd

29 files changed

+566
-428
lines changed

.github/.dockstore.yml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,15 @@ workflows:
171171
tags:
172172
- /.*/
173173

174+
- subclass: WDL
175+
name: ScoreGenotypes
176+
primaryDescriptorPath: /wdl/ScoreGenotypes.wdl
177+
filters:
178+
branches:
179+
- main
180+
tags:
181+
- /.*/
182+
174183
- subclass: WDL
175184
name: FilterGenotypes
176185
primaryDescriptorPath: /wdl/FilterGenotypes.wdl

inputs/templates/terra_workspaces/cohort_mode/workflow_configurations/FilterGenotypes.json.tmpl

Lines changed: 2 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2,24 +2,8 @@
22
"FilterGenotypes.vcf": "${this.concordance_vcf}",
33
"FilterGenotypes.output_prefix": "${this.sample_set_set_id}",
44
"FilterGenotypes.ploidy_table": "${this.ploidy_table}",
5-
"FilterGenotypes.gq_recalibrator_model_file": "${workspace.recalibrate_gq_model_file}",
6-
"FilterGenotypes.sl_filter_args": "--small-del-threshold 93 --medium-del-threshold 150 --small-dup-threshold -51 --medium-dup-threshold -4 --ins-threshold -13 --inv-threshold -19",
7-
8-
"FilterGenotypes.genome_tracks": [
9-
{{ reference_resources.recalibrate_gq_genome_track_repeatmasker | tojson }},
10-
{{ reference_resources.recalibrate_gq_genome_track_segdup | tojson }},
11-
{{ reference_resources.recalibrate_gq_genome_track_simple_repeats | tojson }},
12-
{{ reference_resources.recalibrate_gq_genome_track_umap100 | tojson }},
13-
{{ reference_resources.recalibrate_gq_genome_track_umap24 | tojson }}
14-
],
15-
"FilterGenotypes.recalibrate_gq_args": [
16-
"--keep-homvar false",
17-
"--keep-homref true",
18-
"--keep-multiallelic true",
19-
"--skip-genotype-filtering true",
20-
"--min-samples-to-estimate-allele-frequency -1"
21-
],
22-
5+
6+
"FilterGenotypes.sl_cutoff_table": "${workspace.sl_cutoff_table}",
237
"FilterGenotypes.ped_file": "${workspace.cohort_ped_file}",
248
"FilterGenotypes.primary_contigs_fai": "${workspace.primary_contigs_fai}",
259
"FilterGenotypes.site_level_comparison_datasets": [
@@ -34,8 +18,6 @@
3418
"disk_gb": 100
3519
},
3620

37-
"FilterGenotypes.linux_docker": "${workspace.linux_docker}",
38-
"FilterGenotypes.gatk_docker": "${workspace.gq_recalibrator_docker}",
3921
"FilterGenotypes.sv_base_mini_docker": "${workspace.sv_base_mini_docker}",
4022
"FilterGenotypes.sv_pipeline_docker": "${workspace.sv_pipeline_docker}"
4123
}
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
{
2+
"ScoreGenotypes.vcf": "${this.concordance_vcf}",
3+
"ScoreGenotypes.output_prefix": "${this.sample_set_set_id}",
4+
"ScoreGenotypes.gq_recalibrator_model_file": "${workspace.recalibrate_gq_model_file}",
5+
6+
"ScoreGenotypes.recalibrate_gq_args": [
7+
"--keep-homvar false",
8+
"--keep-homref true",
9+
"--keep-multiallelic true",
10+
"--skip-genotype-filtering true",
11+
"--min-samples-to-estimate-allele-frequency -1"
12+
],
13+
"ScoreGenotypes.genome_tracks": [
14+
{{ reference_resources.recalibrate_gq_genome_track_repeatmasker | tojson }},
15+
{{ reference_resources.recalibrate_gq_genome_track_segdup | tojson }},
16+
{{ reference_resources.recalibrate_gq_genome_track_simple_repeats | tojson }},
17+
{{ reference_resources.recalibrate_gq_genome_track_umap100 | tojson }},
18+
{{ reference_resources.recalibrate_gq_genome_track_umap24 | tojson }}
19+
],
20+
21+
"ScoreGenotypes.linux_docker": "${workspace.linux_docker}",
22+
"ScoreGenotypes.gatk_docker": "${workspace.gq_recalibrator_docker}",
23+
"ScoreGenotypes.sv_base_mini_docker": "${workspace.sv_base_mini_docker}",
24+
"ScoreGenotypes.sv_pipeline_docker": "${workspace.sv_pipeline_docker}"
25+
}

inputs/templates/terra_workspaces/cohort_mode/workspace.tsv.tmpl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ reference_version {{ reference_resources.reference_version }}
6060
rmsk {{ reference_resources.rmsk }}
6161
segdups {{ reference_resources.segdups }}
6262
seed_cutoffs {{ reference_resources.seed_cutoffs }}
63+
sl_cutoff_table {{ reference_resources.sl_cutoff_table }}
6364
stratification_config_part1 {{ reference_resources.stratification_config_part1 }}
6465
stratification_config_part2 {{ reference_resources.stratification_config_part2 }}
6566
wgd_scoring_mask {{ reference_resources.wgd_scoring_mask }}

inputs/templates/terra_workspaces/single_sample/GATKSVPipelineSingleSample.json.tmpl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@
112112
"GATKSVPipelineSingleSample.RefineComplexVariants.n_per_split": "15000",
113113

114114
"GATKSVPipelineSingleSample.gq_recalibrator_model_file": "${workspace.recalibrate_gq_model_file}",
115+
"GATKSVPipelineSingleSample.sl_cutoff_table": "${workspace.sl_cutoff_table}",
115116
"GATKSVPipelineSingleSample.sl_filter_args": "--gq-scale-factor 1.012072 --upper-sl-cap 1500 --lower-sl-cap -1000 --sl-shift 300",
116117
"GATKSVPipelineSingleSample.genome_tracks": [
117118
{{ reference_resources.recalibrate_gq_genome_track_repeatmasker | tojson }},

inputs/templates/terra_workspaces/single_sample/workspace.tsv.tmpl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,5 +84,6 @@ reference_segdups {{ reference_resources.segdups }}
8484
reference_seed_cutoffs {{ reference_resources.seed_cutoffs }}
8585
reference_wgd_scoring_mask {{ reference_resources.wgd_scoring_mask }}
8686
reference_wham_include_list_bed_file {{ reference_resources.wham_include_list_bed_file }}
87+
sl_cutoff_table {{ reference_resources.sl_cutoff_table }}
8788
stratification_config_part1 {{ reference_resources.stratification_config_part1 }}
8889
stratification_config_part2 {{ reference_resources.stratification_config_part2 }}

inputs/templates/test/FilterGenotypes/FilterGenotypes.fixed_cutoffs.json.tmpl

Lines changed: 1 addition & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2,24 +2,8 @@
22
"FilterGenotypes.vcf": {{ test_batch.concordance_vcf | tojson }},
33
"FilterGenotypes.output_prefix": {{ test_batch.name | tojson }},
44
"FilterGenotypes.ploidy_table": {{ test_batch.ploidy_table | tojson }},
5-
"FilterGenotypes.gq_recalibrator_model_file": {{ reference_resources.aou_recalibrate_gq_model_file | tojson }},
6-
"FilterGenotypes.sl_filter_args": "--small-del-threshold 93 --medium-del-threshold 150 --small-dup-threshold -51 --medium-dup-threshold -4 --ins-threshold -13 --inv-threshold -19",
7-
8-
"FilterGenotypes.genome_tracks": [
9-
{{ reference_resources.recalibrate_gq_genome_track_repeatmasker | tojson }},
10-
{{ reference_resources.recalibrate_gq_genome_track_segdup | tojson }},
11-
{{ reference_resources.recalibrate_gq_genome_track_simple_repeats | tojson }},
12-
{{ reference_resources.recalibrate_gq_genome_track_umap100 | tojson }},
13-
{{ reference_resources.recalibrate_gq_genome_track_umap24 | tojson }}
14-
],
15-
"FilterGenotypes.recalibrate_gq_args": [
16-
"--keep-homvar false",
17-
"--keep-homref true",
18-
"--keep-multiallelic true",
19-
"--skip-genotype-filtering true",
20-
"--min-samples-to-estimate-allele-frequency -1"
21-
],
225

6+
"FilterGenotypes.sl_cutoff_table": {{ test_batch.sl_cutoff_table | tojson }},
237
"FilterGenotypes.ped_file": {{ test_batch.ped_file | tojson }},
248
"FilterGenotypes.primary_contigs_fai": {{ reference_resources.primary_contigs_fai | tojson }},
259
"FilterGenotypes.site_level_comparison_datasets": [
@@ -38,8 +22,6 @@
3822
"disk_gb": 100
3923
},
4024

41-
"FilterGenotypes.linux_docker": {{ dockers.linux_docker | tojson }},
42-
"FilterGenotypes.gatk_docker": {{ dockers.gq_recalibrator_docker | tojson }},
4325
"FilterGenotypes.sv_base_mini_docker": {{ dockers.sv_base_mini_docker | tojson }},
4426
"FilterGenotypes.sv_pipeline_docker": {{ dockers.sv_pipeline_docker | tojson }}
4527
}

inputs/templates/test/FilterGenotypes/FilterGenotypes.optimize_cutoffs.json.tmpl

Lines changed: 2 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2,26 +2,10 @@
22
"FilterGenotypes.vcf": {{ test_batch.concordance_vcf | tojson }},
33
"FilterGenotypes.output_prefix": {{ test_batch.name | tojson }},
44
"FilterGenotypes.ploidy_table": {{ test_batch.ploidy_table | tojson }},
5-
"FilterGenotypes.truth_json": {{ test_batch.recalibrate_gq_truth_json | tojson }},
6-
7-
"FilterGenotypes.primary_contigs_fai": {{ reference_resources.primary_contigs_fai | tojson }},
8-
"FilterGenotypes.gq_recalibrator_model_file": {{ reference_resources.aou_recalibrate_gq_model_file | tojson }},
9-
"FilterGenotypes.genome_tracks": [
10-
{{ reference_resources.recalibrate_gq_genome_track_repeatmasker | tojson }},
11-
{{ reference_resources.recalibrate_gq_genome_track_segdup | tojson }},
12-
{{ reference_resources.recalibrate_gq_genome_track_simple_repeats | tojson }},
13-
{{ reference_resources.recalibrate_gq_genome_track_umap100 | tojson }},
14-
{{ reference_resources.recalibrate_gq_genome_track_umap24 | tojson }}
15-
],
16-
"FilterGenotypes.recalibrate_gq_args": [
17-
"--keep-homvar false",
18-
"--keep-homref true",
19-
"--keep-multiallelic true",
20-
"--skip-genotype-filtering true",
21-
"--min-samples-to-estimate-allele-frequency -1"
22-
],
235

6+
"FilterGenotypes.sl_cutoff_table": {{ test_batch.sl_cutoff_table | tojson }},
247
"FilterGenotypes.ped_file": {{ test_batch.ped_file | tojson }},
8+
"FilterGenotypes.primary_contigs_fai": {{ reference_resources.primary_contigs_fai | tojson }},
259
"FilterGenotypes.site_level_comparison_datasets": [
2610
{{ reference_resources.ccdg_abel_site_level_benchmarking_dataset | tojson }},
2711
{{ reference_resources.gnomad_v4_site_level_benchmarking_dataset | tojson }},
@@ -38,8 +22,6 @@
3822
"disk_gb": 50
3923
},
4024

41-
"FilterGenotypes.linux_docker": {{ dockers.linux_docker | tojson }},
42-
"FilterGenotypes.gatk_docker": {{ dockers.gq_recalibrator_docker | tojson }},
4325
"FilterGenotypes.sv_base_mini_docker": {{ dockers.sv_base_mini_docker | tojson }},
4426
"FilterGenotypes.sv_pipeline_docker": {{ dockers.sv_pipeline_docker | tojson }}
4527
}

inputs/templates/test/GATKSVPipelineSingleSample/GATKSVPipelineSingleSample.json.tmpl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@
114114

115115
"GATKSVPipelineSingleSample.gq_recalibrator_model_file": {{ reference_resources.hgsvc_release3_gq_model_file | tojson }},
116116
"GATKSVPipelineSingleSample.sl_filter_args": "--gq-scale-factor 1.012072 --upper-sl-cap 1500 --lower-sl-cap -1000 --sl-shift 300",
117+
"GATKSVPipelineSingleSample.sl_cutoff_table": {{ ref_panel.sl_cutoff_table | tojson }},
117118
"GATKSVPipelineSingleSample.genome_tracks": [
118119
{{ reference_resources.recalibrate_gq_genome_track_repeatmasker | tojson }},
119120
{{ reference_resources.recalibrate_gq_genome_track_segdup | tojson }},
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
{
2+
"ScoreGenotypes.vcf": {{ test_batch.concordance_vcf | tojson }},
3+
"ScoreGenotypes.output_prefix": {{ test_batch.name | tojson }},
4+
"ScoreGenotypes.gq_recalibrator_model_file": {{ reference_resources.aou_recalibrate_gq_model_file | tojson }},
5+
6+
"ScoreGenotypes.recalibrate_gq_args": [
7+
"--keep-homvar false",
8+
"--keep-homref true",
9+
"--keep-multiallelic true",
10+
"--skip-genotype-filtering true",
11+
"--min-samples-to-estimate-allele-frequency -1"
12+
],
13+
"ScoreGenotypes.genome_tracks": [
14+
{{ reference_resources.recalibrate_gq_genome_track_repeatmasker | tojson }},
15+
{{ reference_resources.recalibrate_gq_genome_track_segdup | tojson }},
16+
{{ reference_resources.recalibrate_gq_genome_track_simple_repeats | tojson }},
17+
{{ reference_resources.recalibrate_gq_genome_track_umap100 | tojson }},
18+
{{ reference_resources.recalibrate_gq_genome_track_umap24 | tojson }}
19+
],
20+
21+
"ScoreGenotypes.linux_docker": {{ dockers.linux_docker | tojson }},
22+
"ScoreGenotypes.gatk_docker": {{ dockers.gq_recalibrator_docker | tojson }},
23+
"ScoreGenotypes.sv_base_mini_docker": {{ dockers.sv_base_mini_docker | tojson }},
24+
"ScoreGenotypes.sv_pipeline_docker": {{ dockers.sv_pipeline_docker | tojson }}
25+
}

0 commit comments

Comments
 (0)