Skip to content

Commit 044b12a

Browse files
authored
Standardize SVTYPE nomenclature for INS variants after minGQ (#106)
1 parent 560875f commit 044b12a

File tree

4 files changed

+11
-3
lines changed

4 files changed

+11
-3
lines changed

input_values/dockers.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
"sv_base_docker" : "us.gcr.io/broad-dsde-methods/gatk-sv/sv-base:mw-gnomad-02-6a66c96",
1414
"sv_base_mini_docker" : "us.gcr.io/broad-dsde-methods/gatk-sv/sv-base-mini:mw-gnomad-02-6a66c96",
1515
"sv_pipeline_base_docker" : "us.gcr.io/broad-dsde-methods/cwhelan/sv-pipeline-base:cw-pesrgtfilter-5bb73a",
16-
"sv_pipeline_docker" : "us.gcr.io/broad-dsde-methods/cwhelan/sv-pipeline:cw-end2header-69e9e19",
16+
"sv_pipeline_docker" : "us.gcr.io/broad-dsde-methods/markw/sv-pipeline:rlc-vcf-qc-mei-2fd09c8",
1717
"sv_pipeline_qc_docker" : "us.gcr.io/broad-dsde-methods/markw/sv-pipeline-qc:mw-xz-fixes-7cbffee",
1818
"sv_pipeline_rdtest_docker" : "us.gcr.io/broad-dsde-methods/gatk-sv/sv-pipeline-rdtest:mw-gnomad-02-6a66c96",
1919
"wham_docker" : "us.gcr.io/broad-dsde-methods/wham:8645aa"

src/sv-pipeline/scripts/downstream_analysis_and_filtering/apply_minGQ_filter.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -357,6 +357,9 @@ def main():
357357
parser.add_argument('--dropEmpties', default=False, action='store_true',
358358
help='After GT reassignments, drop any SV with no remaining ' +
359359
' non-ref samples (default: keep all SV).')
360+
parser.add_argument('--simplify-INS-SVTYPEs', default=False, action='store_true',
361+
help='Resets the SVTYPE of all INS variants, including MEIs, ' +
362+
'to be SVTYPE=INS (default: keep original SVTYPEs).')
360363
parser.add_argument('--maxNCR', help='Max no-call rate among all ' +
361364
'samples before adding a flag to the record\'s FILTER field' +
362365
' (default: 0.005)',
@@ -414,7 +417,11 @@ def main():
414417
for key in 'AN AC AF N_BI_GENOS N_HOMREF N_HET N_HOMALT FREQ_HOMREF FREQ_HET FREQ_HOMALT'.split(' '):
415418
if key in record.info.keys():
416419
record.info.pop(key)
417-
420+
421+
# Standardize SVTYPE for all INS variants, if optioned
422+
if args.simplify_INS_SVTYPEs and 'INS' in record.info['SVTYPE']:
423+
record.info['SVTYPE'] = 'INS'
424+
418425
if args.dropEmpties:
419426
samps = svu.get_called_samples(record, include_null=False)
420427
if len(samps) > 0:

wdl/MasterVcfQc.wdl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ import "PerSampleExternalBenchmark.wdl" as PerSampleExternalBenchmark
1717
import "Tasks0506.wdl" as MiniTasks
1818

1919
# Master workflow to perform comprehensive quality control (QC) on
20-
# an SV VCF output by the Talkowski lab SV pipeline
20+
# an SV VCF output by GATK-SV
2121
workflow MasterVcfQc {
2222
input {
2323
File vcf

wdl/minGQ.wdl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -792,6 +792,7 @@ task ApplyMinGQFilter {
792792
/opt/sv-pipeline/scripts/downstream_analysis_and_filtering/apply_minGQ_filter.py \
793793
--minGQ "~{global_minGQ}" \
794794
--maxNCR "~{maxNCR}" \
795+
--simplify-INS-SVTYPEs \
795796
--cleanAFinfo \
796797
--prefix "~{PCR_status}" \
797798
"~{vcf}" \

0 commit comments

Comments
 (0)