Skip to content

Commit ca4b3de

Browse files
committed
test add sage metrics for Msi calculation
1 parent 0089151 commit ca4b3de

2 files changed

Lines changed: 49 additions & 0 deletions

File tree

bolt/common/constants.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,11 @@ class VcfInfo(enum.Enum):
7777
SAGE_NOVEL = 'SAGE_NOVEL'
7878
SAGE_RESCUE = 'SAGE_RESCUE'
7979

80+
RC_REPC = 'RC_REPC'
81+
RC_REPS = 'RC_REPS'
82+
REP_C = 'REP_C'
83+
REP_S = 'REP_S'
84+
8085
PCGR_TIER = 'PCGR_TIER'
8186
PCGR_CSQ = 'PCGR_CSQ'
8287
PCGR_MUTATION_HOTSPOT = 'PCGR_MUTATION_HOTSPOT'
@@ -226,6 +231,27 @@ def namespace(self):
226231
'Description': 'Variant rescued by a matching SAGE call',
227232
},
228233

234+
VcfInfo.RC_REPC: {
235+
'Number': '1',
236+
'Type': 'Integer',
237+
'Description': 'Repeat count from read context',
238+
},
239+
VcfInfo.RC_REPS: {
240+
'Number': '1',
241+
'Type': 'String',
242+
'Description': 'Repeat sequence from read context',
243+
},
244+
VcfInfo.REP_C: {
245+
'Number': '1',
246+
'Type': 'Integer',
247+
'Description': 'Repeat sequence count',
248+
},
249+
VcfInfo.REP_S: {
250+
'Number': '1',
251+
'Type': 'String',
252+
'Description': 'Repeat sequence',
253+
},
254+
229255
VcfInfo.PCGR_TIER: {
230256
'Number': '1',
231257
'Type': 'String',

bolt/workflows/smlv_somatic/rescue.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,11 @@ def annotate_existing_sage_calls(input_fp, tumor_name, sage_vcf_fp, output_dir):
112112
util.add_vcf_header_entry(input_fh, constants.VcfInfo.SAGE_HOTSPOT)
113113
util.add_vcf_header_entry(input_fh, constants.VcfInfo.SAGE_RESCUE)
114114

115+
util.add_vcf_header_entry(input_fh, constants.VcfInfo.RC_REPC)
116+
util.add_vcf_header_entry(input_fh, constants.VcfInfo.RC_REPS)
117+
util.add_vcf_header_entry(input_fh, constants.VcfInfo.REP_C)
118+
util.add_vcf_header_entry(input_fh, constants.VcfInfo.REP_S)
119+
115120
# TODO(SW): check that defined header descriptions match those in the SAGE fp; collect as list
116121
# here and iterate to check and then add to input_fp header also in another loop
117122

@@ -155,6 +160,16 @@ def annotate_existing_sage_calls(input_fp, tumor_name, sage_vcf_fp, output_dir):
155160
record.set_format(constants.VcfFormat.SAGE_DP.value, sage_record.format('DP'))
156161
record.set_format(constants.VcfFormat.SAGE_SB.value, sage_record.format('SB'))
157162

163+
# Transfer SAGE INFO fields for repeat context
164+
if sage_record.INFO.get('RC_REPC') is not None:
165+
record.INFO[constants.VcfInfo.RC_REPC.value] = sage_record.INFO['RC_REPC']
166+
if sage_record.INFO.get('RC_REPS') is not None:
167+
record.INFO[constants.VcfInfo.RC_REPS.value] = sage_record.INFO['RC_REPS']
168+
if sage_record.INFO.get('REP_C') is not None:
169+
record.INFO[constants.VcfInfo.REP_C.value] = sage_record.INFO['REP_C']
170+
if sage_record.INFO.get('REP_S') is not None:
171+
record.INFO[constants.VcfInfo.REP_S.value] = sage_record.INFO['REP_S']
172+
158173
output_fh.write_record(record)
159174

160175
# Explicitly close to flush buffer then index output file
@@ -196,6 +211,10 @@ def prepare_sage_novel(input_fp, tumor_name, output_dir):
196211
annotations_retain_info = (
197212
util.get_qualified_vcf_annotation(constants.VcfInfo.SAGE_HOTSPOT),
198213
util.get_qualified_vcf_annotation(constants.VcfInfo.SAGE_NOVEL),
214+
util.get_qualified_vcf_annotation(constants.VcfInfo.RC_REPC),
215+
util.get_qualified_vcf_annotation(constants.VcfInfo.RC_REPS),
216+
util.get_qualified_vcf_annotation(constants.VcfInfo.REP_C),
217+
util.get_qualified_vcf_annotation(constants.VcfInfo.REP_S),
199218
)
200219
annotations_retain_format = (
201220
util.get_qualified_vcf_annotation(constants.VcfFormat.SAGE_SB),
@@ -215,6 +234,10 @@ def prepare_sage_novel(input_fp, tumor_name, output_dir):
215234
with header_entries_fp.open('w') as fh:
216235
print(util.get_vcf_header_line(constants.VcfInfo.SAGE_HOTSPOT), file=fh)
217236
print(util.get_vcf_header_line(constants.VcfInfo.SAGE_NOVEL), file=fh)
237+
print(util.get_vcf_header_line(constants.VcfInfo.RC_REPC), file=fh)
238+
print(util.get_vcf_header_line(constants.VcfInfo.RC_REPS), file=fh)
239+
print(util.get_vcf_header_line(constants.VcfInfo.REP_C), file=fh)
240+
print(util.get_vcf_header_line(constants.VcfInfo.REP_S), file=fh)
218241

219242
# NOTE(SW): the fill-tags BCFtools plugin would have been useful here instead of iterating with
220243
# awk but unforunately it doesn't seem to support anything other than integer and float INFO

0 commit comments

Comments
 (0)