Skip to content

Commit 97ad9f7

Browse files
committed
Add tests for is_hypermutated dragen count fix
Covers count_variant_process: dragen vs filter_pass threshold, SAGE_NOVEL exclusion, and annotation filter exclusion.
1 parent 7f01001 commit 97ad9f7

1 file changed

Lines changed: 95 additions & 0 deletions

File tree

tests/test_pcgr_hypermutated.py

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -410,5 +410,100 @@ def fake_run_somatic(*args, **kwargs):
410410
'chunk_nbr was not forwarded correctly')
411411

412412

413+
class TestCountVariantProcess(unittest.TestCase):
414+
"""Verify count_variant_process counts and is_hypermutated flag (bolt #27).
415+
416+
is_hypermutated must use the 'dragen' count (raw, pre-bolt-filter), not
417+
'filter_pass'. A sample with many DRAGEN variants that are mostly filtered
418+
away must still be flagged as hypermutated.
419+
"""
420+
421+
# Minimal header for count_variant_process: needs FILTER tags + SAGE_NOVEL INFO
422+
COUNT_HEADER = (
423+
'##fileformat=VCFv4.2\n'
424+
'##FILTER=<ID=PASS,Description="All filters passed">\n'
425+
f'##FILTER=<ID={constants.VcfFilter.MIN_AF.value},Description="">\n'
426+
f'##FILTER=<ID={constants.VcfFilter.PON.value},Description="">\n'
427+
f'##FILTER=<ID={constants.VcfFilter.MAX_VARIANTS_NON_PASS.value},Description="">\n'
428+
f'##INFO=<ID={constants.VcfInfo.SAGE_NOVEL.value},Number=0,Type=Flag,Description="">\n'
429+
f'##INFO=<ID={constants.VcfInfo.RESCUED_FILTERS_EXISTING.value},Number=1,Type=String,Description="">\n'
430+
'##contig=<ID=chr1,length=248956422>\n'
431+
'#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\n'
432+
)
433+
434+
def _write_count_vcf(self, path, rows):
435+
"""rows: list of (pos, filter_str, info_str) tuples."""
436+
with open(path, 'w') as fh:
437+
fh.write(self.COUNT_HEADER)
438+
for pos, filt, info in rows:
439+
fh.write(f'chr1\t{pos}\t.\tA\tT\t.\t{filt}\t{info}\n')
440+
441+
def test_is_hypermutated_uses_dragen_count(self):
442+
"""is_hypermutated=True when dragen count > MAX_SOMATIC_VARIANTS even if filter_pass is below."""
443+
with tempfile.TemporaryDirectory() as tmp:
444+
vcf_fp = pathlib.Path(tmp) / 'test.vcf'
445+
min_af = constants.VcfFilter.MIN_AF.value
446+
# 3 DRAGEN PASS variants + 2 filtered by bolt (MIN_AF) — filter_pass=3, dragen=5
447+
rows = [(i * 10, 'PASS', '.') for i in range(1, 4)]
448+
rows += [(i * 10 + 5, min_af, '.') for i in range(1, 3)]
449+
self._write_count_vcf(vcf_fp, rows)
450+
451+
with patch('bolt.common.constants.MAX_SOMATIC_VARIANTS', 4):
452+
counts = report_mod.count_variant_process(vcf_fp)
453+
454+
self.assertEqual(counts['dragen'], 5)
455+
self.assertEqual(counts['filter_pass'], 3)
456+
# dragen(5) > MAX(4) → hypermutated, even though filter_pass(3) ≤ MAX(4)
457+
self.assertTrue(counts['is_hypermutated'])
458+
459+
def test_is_hypermutated_false_when_dragen_within_limit(self):
460+
"""is_hypermutated=False when dragen count ≤ MAX_SOMATIC_VARIANTS."""
461+
with tempfile.TemporaryDirectory() as tmp:
462+
vcf_fp = pathlib.Path(tmp) / 'test.vcf'
463+
rows = [(i * 10, 'PASS', '.') for i in range(1, 4)]
464+
self._write_count_vcf(vcf_fp, rows)
465+
466+
with patch('bolt.common.constants.MAX_SOMATIC_VARIANTS', 10):
467+
counts = report_mod.count_variant_process(vcf_fp)
468+
469+
self.assertEqual(counts['dragen'], 3)
470+
self.assertFalse(counts['is_hypermutated'])
471+
472+
def test_sage_novel_excluded_from_dragen_count(self):
473+
"""SAGE_NOVEL variants are not counted as DRAGEN variants."""
474+
with tempfile.TemporaryDirectory() as tmp:
475+
vcf_fp = pathlib.Path(tmp) / 'test.vcf'
476+
sage_novel_info = constants.VcfInfo.SAGE_NOVEL.value
477+
rows = [
478+
(10, 'PASS', '.'), # dragen
479+
(20, 'PASS', sage_novel_info), # sage novel — not dragen
480+
(30, 'PASS', '.'), # dragen
481+
]
482+
self._write_count_vcf(vcf_fp, rows)
483+
484+
with patch('bolt.common.constants.MAX_SOMATIC_VARIANTS', 100):
485+
counts = report_mod.count_variant_process(vcf_fp)
486+
487+
self.assertEqual(counts['dragen'], 2)
488+
self.assertEqual(counts['sage'], 3)
489+
490+
def test_annotation_filter_excluded_from_annotated_count(self):
491+
"""Variants with bolt annotation filters are excluded from annotated count."""
492+
with tempfile.TemporaryDirectory() as tmp:
493+
vcf_fp = pathlib.Path(tmp) / 'test.vcf'
494+
annot_filter = constants.VcfFilter.MAX_VARIANTS_NON_PASS.value
495+
rows = [
496+
(10, 'PASS', '.'), # annotated
497+
(20, annot_filter, '.'), # not annotated (bolt annotation filter)
498+
]
499+
self._write_count_vcf(vcf_fp, rows)
500+
501+
with patch('bolt.common.constants.MAX_SOMATIC_VARIANTS', 100):
502+
counts = report_mod.count_variant_process(vcf_fp)
503+
504+
self.assertEqual(counts['annotated'], 1)
505+
self.assertEqual(counts['dragen'], 2)
506+
507+
413508
if __name__ == '__main__':
414509
unittest.main()

0 commit comments

Comments
 (0)