Skip to content

Commit bbb465e

Browse files
tomwardiocopybara-github
authored andcommitted
Add properties for whether a variant is an indel, frameshift or SV.
PiperOrigin-RevId: 864449441 Change-Id: I4fc9f6f05dd65f1e5226d99842d684d367fd92ae
1 parent 002832c commit bbb465e

2 files changed

Lines changed: 49 additions & 0 deletions

File tree

src/alphagenome/data/genome.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -749,6 +749,23 @@ def is_insertion(self) -> bool:
749749
"""Return if the variant is an insertion."""
750750
return len(self.reference_bases) < len(self.alternate_bases)
751751

752+
@property
753+
def is_frameshift(self) -> bool:
754+
"""Return if the variant is a frameshift."""
755+
indel_size = abs(len(self.reference_bases) - len(self.alternate_bases))
756+
return indel_size > 0 and indel_size % 3 != 0
757+
758+
@property
759+
def is_indel(self) -> bool:
760+
"""Return if the variant is an insertion or deletion."""
761+
return self.is_insertion or self.is_deletion
762+
763+
@property
764+
def is_structural(self) -> bool:
765+
"""Return if the variant is a structural variant."""
766+
indel_size = abs(len(self.reference_bases) - len(self.alternate_bases))
767+
return indel_size >= 50
768+
752769
def copy(self) -> Self:
753770
"""Returns a deep copy of the variant."""
754771
return copy.deepcopy(self)

src/alphagenome/data/genome_test.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -781,6 +781,38 @@ def test_is_insertion(self, ref, alt, expected):
781781
v = genome.Variant('chr1', 10, ref, alt)
782782
self.assertEqual(v.is_insertion, expected)
783783

784+
@parameterized.parameters(
785+
('A', 'C', False),
786+
('AC', 'A', True),
787+
('A', 'AC', True),
788+
('AC', 'GT', False),
789+
('', 'A', True),
790+
('A', '', True),
791+
)
792+
def test_is_indel(self, ref, alt, expected):
793+
v = genome.Variant('chr1', 10, ref, alt)
794+
self.assertEqual(v.is_indel, expected)
795+
796+
@parameterized.parameters(
797+
('A', 'C', False),
798+
('AC', 'GTA', True),
799+
('', 'AAA', False),
800+
('AAAA', 'C', False),
801+
('ACG', 'ACGTACGTN', False),
802+
)
803+
def test_is_frameshift(self, ref, alt, expected):
804+
v = genome.Variant('chr1', 10, ref, alt)
805+
self.assertEqual(v.is_frameshift, expected)
806+
807+
@parameterized.parameters(
808+
('', 'C' * 50, True),
809+
('A' * 51, 'C', True),
810+
('A', 'AC', False),
811+
)
812+
def test_is_structural(self, ref, alt, expected):
813+
v = genome.Variant('chr1', 10, ref, alt)
814+
self.assertEqual(v.is_structural, expected)
815+
784816
@parameterized.parameters(
785817
('chr1:1:AAA>GGG', 'TTTNNNN', 'chr1:1:AAA>GGG'),
786818
('chr1:1:ATCG>ATCC', 'ATCGNNN', 'chr1:4:G>C'),

0 commit comments

Comments
 (0)