Skip to content

Commit 88d1133

Browse files
authored
Merge pull request #91 from bcgsc/bugfix/DEVSU-2793-remove-kbvar-match-when-no-stmt
Bugfix/devsu 2793 remove kbvar match when no stmt
2 parents 7bbb86b + 524ad6b commit 88d1133

File tree

4 files changed

+73
-5
lines changed

4 files changed

+73
-5
lines changed

pori_python/ipr/ipr.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,7 @@ def convert_statements_to_alterations(
168168
diseases = [c for c in statement["conditions"] if c["@class"] == "Disease"]
169169
disease_match = len(diseases) == 1 and diseases[0]["@rid"] in disease_matches
170170
reference = ";".join([e["displayName"] for e in statement["evidence"]])
171+
171172
if statement['relevance']['name'] == 'eligibility':
172173
reference = ";".join([e["sourceId"] for e in statement["evidence"]])
173174

@@ -624,6 +625,20 @@ def get_kb_matches_sections(
624625
kb_statement_matched_conditions = get_kb_statement_matched_conditions(
625626
gkb_matches, allow_partial_matches
626627
)
628+
629+
if not allow_partial_matches:
630+
# remove kb_matches that are not part of any fully matched condition set
631+
unique_kb_variant_ids = list(
632+
set(
633+
[
634+
item['kbVariantId']
635+
for conditionSet in kb_statement_matched_conditions
636+
for item in conditionSet['matchedConditions']
637+
]
638+
)
639+
)
640+
kb_variants = [item for item in kb_variants if item['kbVariantId'] in unique_kb_variant_ids]
641+
627642
return {
628643
"kbMatches": kb_variants,
629644
"kbMatchedStatements": kb_matched_statements,

pori_python/ipr/main.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -467,9 +467,6 @@ def ipr_report(
467467
gkb_matches = [Hashabledict(match) for match in custom_kb_match_filter(gkb_matches)]
468468
logger.info(f"\t custom_kb_match_filter left {len(gkb_matches)} variants")
469469

470-
# KEY ALTERATIONS
471-
key_alterations, variant_counts = create_key_alterations(gkb_matches, all_variants)
472-
473470
# GENE INFORMATION
474471
logger.info("fetching gene annotations")
475472
gene_information = get_gene_information(graphkb_conn, sorted(genes_with_variants))
@@ -514,6 +511,10 @@ def ipr_report(
514511
gkb_matches, allow_partial_matches=allow_partial_matches
515512
)
516513

514+
# KEY ALTERATIONS
515+
# must do after pruning of kbMatches for kb_matched_sections
516+
key_alterations, variant_counts = create_key_alterations(gkb_matches, all_variants)
517+
517518
# OUTPUT CONTENT
518519
# thread safe deep-copy the original content
519520
output = json.loads(json.dumps(content))

tests/test_ipr/test_data/expression.short.tab

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,3 +25,4 @@ KLHL25 27.39 outlier_high increased expression 100.0 6.97 46.01 100.0 210.66 8.
2525
ZNRF3-IT1 0.2 no_category 6.75 0.12 34 -1.816 2.225 -2.505 1.744 26 0.623 -1.272 0.2 0.3 0.4 0.5
2626
PTP4A3 1.33 high_percentile increased expression 100.0 9.74 99.62 100.0 32.2 6.09 -2.217 87 1.863 2.013 0.286 -0.767 66 -0.091 -1.824 0.2 0.3 0.4 0.5
2727
ERBB2 0.05 no_category 67.0 0.62 51.71 100.0 1.01 4.8 -0.551 61 -2.009 -2.216 0.147 -2.385 86 2.299 -0.953 0.2 0.3 0.4 0.5
28+
DPYD 1.12 increased rna expression increased expression 97.0 4.75 73.38 100.0 50.19 0.99 -0.193 92 2.786 0.537 2.113 1.888 31 0.915 0.861 0.2 0.3 0.4 0.5

tests/test_ipr/test_ipr.py

Lines changed: 53 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
get_kb_matched_statements,
1111
get_kb_statement_matched_conditions,
1212
get_kb_variants,
13+
get_kb_matches_sections,
1314
)
1415
from pori_python.types import Statement
1516

@@ -696,9 +697,12 @@ def test_partial_matches_omitted(self):
696697
for item in input_fields: # we don't care about these for this test
697698
item["variantType"] = "test"
698699
item["kbVariant"] = "test"
700+
699701
gkb_matches = create_gkb_matches(input_fields)
700-
stmts = get_kb_matched_statements(gkb_matches)
701-
kbcs = get_kb_statement_matched_conditions(gkb_matches)
702+
sections = get_kb_matches_sections(gkb_matches, allow_partial_matches=False)
703+
704+
stmts = sections['kbMatchedStatements']
705+
kbcs = sections['kbStatementMatchedConditions']
702706
assert len(stmts) == 2
703707
assert len(kbcs) == 1 # X only
704708
assert kbcs[0]["kbStatementId"] == "X"
@@ -748,6 +752,53 @@ def test_partial_matches_omitted_even_when_var_used_elsewhere(self):
748752
assert len(kbcs) == 2 # X and Z but not Y
749753
assert "Y" not in [item["kbStatementId"] for item in kbcs]
750754

755+
def test_kbvariants_removed_from_set_when_not_part_of_full_conditionset_match(self):
756+
"""When there is a variant that fulfills one part of a statement's condition set,
757+
but isn't part of any fully satisfied condition set,
758+
the kbvariant record should be removed from the kbvariants list
759+
"""
760+
input_fields = [
761+
{
762+
"variant": "A",
763+
"kbVariantId": "test1",
764+
"kbStatementId": "X",
765+
"requiredKbMatches": ["test1", "test2", "test3"],
766+
},
767+
{
768+
"variant": "B",
769+
"kbVariantId": "test2",
770+
"kbStatementId": "X",
771+
"requiredKbMatches": ["test1", "test2", "test3"],
772+
},
773+
{
774+
"variant": "A",
775+
"kbVariantId": "test1",
776+
"kbStatementId": "Y",
777+
"requiredKbMatches": ["test4", "test1"],
778+
},
779+
{
780+
"variant": "D",
781+
"kbVariantId": "test4",
782+
"kbStatementId": "Y",
783+
"requiredKbMatches": ["test4", "test1"],
784+
},
785+
]
786+
for item in input_fields: # we don't care about these for this test
787+
item["variantType"] = "test"
788+
item["kbVariant"] = "test"
789+
gkb_matches = create_gkb_matches(input_fields)
790+
sections1 = get_kb_matches_sections(gkb_matches, allow_partial_matches=False)
791+
kbcs1 = sections1['kbStatementMatchedConditions']
792+
kbvars1 = sections1['kbMatches']
793+
assert len(kbcs1) == 1 # only fully matched condition sets included
794+
assert len(kbvars1) == 2 # therefore, kbvars associated with stmt X are pruned
795+
796+
sections2 = get_kb_matches_sections(gkb_matches, allow_partial_matches=True)
797+
kbcs2 = sections2['kbStatementMatchedConditions']
798+
kbvars2 = sections2['kbMatches']
799+
assert len(kbcs2) == 2 # all condition sets included
800+
assert len(kbvars2) == 3 # therefore, no pruning
801+
751802
def test_partial_matches_included(self):
752803
"""check statements that are only partially supported
753804
are included when allow_partial_matches=True"""

0 commit comments

Comments
 (0)