Skip to content

Commit b051b27

Browse files
committed
Merge branch 'develop' of https://github.com/bcgsc/pori_python into feat/DEVSU-2797-remove-gsc-defaults_dustin
2 parents 9cd51c3 + 1219643 commit b051b27

File tree

6 files changed

+123
-12
lines changed

6 files changed

+123
-12
lines changed

pori_python/graphkb/genes.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -370,7 +370,7 @@ def get_pharmacogenomic_info(
370370

371371

372372
def get_gene_linked_pharmacogenomic_info(
373-
conn: GraphKBConnection, source: str
373+
conn: GraphKBConnection, source: str = PREFERRED_GENE_SOURCE_NAME
374374
) -> Tuple[List[str], Dict[str, Tuple[str, List[str]]]]:
375375
"""
376376
Return two lists from GraphKB, one of pharmacogenomic genes and one of associated variants.

pori_python/ipr/ipr.py

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -268,11 +268,22 @@ def select_expression_plots(
268268

269269

270270
def create_key_alterations(
271-
kb_matches: List[Hashabledict], all_variants: Sequence[IprVariant]
271+
kb_matches: List[Hashabledict],
272+
all_variants: Sequence[IprVariant],
273+
included_kb_matches: List[KbVariantMatch],
272274
) -> Tuple[List[Dict], Dict]:
273275
"""Create the list of significant variants matched by the KB.
274276
275277
This list of matches is also used to create the variant counts.
278+
279+
kb_matches: the full list of matched kb objects found for the reported variants
280+
all_variants: the full list of all reported variants, matched or unmatched
281+
included_kb_matches: the list of kb_variant ids to be allowed in the key alterations table;
282+
this is all kb_variants if partially matched statements are allowed, or
283+
the subset of kb_variants that are conditions for at least one
284+
fully satisfied statement condition set, if partially matched statements
285+
are not allowed (ie, kb_variants that are not part of any fully satisfied
286+
statement condition set are excluded)
276287
"""
277288
alterations = []
278289
type_mapping = {
@@ -283,7 +294,12 @@ def create_key_alterations(
283294
}
284295
counts: Dict[str, Set] = {v: set() for v in type_mapping.values()}
285296
skipped_variant_types = []
297+
298+
included_kbvariant_ids = list(set([item['kbVariantId'] for item in included_kb_matches]))
299+
286300
for kb_match in kb_matches:
301+
if kb_match['kbVariantId'] not in included_kbvariant_ids:
302+
continue
287303
variant_type = kb_match["variantType"]
288304
variant_key = kb_match["variant"]
289305
if kb_match["category"] == "unknown":
@@ -624,6 +640,20 @@ def get_kb_matches_sections(
624640
kb_statement_matched_conditions = get_kb_statement_matched_conditions(
625641
gkb_matches, allow_partial_matches
626642
)
643+
644+
if not allow_partial_matches:
645+
# remove kb_matches that are not part of any fully matched condition set
646+
unique_kb_variant_ids = list(
647+
set(
648+
[
649+
item['kbVariantId']
650+
for conditionSet in kb_statement_matched_conditions
651+
for item in conditionSet['matchedConditions']
652+
]
653+
)
654+
)
655+
kb_variants = [item for item in kb_variants if item['kbVariantId'] in unique_kb_variant_ids]
656+
627657
return {
628658
"kbMatches": kb_variants,
629659
"kbMatchedStatements": kb_matched_statements,

pori_python/ipr/main.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -477,9 +477,6 @@ def ipr_report(
477477
gkb_matches = [Hashabledict(match) for match in custom_kb_match_filter(gkb_matches)]
478478
logger.info(f"\t custom_kb_match_filter left {len(gkb_matches)} variants")
479479

480-
# KEY ALTERATIONS
481-
key_alterations, variant_counts = create_key_alterations(gkb_matches, all_variants)
482-
483480
# GENE INFORMATION
484481
logger.info("fetching gene annotations")
485482
gene_information = get_gene_information(graphkb_conn, sorted(genes_with_variants))
@@ -526,6 +523,11 @@ def ipr_report(
526523
gkb_matches, allow_partial_matches=allow_partial_matches
527524
)
528525

526+
# KEY ALTERATIONS
527+
key_alterations, variant_counts = create_key_alterations(
528+
gkb_matches, all_variants, kb_matched_sections['kbMatches']
529+
)
530+
529531
# OUTPUT CONTENT
530532
# thread safe deep-copy the original content
531533
output = json.loads(json.dumps(content))
@@ -571,9 +573,10 @@ def ipr_report(
571573
upload_error = None
572574

573575
# UPLOAD TO IPR
576+
574577
if ipr_upload:
575578
if not ipr_conn:
576-
raise ValueError("ipr_url required to upload_report")
579+
raise ValueError("ipr_url required to upload report")
577580
ipr_spec = ipr_conn.get_spec()
578581
output = clean_unsupported_content(output, ipr_spec)
579582
try:

tests/test_graphkb/test_genes.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,7 @@ def test_get_pharmacogenomic_info(conn):
172172
EXCLUDE_BCGSC_TESTS, reason="excluding BCGSC-specific tests (requires CGL loader))"
173173
)
174174
def test_get_gene_linked_pharmacogenomic_info(conn):
175-
genes, matches = get_gene_linked_pharmacogenomic_info(conn, PREFERRED_GENE_SOURCE_NAME)
175+
genes, matches = get_gene_linked_pharmacogenomic_info(conn)
176176
for gene in PHARMACOGENOMIC_INITIAL_GENES:
177177
assert gene in genes, f"{gene} not found in get_pharmacogenomic_info"
178178
for rid, variant_info in matches.items():
@@ -198,7 +198,7 @@ def test_get_cancer_predisposition_info(conn):
198198
EXCLUDE_BCGSC_TESTS, reason="excluding BCGSC-specific tests (requires CGL loader))"
199199
)
200200
def test_get_gene_linked_cancer_predisposition_info(conn):
201-
genes, matches = get_gene_linked_cancer_predisposition_info(conn, PREFERRED_GENE_SOURCE_NAME)
201+
genes, matches = get_gene_linked_cancer_predisposition_info(conn)
202202
for gene in CANCER_PREDISP_INITIAL_GENES:
203203
assert gene in genes, f"{gene} not found in get_cancer_predisposition_info"
204204

tests/test_ipr/test_data/expression.short.tab

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,3 +25,4 @@ KLHL25 27.39 outlier_high increased expression 100.0 6.97 46.01 100.0 210.66 8.
2525
ZNRF3-IT1 0.2 no_category 6.75 0.12 34 -1.816 2.225 -2.505 1.744 26 0.623 -1.272 0.2 0.3 0.4 0.5
2626
PTP4A3 1.33 high_percentile increased expression 100.0 9.74 99.62 100.0 32.2 6.09 -2.217 87 1.863 2.013 0.286 -0.767 66 -0.091 -1.824 0.2 0.3 0.4 0.5
2727
ERBB2 0.05 no_category 67.0 0.62 51.71 100.0 1.01 4.8 -0.551 61 -2.009 -2.216 0.147 -2.385 86 2.299 -0.953 0.2 0.3 0.4 0.5
28+
DPYD 1.12 increased rna expression increased expression 97.0 4.75 73.38 100.0 50.19 0.99 -0.193 92 2.786 0.537 2.113 1.888 31 0.915 0.861 0.2 0.3 0.4 0.5

tests/test_ipr/test_ipr.py

Lines changed: 81 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
get_kb_matched_statements,
1111
get_kb_statement_matched_conditions,
1212
get_kb_variants,
13+
get_kb_matches_sections,
14+
create_key_alterations,
1315
)
1416
from pori_python.types import Statement
1517

@@ -483,9 +485,9 @@ def test_germline_kb_matches(self):
483485
"kbContextId": "#135:8764",
484486
"kbRelevanceId": "#147:32",
485487
"kbStatementId": "#155:13511",
486-
"requiredKbMatches": ["#159:5426", "#161:938"],
488+
"requiredKbMatches": ["#159:54261", "#161:9381"],
487489
"kbVariant": "BRCA1 mutation",
488-
"kbVariantId": "#161:938",
490+
"kbVariantId": "#161:9381",
489491
"matchedCancer": False,
490492
"reference": "MOAlmanac FDA-56",
491493
"relevance": "therapy",
@@ -494,6 +496,13 @@ def test_germline_kb_matches(self):
494496
},
495497
]
496498

499+
ALL_VARIANTS = [
500+
{"variant": "var1", "key": '1', "variantType": 'mut'},
501+
{"variant": "var2", "key": '2', "variantType": 'mut'},
502+
{"variant": "var3", "key": '3', "variantType": 'mut'},
503+
{"variant": "var4", "key": '4', "variantType": 'mut'},
504+
]
505+
497506
BASIC_GKB_MATCH = {
498507
"approvedTherapy": False,
499508
"category": "test",
@@ -696,9 +705,12 @@ def test_partial_matches_omitted(self):
696705
for item in input_fields: # we don't care about these for this test
697706
item["variantType"] = "test"
698707
item["kbVariant"] = "test"
708+
699709
gkb_matches = create_gkb_matches(input_fields)
700-
stmts = get_kb_matched_statements(gkb_matches)
701-
kbcs = get_kb_statement_matched_conditions(gkb_matches)
710+
sections = get_kb_matches_sections(gkb_matches, allow_partial_matches=False)
711+
712+
stmts = sections['kbMatchedStatements']
713+
kbcs = sections['kbStatementMatchedConditions']
702714
assert len(stmts) == 2
703715
assert len(kbcs) == 1 # X only
704716
assert kbcs[0]["kbStatementId"] == "X"
@@ -748,6 +760,53 @@ def test_partial_matches_omitted_even_when_var_used_elsewhere(self):
748760
assert len(kbcs) == 2 # X and Z but not Y
749761
assert "Y" not in [item["kbStatementId"] for item in kbcs]
750762

763+
def test_kbvariants_removed_from_set_when_not_part_of_full_conditionset_match(self):
764+
"""When there is a variant that fulfills one part of a statement's condition set,
765+
but isn't part of any fully satisfied condition set,
766+
the kbvariant record should be removed from the kbvariants list
767+
"""
768+
input_fields = [
769+
{
770+
"variant": "A",
771+
"kbVariantId": "test1",
772+
"kbStatementId": "X",
773+
"requiredKbMatches": ["test1", "test2", "test3"],
774+
},
775+
{
776+
"variant": "B",
777+
"kbVariantId": "test2",
778+
"kbStatementId": "X",
779+
"requiredKbMatches": ["test1", "test2", "test3"],
780+
},
781+
{
782+
"variant": "A",
783+
"kbVariantId": "test1",
784+
"kbStatementId": "Y",
785+
"requiredKbMatches": ["test4", "test1"],
786+
},
787+
{
788+
"variant": "D",
789+
"kbVariantId": "test4",
790+
"kbStatementId": "Y",
791+
"requiredKbMatches": ["test4", "test1"],
792+
},
793+
]
794+
for item in input_fields: # we don't care about these for this test
795+
item["variantType"] = "test"
796+
item["kbVariant"] = "test"
797+
gkb_matches = create_gkb_matches(input_fields)
798+
sections1 = get_kb_matches_sections(gkb_matches, allow_partial_matches=False)
799+
kbcs1 = sections1['kbStatementMatchedConditions']
800+
kbvars1 = sections1['kbMatches']
801+
assert len(kbcs1) == 1 # only fully matched condition sets included
802+
assert len(kbvars1) == 2 # therefore, kbvars associated with stmt X are pruned
803+
804+
sections2 = get_kb_matches_sections(gkb_matches, allow_partial_matches=True)
805+
kbcs2 = sections2['kbStatementMatchedConditions']
806+
kbvars2 = sections2['kbMatches']
807+
assert len(kbcs2) == 2 # all condition sets included
808+
assert len(kbvars2) == 3 # therefore, no pruning
809+
751810
def test_partial_matches_included(self):
752811
"""check statements that are only partially supported
753812
are included when allow_partial_matches=True"""
@@ -779,3 +838,21 @@ def test_partial_matches_included(self):
779838
kbcs = get_kb_statement_matched_conditions(gkb_matches, allow_partial_matches=True)
780839
assert len(stmts) == 2 # X and Y
781840
assert len(kbcs) == 2
841+
842+
def test_create_key_alterations_includes_only_pruned_kbmatches(self):
843+
gkb_matches = create_gkb_matches(GKB_MATCHES)
844+
845+
sections1 = get_kb_matches_sections(gkb_matches, allow_partial_matches=False)
846+
key_alts1, counts1 = create_key_alterations(
847+
gkb_matches, ALL_VARIANTS, sections1['kbMatches']
848+
)
849+
850+
sections2 = get_kb_matches_sections(gkb_matches, allow_partial_matches=True)
851+
key_alts2, counts2 = create_key_alterations(
852+
gkb_matches, ALL_VARIANTS, sections2['kbMatches']
853+
)
854+
855+
# check partial-match-only variants are not included in key alterations when
856+
# partial matches is false
857+
assert len(key_alts1) == 3
858+
assert len(key_alts2) == 4

0 commit comments

Comments
 (0)