From 4c4ed711f2afe4f8cbb0cafa75cdb265b855e9e1 Mon Sep 17 00:00:00 2001 From: mathieulemieux Date: Wed, 30 Apr 2025 10:45:21 -0700 Subject: [PATCH 1/8] Update get_kb_disease_matches() --- pori_python/ipr/ipr.py | 44 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 36 insertions(+), 8 deletions(-) diff --git a/pori_python/ipr/ipr.py b/pori_python/ipr/ipr.py index 499cb28..e40d970 100644 --- a/pori_python/ipr/ipr.py +++ b/pori_python/ipr/ipr.py @@ -11,6 +11,7 @@ from pori_python.graphkb import GraphKBConnection from pori_python.graphkb import statement as gkb_statement from pori_python.graphkb import vocab as gkb_vocab +from pori_python.graphkb import util as gkb_util from pori_python.types import ( Hashabledict, ImageDefinition, @@ -638,14 +639,41 @@ def get_kb_disease_matches( if verbose: logger.info(f"Matching disease ({kb_disease_match}) to graphkb") - disease_matches = { - r["@rid"] - for r in gkb_vocab.get_term_tree( - graphkb_conn, - kb_disease_match, - ontology_class="Disease", + disease_matches = [] + try: + # KBDEV-1306 + # Matching disease(s) from name, then tree traversal for ancestors & descendants. + # Leverage new 'similarToExtended' queryType + base_records = gkb_util.convert_to_rid_list( + graphkb_conn.query( + gkb_vocab.query_by_name( + 'Disease', + kb_disease_match, + ) + ) ) - } + if base_records: + disease_matches = list({ + r["@rid"] + for r in graphkb_conn.query({ + "target": base_records, + "queryType": "similarToExtended", + "matchType": "Disease", + "edges": ["AliasOf", "CrossReferenceOf", "DeprecatedBy"], + "treeEdges": ["subClassOf"], + "returnProperties": ["@rid"] + }) + }) + except: + # Previous solution w/ get_term_tree() -> 'similarTo' queryType + disease_matches = list({ + r["@rid"] + for r in gkb_vocab.get_term_tree( + graphkb_conn, + kb_disease_match, + ontology_class="Disease", + ) + }) if not disease_matches: msg = f"failed to match disease ({kb_disease_match}) to graphkb" @@ -653,4 +681,4 @@ def get_kb_disease_matches( logger.error(msg) raise ValueError(msg) - return list(disease_matches) + return disease_matches From 9200d440c0eab56c6e95e2b331bf678e78ea06bb Mon Sep 17 00:00:00 2001 From: mathieulemieux Date: Wed, 30 Apr 2025 11:19:27 -0700 Subject: [PATCH 2/8] Add error msg to get_kb_disease_matches() --- pori_python/ipr/ipr.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pori_python/ipr/ipr.py b/pori_python/ipr/ipr.py index e40d970..7f09fc8 100644 --- a/pori_python/ipr/ipr.py +++ b/pori_python/ipr/ipr.py @@ -7,7 +7,9 @@ from itertools import product from copy import copy from typing import Dict, Iterable, List, Sequence, Set, Tuple, cast +from requests.exceptions import HTTPError import uuid + from pori_python.graphkb import GraphKBConnection from pori_python.graphkb import statement as gkb_statement from pori_python.graphkb import vocab as gkb_vocab @@ -664,7 +666,10 @@ def get_kb_disease_matches( "returnProperties": ["@rid"] }) }) - except: + except HTTPError: + if verbose: + logger.info(f"Failed at using 'similarToExtended' queryType. Trying again with get_term_tree()") + # Previous solution w/ get_term_tree() -> 'similarTo' queryType disease_matches = list({ r["@rid"] From 2b63eadef3cec69f2cc7af0d65a2878be443096a Mon Sep 17 00:00:00 2001 From: mathieulemieux Date: Fri, 2 May 2025 09:10:57 -0700 Subject: [PATCH 3/8] formatting with black --- pori_python/ipr/ipr.py | 66 ++++++++++++++++++++++++------------------ 1 file changed, 38 insertions(+), 28 deletions(-) diff --git a/pori_python/ipr/ipr.py b/pori_python/ipr/ipr.py index 7f09fc8..41ffdaa 100644 --- a/pori_python/ipr/ipr.py +++ b/pori_python/ipr/ipr.py @@ -4,16 +4,18 @@ """ from __future__ import annotations -from itertools import product -from copy import copy -from typing import Dict, Iterable, List, Sequence, Set, Tuple, cast + from requests.exceptions import HTTPError + import uuid +from copy import copy +from itertools import product +from typing import Dict, Iterable, List, Sequence, Set, Tuple, cast from pori_python.graphkb import GraphKBConnection from pori_python.graphkb import statement as gkb_statement -from pori_python.graphkb import vocab as gkb_vocab from pori_python.graphkb import util as gkb_util +from pori_python.graphkb import vocab as gkb_vocab from pori_python.types import ( Hashabledict, ImageDefinition, @@ -21,12 +23,12 @@ IprGene, IprVariant, KbMatch, - Statement, - Variant, - KbVariantMatch, KbMatchedStatement, KbMatchedStatementConditionSet, KbMatchSections, + KbVariantMatch, + Statement, + Variant, ) from .constants import GERMLINE_BASE_TERMS, VARIANT_CLASSES @@ -615,7 +617,7 @@ def get_kb_statement_matched_conditions( def get_kb_matches_sections( gkb_matches: List[KbMatch] | List[Hashabledict], - allow_partial_matches=False, + allow_partial_matches: bool = False, ) -> KbMatchSections: kb_variants = get_kb_variants(gkb_matches) kb_matched_statements = get_kb_matched_statements(gkb_matches) @@ -655,30 +657,38 @@ def get_kb_disease_matches( ) ) if base_records: - disease_matches = list({ - r["@rid"] - for r in graphkb_conn.query({ - "target": base_records, - "queryType": "similarToExtended", - "matchType": "Disease", - "edges": ["AliasOf", "CrossReferenceOf", "DeprecatedBy"], - "treeEdges": ["subClassOf"], - "returnProperties": ["@rid"] - }) - }) + disease_matches = list( + { + r["@rid"] + for r in graphkb_conn.query( + { + "target": base_records, + "queryType": "similarToExtended", + "matchType": "Disease", + "edges": ["AliasOf", "CrossReferenceOf", "DeprecatedBy"], + "treeEdges": ["subClassOf"], + "returnProperties": ["@rid"], + } + ) + } + ) except HTTPError: if verbose: - logger.info(f"Failed at using 'similarToExtended' queryType. Trying again with get_term_tree()") + logger.info( + "Failed at using 'similarToExtended' queryType. Trying again with get_term_tree()" + ) # Previous solution w/ get_term_tree() -> 'similarTo' queryType - disease_matches = list({ - r["@rid"] - for r in gkb_vocab.get_term_tree( - graphkb_conn, - kb_disease_match, - ontology_class="Disease", - ) - }) + disease_matches = list( + { + r["@rid"] + for r in gkb_vocab.get_term_tree( + graphkb_conn, + kb_disease_match, + ontology_class="Disease", + ) + } + ) if not disease_matches: msg = f"failed to match disease ({kb_disease_match}) to graphkb" From 9896afb9e9fc4e49755cee50582b5df1cfe3a384 Mon Sep 17 00:00:00 2001 From: mathieulemieux Date: Fri, 2 May 2025 09:28:36 -0700 Subject: [PATCH 4/8] Update logger msg in get_kb_disease_matches() --- pori_python/ipr/ipr.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pori_python/ipr/ipr.py b/pori_python/ipr/ipr.py index 41ffdaa..9fdb4bc 100644 --- a/pori_python/ipr/ipr.py +++ b/pori_python/ipr/ipr.py @@ -641,7 +641,9 @@ def get_kb_disease_matches( logger.warning(f"No disease provided; will use '{kb_disease_match}'") if verbose: - logger.info(f"Matching disease ({kb_disease_match}) to graphkb") + logger.info( + f"Matching disease ({kb_disease_match}) to graphkb using 'similarToExtended' queryType." + ) disease_matches = [] try: From ab4eb020825c11436972ef19ffc05ce6de10034c Mon Sep 17 00:00:00 2001 From: mathieulemieux Date: Fri, 2 May 2025 13:19:37 -0700 Subject: [PATCH 5/8] Add similarToExtended flag to get_kb_disease_matches() --- pori_python/ipr/ipr.py | 81 +++++++++++++++++++++++------------------- 1 file changed, 44 insertions(+), 37 deletions(-) diff --git a/pori_python/ipr/ipr.py b/pori_python/ipr/ipr.py index 9fdb4bc..1e59e10 100644 --- a/pori_python/ipr/ipr.py +++ b/pori_python/ipr/ipr.py @@ -632,54 +632,61 @@ def get_kb_matches_sections( def get_kb_disease_matches( - graphkb_conn: GraphKBConnection, kb_disease_match: str = None, verbose: bool = True + graphkb_conn: GraphKBConnection, + kb_disease_match: str = None, + verbose: bool = True, + similarToExtended: bool = True, ) -> list[str]: + disease_matches = [] + if not kb_disease_match: kb_disease_match = 'cancer' if verbose: logger.warning(f"No disease provided; will use '{kb_disease_match}'") - if verbose: - logger.info( - f"Matching disease ({kb_disease_match}) to graphkb using 'similarToExtended' queryType." - ) - - disease_matches = [] - try: - # KBDEV-1306 - # Matching disease(s) from name, then tree traversal for ancestors & descendants. - # Leverage new 'similarToExtended' queryType - base_records = gkb_util.convert_to_rid_list( - graphkb_conn.query( - gkb_vocab.query_by_name( - 'Disease', - kb_disease_match, - ) - ) - ) - if base_records: - disease_matches = list( - { - r["@rid"] - for r in graphkb_conn.query( - { - "target": base_records, - "queryType": "similarToExtended", - "matchType": "Disease", - "edges": ["AliasOf", "CrossReferenceOf", "DeprecatedBy"], - "treeEdges": ["subClassOf"], - "returnProperties": ["@rid"], - } - ) - } - ) - except HTTPError: + if similarToExtended: if verbose: logger.info( - "Failed at using 'similarToExtended' queryType. Trying again with get_term_tree()" + f"Matching disease ({kb_disease_match}) to graphkb using 'similarToExtended' queryType." ) + try: + # KBDEV-1306 + # Matching disease(s) from name, then tree traversal for ancestors & descendants. + # Leverage new 'similarToExtended' queryType + base_records = gkb_util.convert_to_rid_list( + graphkb_conn.query( + gkb_vocab.query_by_name( + 'Disease', + kb_disease_match, + ) + ) + ) + if base_records: + disease_matches = list( + { + r["@rid"] + for r in graphkb_conn.query( + { + "target": base_records, + "queryType": "similarToExtended", + "matchType": "Disease", + "edges": ["AliasOf", "CrossReferenceOf", "DeprecatedBy"], + "treeEdges": ["subClassOf"], + "returnProperties": ["@rid"], + } + ) + } + ) + except HTTPError: + if verbose: + logger.info("Failed at using 'similarToExtended' queryType.") + similarToExtended = False + + if not similarToExtended: + if verbose: + logger.info("Matching disease ({kb_disease_match}) to graphkb using get_term_tree()") # Previous solution w/ get_term_tree() -> 'similarTo' queryType disease_matches = list( { From 66e1a4f0efdc68f8b25c36fefab120862f19e013 Mon Sep 17 00:00:00 2001 From: mathieulemieux Date: Fri, 2 May 2025 13:47:54 -0700 Subject: [PATCH 6/8] fix f-string --- pori_python/ipr/ipr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pori_python/ipr/ipr.py b/pori_python/ipr/ipr.py index 1e59e10..cd1f784 100644 --- a/pori_python/ipr/ipr.py +++ b/pori_python/ipr/ipr.py @@ -686,7 +686,7 @@ def get_kb_disease_matches( if not similarToExtended: if verbose: - logger.info("Matching disease ({kb_disease_match}) to graphkb using get_term_tree()") + logger.info(f"Matching disease ({kb_disease_match}) to graphkb using get_term_tree()") # Previous solution w/ get_term_tree() -> 'similarTo' queryType disease_matches = list( { From 7338551bbe804388388a54d95aee25b7df758f90 Mon Sep 17 00:00:00 2001 From: mathieulemieux Date: Fri, 2 May 2025 16:31:58 -0700 Subject: [PATCH 7/8] Add TestGetKbDiseaseMatches to test_ipr.py --- tests/test_ipr/test_ipr.py | 69 ++++++++++++++++++++++++++------------ 1 file changed, 48 insertions(+), 21 deletions(-) diff --git a/tests/test_ipr/test_ipr.py b/tests/test_ipr/test_ipr.py index 5c55d9d..f27932c 100644 --- a/tests/test_ipr/test_ipr.py +++ b/tests/test_ipr/test_ipr.py @@ -6,6 +6,7 @@ from pori_python.ipr.ipr import ( convert_statements_to_alterations, germline_kb_matches, + get_kb_disease_matches, get_kb_matched_statements, get_kb_statement_matched_conditions, get_kb_variants, @@ -172,27 +173,36 @@ @pytest.fixture def graphkb_conn(): - class QueryMock: - return_values = [ - # get approved evidence levels - [{"@rid": v} for v in APPROVED_EVIDENCE_RIDS] - ] - index = -1 - - def __call__(self, *args, **kwargs): - self.index += 1 - ret_val = self.return_values[self.index] if self.index < len(self.return_values) else [] - return ret_val - - class PostMock: - def __call__(self, *args, **kwargs): - # custom return tailored for multi_variant_filtering() testing - return {"result": KB_MATCHES_STATEMENTS} - + # Mock for the 'query' method + query_mock = Mock() + query_return_values = [ + [{"@rid": v} for v in APPROVED_EVIDENCE_RIDS] + ] + query_index = {"value": -1} # Mutable index for closure + + def query_side_effect(*args, **kwargs): + if args: + # for TestGetKbDiseaseMatches + return [{'@rid': '#123:45'}] + query_index["value"] += 1 + idx = query_index["value"] + return query_return_values[idx] if idx < len(query_return_values) else [] + + query_mock.side_effect = query_side_effect + + # Mock for the 'post' method + post_mock = Mock(return_value={"result": KB_MATCHES_STATEMENTS}) + + # 'get_source' remains a plain function def mock_get_source(source): return {"@rid": 0} - conn = Mock(query=QueryMock(), cache={}, get_source=mock_get_source, post=PostMock()) + # Create the connection mock with attributes + conn = Mock() + conn.query = query_mock + conn.post = post_mock + conn.cache = {} + conn.get_source = mock_get_source return conn @@ -233,10 +243,9 @@ def base_graphkb_statement(disease_id: str = "disease", relevance_rid: str = "ot @pytest.fixture(autouse=True) def mock_get_term_tree(monkeypatch): - def mock_func(*pos, **kwargs): - return [{"@rid": d} for d in DISEASE_RIDS] - + mock_func = Mock(return_value=[{"@rid": d} for d in DISEASE_RIDS]) monkeypatch.setattr(gkb_vocab, "get_term_tree", mock_func) + return mock_func @pytest.fixture(autouse=True) @@ -255,6 +264,24 @@ def mock_func(_, relevance_id): monkeypatch.setattr(gkb_statement, "categorize_relevance", mock_func) +class TestGetKbDiseaseMatches: + def test_get_kb_disease_matches_similarToExtended(self, graphkb_conn) -> None: + get_kb_disease_matches(graphkb_conn, 'Breast Cancer') + assert graphkb_conn.query.called + assert not gkb_vocab.get_term_tree.called + + def test_get_kb_disease_matches_get_term_tree(self, graphkb_conn) -> None: + get_kb_disease_matches(graphkb_conn, 'Breast Cancer', similarToExtended=False) + assert gkb_vocab.get_term_tree.called + assert not graphkb_conn.query.called + + def test_get_kb_disease_matches_default(self, graphkb_conn) -> None: + get_kb_disease_matches(graphkb_conn) + assert graphkb_conn.query.call_args_list[0].args == ({ + 'target': 'Disease', 'filters': {'name': 'cancer'} + },) + + class TestConvertStatementsToAlterations: def test_disease_match(self, graphkb_conn) -> None: statement = base_graphkb_statement(DISEASE_RIDS[0]) From 5ecc23e6fefcd2bc5207111bc6a82a0d918f5047 Mon Sep 17 00:00:00 2001 From: mathieulemieux Date: Fri, 2 May 2025 16:36:29 -0700 Subject: [PATCH 8/8] Black formatting --- tests/test_ipr/test_ipr.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/tests/test_ipr/test_ipr.py b/tests/test_ipr/test_ipr.py index f27932c..03628e0 100644 --- a/tests/test_ipr/test_ipr.py +++ b/tests/test_ipr/test_ipr.py @@ -175,9 +175,7 @@ def graphkb_conn(): # Mock for the 'query' method query_mock = Mock() - query_return_values = [ - [{"@rid": v} for v in APPROVED_EVIDENCE_RIDS] - ] + query_return_values = [[{"@rid": v} for v in APPROVED_EVIDENCE_RIDS]] query_index = {"value": -1} # Mutable index for closure def query_side_effect(*args, **kwargs): @@ -277,9 +275,9 @@ def test_get_kb_disease_matches_get_term_tree(self, graphkb_conn) -> None: def test_get_kb_disease_matches_default(self, graphkb_conn) -> None: get_kb_disease_matches(graphkb_conn) - assert graphkb_conn.query.call_args_list[0].args == ({ - 'target': 'Disease', 'filters': {'name': 'cancer'} - },) + assert graphkb_conn.query.call_args_list[0].args == ( + {'target': 'Disease', 'filters': {'name': 'cancer'}}, + ) class TestConvertStatementsToAlterations: