From bba294764c640f58f3379eeb5f41e9e53e9469a3 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Mon, 28 Apr 2025 12:45:46 +0200 Subject: [PATCH 1/2] Add testing for graph I/O --- src/semra/io/__init__.py | 3 ++- src/semra/io/graph.py | 18 ++++++++++++++++++ tests/test_io.py | 37 ++++++++++++++++++++++++++++++------- 3 files changed, 50 insertions(+), 8 deletions(-) diff --git a/src/semra/io/__init__.py b/src/semra/io/__init__.py index e18ec385..cbfdb242 100644 --- a/src/semra/io/__init__.py +++ b/src/semra/io/__init__.py @@ -1,6 +1,6 @@ """I/O functions for SeMRA.""" -from .graph import from_digraph, to_digraph, to_multidigraph +from .graph import from_digraph, from_multidigraph, to_digraph, to_multidigraph from .io import ( from_bioontologies, from_cache_df, @@ -21,6 +21,7 @@ "from_cache_df", "from_digraph", "from_jsonl", + "from_multidigraph", "from_pickle", "from_pyobo", "from_sssom", diff --git a/src/semra/io/graph.py b/src/semra/io/graph.py index c2ea87ce..c53ea116 100644 --- a/src/semra/io/graph.py +++ b/src/semra/io/graph.py @@ -14,6 +14,7 @@ "DIGRAPH_DATA_KEY", "MULTIDIGRAPH_DATA_KEY", "from_digraph", + "from_multidigraph", "to_digraph", "to_multidigraph", ] @@ -94,3 +95,20 @@ def to_multidigraph(mappings: t.Iterable[Mapping], *, progress: bool = False) -> **{MULTIDIGRAPH_DATA_KEY: mapping.evidence}, ) return graph + + +def from_multidigraph(graph: nx.MultiDiGraph) -> list[Mapping]: + """Extract mappings from a multi-directed graph data model.""" + return [ + mapping + for s, o, p in graph.edges(keys=True) + for mapping in _from_multidigraph_edge(graph, s, p, o) + ] + + +def _from_multidigraph_edge( + graph: nx.MultiDiGraph, s: Reference, p: Reference, o: Reference +) -> t.Iterable[Mapping]: + data = graph[s][o][p] + for evidence in data[MULTIDIGRAPH_DATA_KEY]: + yield Mapping(s=s, p=p, o=o, evidence=evidence) diff --git a/tests/test_io.py b/tests/test_io.py index 5e0c1a63..8f5a61a4 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -9,7 +9,16 @@ import pandas as pd from semra import Mapping, MappingSet, ReasonedEvidence, Reference, SimpleEvidence -from semra.io import from_jsonl, from_pyobo, from_sssom_df, write_jsonl +from semra.io import ( + from_digraph, + from_jsonl, + from_multidigraph, + from_pyobo, + from_sssom_df, + to_digraph, + to_multidigraph, + write_jsonl, +) from semra.rules import ( BEN_ORCID, CHAIN_MAPPING, @@ -44,7 +53,7 @@ def test_from_pyobo(self) -> None: self.assertEqual("mesh", mapping.o.prefix) -class TestIO(unittest.TestCase): +class TestSSSOM(unittest.TestCase): """Tests for I/O functions.""" def test_from_sssom_df(self) -> None: @@ -173,8 +182,12 @@ def test_from_sssom_df_with_license(self) -> None: ) self.assertEqual(expected_mappings, actual_mappings) - def test_jsonl(self) -> None: - """Test JSONL I/O.""" + +class TestIO(unittest.TestCase): + """Test I/O funcitons.""" + + def setUp(self) -> None: + """Set up the test case.""" r1 = Reference.from_curie("mesh:C406527", name="R 115866") r2 = Reference.from_curie("chebi:101854", name="talarozole") r3 = Reference.from_curie("chembl.compound:CHEMBL459505", name="TALAROZOLE") @@ -214,13 +227,23 @@ def test_jsonl(self) -> None: m3_e1 = ReasonedEvidence(justification=CHAIN_MAPPING, mappings=[m1, m2]) m3 = Mapping.from_triple(t3, evidence=[m3_e1]) - mappings = [m1, m2, m3] + self.mappings = [m1, m2, m3] + def test_jsonl(self) -> None: + """Test JSONL I/O.""" with tempfile.TemporaryDirectory() as directory_: for path in [ Path(directory_).joinpath("test.jsonl"), Path(directory_).joinpath("test.jsonl.gz"), ]: - write_jsonl(mappings, path) + write_jsonl(self.mappings, path) new_mappings = from_jsonl(path, show_progress=False) - self.assertEqual(mappings, new_mappings) + self.assertEqual(self.mappings, new_mappings) + + def test_digraph(self) -> None: + """Test I/O to a directed graph.""" + self.assertEqual(self.mappings, from_digraph(to_digraph(self.mappings))) + + def test_multidigraph(self) -> None: + """Test I/O with multi-directed graph.""" + self.assertEqual(self.mappings, from_multidigraph(to_multidigraph(self.mappings))) From 994073ddb71d569ed6ef87f4be25fe0a98c4e10d Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Mon, 28 Apr 2025 12:50:39 +0200 Subject: [PATCH 2/2] Fix --- src/semra/io/graph.py | 13 ++----------- src/semra/struct.py | 3 +++ tests/test_io.py | 6 ++++-- 3 files changed, 9 insertions(+), 13 deletions(-) diff --git a/src/semra/io/graph.py b/src/semra/io/graph.py index c53ea116..aad910bf 100644 --- a/src/semra/io/graph.py +++ b/src/semra/io/graph.py @@ -100,15 +100,6 @@ def to_multidigraph(mappings: t.Iterable[Mapping], *, progress: bool = False) -> def from_multidigraph(graph: nx.MultiDiGraph) -> list[Mapping]: """Extract mappings from a multi-directed graph data model.""" return [ - mapping - for s, o, p in graph.edges(keys=True) - for mapping in _from_multidigraph_edge(graph, s, p, o) + Mapping(s=s, p=p, o=o, evidence=data[MULTIDIGRAPH_DATA_KEY]) + for s, o, p, data in graph.edges(keys=True, data=True) ] - - -def _from_multidigraph_edge( - graph: nx.MultiDiGraph, s: Reference, p: Reference, o: Reference -) -> t.Iterable[Mapping]: - data = graph[s][o][p] - for evidence in data[MULTIDIGRAPH_DATA_KEY]: - yield Mapping(s=s, p=p, o=o, evidence=evidence) diff --git a/src/semra/struct.py b/src/semra/struct.py index ff5c95a9..fa83ec06 100644 --- a/src/semra/struct.py +++ b/src/semra/struct.py @@ -359,6 +359,9 @@ def key(self) -> StrTriple: """Get a hashable key for the mapping, based on the subject, predicate, and object.""" return triple_key(self.triple) + def __lt__(self, other: Mapping) -> bool: + return self.triple < other.triple + @classmethod def from_triple(cls, triple: Triple, evidence: list[Evidence] | None = None) -> Mapping: """Instantiate a mapping from a triple.""" diff --git a/tests/test_io.py b/tests/test_io.py index 8f5a61a4..c44003dd 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -242,8 +242,10 @@ def test_jsonl(self) -> None: def test_digraph(self) -> None: """Test I/O to a directed graph.""" - self.assertEqual(self.mappings, from_digraph(to_digraph(self.mappings))) + self.assertEqual(sorted(self.mappings), sorted(from_digraph(to_digraph(self.mappings)))) def test_multidigraph(self) -> None: """Test I/O with multi-directed graph.""" - self.assertEqual(self.mappings, from_multidigraph(to_multidigraph(self.mappings))) + self.assertEqual( + sorted(self.mappings), sorted(from_multidigraph(to_multidigraph(self.mappings))) + )