Skip to content

Commit bffc263

Browse files
authored
Add tests for prioritization (#89)
1 parent 671df41 commit bffc263

2 files changed

Lines changed: 108 additions & 12 deletions

File tree

src/semra/api.py

Lines changed: 44 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -513,8 +513,16 @@ def prioritize(
513513
) -> list[Mapping]:
514514
"""Get a priority star graph.
515515
516-
:param mappings: An iterable of mappings
517-
:param priority: A priority list of prefixes, where earlier in the list means the priority is higher
516+
:param mappings: An iterable of mappings.
517+
518+
.. warning::
519+
520+
This assumes that inference and inversion have already been run.
521+
This means that if there exists any exact match mapping path between
522+
``A`` and ``B``, then there exists an edge `A, exact, B``. Further,
523+
if there exists a mapping ``A, exact, B``, there must be a ``B, exact, A``.
524+
525+
:param priority: A priority list of prefixes, where earlier in the list means the priority is higher.
518526
:return:
519527
A list of mappings representing a "prioritization", meaning that each element only
520528
appears as subject once. This condition means that the prioritization mapping can be applied
@@ -524,11 +532,31 @@ def prioritize(
524532
525533
1. Get the subset of exact matches from the input mapping list
526534
2. Convert the exact matches to an undirected mapping graph
527-
3. Extract connected components
535+
3. Extract connected components.
536+
537+
.. note::
538+
539+
because of construction, connected components might contain
540+
just two mappings, ``A, exact, B`` and ``B, exact A``.
541+
528542
4. For each component
529543
1. Get the "priority" reference using :func:`get_priority_reference`
530544
2. Construct new mappings where all references in the component are the subject
531545
and the priority reference is the object (skip the self mapping)
546+
547+
Here's an example usage, where inference is run ahead of prioritization.
548+
549+
>>> from semra import DB_XREF, EXACT_MATCH, Reference
550+
>>> from semra.inference import infer_reversible, infer_chains
551+
>>> curies = "doid:0050577", "mesh:C562966", "umls:C4551571"
552+
>>> r1, r2, r3 = (Reference.from_curie(c) for c in curies)
553+
>>> m1 = Mapping.from_triple((r1, EXACT_MATCH, r2))
554+
>>> m2 = Mapping.from_triple((r2, EXACT_MATCH, r3))
555+
>>> m3 = Mapping.from_triple((r1, EXACT_MATCH, r3))
556+
>>> mappings = [m1, m2, m3]
557+
>>> mappings = infer_reversible(mappings)
558+
>>> mappings = infer_chains(mappings)
559+
>>> prioritize(mappings, ["mesh", "doid", "umls"])
532560
"""
533561
original_mappings = len(mappings)
534562
mappings = [m for m in mappings if m.predicate == EXACT_MATCH]
@@ -543,15 +571,19 @@ def prioritize(
543571
o = get_priority_reference(component, priority)
544572
if o is None:
545573
continue
546-
rv.extend(
547-
mapping
548-
# TODO should this work even if s-o edge not exists?
549-
# can also do "inference" here, but also might be
550-
# because of negative edge filtering
551-
for s in component
552-
if s != o and graph.has_edge(s, o)
553-
for mapping in _from_digraph_edge(graph, s, o)
554-
)
574+
for s in component:
575+
if s == o: # don't add self-edges
576+
continue
577+
if not graph.has_edge(s, o):
578+
# TODO should this work even if s-o edge not exists?
579+
# can also do "inference" here, but also might be
580+
# because of negative edge filtering
581+
raise NotImplementedError(
582+
"prioritize() should only be called on fully inferred graphs, meaning "
583+
"that in a given component, it is a full clique (i.e., there are edges "
584+
"in both directions between all nodes)"
585+
)
586+
rv.extend(_from_digraph_edge(graph, s, o))
555587

556588
# sort such that the mappings are ordered by object by priority order
557589
# then identifier of object, then subject prefix in alphabetical order

tests/test_api.py

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
get_index,
1919
get_many_to_many,
2020
keep_prefixes,
21+
prioritize,
2122
prioritize_df,
2223
project,
2324
)
@@ -485,6 +486,69 @@ def test_prioritize_df(self) -> None:
485486
list(df["curie_prioritized"]),
486487
)
487488

489+
def test_prioritize(self) -> None:
490+
"""Test prioritize."""
491+
a1 = Reference(prefix=PREFIX_A, identifier="0000001")
492+
b1 = Reference(prefix=PREFIX_B, identifier="0000002")
493+
c1 = Reference(prefix=PREFIX_C, identifier="0000003")
494+
ev = SimpleEvidence(confidence=0.95, mapping_set=MS)
495+
m1 = Mapping(subject=a1, predicate=EXACT_MATCH, object=b1, evidence=[ev])
496+
m1_rev = Mapping(subject=b1, predicate=EXACT_MATCH, object=a1, evidence=[ev])
497+
m2 = Mapping(subject=b1, predicate=EXACT_MATCH, object=c1, evidence=[ev])
498+
m2_rev = Mapping(subject=c1, predicate=EXACT_MATCH, object=b1, evidence=[ev])
499+
m3 = Mapping(subject=a1, predicate=EXACT_MATCH, object=c1, evidence=[ev])
500+
m3_rev = Mapping(subject=c1, predicate=EXACT_MATCH, object=a1, evidence=[ev])
501+
502+
# can't address priority
503+
self.assert_same_triples(
504+
[],
505+
prioritize([m1, m1_rev, m2, m2_rev, m3, m3_rev], [PREFIX_D], progress=False),
506+
)
507+
508+
# has unusable priority first, but then defaults
509+
self.assert_same_triples(
510+
[m1_rev, m3_rev],
511+
prioritize([m1, m1_rev, m2, m2_rev, m3, m3_rev], [PREFIX_D, PREFIX_A], progress=False),
512+
)
513+
514+
self.assert_same_triples(
515+
[m1_rev, m3_rev],
516+
prioritize([m1, m1_rev, m2, m2_rev, m3, m3_rev], [PREFIX_A], progress=False),
517+
)
518+
self.assert_same_triples(
519+
[m1, m2_rev],
520+
prioritize([m1, m1_rev, m2, m2_rev, m3, m3_rev], [PREFIX_B], progress=False),
521+
)
522+
self.assert_same_triples(
523+
[m2, m3],
524+
prioritize([m1, m1_rev, m2, m2_rev, m3, m3_rev], [PREFIX_C], progress=False),
525+
)
526+
527+
# test on component with only 1
528+
self.assert_same_triples(
529+
[m1_rev],
530+
prioritize([m1, m1_rev], [PREFIX_A], progress=False),
531+
)
532+
self.assert_same_triples(
533+
[m1],
534+
prioritize([m1, m1_rev], [PREFIX_B], progress=False),
535+
)
536+
self.assert_same_triples(
537+
[],
538+
prioritize([m1, m1_rev], [PREFIX_C], progress=False),
539+
)
540+
541+
# the following three tests reflect that the prioritize() function
542+
# is not implemented in cases when inference hasn't been fully done
543+
with self.assertRaises(NotImplementedError):
544+
prioritize([m1, m2], [PREFIX_A], progress=False)
545+
with self.assertRaises(NotImplementedError):
546+
prioritize([m1, m2], [PREFIX_C], progress=False)
547+
548+
# this one is able to complete, by chance, but it's not part of
549+
# the contract, so just left here for later
550+
# self.assertEqual([m1, m2_rev], prioritize([m1, m2], [PREFIX_B], progress=False))
551+
488552

489553
class TestUpgrades(unittest.TestCase):
490554
"""Test inferring mutations."""

0 commit comments

Comments
 (0)