Skip to content

Commit 37643cc

Browse files
authored
Add valid URI checks in mapping service graph (#45)
Bioregistry has some synonyms that contain spaces, which currently get propagated to the `curies.Converter` instance in the Bioregistry's default `bioregistry.Manager`. This is an issue since it results in `https://bioregistry.io/<prefix>:<identifier>` variant URIs with spaces. This PR makes sure that the ones containing spaces get filtered out. It also does some refactoring of the logic for the custom `triples()` function to streamline it and reduce code duplication.
1 parent a873fef commit 37643cc

2 files changed

Lines changed: 35 additions & 26 deletions

File tree

src/curies/mapping_service/__init__.py

Lines changed: 19 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@
107107
from typing import TYPE_CHECKING, Any, Collection, Iterable, List, Set, Tuple, Union, cast
108108

109109
from rdflib import OWL, Graph, URIRef
110+
from rdflib.term import _is_valid_uri
110111

111112
from .rdflib_custom import MappingServiceSPARQLProcessor # type: ignore
112113
from ..api import Converter
@@ -201,37 +202,29 @@ def __init__(
201202
self.predicates = _prepare_predicates(predicates)
202203
super().__init__(*args, **kwargs)
203204

205+
def _expand_pair_all(self, uri_in: str) -> List[URIRef]:
206+
prefix, identifier = self.converter.parse_uri(uri_in)
207+
if prefix is None or identifier is None:
208+
return []
209+
uris = cast(Collection[str], self.converter.expand_pair_all(prefix, identifier))
210+
# do _is_valid_uri check because some configurations e.g. from Bioregistry might
211+
# produce invalid URIs e.g., containing spaces
212+
return [URIRef(uri) for uri in uris if _is_valid_uri(uri)]
213+
204214
def triples(
205215
self, triple: Tuple[URIRef, URIRef, URIRef]
206216
) -> Iterable[Tuple[URIRef, URIRef, URIRef]]:
207217
"""Generate triples, overriden to dynamically generate mappings based on this graph's converter."""
208218
subj_query, pred_query, obj_query = triple
209-
if pred_query not in self.predicates:
210-
return
211-
if subj_query is None and obj_query is None:
212-
return # can't generate based on this
213-
if subj_query is None and obj_query is not None:
214-
prefix, identifier = self.converter.parse_uri(obj_query)
215-
if prefix is None or identifier is None:
216-
return
217-
subjects = [
218-
URIRef(sub)
219-
for sub in cast(Collection[str], self.converter.expand_pair_all(prefix, identifier))
220-
]
221-
for subj, pred in itt.product(subjects, self.predicates):
222-
yield subj, pred, obj_query
223-
elif subj_query is not None and obj_query is None:
224-
prefix, identifier = self.converter.parse_uri(subj_query)
225-
if prefix is None or identifier is None:
226-
return
227-
objects = [
228-
URIRef(obj)
229-
for obj in cast(Collection[str], self.converter.expand_pair_all(prefix, identifier))
230-
]
231-
for obj, pred in itt.product(objects, self.predicates):
232-
yield subj_query, pred, obj
233-
else: # subj_query is not None and obj_query is not None
234-
return # too much specification? maybe just return one triple then?
219+
if pred_query in self.predicates:
220+
if subj_query is None and obj_query is not None:
221+
subjects = self._expand_pair_all(obj_query)
222+
for subj, pred in itt.product(subjects, self.predicates):
223+
yield subj, pred, obj_query
224+
elif subj_query is not None and obj_query is None:
225+
objects = self._expand_pair_all(subj_query)
226+
for obj, pred in itt.product(objects, self.predicates):
227+
yield subj_query, pred, obj
235228

236229

237230
def get_flask_mapping_blueprint(

tests/test_mapping_service.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,22 @@ def test_missing(self):
151151
"""
152152
self.assertEqual([], list(self.graph.query(sparql, processor=self.processor)))
153153

154+
def test_safe_expand(self):
155+
"""Test that expansion to invalid prefixes doesn't happen."""
156+
ppm = {
157+
"CHEBI": [
158+
"http://purl.obolibrary.org/obo/CHEBI_",
159+
"http://identifiers.org/chebi/",
160+
"http://identifiers.org/chebi/nope nope:",
161+
],
162+
}
163+
converter = Converter.from_priority_prefix_map(ppm)
164+
graph = MappingServiceGraph(converter=converter)
165+
self.assertEqual(
166+
{"http://purl.obolibrary.org/obo/CHEBI_1", "http://identifiers.org/chebi/1"},
167+
set(map(str, graph._expand_pair_all("http://purl.obolibrary.org/obo/CHEBI_1"))),
168+
)
169+
154170

155171
class ConverterMixin(unittest.TestCase):
156172
"""A mixin that has a converter."""

0 commit comments

Comments
 (0)