Skip to content

Commit d770635

Browse files
committed
Rename components
1 parent b1903d4 commit d770635

4 files changed

Lines changed: 72 additions & 48 deletions

File tree

docs/source/preprocessing.rst

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,10 @@ For example, you always want to fix legacy references to the ``OBO_REL`` namespa
1111
.. code-block:: python
1212
1313
import curies
14-
from curies import PreprocessingRules, PreprocessingConverter
15-
from curies.wrapped import Rewrites
14+
from curies import PreprocessingRules, PreprocessingConverter, PreprocessingRewrites
1615
1716
rules = PreprocessingRules(
18-
rewrites=Rewrites(
17+
rewrites=PreprocessingRewrites(
1918
full={"OBO_REL:is_a": "rdfs:subClassOf"}
2019
)
2120
)
@@ -27,3 +26,30 @@ For example, you always want to fix legacy references to the ``OBO_REL`` namespa
2726
2827
>>> converter.parse_curie("OBO_REL:is_a")
2928
ReferenceTuple('rdfs', 'subClassOf')
29+
30+
Similarly, there may be a whole class of references that need to be fixed
31+
based on their prefix, such as the ``APOLLO:SV_`` references that are mangled
32+
by the OWLAPI due to the OBO Foundry's PURL rules
33+
34+
.. code-block:: python
35+
36+
import curies
37+
from curies import PreprocessingRules, PreprocessingConverter, PreprocessingRewrites
38+
39+
rules = PreprocessingRules(
40+
rewrites=PreprocessingRewrites(
41+
prefix={"APOLLO:SV_": "APOLLO_SV:"}
42+
)
43+
)
44+
45+
converter = curies.get_obo_converter()
46+
converter = PreprocessingConverter.from_converter(
47+
converter, rules=rules
48+
)
49+
50+
>>> converter.parse_curie("APOLLO:SV_1234567")
51+
ReferenceTuple('APOLLO_SV', '1234567')
52+
53+
Some rewrite rules only apply to a specific resource, because of its own quirks
54+
in curation or encoding. For example, CHMO encodes OrangeBook entries with ``orange``
55+
as a prefix, which is not typically specific enough to

src/curies/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
)
2727
from .discovery import discover, discover_from_rdf
2828
from .preprocessing import (
29-
PreprocessingBlacklist,
29+
PreprocessingBlocklists,
3030
PreprocessingConverter,
3131
PreprocessingRewrites,
3232
PreprocessingRules,
@@ -51,7 +51,7 @@
5151
"NamedReference",
5252
"Prefix",
5353
"PrefixMap",
54-
"PreprocessingBlacklist",
54+
"PreprocessingBlocklists",
5555
"PreprocessingConverter",
5656
"PreprocessingRewrites",
5757
"PreprocessingRules",

src/curies/preprocessing.py

Lines changed: 29 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@
1212
from .api import Converter, Reference, ReferenceTuple
1313

1414
__all__ = [
15-
"BlacklistError",
16-
"PreprocessingBlacklist",
15+
"BlocklistError",
16+
"PreprocessingBlocklists",
1717
"PreprocessingConverter",
1818
"PreprocessingRewrites",
1919
"PreprocessingRules",
@@ -22,8 +22,8 @@
2222
X = TypeVar("X", bound=Reference)
2323

2424

25-
class PreprocessingBlacklist(BaseModel):
26-
"""A model for prefix and full blacklists."""
25+
class PreprocessingBlocklists(BaseModel):
26+
"""A model for prefix and full blocklists."""
2727

2828
full: list[str] = Field(default_factory=list)
2929
resource_full: dict[str, list[str]] = Field(default_factory=dict)
@@ -40,34 +40,32 @@ def _sort(self) -> None:
4040
for v in self.resource_prefix.values():
4141
v.sort()
4242

43-
def str_has_blacklisted_prefix(
43+
def str_has_blocked_prefix(
4444
self, str_or_curie_or_uri: str, *, context: str | None = None
4545
) -> bool:
46-
"""Check if the CURIE string has a blacklisted prefix."""
46+
"""Check if the CURIE string has a blocklisted prefix."""
4747
if context:
4848
prefixes: list[str] = self.resource_prefix.get(context, [])
4949
if prefixes and any(str_or_curie_or_uri.startswith(prefix) for prefix in prefixes):
5050
return True
5151
return any(str_or_curie_or_uri.startswith(prefix) for prefix in self.prefix)
5252

53-
def str_has_blacklisted_suffix(self, str_or_curie_or_uri: str) -> bool:
54-
"""Check if the CURIE string has a blacklisted suffix."""
53+
def str_has_blocked_suffix(self, str_or_curie_or_uri: str) -> bool:
54+
"""Check if the CURIE string has a blocklisted suffix."""
5555
return any(str_or_curie_or_uri.endswith(suffix) for suffix in self.suffix)
5656

57-
def str_is_blacklisted_full(
58-
self, str_or_curie_or_uri: str, *, context: str | None = None
59-
) -> bool:
60-
"""Check if the full CURIE string is blacklisted."""
57+
def str_is_blocked_full(self, str_or_curie_or_uri: str, *, context: str | None = None) -> bool:
58+
"""Check if the full CURIE string is blocklisted."""
6159
if context and str_or_curie_or_uri in self.resource_full.get(context, set()):
6260
return True
6361
return str_or_curie_or_uri in self.full
6462

65-
def str_is_blacklisted(self, str_or_curie_or_uri: str, *, context: str | None = None) -> bool:
66-
"""Check if the full CURIE string is blacklisted."""
63+
def str_is_blocked(self, str_or_curie_or_uri: str, *, context: str | None = None) -> bool:
64+
"""Check if the full CURIE string is blocklisted."""
6765
return (
68-
self.str_has_blacklisted_prefix(str_or_curie_or_uri, context=context)
69-
or self.str_has_blacklisted_suffix(str_or_curie_or_uri)
70-
or self.str_is_blacklisted_full(str_or_curie_or_uri, context=context)
66+
self.str_has_blocked_prefix(str_or_curie_or_uri, context=context)
67+
or self.str_has_blocked_suffix(str_or_curie_or_uri)
68+
or self.str_is_blocked_full(str_or_curie_or_uri, context=context)
7169
)
7270

7371

@@ -118,17 +116,17 @@ def remap_prefix(self, str_or_curie_or_uri: str, *, context: str | None = None)
118116

119117

120118
class PreprocessingRules(BaseModel):
121-
"""A model for blacklists and rewrites."""
119+
"""A model for blocklists and rewrites."""
122120

123-
blacklists: PreprocessingBlacklist
121+
blocklists: PreprocessingBlocklists
124122
rewrites: PreprocessingRewrites
125123

126124
@classmethod
127125
def lint_file(cls, path: str | Path) -> None:
128126
"""Lint a file, in place, given a file path."""
129127
path = Path(path).expanduser().resolve()
130128
rules = cls.model_validate_json(path.read_text())
131-
rules.blacklists._sort()
129+
rules.blocklists._sort()
132130
path.write_text(
133131
json.dumps(
134132
rules.model_dump(exclude_unset=True, exclude_defaults=True),
@@ -137,9 +135,9 @@ def lint_file(cls, path: str | Path) -> None:
137135
)
138136
)
139137

140-
def str_is_blacklisted(self, str_or_curie_or_uri: str, *, context: str | None = None) -> bool:
141-
"""Check if the CURIE string is blacklisted."""
142-
return self.blacklists.str_is_blacklisted(str_or_curie_or_uri, context=context)
138+
def str_is_blocked(self, str_or_curie_or_uri: str, *, context: str | None = None) -> bool:
139+
"""Check if the CURIE string is blocked."""
140+
return self.blocklists.str_is_blocked(str_or_curie_or_uri, context=context)
143141

144142
def remap_full(
145143
self,
@@ -165,8 +163,8 @@ def _load_rules(rules: str | Path | PreprocessingRules) -> PreprocessingRules:
165163
return rules
166164

167165

168-
class BlacklistError(ValueError):
169-
"""An error for blacklist."""
166+
class BlocklistError(ValueError):
167+
"""An error for block list."""
170168

171169

172170
class PreprocessingConverter(Converter):
@@ -228,8 +226,8 @@ def parse(
228226
# Remap node's prefix (if necessary)
229227
str_or_uri_or_curie = self.rules.remap_prefix(str_or_uri_or_curie, context=context)
230228

231-
if self.rules.str_is_blacklisted(str_or_uri_or_curie, context=context):
232-
raise BlacklistError
229+
if self.rules.str_is_blocked(str_or_uri_or_curie, context=context):
230+
raise BlocklistError
233231

234232
if strict:
235233
return super().parse(str_or_uri_or_curie, strict=strict)
@@ -256,20 +254,20 @@ def parse_curie(
256254
:param strict: If the CURIE can't be parsed, should an error be thrown? Defaults
257255
to false.
258256
:param context: Is there a context, e.g., an ontology prefix that should be
259-
applied to the remapping and blacklist rules?
257+
applied to the remapping and blocklist rules?
260258
261259
:returns: A tuple representing a parsed and standardized CURIE
262260
263-
:raises BlacklistError: If the CURIE is blacklisted
261+
:raises BlocklistError: If the CURIE is blocked
264262
"""
265263
if r1 := self.rules.remap_full(curie, reference_cls=self._reference_cls, context=context):
266264
return r1.pair
267265

268266
# Remap node's prefix (if necessary)
269267
curie = self.rules.remap_prefix(curie, context=context)
270268

271-
if self.rules.str_is_blacklisted(curie, context=context):
272-
raise BlacklistError
269+
if self.rules.str_is_blocked(curie, context=context):
270+
raise BlocklistError
273271

274272
if strict:
275273
return super().parse_curie(curie, strict=strict)

tests/test_preprocessing.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@
99

1010
from curies import Converter, ReferenceTuple
1111
from curies.preprocessing import (
12-
BlacklistError,
13-
PreprocessingBlacklist,
12+
BlocklistError,
13+
PreprocessingBlocklists,
1414
PreprocessingConverter,
1515
PreprocessingRewrites,
1616
PreprocessingRules,
@@ -55,7 +55,7 @@ def setUpClass(cls) -> None:
5555
},
5656
},
5757
),
58-
blacklists=PreprocessingBlacklist(
58+
blocklists=PreprocessingBlocklists(
5959
full=["rdf:NOPE"],
6060
resource_prefix={
6161
"chebi": [
@@ -155,8 +155,8 @@ def test_resource_prefix_rewrite(self) -> None:
155155
with self.assertRaises(ValueError):
156156
self.assertIsNone(self.converter.parse_curie("j1234", context="chebi"))
157157

158-
def test_resource_specific_blacklist(self) -> None:
159-
"""Test resource-specific blacklist."""
158+
def test_resource_specific_blocklist(self) -> None:
159+
"""Test resource-specific blocklist."""
160160
self.assertEqual(
161161
ReferenceTuple("pubmed", "1234"),
162162
self.converter.parse_curie("pubmed:1234"),
@@ -165,17 +165,17 @@ def test_resource_specific_blacklist(self) -> None:
165165
ReferenceTuple("pubmed", "1234"),
166166
self.converter.parse_curie("pubmed:1234", context="doid"),
167167
)
168-
with self.assertRaises(BlacklistError):
168+
with self.assertRaises(BlocklistError):
169169
self.converter.parse_curie("pubmed:1234", context="chebi")
170170

171171
self.converter.parse_curie("omim:1234", context="chebi")
172-
# normally, OMIM works, but we configured a specific one for the blacklist
173-
with self.assertRaises(BlacklistError):
172+
# normally, OMIM works, but we configured a specific one for the blocklist
173+
with self.assertRaises(BlocklistError):
174174
self.converter.parse_curie("omim:1356", context="chebi")
175175

176-
def test_global_blacklist(self) -> None:
177-
"""Test global blacklist."""
178-
with self.assertRaises(BlacklistError):
176+
def test_global_blocklist(self) -> None:
177+
"""Test global blocklist."""
178+
with self.assertRaises(BlocklistError):
179179
self.converter.parse("rdf:NOPE")
180-
with self.assertRaises(BlacklistError):
180+
with self.assertRaises(BlocklistError):
181181
self.converter.parse_curie("rdf:NOPE")

0 commit comments

Comments
 (0)