Skip to content

Commit 0cea138

Browse files
committed
Update preprocessing.py
1 parent 431fc08 commit 0cea138

1 file changed

Lines changed: 15 additions & 1 deletion

File tree

src/curies/preprocessing.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
import json
66
from pathlib import Path
7-
from typing import Any, Literal, TypeVar, overload
7+
from typing import Any, Callable, Literal, TypeVar, overload
88

99
from pydantic import BaseModel, Field
1010
from typing_extensions import Never, Self, TypeAlias
@@ -171,6 +171,10 @@ class BlocklistError(ValueError):
171171
"""An error for block list."""
172172

173173

174+
def _identity(x: str) -> str:
175+
return x
176+
177+
174178
class PreprocessingConverter(Converter):
175179
"""A converter with pre-processing rules."""
176180

@@ -179,6 +183,7 @@ def __init__(
179183
*args: Any,
180184
rules: PreprocessingRules | str | Path,
181185
reference_cls: type[X] | None = None,
186+
preclean: Callable[[str], str] | None = None,
182187
**kwargs: Any,
183188
) -> None:
184189
"""Instantiate a converter with a ruleset for pre-processing.
@@ -187,11 +192,14 @@ def __init__(
187192
:param rules: A set of rules
188193
:param reference_cls: The reference class to use. Defaults to
189194
:class:`curies.Reference`.
195+
:param preclean: An optional function used to preprocess strings, CURIEs, and
196+
URIs before parsing
190197
:param kwargs: Keyword arguments passed to :meth:`curies.Converter.__init__`
191198
"""
192199
super().__init__(*args, **kwargs)
193200
self.rules = _load_rules(rules)
194201
self._reference_cls = Reference if reference_cls is None else reference_cls
202+
self._preclean = preclean if preclean is not None else _identity
195203

196204
@classmethod
197205
def from_converter(cls, converter: Converter, rules: PreprocessingRules | str | Path) -> Self:
@@ -237,6 +245,8 @@ def parse(
237245
block_action: BlockAction = "raise",
238246
) -> ReferenceTuple | None:
239247
"""Parse a string, CURIE, or URI."""
248+
str_or_uri_or_curie = self._preclean(str_or_uri_or_curie)
249+
240250
if r1 := self.rules.remap_full(
241251
str_or_uri_or_curie, reference_cls=self._reference_cls, context=context
242252
):
@@ -301,6 +311,8 @@ def parse_curie(
301311
302312
:raises BlocklistError: If the CURIE is blocked
303313
"""
314+
curie = self._preclean(curie)
315+
304316
if r1 := self.rules.remap_full(curie, reference_cls=self._reference_cls, context=context):
305317
return r1.pair
306318

@@ -384,6 +396,8 @@ def parse_uri(
384396
if not return_none:
385397
raise NotImplementedError
386398

399+
uri = self._preclean(uri)
400+
387401
if r1 := self.rules.remap_full(uri, reference_cls=self._reference_cls, context=context):
388402
return r1.pair
389403

0 commit comments

Comments
 (0)