44
55import json
66from pathlib import Path
7- from typing import Any , Literal , TypeVar , overload
7+ from typing import Any , Callable , Literal , TypeVar , overload
88
99from pydantic import BaseModel , Field
1010from typing_extensions import Never , Self , TypeAlias
@@ -171,6 +171,10 @@ class BlocklistError(ValueError):
171171 """An error for block list."""
172172
173173
174+ def _identity (x : str ) -> str :
175+ return x
176+
177+
174178class PreprocessingConverter (Converter ):
175179 """A converter with pre-processing rules."""
176180
@@ -179,6 +183,7 @@ def __init__(
179183 * args : Any ,
180184 rules : PreprocessingRules | str | Path ,
181185 reference_cls : type [X ] | None = None ,
186+ preclean : Callable [[str ], str ] | None = None ,
182187 ** kwargs : Any ,
183188 ) -> None :
184189 """Instantiate a converter with a ruleset for pre-processing.
@@ -187,11 +192,14 @@ def __init__(
187192 :param rules: A set of rules
188193 :param reference_cls: The reference class to use. Defaults to
189194 :class:`curies.Reference`.
195+ :param preclean: An optional function used to preprocess strings, CURIEs, and
196+ URIs before parsing
190197 :param kwargs: Keyword arguments passed to :meth:`curies.Converter.__init__`
191198 """
192199 super ().__init__ (* args , ** kwargs )
193200 self .rules = _load_rules (rules )
194201 self ._reference_cls = Reference if reference_cls is None else reference_cls
202+ self ._preclean = preclean if preclean is not None else _identity
195203
196204 @classmethod
197205 def from_converter (cls , converter : Converter , rules : PreprocessingRules | str | Path ) -> Self :
@@ -237,6 +245,8 @@ def parse(
237245 block_action : BlockAction = "raise" ,
238246 ) -> ReferenceTuple | None :
239247 """Parse a string, CURIE, or URI."""
248+ str_or_uri_or_curie = self ._preclean (str_or_uri_or_curie )
249+
240250 if r1 := self .rules .remap_full (
241251 str_or_uri_or_curie , reference_cls = self ._reference_cls , context = context
242252 ):
@@ -301,6 +311,8 @@ def parse_curie(
301311
302312 :raises BlocklistError: If the CURIE is blocked
303313 """
314+ curie = self ._preclean (curie )
315+
304316 if r1 := self .rules .remap_full (curie , reference_cls = self ._reference_cls , context = context ):
305317 return r1 .pair
306318
@@ -384,6 +396,8 @@ def parse_uri(
384396 if not return_none :
385397 raise NotImplementedError
386398
399+ uri = self ._preclean (uri )
400+
387401 if r1 := self .rules .remap_full (uri , reference_cls = self ._reference_cls , context = context ):
388402 return r1 .pair
389403
0 commit comments