99from collections .abc import Iterable
1010from hashlib import md5
1111from itertools import islice
12- from typing import Annotated , Any , ClassVar , Literal
12+ from typing import Annotated , Any , ClassVar , Generic , Literal , NamedTuple , ParamSpec , TypeVar , Union
1313
1414import pydantic
1515from more_itertools import triplewise
3131 "triple_key" ,
3232]
3333
34+ P = ParamSpec ("P" )
35+ X = TypeVar ("X" )
36+
3437#: A type annotation for a subject-predicate-object triple
3538Triple = tuple [Reference , Reference , Reference ]
3639
3740
38- def triple_key (triple : Triple ) -> tuple [str , str , str ]:
41+ class StrTriple (NamedTuple ):
42+ """A triple of curies."""
43+
44+ subject : str
45+ predicate : str
46+ object : str
47+
48+
49+ def triple_key (triple : Triple ) -> StrTriple :
3950 """Get a sortable key for a triple."""
40- return triple [0 ].curie , triple [2 ].curie , triple [1 ].curie
51+ return StrTriple ( triple [0 ].curie , triple [2 ].curie , triple [1 ].curie )
4152
4253
4354def _md5_hexdigest (picklable : object ) -> str :
@@ -46,7 +57,7 @@ def _md5_hexdigest(picklable: object) -> str:
4657 return hasher .hexdigest ()
4758
4859
49- class KeyedMixin (ABC ):
60+ class KeyedMixin (ABC , Generic [ P , X ] ):
5061 """A mixin for a class that can be hashed and CURIE-encoded."""
5162
5263 #: The prefix for CURIEs for instances of this class
@@ -56,23 +67,23 @@ def __init_subclass__(cls, *, prefix: str, **kwargs: Any) -> None:
5667 cls ._prefix = prefix
5768
5869 @abstractmethod
59- def key (self ) -> object :
70+ def key (self , * args : P . args , ** kwargs : P . kwargs ) -> X :
6071 """Return a picklable key."""
6172 raise NotImplementedError
6273
63- def hexdigest (self ) -> str :
74+ def hexdigest (self , * args : P . args , ** kwargs : P . kwargs ) -> str :
6475 """Get a hex string for the MD5 hash of the pickled key() for this class."""
65- key = self .key ()
76+ key = self .key (* args , ** kwargs )
6677 return _md5_hexdigest (key )
6778
68- def get_reference (self ) -> Reference :
79+ def get_reference (self , * args : P . args , ** kwargs : P . kwargs ) -> Reference :
6980 """Get a CURIE reference using this class's prefix and its hexadecimal representation."""
70- return Reference (prefix = self ._prefix , identifier = self .hexdigest ())
81+ return Reference (prefix = self ._prefix , identifier = self .hexdigest (* args , ** kwargs ))
7182
7283 @property
73- def curie (self ) -> str :
84+ def curie (self , * args : P . args , ** kwargs : P . kwargs ) -> str :
7485 """Get a string representing the CURIE."""
75- return self .get_reference ().curie
86+ return self .get_reference (* args , ** kwargs ).curie
7687
7788
7889class ConfidenceMixin :
@@ -104,7 +115,20 @@ def mapping_set_names(self) -> set[str]:
104115 raise NotImplementedError
105116
106117
107- class MappingSet (pydantic .BaseModel , ConfidenceMixin , KeyedMixin , prefix = SEMRA_MAPPING_SET_PREFIX ):
118+ class MappingSetKey (NamedTuple ):
119+ """The key used for a mapping set."""
120+
121+ name : str
122+ version : str
123+ license : str
124+
125+
126+ class MappingSet (
127+ pydantic .BaseModel ,
128+ ConfidenceMixin ,
129+ KeyedMixin [[], MappingSetKey ],
130+ prefix = SEMRA_MAPPING_SET_PREFIX ,
131+ ):
108132 """Represents a set of semantic mappings.
109133
110134 For example, this might correspond to:
@@ -121,17 +145,30 @@ class MappingSet(pydantic.BaseModel, ConfidenceMixin, KeyedMixin, prefix=SEMRA_M
121145 license : str | None = Field (default = None , description = "License name or URL for mapping set" )
122146 confidence : float = Field (..., description = "Mapping set level confidence" )
123147
124- def key (self ) -> object :
148+ def key (self ) -> MappingSetKey :
125149 """Get a picklable key representing the mapping set."""
126- return self .name , self .version or "" , self .license or "" , self . confidence
150+ return MappingSetKey ( self .name , self .version or "" , self .license or "" )
127151
128152 def get_confidence (self ) -> float :
129153 """Get the explicit confidence for the mapping set."""
130154 return self .confidence
131155
132156
157+ class SimpleEvidenceKey (NamedTuple ):
158+ """The key used for a simple evidence."""
159+
160+ evidence_type : str
161+ justification : str
162+ author : str
163+ mapping_set : MappingSetKey
164+
165+
133166class SimpleEvidence (
134- pydantic .BaseModel , KeyedMixin , EvidenceMixin , ConfidenceMixin , prefix = SEMRA_EVIDENCE_PREFIX
167+ pydantic .BaseModel ,
168+ KeyedMixin [[Union [Triple , "Mapping" ]], tuple [StrTriple , SimpleEvidenceKey ]],
169+ EvidenceMixin ,
170+ ConfidenceMixin ,
171+ prefix = SEMRA_EVIDENCE_PREFIX ,
135172):
136173 """Evidence for a mapping.
137174
@@ -158,19 +195,24 @@ class SimpleEvidence(
158195 uuid : UUID4 = Field (default_factory = uuid .uuid4 )
159196 confidence : float | None = Field (None , description = "The confidence" )
160197
161- def key (self ) -> object :
198+ def _simple_key (self ) -> SimpleEvidenceKey :
199+ return SimpleEvidenceKey (
200+ self .evidence_type ,
201+ self .justification .curie ,
202+ self .author .curie if self .author else "" ,
203+ self .mapping_set .key (),
204+ )
205+
206+ def key (self , triple : Triple | Mapping ) -> tuple [StrTriple , SimpleEvidenceKey ]:
162207 """Get a key suitable for hashing the evidence.
163208
164209 :returns: A key for deduplication based on the mapping set.
165210
166211 Note: this should be extended to include basically _all_ fields
167212 """
168213 return (
169- self .evidence_type ,
170- self .justification ,
171- self .author ,
172- self .mapping_set .key (),
173- self .uuid ,
214+ triple_key (triple .triple if isinstance (triple , Mapping ) else triple ),
215+ self ._simple_key (),
174216 )
175217
176218 @property
@@ -183,8 +225,28 @@ def get_confidence(self) -> float:
183225 return self .confidence if self .confidence is not None else self .mapping_set .confidence
184226
185227
228+ def _sort_evidence_key (ev : Evidence ) -> tuple [Any , ...]:
229+ # the first element of the simple key is the type of evidence,
230+ # so they can be compared
231+ return ev ._simple_key ()
232+
233+
234+ class ReasonedEvidenceKey (NamedTuple ):
235+ """The key used for a reasoned evidence."""
236+
237+ evidence_type : str
238+ justification : str
239+ rest : tuple [
240+ tuple [tuple [StrTriple , ReasonedEvidenceKey ] | tuple [StrTriple , SimpleEvidenceKey ], ...], ...
241+ ]
242+
243+
186244class ReasonedEvidence (
187- pydantic .BaseModel , KeyedMixin , EvidenceMixin , ConfidenceMixin , prefix = SEMRA_EVIDENCE_PREFIX
245+ pydantic .BaseModel ,
246+ KeyedMixin [[Union [Triple , "Mapping" ]], tuple [StrTriple , ReasonedEvidenceKey ]],
247+ EvidenceMixin ,
248+ ConfidenceMixin ,
249+ prefix = SEMRA_EVIDENCE_PREFIX ,
188250):
189251 """A complex evidence based on multiple mappings."""
190252
@@ -200,12 +262,29 @@ class ReasonedEvidence(
200262 1.0 , description = "The probability that the reasoning method is correct"
201263 )
202264
203- def key (self ) -> object :
204- """Get a key for reasoned evidence."""
205- return (
265+ def _simple_key (self ) -> ReasonedEvidenceKey :
266+ return ReasonedEvidenceKey (
206267 self .evidence_type ,
207- self .justification ,
208- * ((* m .triple , * (e .key () for e in m .evidence )) for m in self .mappings ),
268+ self .justification .curie ,
269+ tuple (
270+ tuple (
271+ evidence .key (mapping )
272+ for evidence in sorted (mapping .evidence , key = _sort_evidence_key )
273+ )
274+ for mapping in sorted (self .mappings , key = lambda m : triple_key (m .triple ))
275+ ),
276+ )
277+
278+ def key (self , triple : Triple | Mapping ) -> tuple [StrTriple , ReasonedEvidenceKey ]:
279+ """Get a key suitable for hashing the evidence.
280+
281+ :returns: A key for deduplication based on the mapping set.
282+
283+ Note: this should be extended to include basically _all_ fields
284+ """
285+ return (
286+ triple_key (triple .triple if isinstance (triple , Mapping ) else triple ),
287+ self ._simple_key (),
209288 )
210289
211290 def get_confidence (self ) -> float :
@@ -256,7 +335,12 @@ def explanation(self) -> str:
256335]
257336
258337
259- class Mapping (pydantic .BaseModel , ConfidenceMixin , KeyedMixin , prefix = SEMRA_MAPPING_PREFIX ):
338+ class Mapping (
339+ pydantic .BaseModel ,
340+ ConfidenceMixin ,
341+ KeyedMixin [[], StrTriple ],
342+ prefix = SEMRA_MAPPING_PREFIX ,
343+ ):
260344 """A semantic mapping."""
261345
262346 model_config = ConfigDict (frozen = True )
@@ -271,9 +355,9 @@ def triple(self) -> Triple:
271355 """Get the mapping's core triple as a tuple."""
272356 return self .s , self .p , self .o
273357
274- def key (self ) -> object :
358+ def key (self ) -> StrTriple :
275359 """Get a hashable key for the mapping, based on the subject, predicate, and object."""
276- return self .triple
360+ return triple_key ( self .triple )
277361
278362 @classmethod
279363 def from_triple (cls , triple : Triple , evidence : list [Evidence ] | None = None ) -> Mapping :
0 commit comments