8585from __future__ import annotations
8686
8787import math
88- import pickle
8988from abc import ABC , abstractmethod
9089from collections .abc import Iterable
91- from hashlib import md5
90+ from hashlib import sha256
9291from typing import Annotated , Any , ClassVar , Generic , Literal , NamedTuple , ParamSpec , TypeVar , Union
9392
9493import pydantic
117116X = TypeVar ("X" )
118117
119118
120- def _md5_hexdigest (picklable : object ) -> str :
121- hasher = md5 () # noqa: S324
122- hasher .update (pickle .dumps (picklable ))
123- return hasher .hexdigest ()
124-
125-
126- class KeyedMixin (ABC , Generic [P , X ]):
119+ class KeyedMixin (ABC , Generic [P ]):
127120 """A mixin for a class that can be hashed and CURIE-encoded."""
128121
129122 #: The prefix for CURIEs for instances of this class
@@ -133,14 +126,16 @@ def __init_subclass__(cls, *, prefix: str, **kwargs: Any) -> None:
133126 cls ._prefix = prefix
134127
135128 @abstractmethod
136- def key (self , * args : P .args , ** kwargs : P .kwargs ) -> X :
137- """Return a picklable key."""
129+ def key (self , * args : P .args , ** kwargs : P .kwargs ) -> str :
130+ """Return a string key."""
138131 raise NotImplementedError
139132
140133 def hexdigest (self , * args : P .args , ** kwargs : P .kwargs ) -> str :
141134 """Get a hex string for the MD5 hash of the pickled key() for this class."""
142- key = self .key (* args , ** kwargs )
143- return _md5_hexdigest (key )
135+ s = self .key (* args , ** kwargs )
136+ hasher = sha256 ()
137+ hasher .update (s .encode ("utf-8" ))
138+ return hasher .hexdigest ()
144139
145140 def get_reference (self , * args : P .args , ** kwargs : P .kwargs ) -> Reference :
146141 """Get a CURIE reference using this class's prefix and its hexadecimal representation."""
@@ -193,7 +188,7 @@ class MappingSetKey(NamedTuple):
193188class MappingSet (
194189 pydantic .BaseModel ,
195190 ConfidenceMixin ,
196- KeyedMixin [[], MappingSetKey ],
191+ KeyedMixin [[]],
197192 prefix = SEMRA_MAPPING_SET_PREFIX ,
198193):
199194 """Represents a set of semantic mappings.
@@ -239,9 +234,9 @@ class MappingSet(
239234 description = "Mapping set level confidence. Corresponds to optional SSSOM field https://mapping-commons.github.io/sssom/mapping_set_confidence/" ,
240235 )
241236
242- def key (self ) -> MappingSetKey :
243- """Get a picklable key representing the mapping set."""
244- return MappingSetKey ( self .purl or "" , self .name , self .version or "" , self .license or "" )
237+ def key (self ) -> str :
238+ """Get a string key representing the mapping set."""
239+ return " \n " . join (( self .purl or "" , self .name , self .version or "" , self .license or "" ) )
245240
246241 def get_confidence (self ) -> float :
247242 """Get the explicit confidence for the mapping set."""
@@ -259,7 +254,7 @@ class SimpleEvidenceKey(NamedTuple):
259254
260255class SimpleEvidence (
261256 pydantic .BaseModel ,
262- KeyedMixin [[Union [Triple , "Mapping" ]], tuple [ StrTriple , SimpleEvidenceKey ] ],
257+ KeyedMixin [[Union [Triple , "Mapping" ]]],
263258 EvidenceMixin ,
264259 ConfidenceMixin ,
265260 prefix = SEMRA_EVIDENCE_PREFIX ,
@@ -288,25 +283,24 @@ class SimpleEvidence(
288283 )
289284 confidence : float | None = Field (None , description = "The confidence" )
290285
291- def _simple_key (self ) -> SimpleEvidenceKey :
292- return SimpleEvidenceKey (
293- self .evidence_type ,
294- self .justification .curie ,
295- self .author .curie if self .author else "" ,
296- self .mapping_set .key (),
286+ def _simple_key (self ) -> str :
287+ return "\t " .join (
288+ (
289+ self .evidence_type ,
290+ self .justification .curie ,
291+ self .author .curie if self .author else "" ,
292+ self .mapping_set .key (),
293+ )
297294 )
298295
299- def key (self , triple : Triple | Mapping ) -> tuple [ StrTriple , SimpleEvidenceKey ] :
296+ def key (self , triple : Triple | Mapping ) -> str :
300297 """Get a key suitable for hashing the evidence.
301298
302299 :returns: A key for deduplication based on the mapping set.
303300
304301 Note: this should be extended to include basically _all_ fields
305302 """
306- return (
307- triple .as_str_triple (),
308- self ._simple_key (),
309- )
303+ return "\t " .join ((* triple .as_str_triple (), self ._simple_key ()))
310304
311305 @property
312306 def mapping_set_names (self ) -> set [str ]:
@@ -318,12 +312,6 @@ def get_confidence(self) -> float:
318312 return self .confidence if self .confidence is not None else self .mapping_set .confidence
319313
320314
321- def _sort_evidence_key (ev : Evidence ) -> tuple [Any , ...]:
322- # the first element of the simple key is the type of evidence,
323- # so they can be compared
324- return ev ._simple_key ()
325-
326-
327315class ReasonedEvidenceKey (NamedTuple ):
328316 """The key used for a reasoned evidence."""
329317
@@ -336,7 +324,7 @@ class ReasonedEvidenceKey(NamedTuple):
336324
337325class ReasonedEvidence (
338326 pydantic .BaseModel ,
339- KeyedMixin [[Union [Triple , "Mapping" ]], tuple [ StrTriple , ReasonedEvidenceKey ] ],
327+ KeyedMixin [[Union [Triple , "Mapping" ]]],
340328 EvidenceMixin ,
341329 ConfidenceMixin ,
342330 prefix = SEMRA_EVIDENCE_PREFIX ,
@@ -355,30 +343,29 @@ class ReasonedEvidence(
355343 1.0 , description = "The probability that the reasoning method is correct"
356344 )
357345
358- def _simple_key (self ) -> ReasonedEvidenceKey :
359- return ReasonedEvidenceKey (
360- self .evidence_type ,
361- self .justification .curie ,
362- tuple (
363- tuple (
364- evidence .key (mapping )
365- for evidence in sorted (mapping .evidence , key = _sort_evidence_key )
366- )
367- for mapping in sorted (self .mappings )
368- ),
346+ def _simple_key (self ) -> str :
347+ return "\t " .join (
348+ (
349+ self .evidence_type ,
350+ self .justification .curie ,
351+ * (
352+ "|" .join (
353+ evidence .key (mapping )
354+ for evidence in sorted (mapping .evidence , key = lambda e : e ._simple_key ())
355+ )
356+ for mapping in sorted (self .mappings )
357+ ),
358+ )
369359 )
370360
371- def key (self , triple : Triple | Mapping ) -> tuple [ StrTriple , ReasonedEvidenceKey ] :
361+ def key (self , triple : Triple | Mapping ) -> str :
372362 """Get a key suitable for hashing the evidence.
373363
374364 :returns: A key for deduplication based on the mapping set.
375365
376366 Note: this should be extended to include basically _all_ fields
377367 """
378- return (
379- triple .as_str_triple (),
380- self ._simple_key (),
381- )
368+ return "\t " .join ((* triple .as_str_triple (), self ._simple_key ()))
382369
383370 def get_confidence (self ) -> float :
384371 r"""Calculate confidence for the reasoned evidence.
@@ -433,7 +420,7 @@ def explanation(self) -> str:
433420class Mapping (
434421 Triple ,
435422 ConfidenceMixin ,
436- KeyedMixin [[], StrTriple ],
423+ KeyedMixin [[]],
437424 prefix = SEMRA_MAPPING_PREFIX ,
438425):
439426 """A semantic mapping.
@@ -455,9 +442,11 @@ def triple(self) -> Triple:
455442 """Get the mapping's core triple as a tuple."""
456443 return Triple (subject = self .subject , predicate = self .predicate , object = self .object )
457444
458- def key (self ) -> StrTriple :
445+ # TODO converge on reusing the curies definition of a triple/mapping identifier
446+
447+ def key (self ) -> str :
459448 """Get a hashable key for the mapping, based on the subject, predicate, and object."""
460- return self .as_str_triple ()
449+ return " \t " . join ( self .as_str_triple () )
461450
462451 @classmethod
463452 def from_triple (
0 commit comments