@@ -274,8 +274,15 @@ def _build_person_lookup(
274274 persons : list [dict [str , Any ]],
275275 * ,
276276 person_derivations : Any = None ,
277- ) -> dict [str , str ]:
277+ ) -> tuple [dict [str , str ], dict [str , str ]]:
278+ """Build the alias→person_id lookup plus an infoscience-authority→person_id map.
279+
280+ The authority lookup is keyed on raw DSpace authority UUIDs (``pulse:infosciencePersonIdentifier``)
281+ so article authors can be mapped directly without name fuzzy-matching when DSpace returned
282+ an authority on the publication.
283+ """
278284 lookup : dict [str , str ] = {}
285+ authority_lookup : dict [str , str ] = {}
279286 derivation_by_id : dict [str , dict [str , Any ]] = {}
280287 if isinstance (person_derivations , list ):
281288 for derivation in person_derivations :
@@ -296,7 +303,17 @@ def _build_person_lookup(
296303 derivation_by_id = derivation_by_id ,
297304 ):
298305 _register_lookup_token (lookup , alias_token , person_id )
299- return lookup
306+
307+ infoscience_id = _as_string (person .get ("pulse:infosciencePersonIdentifier" ))
308+ if infoscience_id is None :
309+ identifiers = person .get ("identifiers" )
310+ if isinstance (identifiers , dict ):
311+ infoscience_id = _as_string (
312+ identifiers .get ("pulse:infosciencePersonIdentifier" ),
313+ )
314+ if infoscience_id :
315+ authority_lookup .setdefault (infoscience_id , person_id )
316+ return lookup , authority_lookup
300317
301318
302319def _build_organization_lookup (organizations : list [dict [str , Any ]]) -> dict [str , str ]: # noqa: C901
@@ -489,6 +506,7 @@ def _map_author_ids(
489506 publication : dict [str , Any ],
490507 * ,
491508 person_lookup : dict [str , str ],
509+ authority_lookup : dict [str , str ],
492510 publication_reference : str ,
493511) -> tuple [list [str ], list [str ], list [str ], int , int ]:
494512 warnings : list [str ] = []
@@ -497,8 +515,22 @@ def _map_author_ids(
497515 matched_authors = 0
498516 unresolved_count = 0
499517 author_names = _as_string_list (publication .get ("authors" ))
500- for author_name in author_names :
501- resolved_author = _resolve_lookup_token (person_lookup , author_name )
518+ raw_authorities = publication .get ("author_authorities" )
519+ authority_values : list [str | None ]
520+ if isinstance (raw_authorities , list ) and len (raw_authorities ) == len (author_names ):
521+ authority_values = [
522+ item if isinstance (item , str ) and item .strip () else None
523+ for item in raw_authorities
524+ ]
525+ else :
526+ authority_values = [None ] * len (author_names )
527+
528+ for author_name , authority in zip (author_names , authority_values , strict = False ):
529+ resolved_author : str | None = None
530+ if authority is not None :
531+ resolved_author = authority_lookup .get (authority )
532+ if resolved_author is None :
533+ resolved_author = _resolve_lookup_token (person_lookup , author_name )
502534 if isinstance (resolved_author , str ):
503535 mapped_authors .append (resolved_author )
504536 matched_authors += 1
@@ -749,7 +781,7 @@ async def run( # noqa: C901, PLR0912, PLR0915
749781 stats = {"queries" : queries , "articles" : []},
750782 )
751783
752- person_lookup = _build_person_lookup (
784+ person_lookup , authority_lookup = _build_person_lookup (
753785 _collect_known_persons (context ),
754786 person_derivations = context .get ("person_derivations" ),
755787 )
@@ -773,6 +805,7 @@ async def run( # noqa: C901, PLR0912, PLR0915
773805 ) = _map_author_ids (
774806 candidate .publication ,
775807 person_lookup = person_lookup ,
808+ authority_lookup = authority_lookup ,
776809 publication_reference = publication_reference ,
777810 )
778811 for unresolved_author in unresolved_authors :
0 commit comments