11import asyncio
22import os
33from typing import Any , List , Literal , Optional , Tuple
4- from time import sleep
54
65from semanticscholar import SemanticScholar
76
87from ..orcid import orcid_to_author_name
9- from ..self_citations import CitationResult
8+ from ..self_citations import CitationResult , self_citations_paper
109from ..self_references import ReferenceResult , self_references_paper
1110from ..utils import author_name_to_ssaid , get_papers_for_author
1211from .core import Entity , EntityResult
1312
1413
1514class ResearcherResult (EntityResult ):
1615 name : str
17- ssid : int
16+ ssaid : int
1817 orcid : Optional [str ] = None
1918
2019 def _ordered_items (self ) -> List [Tuple [str , Any ]]:
@@ -27,7 +26,7 @@ def _ordered_items(self) -> List[Tuple[str, Any]]:
2726 ("num_citations" , self .num_citations ),
2827 ("self_references" , self .self_references ),
2928 ("self_citations" , self .self_citations ),
30- ("ssid " , self .ssid ),
29+ ("ssaid " , self .ssaid ),
3130 ("orcid" , self .orcid ),
3231 ]
3332
@@ -46,8 +45,9 @@ def __str__(self) -> str:
4645
4746class Researcher (Entity ):
4847 name : str
49- ssid : int
48+ ssaid : int
5049 orcid : Optional [str ] = None
50+ ssids : List [int ] = []
5151
5252 def __init__ (self , input : str , mode : ModeType = "infer" ):
5353 """
@@ -78,22 +78,31 @@ def __init__(self, input: str, mode: ModeType = "infer"):
7878 mode = "name"
7979 if mode == "ssaid" :
8080 self .name = sch .get_author (input )._name
81- self .ssid = input
81+ self .ssaid = input
8282 elif mode == "orcid" :
8383 orcid_name = orcid_to_author_name (input )
8484 self .orcid = input
85- self .ssid , self .name = author_name_to_ssaid (orcid_name )
85+ self .ssaid , self .name = author_name_to_ssaid (orcid_name )
8686 elif mode == "name" :
87- name = input
88- self .ssid , self .name = author_name_to_ssaid (input )
87+ self .name = input
88+ self .ssaid , self .name = author_name_to_ssaid (input )
89+
90+ self .result = ResearcherResult (
91+ name = self .name ,
92+ ssaid = int (self .ssaid ),
93+ orcid = self .orcid ,
94+ num_citations = - 1 ,
95+ num_references = - 1 ,
96+ )
8997
9098 async def _self_references_async (
9199 self , verbose : bool = False
92100 ) -> List [ReferenceResult ]:
93101 """Async version of self_references."""
94- if self .ssid == '-1' :
102+ if self .ssaid == "-1" :
95103 return []
96- self .ssids = await get_papers_for_author (self .ssid )
104+ if self .ssids == []:
105+ self .ssids = await get_papers_for_author (self .ssaid )
97106
98107 results : List [ReferenceResult ] = await self_references_paper (
99108 self .ssids , verbose = verbose
@@ -122,36 +131,91 @@ def self_references(self, verbose: bool = False) -> ResearcherResult:
122131 reference_results = asyncio .run (self ._self_references_async (verbose = verbose ))
123132
124133 individual_self_references = {
125- getattr (result , "title" ): getattr (result , "self_references" ).get (self .name , 0.0 )
134+ getattr (result , "title" ): getattr (result , "self_references" ).get (
135+ self .name , 0.0
136+ )
126137 for result in reference_results
127138 }
128- reference_ratio = sum (individual_self_references .values ()) / max (1 , len (
129- individual_self_references
130- ))
131- return ResearcherResult (
132- name = self .name ,
133- ssid = int (self .ssid ),
134- orcid = self .orcid ,
135- num_references = sum (r .num_references for r in reference_results ),
136- num_citations = - 1 ,
137- self_references = dict (
138- sorted (
139- individual_self_references .items (), key = lambda x : x [1 ], reverse = True
140- )
141- ),
142- self_citations = {},
143- self_reference_ratio = round (reference_ratio , 3 ),
144- self_citation_ratio = - 1.0 ,
139+ reference_ratio = sum (individual_self_references .values ()) / max (
140+ 1 , len (individual_self_references )
145141 )
146142
147- def self_citations (self ) -> ResearcherResult :
143+ self .result = self .result .model_copy (
144+ update = {
145+ "num_references" : sum (r .num_references for r in reference_results ),
146+ "self_references" : dict (
147+ sorted (
148+ individual_self_references .items (),
149+ key = lambda x : x [1 ],
150+ reverse = True ,
151+ )
152+ ),
153+ "self_reference_ratio" : round (reference_ratio , 3 ),
154+ }
155+ )
156+
157+ return self .result
158+
159+ async def _self_citations_async (
160+ self , verbose : bool = False
161+ ) -> List [CitationResult ]:
162+ """Async version of self_citations."""
163+ if self .ssaid == "-1" :
164+ return []
165+ if self .ssids == []:
166+ self .ssids = await get_papers_for_author (self .ssaid )
167+
168+ results : List [CitationResult ] = await self_citations_paper (
169+ self .ssids , verbose = verbose
170+ )
171+ # Remove papers with zero references or that are erratum/corrigendum
172+ results = [
173+ r
174+ for r in results
175+ if r .num_citations > 0
176+ and "erratum" not in r .title .lower ()
177+ and "corrigendum" not in r .title .lower ()
178+ ]
179+
180+ return results
181+
182+ def self_citations (self , verbose : bool = False ) -> ResearcherResult :
148183 """
149184 Sifts through all papers of a researcher and finds how often they are self-cited.
150185 """
151- ...
186+ citation_results = asyncio .run (self ._self_citations_async (verbose = verbose ))
187+ individual_self_citations = {
188+ getattr (result , "title" ): getattr (result , "self_citations" ).get (
189+ self .name , 0.0
190+ )
191+ for result in citation_results
192+ }
193+ citation_ratio = sum (individual_self_citations .values ()) / max (
194+ 1 , len (individual_self_citations )
195+ )
196+
197+ self .result = self .result .model_copy (
198+ update = {
199+ "num_citations" : sum (r .num_citations for r in citation_results ),
200+ "self_citations" : dict (
201+ sorted (
202+ individual_self_citations .items (),
203+ key = lambda x : x [1 ],
204+ reverse = True ,
205+ )
206+ ),
207+ "self_citation_ratio" : round (citation_ratio , 3 ),
208+ }
209+ )
210+
211+ return self .result
152212
153213 def get_result (self ) -> ResearcherResult :
154214 """
155215 Provides the result of the analysis.
156216 """
157- ...
217+ if not hasattr (self , "self_ref" ):
218+ self .self_references ()
219+ if not hasattr (self , "self_cite" ):
220+ self .self_citations ()
221+ return self .result
0 commit comments