Skip to content

Commit d05af6e

Browse files
authored
Bump impactor dependency (#93)
* chore: bump impact factor * robustify * wip * increase max retry
1 parent 3443579 commit d05af6e

File tree

6 files changed

+38
-14
lines changed

6 files changed

+38
-14
lines changed

paperscraper/citations/entity/researcher.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import asyncio
22
import os
33
from typing import Any, List, Literal, Optional, Tuple
4+
from time import sleep
45

56
from semanticscholar import SemanticScholar
67

@@ -90,6 +91,8 @@ async def _self_references_async(
9091
self, verbose: bool = False
9192
) -> List[ReferenceResult]:
9293
"""Async version of self_references."""
94+
if self.ssid == '-1':
95+
return []
9396
self.ssids = await get_papers_for_author(self.ssid)
9497

9598
results: List[ReferenceResult] = await self_references_paper(

paperscraper/citations/self_citations.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ async def _process_single(client: httpx.AsyncClient, identifier: str) -> Citatio
9696

9797

9898
@optional_async
99-
@retry_with_exponential_backoff(max_retries=4, base_delay=1.0)
99+
@retry_with_exponential_backoff(max_retries=10, base_delay=1.0)
100100
async def self_citations_paper(
101101
inputs: Union[str, List[str]], verbose: bool = False
102102
) -> Union[CitationResult, List[CitationResult]]:

paperscraper/citations/utils.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,8 @@ async def get_title_and_id_from_doi(doi: str) -> Dict[str, str] | None:
111111

112112

113113
@optional_async
114-
async def author_name_to_ssaid(author_name: str) -> Optional[Tuple[str, str]]:
114+
@retry_with_exponential_backoff(max_retries=10, base_delay=1.0)
115+
async def author_name_to_ssaid(author_name: str) -> Tuple[str, str]:
115116
"""
116117
Given an author name, returns the Semantic Scholar author ID.
117118
@@ -128,16 +129,17 @@ async def author_name_to_ssaid(author_name: str) -> Optional[Tuple[str, str]]:
128129
params={"query": author_name, "fields": "name", "limit": 1},
129130
headers=HEADERS,
130131
)
131-
if response.status_code == 200:
132-
data = response.json()
133-
authors = data.get("data", [])
134-
if authors:
135-
# Return the Semantic Scholar author ID from the first result.
136-
return authors[0].get("authorId"), authors[0].get("name")
132+
response.raise_for_status()
133+
data = response.json()
134+
authors = data.get("data", [])
135+
if authors:
136+
# Return the Semantic Scholar author ID from the first result.
137+
return authors[0]["authorId"], authors[0]["name"]
137138

138139
logger.error(
139140
f"Error in retrieving name from SS Author ID: {response.status_code} - {response.text}"
140141
)
142+
return ('-1', 'N.A.')
141143

142144

143145
def determine_paper_input_type(input: str) -> Literal["ssid", "doi", "title"]:

paperscraper/tests/test_impactor.py

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -47,10 +47,17 @@ def test_return_all_fields(self, impactor: Impactor):
4747

4848
def test_quantum_information_search(self, impactor):
4949
expected_results = [
50-
{"journal": "Innovation", "factor": 33.2, "score": 70},
51-
{"journal": "InfoMat", "factor": 22.7, "score": 71},
52-
{"journal": "Information Fusion", "factor": 14.7, "score": 71},
53-
{"journal": "PRX Quantum", "factor": 9.3, "score": 78},
50+
{"journal": "Innovation", "factor": 25.7, "score": 70},
51+
{"journal": "Exploration", "factor": 22.5, "score": 74},
52+
{"journal": "InfoMat", "factor": 22.3, "score": 71},
53+
{"journal": "Information Fusion", "factor": 15.5, "score": 71},
54+
{"journal": "PRX Quantum", "factor": 11.0, "score": 78},
55+
{
56+
"journal": "International Journal of Applied Earth Observation and Geoinformation",
57+
"factor": 8.6,
58+
"score": 76,
59+
},
60+
{"journal": "npj Quantum Information", "factor": 8.3, "score": 95},
5461
]
5562

5663
results = impactor.search(
@@ -70,6 +77,18 @@ def test_quantum_information_search(self, impactor):
7077
expected["score"] == actual["score"]
7178
), f"Score does not match for {expected['journal']}"
7279

80+
results = impactor.search(
81+
"Quantum information", threshold=90, sort_by="score", min_impact=2
82+
)
83+
expected_results = [
84+
{"journal": "Quantum", "factor": 5.4, "score": 100},
85+
{"journal": "npj Quantum Information", "factor": 8.3, "score": 95},
86+
{"journal": "Information", "factor": 2.9, "score": 95},
87+
{"journal": "Quantum Information Processing", "factor": 2.2, "score": 95},
88+
]
89+
assert len(results) == len(expected_results), "Number of results does not match"
90+
assert results == expected_results
91+
7392
def test_type_error(self, impactor: Impactor):
7493
with pytest.raises(TypeError):
7594
impactor.search(123, threshold=99) # query is not a str

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ seaborn>=0.11.0
88
matplotlib>=3.3.2
99
matplotlib-venn>=0.11.5
1010
bs4>=0.0.1
11-
impact-factor>=1.1.1,<1.1.3
11+
impact-factor>=1.1.3
1212
thefuzz>=0.20.0
1313
pytest
1414
tldextract

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
"matplotlib",
3838
"matplotlib_venn",
3939
"bs4",
40-
"impact-factor>=1.1.1,<1.1.3",
40+
"impact-factor>=1.1.3",
4141
"thefuzz",
4242
"pytest",
4343
"tldextract",

0 commit comments

Comments
 (0)