Bump impactor dependency (#93)

jannisborn · web-flow · commit d05af6e91c08 · 2025-11-29T00:16:46.000+01:00
* chore: bump impact factor

* robustify

* wip

* increase max retry
diff --git a/paperscraper/citations/entity/researcher.py b/paperscraper/citations/entity/researcher.py
@@ -1,6 +1,7 @@
 import asyncio
 import os
 from typing import Any, List, Literal, Optional, Tuple
+from time import sleep
 
 from semanticscholar import SemanticScholar
 
@@ -90,6 +91,8 @@ async def _self_references_async(
         self, verbose: bool = False
     ) -> List[ReferenceResult]:
         """Async version of self_references."""
+        if self.ssid == '-1':
+            return []
         self.ssids = await get_papers_for_author(self.ssid)
 
         results: List[ReferenceResult] = await self_references_paper(
diff --git a/paperscraper/citations/self_citations.py b/paperscraper/citations/self_citations.py
@@ -96,7 +96,7 @@ async def _process_single(client: httpx.AsyncClient, identifier: str) -> Citatio
 
 
 @optional_async
-@retry_with_exponential_backoff(max_retries=4, base_delay=1.0)
+@retry_with_exponential_backoff(max_retries=10, base_delay=1.0)
 async def self_citations_paper(
     inputs: Union[str, List[str]], verbose: bool = False
 ) -> Union[CitationResult, List[CitationResult]]:
diff --git a/paperscraper/citations/utils.py b/paperscraper/citations/utils.py
@@ -111,7 +111,8 @@ async def get_title_and_id_from_doi(doi: str) -> Dict[str, str] | None:
 
 
 @optional_async
-async def author_name_to_ssaid(author_name: str) -> Optional[Tuple[str, str]]:
+@retry_with_exponential_backoff(max_retries=10, base_delay=1.0)
+async def author_name_to_ssaid(author_name: str) -> Tuple[str, str]:
     """
     Given an author name, returns the Semantic Scholar author ID.
 
@@ -128,16 +129,17 @@ async def author_name_to_ssaid(author_name: str) -> Optional[Tuple[str, str]]:
             params={"query": author_name, "fields": "name", "limit": 1},
             headers=HEADERS,
         )
-        if response.status_code == 200:
-            data = response.json()
-            authors = data.get("data", [])
-            if authors:
-                # Return the Semantic Scholar author ID from the first result.
-                return authors[0].get("authorId"), authors[0].get("name")
+        response.raise_for_status()
+        data = response.json()
+        authors = data.get("data", [])
+        if authors:
+            # Return the Semantic Scholar author ID from the first result.
+            return authors[0]["authorId"], authors[0]["name"]
 
         logger.error(
             f"Error in retrieving name from SS Author ID: {response.status_code} - {response.text}"
         )
+        return ('-1', 'N.A.')
 
 
 def determine_paper_input_type(input: str) -> Literal["ssid", "doi", "title"]:
diff --git a/paperscraper/tests/test_impactor.py b/paperscraper/tests/test_impactor.py
@@ -47,10 +47,17 @@ def test_return_all_fields(self, impactor: Impactor):
 
     def test_quantum_information_search(self, impactor):
         expected_results = [
-            {"journal": "Innovation", "factor": 33.2, "score": 70},
-            {"journal": "InfoMat", "factor": 22.7, "score": 71},
-            {"journal": "Information Fusion", "factor": 14.7, "score": 71},
-            {"journal": "PRX Quantum", "factor": 9.3, "score": 78},
+            {"journal": "Innovation", "factor": 25.7, "score": 70},
+            {"journal": "Exploration", "factor": 22.5, "score": 74},
+            {"journal": "InfoMat", "factor": 22.3, "score": 71},
+            {"journal": "Information Fusion", "factor": 15.5, "score": 71},
+            {"journal": "PRX Quantum", "factor": 11.0, "score": 78},
+            {
+                "journal": "International Journal of Applied Earth Observation and Geoinformation",
+                "factor": 8.6,
+                "score": 76,
+            },
+            {"journal": "npj Quantum Information", "factor": 8.3, "score": 95},
         ]
 
         results = impactor.search(
@@ -70,6 +77,18 @@ def test_quantum_information_search(self, impactor):
                 expected["score"] == actual["score"]
             ), f"Score does not match for {expected['journal']}"
 
+        results = impactor.search(
+            "Quantum information", threshold=90, sort_by="score", min_impact=2
+        )
+        expected_results = [
+            {"journal": "Quantum", "factor": 5.4, "score": 100},
+            {"journal": "npj Quantum Information", "factor": 8.3, "score": 95},
+            {"journal": "Information", "factor": 2.9, "score": 95},
+            {"journal": "Quantum Information Processing", "factor": 2.2, "score": 95},
+        ]
+        assert len(results) == len(expected_results), "Number of results does not match"
+        assert results == expected_results
+
     def test_type_error(self, impactor: Impactor):
         with pytest.raises(TypeError):
             impactor.search(123, threshold=99)  # query is not a str
diff --git a/requirements.txt b/requirements.txt
@@ -8,7 +8,7 @@ seaborn>=0.11.0
 matplotlib>=3.3.2
 matplotlib-venn>=0.11.5
 bs4>=0.0.1
-impact-factor>=1.1.1,<1.1.3
+impact-factor>=1.1.3
 thefuzz>=0.20.0
 pytest
 tldextract
diff --git a/setup.py b/setup.py
@@ -37,7 +37,7 @@
         "matplotlib",
         "matplotlib_venn",
         "bs4",
-        "impact-factor>=1.1.1,<1.1.3",
+        "impact-factor>=1.1.3",
         "thefuzz",
         "pytest",
         "tldextract",