Skip to content

Commit 62629a8

Browse files
Add CROSSREF_MODE=local support via LocalCrossrefAdapter (#1095)
* Add CROSSREF_MODE=local support via LocalCrossrefAdapter - Extract _CrossrefHttpClient wrapping the existing aiohttp logic - Add module-level create_crossref_client() factory: returns LocalCrossrefAdapter when CROSSREF_MODE=local, otherwise the HTTP client - _search_by_issn() now delegates to the factory instead of calling _get_journal_by_issn() directly - aletheia-crossref-adapter is an optional dependency; ImportError is raised with a clear message if CROSSREF_MODE=local but the package is not installed * Fix mypy type errors in crossref_analyzer local-mode integration - Add dict[str, str] type params to _CrossrefHttpClient.__init__ headers - Add return type annotation to create_crossref_client factory function - Add explicit result type annotation to suppress no-any-return error - Add mypy ignore_missing_imports override for aletheia_crossref_adapter --------- Co-authored-by: Andreas Florath <Andreas.Florath@telekom.de>
1 parent 675eb37 commit 62629a8

File tree

2 files changed

+79
-32
lines changed

2 files changed

+79
-32
lines changed

pyproject.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,10 @@ ignore_missing_imports = true
152152
module = "aletheia_opencitations_adapter"
153153
ignore_missing_imports = true
154154

155+
[[tool.mypy.overrides]]
156+
module = "aletheia_crossref_adapter"
157+
ignore_missing_imports = true
158+
155159
[tool.ruff]
156160
target-version = "py310"
157161
line-length = 88

src/aletheia_probe/backends/crossref_analyzer.py

Lines changed: 75 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
"""Crossref backend with metadata quality analysis for predatory journal detection."""
33

44
import asyncio
5+
import os
56
from typing import Any
67

78
import aiohttp
@@ -81,6 +82,75 @@
8182
_YEARS_THRESHOLD_EXPLOSION = 2
8283

8384

85+
class _CrossrefHttpClient:
86+
"""Thin async wrapper around the live Crossref REST API."""
87+
88+
def __init__(self, email: str, base_url: str, headers: dict[str, str]) -> None:
89+
self._email = email
90+
self._base_url = base_url
91+
self._headers = headers
92+
93+
async def __aenter__(self) -> "_CrossrefHttpClient":
94+
return self
95+
96+
async def __aexit__(self, *_: Any) -> None:
97+
pass
98+
99+
@async_retry_with_backoff(
100+
max_retries=3,
101+
exceptions=(RateLimitError, aiohttp.ClientError, asyncio.TimeoutError),
102+
)
103+
async def get_journal_by_issn(self, issn: str) -> dict[str, Any] | None:
104+
"""Get journal data by ISSN from the live Crossref API."""
105+
url = f"{self._base_url}/journals/{issn}"
106+
async with aiohttp.ClientSession(
107+
headers=self._headers,
108+
timeout=aiohttp.ClientTimeout(total=_API_TIMEOUT),
109+
trust_env=True,
110+
) as session:
111+
async with session.get(url) as response:
112+
_check_rate_limit_response(response)
113+
if response.status == 200:
114+
data = await response.json()
115+
message = data.get("message", {})
116+
return message if isinstance(message, dict) else {}
117+
elif response.status == 404:
118+
return None
119+
else:
120+
error_text = await response.text()
121+
raise BackendError(
122+
f"Crossref API returned status {response.status}. Response: {error_text[:200]}",
123+
backend_name="crossref_analyzer",
124+
)
125+
126+
127+
def _check_rate_limit_response(response: aiohttp.ClientResponse) -> None:
128+
"""Raise RateLimitError if the response indicates rate limiting."""
129+
if response.status == 429:
130+
raise RateLimitError("Crossref API rate limit exceeded")
131+
132+
133+
def create_crossref_client(
134+
email: str, base_url: str, headers: dict[str, str]
135+
) -> "_CrossrefHttpClient | Any":
136+
"""Return the appropriate Crossref client based on CROSSREF_MODE env var.
137+
138+
When ``CROSSREF_MODE=local`` the adapter backed by the local PostgreSQL DB
139+
is returned; otherwise the live HTTP client is used.
140+
"""
141+
mode = os.environ.get("CROSSREF_MODE", "remote")
142+
if mode == "local":
143+
try:
144+
from aletheia_crossref_adapter import LocalCrossrefAdapter # noqa: PLC0415
145+
146+
return LocalCrossrefAdapter()
147+
except ImportError as exc:
148+
raise ImportError(
149+
"CROSSREF_MODE=local requires the aletheia-crossref-adapter package to be installed"
150+
) from exc
151+
return _CrossrefHttpClient(email=email, base_url=base_url, headers=headers)
152+
153+
84154
class CrossrefAnalyzerBackend(ApiBackendWithCache, FallbackStrategyMixin):
85155
"""Backend that analyzes Crossref metadata quality to assess journal legitimacy."""
86156

@@ -197,7 +267,11 @@ async def _search_by_issn(self, issn: str) -> dict[str, Any] | None:
197267
Journal data if found, None if no match
198268
"""
199269
self.detail_logger.debug(f"Crossref: Searching by ISSN {issn}")
200-
return await self._get_journal_by_issn(issn)
270+
async with create_crossref_client(
271+
self.email, self.base_url, self.headers
272+
) as client:
273+
result: dict[str, Any] | None = await client.get_journal_by_issn(issn)
274+
return result
201275

202276
@code_is_used # Overrides FallbackStrategyMixin method
203277
async def _search_by_name(self, name: str, exact: bool = True) -> Any | None:
@@ -213,37 +287,6 @@ async def _search_by_name(self, name: str, exact: bool = True) -> Any | None:
213287
# Crossref API only supports ISSN-based journal lookup, not name search
214288
return None
215289

216-
@async_retry_with_backoff(
217-
max_retries=3,
218-
exceptions=(RateLimitError, aiohttp.ClientError, asyncio.TimeoutError),
219-
)
220-
async def _get_journal_by_issn(self, issn: str) -> dict[str, Any] | None:
221-
"""Get journal data by ISSN from Crossref API."""
222-
url = f"{self.base_url}/journals/{issn}"
223-
self.detail_logger.debug(f"Crossref API request: GET {url}")
224-
225-
async with aiohttp.ClientSession(
226-
headers=self.headers,
227-
timeout=aiohttp.ClientTimeout(total=_API_TIMEOUT),
228-
trust_env=True,
229-
) as session:
230-
async with session.get(url) as response:
231-
self.detail_logger.debug(f"Crossref API response: {response.status}")
232-
self._check_rate_limit_response(response)
233-
234-
if response.status == 200:
235-
data = await response.json()
236-
message = data.get("message", {})
237-
return message if isinstance(message, dict) else {}
238-
elif response.status == 404:
239-
return None
240-
else:
241-
error_text = await response.text()
242-
raise BackendError(
243-
f"Crossref API returned status {response.status}. Response: {error_text[:200]}",
244-
backend_name=self.get_name(),
245-
)
246-
247290
# Result building methods for automatic fallback framework
248291
def _build_success_result_with_chain(
249292
self,

0 commit comments

Comments
 (0)