|
9 | 9 | from ..logging_config import get_detail_logger, get_status_logger |
10 | 10 | from ..models import BackendResult, BackendStatus, QueryInput |
11 | 11 | from ..openalex import get_publication_stats |
12 | | -from .base import CachedBackend, get_backend_registry |
| 12 | +from .base import HybridBackend, get_backend_registry |
13 | 13 |
|
14 | 14 |
|
15 | 15 | detail_logger = get_detail_logger() |
16 | 16 | status_logger = get_status_logger() |
17 | 17 |
|
18 | 18 |
|
19 | | -class RetractionWatchBackend(CachedBackend): |
| 19 | +class RetractionWatchBackend(HybridBackend): |
20 | 20 | """Backend that checks retraction history from Retraction Watch database.""" |
21 | 21 |
|
22 | | - def __init__(self) -> None: |
23 | | - super().__init__( |
24 | | - source_name="retraction_watch", |
25 | | - list_type="quality_indicator", |
26 | | - cache_ttl_hours=24 * 7, # Weekly cache |
27 | | - ) |
| 22 | + def __init__(self, cache_ttl_hours: int = 24) -> None: |
| 23 | + """Initialize backend with configurable cache TTL. |
| 24 | +
|
| 25 | + Args: |
| 26 | + cache_ttl_hours: Cache time-to-live in hours (default: 24) |
| 27 | + """ |
| 28 | + super().__init__(cache_ttl_hours=cache_ttl_hours) |
| 29 | + self.source_name = "retraction_watch" |
| 30 | + self.list_type = "quality_indicator" |
28 | 31 |
|
29 | 32 | def get_name(self) -> str: |
30 | 33 | return "retraction_watch" |
31 | 34 |
|
32 | 35 | def get_description(self) -> str: |
33 | 36 | return "Checks journal retraction history from Retraction Watch database" |
34 | 37 |
|
35 | | - async def query(self, query_input: QueryInput) -> BackendResult: |
| 38 | + async def _query_api(self, query_input: QueryInput) -> BackendResult: |
36 | 39 | """Query retraction data for journal information. |
37 | 40 |
|
38 | | - Overrides CachedBackend.query to provide custom result formatting |
39 | | - with retraction-specific metadata. Fetches OpenAlex publication data |
40 | | - on-demand for rate calculation. |
| 41 | + This method performs the actual query against the Retraction Watch database |
| 42 | + and OpenAlex API. Results are automatically cached by the HybridBackend parent. |
| 43 | +
|
| 44 | + Args: |
| 45 | + query_input: Normalized query input with journal information |
| 46 | +
|
| 47 | + Returns: |
| 48 | + BackendResult with retraction assessment and metadata |
41 | 49 | """ |
42 | 50 | start_time = time.time() |
43 | 51 |
|
@@ -228,6 +236,52 @@ async def _get_openalex_data_cached( |
228 | 236 | get_cache_manager().set_cached_value(cache_key, "null", ttl_hours=24) |
229 | 237 | return None |
230 | 238 |
|
| 239 | + def _search_exact_match(self, name: str) -> list[dict[str, Any]]: |
| 240 | + """Search for exact journal name matches only.""" |
| 241 | + # Get all journals from this source and filter for exact matches |
| 242 | + all_results = get_cache_manager().search_journals( |
| 243 | + source_name=self.source_name, assessment=self.list_type |
| 244 | + ) |
| 245 | + |
| 246 | + # Filter for exact matches (case insensitive) |
| 247 | + exact_matches = [] |
| 248 | + name_lower = name.lower().strip() |
| 249 | + |
| 250 | + for result in all_results: |
| 251 | + journal_name = result.get("journal_name", "").lower().strip() |
| 252 | + normalized_name = result.get("normalized_name", "").lower().strip() |
| 253 | + |
| 254 | + # Exact match on either original or normalized name |
| 255 | + if journal_name == name_lower or normalized_name == name_lower: |
| 256 | + exact_matches.append(result) |
| 257 | + |
| 258 | + return exact_matches |
| 259 | + |
| 260 | + def _calculate_confidence( |
| 261 | + self, query_input: QueryInput, match: dict[str, Any] |
| 262 | + ) -> float: |
| 263 | + """Calculate confidence based on match quality - exact matches only.""" |
| 264 | + |
| 265 | + # High confidence for exact ISSN match |
| 266 | + if ( |
| 267 | + query_input.identifiers.get("issn") |
| 268 | + and match.get("issn") == query_input.identifiers["issn"] |
| 269 | + ): |
| 270 | + return 0.95 |
| 271 | + |
| 272 | + # High confidence for exact name match (case insensitive) |
| 273 | + if query_input.normalized_name: |
| 274 | + query_name = query_input.normalized_name.lower().strip() |
| 275 | + match_name = match.get("normalized_name", "").lower().strip() |
| 276 | + original_name = match.get("journal_name", "").lower().strip() |
| 277 | + |
| 278 | + if query_name == match_name or query_name == original_name: |
| 279 | + return 0.90 |
| 280 | + |
| 281 | + # If we get here, it means we have a match but it's not exact |
| 282 | + # This shouldn't happen with our new exact matching, so low confidence |
| 283 | + return 0.3 |
| 284 | + |
231 | 285 | def _calculate_risk_level( |
232 | 286 | self, |
233 | 287 | total: int, |
@@ -258,5 +312,7 @@ def _calculate_risk_level( |
258 | 312 |
|
259 | 313 | # Register the backend factory |
260 | 314 | get_backend_registry().register_factory( |
261 | | - "retraction_watch", lambda: RetractionWatchBackend(), default_config={} |
| 315 | + "retraction_watch", |
| 316 | + lambda cache_ttl_hours=24: RetractionWatchBackend(cache_ttl_hours=cache_ttl_hours), |
| 317 | + default_config={"cache_ttl_hours": 24}, |
262 | 318 | ) |
0 commit comments