Skip to content

Commit 14dc5ec

Browse files
committed
refactor: Replace direct logging.getLogger() with centralized project loggers
Resolves issue #2 by refactoring all modules to use the project's centralized logging system (detail_logger and status_logger) instead of direct logging.getLogger(__name__) calls. ## Changes Made ### Refactored 12 files to use centralized loggers: - Core modules: dispatcher.py, article_retraction_checker.py, retry_utils.py - Backend modules: doaj.py, retraction_watch.py - Updater modules: predatoryjournals.py, custom.py, algerian_helpers/*, bealls_helpers/* - Utility modules: bibtex_parser.py ### Logging categorization: - detail_logger: Technical details, debug info, internal processing (file only) - status_logger: User-facing progress, status, errors (console + file) ### Added enforcement infrastructure: - scripts/check-logging.py: Automated consistency checker - Integrated logging checks into scripts/run-quality-checks.sh ## Benefits - ✅ Consistent logging practices across entire codebase - ✅ Clear separation of technical vs user-facing messages - ✅ Improved maintainability and user experience - ✅ Automated prevention of future regressions [AI-assisted]
1 parent 2153ad6 commit 14dc5ec

File tree

14 files changed

+234
-92
lines changed

14 files changed

+234
-92
lines changed

scripts/check-logging.py

Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
#!/usr/bin/env python3
2+
"""Script to enforce consistent logging practices in the codebase.
3+
4+
This script checks that no files use direct logging.getLogger() and instead
5+
use the project's centralized detail_logger and status_logger.
6+
7+
Usage:
8+
python scripts/check-logging.py
9+
10+
Exit code:
11+
0: All checks pass
12+
1: Violations found
13+
"""
14+
15+
import subprocess
16+
import sys
17+
from pathlib import Path
18+
19+
20+
def check_direct_logger_usage():
21+
"""Check for direct logging.getLogger() usage."""
22+
violations = []
23+
24+
# Find all Python files in src directory
25+
src_dir = Path("src")
26+
if not src_dir.exists():
27+
print("ERROR: src directory not found. Run this script from the project root.")
28+
return False
29+
30+
# Search for logging.getLogger(__name__) pattern
31+
try:
32+
result = subprocess.run(
33+
["grep", "-r", "-n", "logging\\.getLogger(__name__)", str(src_dir)],
34+
capture_output=True,
35+
text=True
36+
)
37+
38+
if result.returncode == 0:
39+
violations = result.stdout.strip().split('\n')
40+
41+
except FileNotFoundError:
42+
print("ERROR: grep command not found. Install grep to run this check.")
43+
return False
44+
45+
if violations:
46+
print("❌ LOGGING VIOLATIONS FOUND:")
47+
print()
48+
print("The following files use direct logging.getLogger() instead of project loggers:")
49+
print()
50+
51+
for violation in violations:
52+
print(f" {violation}")
53+
54+
print()
55+
print("Fix these by:")
56+
print("1. Remove 'import logging' and 'logger = logging.getLogger(__name__)'")
57+
print("2. Add: from .logging_config import get_detail_logger, get_status_logger")
58+
print("3. Add: detail_logger = get_detail_logger(); status_logger = get_status_logger()")
59+
print("4. Use detail_logger for technical details, status_logger for user messages")
60+
print()
61+
return False
62+
63+
print("✅ All logging checks pass!")
64+
return True
65+
66+
67+
def check_logger_imports():
68+
"""Check that files using loggers import from logging_config."""
69+
try:
70+
# Find files that use detail_logger or status_logger but don't import them
71+
result = subprocess.run(
72+
["grep", "-r", "-l", "--include=*.py", "detail_logger\\|status_logger", "src"],
73+
capture_output=True,
74+
text=True
75+
)
76+
77+
if result.returncode != 0:
78+
return True # No files use loggers, that's fine
79+
80+
files_using_loggers = result.stdout.strip().split('\n')
81+
violations = []
82+
83+
for file_path in files_using_loggers:
84+
# Skip the logging_config.py file itself and __pycache__ files
85+
if "logging_config.py" in file_path or "__pycache__" in file_path:
86+
continue
87+
88+
# Check if file imports from logging_config
89+
import_result = subprocess.run(
90+
["grep", "-q", "from.*logging_config import", file_path],
91+
capture_output=True
92+
)
93+
94+
if import_result.returncode != 0:
95+
violations.append(file_path)
96+
97+
if violations:
98+
print("❌ LOGGING IMPORT VIOLATIONS:")
99+
print()
100+
print("Files use loggers but don't import from logging_config:")
101+
for violation in violations:
102+
print(f" {violation}")
103+
print()
104+
return False
105+
106+
except FileNotFoundError:
107+
print("WARNING: Could not run logger import check (grep not available)")
108+
109+
return True
110+
111+
112+
def main():
113+
"""Run all logging checks."""
114+
print("Checking logging consistency...")
115+
print()
116+
117+
checks_pass = True
118+
119+
# Check for direct logger usage
120+
if not check_direct_logger_usage():
121+
checks_pass = False
122+
123+
print()
124+
125+
# Check for proper imports
126+
if not check_logger_imports():
127+
checks_pass = False
128+
129+
if checks_pass:
130+
print("✅ All logging checks passed!")
131+
return 0
132+
else:
133+
print("❌ Logging violations found. Please fix them before committing.")
134+
return 1
135+
136+
137+
if __name__ == "__main__":
138+
sys.exit(main())

scripts/run-quality-checks.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,9 @@ run_check "Mypy type checking" mypy src/ --strict || true
4949
# 4. Pytest with coverage
5050
run_check "Pytest with coverage" pytest --cov=src --cov-report=term-missing tests/ || true
5151

52+
# 5. Logging consistency check
53+
run_check "Logging consistency" python scripts/check-logging.py || true
54+
5255
# Final summary
5356
echo -e "${BLUE}========================================${NC}"
5457
if [ $FAILED -eq 0 ]; then

src/aletheia_probe/article_retraction_checker.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,13 @@
11
"""Article-level retraction checking using multiple data sources."""
22

33
import asyncio
4-
import logging
54
from typing import Any
65

76
import aiohttp
87

98
from .cache import get_cache_manager
109
from .logging_config import get_detail_logger, get_status_logger
1110

12-
logger = logging.getLogger(__name__)
1311
status_logger = get_status_logger()
1412
detail_logger = get_detail_logger()
1513

src/aletheia_probe/backends/doaj.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,20 @@
11
"""DOAJ (Directory of Open Access Journals) backend for legitimate journal verification."""
22

33
import asyncio
4-
import logging
54
import time
65
from typing import Any
76
from urllib.parse import quote
87

98
import aiohttp
109

1110
from ..enums import AssessmentType
11+
from ..logging_config import get_detail_logger, get_status_logger
1212
from ..models import BackendResult, BackendStatus, QueryInput
1313
from ..retry_utils import async_retry_with_backoff
1414
from .base import HybridBackend, get_backend_registry
1515

16-
logger = logging.getLogger(__name__)
16+
detail_logger = get_detail_logger()
17+
status_logger = get_status_logger()
1718

1819

1920
class RateLimitError(Exception):
@@ -63,7 +64,7 @@ async def _query_api(self, query_input: QueryInput) -> BackendResult:
6364
)
6465

6566
except RateLimitError as e:
66-
logger.warning(f"DOAJ API rate limit exceeded: {e}")
67+
status_logger.warning(f"DOAJ API rate limit exceeded: {e}")
6768
return BackendResult(
6869
backend_name=self.get_name(),
6970
status=BackendStatus.RATE_LIMITED,
@@ -73,7 +74,7 @@ async def _query_api(self, query_input: QueryInput) -> BackendResult:
7374
response_time=time.time() - start_time,
7475
)
7576
except aiohttp.ClientError as e:
76-
logger.error(f"DOAJ API network error: {e}")
77+
status_logger.error(f"DOAJ API network error: {e}")
7778
return BackendResult(
7879
backend_name=self.get_name(),
7980
status=BackendStatus.ERROR,
@@ -83,7 +84,7 @@ async def _query_api(self, query_input: QueryInput) -> BackendResult:
8384
response_time=time.time() - start_time,
8485
)
8586
except Exception as e:
86-
logger.error(f"DOAJ API unexpected error: {e}")
87+
status_logger.error(f"DOAJ API unexpected error: {e}")
8788
return BackendResult(
8889
backend_name=self.get_name(),
8990
status=BackendStatus.ERROR,
@@ -127,7 +128,7 @@ async def _fetch_from_doaj_api(
127128
retry_after = response.headers.get("Retry-After")
128129
retry_seconds = int(retry_after) if retry_after else 60
129130

130-
logger.warning(
131+
detail_logger.debug(
131132
f"DOAJ API rate limit hit. Retry-After: {retry_seconds}s. URL: {url}"
132133
)
133134

src/aletheia_probe/backends/retraction_watch.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,16 @@
11
"""Retraction Watch backend for journal quality assessment based on retraction data."""
22

33
import json
4-
import logging
54
from typing import Any
65

76
from ..cache import get_cache_manager
7+
from ..logging_config import get_detail_logger, get_status_logger
88
from ..models import BackendResult, BackendStatus, QueryInput
99
from ..openalex import get_publication_stats
1010
from .base import CachedBackend, get_backend_registry
1111

12-
logger = logging.getLogger(__name__)
12+
detail_logger = get_detail_logger()
13+
status_logger = get_status_logger()
1314

1415

1516
class RetractionWatchBackend(CachedBackend):
@@ -202,11 +203,11 @@ async def _get_openalex_data_cached(
202203
# Check cache first
203204
cached = get_cache_manager().get_cached_value(cache_key)
204205
if cached is not None:
205-
logger.debug(f"OpenAlex cache hit for {journal_name}")
206+
detail_logger.debug(f"OpenAlex cache hit for {journal_name}")
206207
return json.loads(cached) if cached != "null" else None
207208

208209
# Fetch from OpenAlex API
209-
logger.info(f"Fetching OpenAlex data on-demand for: {journal_name}")
210+
status_logger.info(f"Fetching OpenAlex data on-demand for: {journal_name}")
210211
try:
211212
openalex_data = await get_publication_stats(journal_name, issn)
212213

@@ -219,7 +220,7 @@ async def _get_openalex_data_cached(
219220
return openalex_data
220221

221222
except Exception as e:
222-
logger.warning(f"Failed to fetch OpenAlex data for {journal_name}: {e}")
223+
status_logger.warning(f"Failed to fetch OpenAlex data for {journal_name}: {e}")
223224
# Cache the failure for 1 day to avoid repeated API calls
224225
get_cache_manager().set_cached_value(cache_key, "null", ttl_hours=24)
225226
return None

src/aletheia_probe/bibtex_parser.py

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
"""BibTeX parsing utilities for journal assessment."""
22

3-
import logging
43
from pathlib import Path
54

65
from pybtex.database import ( # type: ignore
@@ -11,9 +10,11 @@
1110
)
1211
from pybtex.scanner import PybtexError, PybtexSyntaxError # type: ignore
1312

13+
from .logging_config import get_detail_logger, get_status_logger
1414
from .models import BibtexEntry
1515

16-
logger = logging.getLogger(__name__)
16+
detail_logger = get_detail_logger()
17+
status_logger = get_status_logger()
1718

1819

1920
class BibtexParser:
@@ -56,7 +57,7 @@ def parse_bibtex_file(file_path: Path) -> list[BibtexEntry]:
5657

5758
for encoding, description in encoding_strategies:
5859
try:
59-
logger.debug(f"Attempting to parse {file_path.name} with {description}")
60+
detail_logger.debug(f"Attempting to parse {file_path.name} with {description}")
6061

6162
if description.endswith("with errors='replace'"):
6263
# For the final attempt, use error handling to replace problematic characters
@@ -81,19 +82,19 @@ def parse_bibtex_file(file_path: Path) -> list[BibtexEntry]:
8182
else:
8283
skipped_entries += 1
8384
except Exception as e:
84-
logger.warning(
85+
status_logger.warning(
8586
f"Skipping entry '{entry_key}' due to processing error: {e}"
8687
)
8788
skipped_entries += 1
8889
continue
8990

9091
if skipped_entries > 0:
91-
logger.info(
92+
status_logger.info(
9293
f"Successfully parsed {len(entries)} entries from {file_path.name} "
9394
f"with {description}, skipped {skipped_entries} problematic entries"
9495
)
9596
else:
96-
logger.debug(
97+
detail_logger.debug(
9798
f"Successfully parsed {len(entries)} entries from {file_path.name} "
9899
f"with {description}"
99100
)
@@ -102,7 +103,7 @@ def parse_bibtex_file(file_path: Path) -> list[BibtexEntry]:
102103

103104
except UnicodeDecodeError as e:
104105
last_error = e
105-
logger.debug(f"{description} decoding failed for {file_path.name}: {e}")
106+
detail_logger.debug(f"{description} decoding failed for {file_path.name}: {e}")
106107
continue
107108

108109
except PybtexSyntaxError as e:
@@ -116,7 +117,7 @@ def parse_bibtex_file(file_path: Path) -> list[BibtexEntry]:
116117

117118
except PybtexError as e:
118119
last_error = e
119-
logger.debug(
120+
detail_logger.debug(
120121
f"PyBTeX error with {description} for {file_path.name}: {e}"
121122
)
122123
continue
@@ -182,7 +183,7 @@ def _process_entry_safely(entry_key: str, entry: Entry) -> BibtexEntry | None:
182183
raw_entry=entry,
183184
)
184185
except Exception as e:
185-
logger.warning(f"Error processing entry {entry_key}: {e}")
186+
detail_logger.debug(f"Error processing entry {entry_key}: {e}")
186187
return None
187188

188189
@staticmethod
@@ -275,7 +276,7 @@ def _extract_authors_safely(entry: Entry) -> str | None:
275276
author_str = str(person)
276277
authors.append(author_str)
277278
except (UnicodeDecodeError, UnicodeEncodeError) as e:
278-
logger.debug(f"Encoding error in author field: {e}")
279+
detail_logger.debug(f"Encoding error in author field: {e}")
279280
# Try to get a safe representation
280281
author_str = repr(person)
281282
authors.append(author_str)
@@ -286,7 +287,7 @@ def _extract_authors_safely(entry: Entry) -> str | None:
286287
# Fallback to raw field value
287288
return BibtexParser._get_field_safely(entry, "author")
288289
except Exception as e:
289-
logger.debug(f"Error extracting authors: {e}")
290+
detail_logger.debug(f"Error extracting authors: {e}")
290291
return None
291292

292293
@staticmethod
@@ -319,7 +320,7 @@ def _get_field_safely(entry: Entry, field_name: str) -> str | None:
319320
cleaned = value.strip("{}").strip()
320321
return cleaned if cleaned else None
321322
except (UnicodeDecodeError, UnicodeEncodeError) as e:
322-
logger.debug(f"Encoding error in field '{field_name}': {e}")
323+
detail_logger.debug(f"Encoding error in field '{field_name}': {e}")
323324
# Try to get a safe representation
324325
try:
325326
# Attempt to encode/decode safely
@@ -333,7 +334,7 @@ def _get_field_safely(entry: Entry, field_name: str) -> str | None:
333334
return repr(value).strip("'\"")
334335
return None
335336
except Exception as e:
336-
logger.debug(f"Error getting field '{field_name}': {e}")
337+
detail_logger.debug(f"Error getting field '{field_name}': {e}")
337338
return None
338339

339340
@staticmethod

0 commit comments

Comments
 (0)