Skip to content

Commit e33839c

Browse files
test: Significantly improve test coverage for issue #6 [AI-assisted] (#197)
* test: Significantly improve test coverage for issue #6 [AI-assisted] - Add comprehensive tests for RetractionWatchSource (17% → 94% coverage) - Add comprehensive tests for PredatoryJournalsSource (19% → 89% coverage) - Total reduction: ~240 missing lines of code coverage - Tests cover error handling, async operations, CSV parsing, and edge cases * test: Significantly improve test coverage for issue #6 [AI-assisted] Increased test coverage for three critical files: 1. bealls_helpers/parser.py: 18% → 100% coverage (+82%) - Created comprehensive test suite with 32 tests - Covers HTML parsing, table parsing, and extraction logic - Tests edge cases, error handling, and metadata processing 2. algerian.py: 30% → 100% coverage (+70%) - Created comprehensive test suite with 22 tests - Covers PDF processing, URL construction, and data extraction - Tests error conditions and edge cases 3. cache.py: 76% → 80% coverage (+4%) - Extended existing test suite with 15+ new tests - Covers metadata storage, journal searching, and caching - Tests error handling and data type conversions New test files: - tests/unit/updater/test_bealls_parser.py (32 tests) - tests/unit/updater/test_algerian_source.py (22 tests) All tests follow existing patterns and coding standards. All quality checks pass (ruff, mypy, etc.). * fix: Move imports to top of test file [AI-assisted] Move hashlib and json imports from inline usage to the top of the file as per project coding standards. Imports should be placed at the top of files whenever possible. --------- Co-authored-by: florath-ai-assistant[bot] <Andreas.Florath@telekom.de>
1 parent e7706f4 commit e33839c

File tree

6 files changed

+2088
-0
lines changed

6 files changed

+2088
-0
lines changed

tests/unit/test_cache.py

Lines changed: 254 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
# SPDX-License-Identifier: MIT
22
"""Tests for the cache management module."""
33

4+
import hashlib
5+
import json
46
import logging
57
import sqlite3
68
import tempfile
@@ -985,3 +987,255 @@ def test_get_source_statistics_comprehensive(self, temp_cache):
985987

986988
# Test that source_0 appears in the statistics
987989
assert "source_0" in stats
990+
991+
def test_add_journal_entry_unregistered_source(self, temp_cache):
992+
"""Test adding journal entry with unregistered source raises error."""
993+
entry = JournalEntryData(
994+
source_name="unregistered_source",
995+
assessment=AssessmentType.PREDATORY,
996+
journal_name="Test Journal",
997+
normalized_name="test_journal",
998+
)
999+
1000+
with pytest.raises(ValueError, match="Source.*not registered"):
1001+
temp_cache.add_journal_entry(source_name="unregistered_source", entry=entry)
1002+
1003+
def test_add_journal_entry_metadata_integer_type(self, temp_cache):
1004+
"""Test adding journal entry with integer metadata."""
1005+
temp_cache.register_data_source(
1006+
name="test_source",
1007+
display_name="Test Source",
1008+
source_type="list",
1009+
)
1010+
1011+
entry = JournalEntryData(
1012+
source_name="test_source",
1013+
assessment=AssessmentType.PREDATORY,
1014+
journal_name="Test Journal",
1015+
normalized_name="test_journal",
1016+
metadata={"count": 42},
1017+
)
1018+
1019+
temp_cache.add_journal_entry(source_name="test_source", entry=entry)
1020+
1021+
# Verify metadata was stored
1022+
results = temp_cache.search_journals(normalized_name="test_journal")
1023+
assert len(results) > 0
1024+
1025+
def test_add_journal_entry_metadata_boolean_type(self, temp_cache):
1026+
"""Test adding journal entry with boolean metadata."""
1027+
temp_cache.register_data_source(
1028+
name="test_source",
1029+
display_name="Test Source",
1030+
source_type="list",
1031+
)
1032+
1033+
entry = JournalEntryData(
1034+
source_name="test_source",
1035+
assessment=AssessmentType.PREDATORY,
1036+
journal_name="Test Journal",
1037+
normalized_name="test_journal",
1038+
metadata={"is_active": True},
1039+
)
1040+
1041+
temp_cache.add_journal_entry(source_name="test_source", entry=entry)
1042+
1043+
results = temp_cache.search_journals(normalized_name="test_journal")
1044+
assert len(results) > 0
1045+
1046+
def test_search_journals_with_journal_name_filter(self, temp_cache):
1047+
"""Test searching journals with journal_name filter."""
1048+
temp_cache.register_data_source(
1049+
name="test_source",
1050+
display_name="Test Source",
1051+
source_type="list",
1052+
)
1053+
1054+
entry = JournalEntryData(
1055+
source_name="test_source",
1056+
assessment=AssessmentType.PREDATORY,
1057+
journal_name="International Journal of Testing",
1058+
normalized_name="international_journal_testing",
1059+
)
1060+
1061+
temp_cache.add_journal_entry(source_name="test_source", entry=entry)
1062+
1063+
# Search with journal_name parameter
1064+
results = temp_cache.search_journals(journal_name="Testing")
1065+
1066+
assert len(results) > 0
1067+
assert any("Testing" in r.get("display_name", "") for r in results)
1068+
1069+
def test_search_journals_metadata_integer_conversion(self, temp_cache):
1070+
"""Test that integer metadata is converted correctly."""
1071+
temp_cache.register_data_source(
1072+
name="test_source",
1073+
display_name="Test Source",
1074+
source_type="list",
1075+
)
1076+
1077+
entry = JournalEntryData(
1078+
source_name="test_source",
1079+
assessment=AssessmentType.PREDATORY,
1080+
journal_name="Test Journal",
1081+
normalized_name="test_journal",
1082+
metadata={"year": 2023},
1083+
)
1084+
1085+
temp_cache.add_journal_entry(source_name="test_source", entry=entry)
1086+
1087+
results = temp_cache.search_journals(normalized_name="test_journal")
1088+
assert len(results) > 0
1089+
if results[0].get("metadata"):
1090+
metadata = json.loads(results[0]["metadata"])
1091+
assert metadata.get("year") == 2023
1092+
1093+
def test_find_conflicts(self, temp_cache):
1094+
"""Test finding journals with conflicting assessments."""
1095+
temp_cache.register_data_source(
1096+
name="source1",
1097+
display_name="Source 1",
1098+
source_type="list",
1099+
)
1100+
1101+
temp_cache.register_data_source(
1102+
name="source2",
1103+
display_name="Source 2",
1104+
source_type="list",
1105+
)
1106+
1107+
# Add same journal to both sources with different assessments
1108+
entry1 = JournalEntryData(
1109+
source_name="source1",
1110+
assessment=AssessmentType.PREDATORY,
1111+
journal_name="Test Journal",
1112+
normalized_name="test_journal",
1113+
)
1114+
1115+
entry2 = JournalEntryData(
1116+
source_name="source2",
1117+
assessment=AssessmentType.LEGITIMATE,
1118+
journal_name="Test Journal",
1119+
normalized_name="test_journal",
1120+
)
1121+
1122+
temp_cache.add_journal_entry(source_name="source1", entry=entry1)
1123+
temp_cache.add_journal_entry(source_name="source2", entry=entry2)
1124+
1125+
conflicts = temp_cache.find_conflicts()
1126+
1127+
assert len(conflicts) > 0
1128+
assert any(c["normalized_name"] == "test_journal" for c in conflicts)
1129+
1130+
def test_get_assessment_cache_count(self, temp_cache, sample_assessment_result):
1131+
"""Test getting assessment cache count."""
1132+
# Initially empty
1133+
assert temp_cache.get_assessment_cache_count() == 0
1134+
1135+
# Add an assessment
1136+
query_hash = hashlib.md5(b"Test Journal").hexdigest()
1137+
temp_cache.cache_assessment_result(
1138+
query_hash=query_hash,
1139+
query_input="Test Journal",
1140+
result=sample_assessment_result,
1141+
)
1142+
1143+
# Should be 1
1144+
assert temp_cache.get_assessment_cache_count() == 1
1145+
1146+
def test_clear_assessment_cache(self, temp_cache, sample_assessment_result):
1147+
"""Test clearing assessment cache."""
1148+
# Add some assessments
1149+
query_hash1 = hashlib.md5(b"Test Journal 1").hexdigest()
1150+
temp_cache.cache_assessment_result(
1151+
query_hash=query_hash1,
1152+
query_input="Test Journal 1",
1153+
result=sample_assessment_result,
1154+
)
1155+
query_hash2 = hashlib.md5(b"Test Journal 2").hexdigest()
1156+
temp_cache.cache_assessment_result(
1157+
query_hash=query_hash2,
1158+
query_input="Test Journal 2",
1159+
result=sample_assessment_result,
1160+
)
1161+
1162+
# Clear cache
1163+
count = temp_cache.clear_assessment_cache()
1164+
assert count == 2
1165+
assert temp_cache.get_assessment_cache_count() == 0
1166+
1167+
def test_cache_and_get_value(self, temp_cache):
1168+
"""Test key-value cache functionality."""
1169+
# Cache a value
1170+
temp_cache.set_cached_value(key="test_key", value="test_value", ttl_hours=24)
1171+
1172+
# Retrieve it
1173+
result = temp_cache.get_cached_value(key="test_key")
1174+
assert result == "test_value"
1175+
1176+
def test_get_cached_value_nonexistent(self, temp_cache):
1177+
"""Test that non-existent key returns None."""
1178+
result = temp_cache.get_cached_value(key="nonexistent_key")
1179+
assert result is None
1180+
1181+
def test_get_article_retraction(self, temp_cache):
1182+
"""Test getting cached article retraction."""
1183+
# Cache a retraction
1184+
temp_cache.cache_article_retraction(
1185+
doi="10.1234/test",
1186+
is_retracted=True,
1187+
source="test_source",
1188+
retraction_type="full",
1189+
retraction_date="2023-01-01",
1190+
retraction_doi="10.1234/retraction",
1191+
retraction_reason="Fraud",
1192+
metadata={"note": "Test retraction"},
1193+
)
1194+
1195+
# Retrieve it
1196+
result = temp_cache.get_article_retraction(doi="10.1234/test")
1197+
1198+
assert result is not None
1199+
assert result["is_retracted"] # SQLite stores booleans as integers
1200+
assert result["retraction_type"] == "full"
1201+
assert result["metadata"]["note"] == "Test retraction"
1202+
1203+
def test_get_article_retraction_nonexistent(self, temp_cache):
1204+
"""Test that non-existent DOI returns None."""
1205+
result = temp_cache.get_article_retraction(doi="10.1234/nonexistent")
1206+
assert result is None
1207+
1208+
def test_get_article_retraction_invalid_json_metadata(self, temp_cache):
1209+
"""Test handling of invalid JSON in metadata field."""
1210+
# Manually insert invalid JSON
1211+
with sqlite3.connect(temp_cache.db_path) as conn:
1212+
conn.execute(
1213+
"""
1214+
INSERT INTO article_retractions
1215+
(doi, is_retracted, source, metadata, checked_at, expires_at)
1216+
VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP, datetime('now', '+30 days'))
1217+
""",
1218+
("10.1234/test", True, "test_source", "invalid{json"),
1219+
)
1220+
conn.commit()
1221+
1222+
result = temp_cache.get_article_retraction(doi="10.1234/test")
1223+
1224+
# Should still return result but with unparsed metadata
1225+
assert result is not None
1226+
assert result["is_retracted"] # SQLite stores booleans as integers
1227+
1228+
def test_cache_article_retraction_with_metadata(self, temp_cache):
1229+
"""Test caching article retraction with metadata."""
1230+
temp_cache.cache_article_retraction(
1231+
doi="10.1234/test",
1232+
is_retracted=True,
1233+
source="test_source",
1234+
metadata={"key1": "value1", "key2": 123},
1235+
)
1236+
1237+
result = temp_cache.get_article_retraction(doi="10.1234/test")
1238+
1239+
assert result is not None
1240+
assert result["metadata"]["key1"] == "value1"
1241+
assert result["metadata"]["key2"] == 123

tests/unit/updater/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# SPDX-License-Identifier: MIT
2+
"""Tests for updater module."""

0 commit comments

Comments
 (0)