|
1 | 1 | # SPDX-License-Identifier: MIT |
2 | 2 | """Tests for the cache management module.""" |
3 | 3 |
|
| 4 | +import hashlib |
| 5 | +import json |
4 | 6 | import logging |
5 | 7 | import sqlite3 |
6 | 8 | import tempfile |
@@ -985,3 +987,255 @@ def test_get_source_statistics_comprehensive(self, temp_cache): |
985 | 987 |
|
986 | 988 | # Test that source_0 appears in the statistics |
987 | 989 | assert "source_0" in stats |
| 990 | + |
| 991 | + def test_add_journal_entry_unregistered_source(self, temp_cache): |
| 992 | + """Test adding journal entry with unregistered source raises error.""" |
| 993 | + entry = JournalEntryData( |
| 994 | + source_name="unregistered_source", |
| 995 | + assessment=AssessmentType.PREDATORY, |
| 996 | + journal_name="Test Journal", |
| 997 | + normalized_name="test_journal", |
| 998 | + ) |
| 999 | + |
| 1000 | + with pytest.raises(ValueError, match="Source.*not registered"): |
| 1001 | + temp_cache.add_journal_entry(source_name="unregistered_source", entry=entry) |
| 1002 | + |
| 1003 | + def test_add_journal_entry_metadata_integer_type(self, temp_cache): |
| 1004 | + """Test adding journal entry with integer metadata.""" |
| 1005 | + temp_cache.register_data_source( |
| 1006 | + name="test_source", |
| 1007 | + display_name="Test Source", |
| 1008 | + source_type="list", |
| 1009 | + ) |
| 1010 | + |
| 1011 | + entry = JournalEntryData( |
| 1012 | + source_name="test_source", |
| 1013 | + assessment=AssessmentType.PREDATORY, |
| 1014 | + journal_name="Test Journal", |
| 1015 | + normalized_name="test_journal", |
| 1016 | + metadata={"count": 42}, |
| 1017 | + ) |
| 1018 | + |
| 1019 | + temp_cache.add_journal_entry(source_name="test_source", entry=entry) |
| 1020 | + |
| 1021 | + # Verify metadata was stored |
| 1022 | + results = temp_cache.search_journals(normalized_name="test_journal") |
| 1023 | + assert len(results) > 0 |
| 1024 | + |
| 1025 | + def test_add_journal_entry_metadata_boolean_type(self, temp_cache): |
| 1026 | + """Test adding journal entry with boolean metadata.""" |
| 1027 | + temp_cache.register_data_source( |
| 1028 | + name="test_source", |
| 1029 | + display_name="Test Source", |
| 1030 | + source_type="list", |
| 1031 | + ) |
| 1032 | + |
| 1033 | + entry = JournalEntryData( |
| 1034 | + source_name="test_source", |
| 1035 | + assessment=AssessmentType.PREDATORY, |
| 1036 | + journal_name="Test Journal", |
| 1037 | + normalized_name="test_journal", |
| 1038 | + metadata={"is_active": True}, |
| 1039 | + ) |
| 1040 | + |
| 1041 | + temp_cache.add_journal_entry(source_name="test_source", entry=entry) |
| 1042 | + |
| 1043 | + results = temp_cache.search_journals(normalized_name="test_journal") |
| 1044 | + assert len(results) > 0 |
| 1045 | + |
| 1046 | + def test_search_journals_with_journal_name_filter(self, temp_cache): |
| 1047 | + """Test searching journals with journal_name filter.""" |
| 1048 | + temp_cache.register_data_source( |
| 1049 | + name="test_source", |
| 1050 | + display_name="Test Source", |
| 1051 | + source_type="list", |
| 1052 | + ) |
| 1053 | + |
| 1054 | + entry = JournalEntryData( |
| 1055 | + source_name="test_source", |
| 1056 | + assessment=AssessmentType.PREDATORY, |
| 1057 | + journal_name="International Journal of Testing", |
| 1058 | + normalized_name="international_journal_testing", |
| 1059 | + ) |
| 1060 | + |
| 1061 | + temp_cache.add_journal_entry(source_name="test_source", entry=entry) |
| 1062 | + |
| 1063 | + # Search with journal_name parameter |
| 1064 | + results = temp_cache.search_journals(journal_name="Testing") |
| 1065 | + |
| 1066 | + assert len(results) > 0 |
| 1067 | + assert any("Testing" in r.get("display_name", "") for r in results) |
| 1068 | + |
| 1069 | + def test_search_journals_metadata_integer_conversion(self, temp_cache): |
| 1070 | + """Test that integer metadata is converted correctly.""" |
| 1071 | + temp_cache.register_data_source( |
| 1072 | + name="test_source", |
| 1073 | + display_name="Test Source", |
| 1074 | + source_type="list", |
| 1075 | + ) |
| 1076 | + |
| 1077 | + entry = JournalEntryData( |
| 1078 | + source_name="test_source", |
| 1079 | + assessment=AssessmentType.PREDATORY, |
| 1080 | + journal_name="Test Journal", |
| 1081 | + normalized_name="test_journal", |
| 1082 | + metadata={"year": 2023}, |
| 1083 | + ) |
| 1084 | + |
| 1085 | + temp_cache.add_journal_entry(source_name="test_source", entry=entry) |
| 1086 | + |
| 1087 | + results = temp_cache.search_journals(normalized_name="test_journal") |
| 1088 | + assert len(results) > 0 |
| 1089 | + if results[0].get("metadata"): |
| 1090 | + metadata = json.loads(results[0]["metadata"]) |
| 1091 | + assert metadata.get("year") == 2023 |
| 1092 | + |
| 1093 | + def test_find_conflicts(self, temp_cache): |
| 1094 | + """Test finding journals with conflicting assessments.""" |
| 1095 | + temp_cache.register_data_source( |
| 1096 | + name="source1", |
| 1097 | + display_name="Source 1", |
| 1098 | + source_type="list", |
| 1099 | + ) |
| 1100 | + |
| 1101 | + temp_cache.register_data_source( |
| 1102 | + name="source2", |
| 1103 | + display_name="Source 2", |
| 1104 | + source_type="list", |
| 1105 | + ) |
| 1106 | + |
| 1107 | + # Add same journal to both sources with different assessments |
| 1108 | + entry1 = JournalEntryData( |
| 1109 | + source_name="source1", |
| 1110 | + assessment=AssessmentType.PREDATORY, |
| 1111 | + journal_name="Test Journal", |
| 1112 | + normalized_name="test_journal", |
| 1113 | + ) |
| 1114 | + |
| 1115 | + entry2 = JournalEntryData( |
| 1116 | + source_name="source2", |
| 1117 | + assessment=AssessmentType.LEGITIMATE, |
| 1118 | + journal_name="Test Journal", |
| 1119 | + normalized_name="test_journal", |
| 1120 | + ) |
| 1121 | + |
| 1122 | + temp_cache.add_journal_entry(source_name="source1", entry=entry1) |
| 1123 | + temp_cache.add_journal_entry(source_name="source2", entry=entry2) |
| 1124 | + |
| 1125 | + conflicts = temp_cache.find_conflicts() |
| 1126 | + |
| 1127 | + assert len(conflicts) > 0 |
| 1128 | + assert any(c["normalized_name"] == "test_journal" for c in conflicts) |
| 1129 | + |
| 1130 | + def test_get_assessment_cache_count(self, temp_cache, sample_assessment_result): |
| 1131 | + """Test getting assessment cache count.""" |
| 1132 | + # Initially empty |
| 1133 | + assert temp_cache.get_assessment_cache_count() == 0 |
| 1134 | + |
| 1135 | + # Add an assessment |
| 1136 | + query_hash = hashlib.md5(b"Test Journal").hexdigest() |
| 1137 | + temp_cache.cache_assessment_result( |
| 1138 | + query_hash=query_hash, |
| 1139 | + query_input="Test Journal", |
| 1140 | + result=sample_assessment_result, |
| 1141 | + ) |
| 1142 | + |
| 1143 | + # Should be 1 |
| 1144 | + assert temp_cache.get_assessment_cache_count() == 1 |
| 1145 | + |
| 1146 | + def test_clear_assessment_cache(self, temp_cache, sample_assessment_result): |
| 1147 | + """Test clearing assessment cache.""" |
| 1148 | + # Add some assessments |
| 1149 | + query_hash1 = hashlib.md5(b"Test Journal 1").hexdigest() |
| 1150 | + temp_cache.cache_assessment_result( |
| 1151 | + query_hash=query_hash1, |
| 1152 | + query_input="Test Journal 1", |
| 1153 | + result=sample_assessment_result, |
| 1154 | + ) |
| 1155 | + query_hash2 = hashlib.md5(b"Test Journal 2").hexdigest() |
| 1156 | + temp_cache.cache_assessment_result( |
| 1157 | + query_hash=query_hash2, |
| 1158 | + query_input="Test Journal 2", |
| 1159 | + result=sample_assessment_result, |
| 1160 | + ) |
| 1161 | + |
| 1162 | + # Clear cache |
| 1163 | + count = temp_cache.clear_assessment_cache() |
| 1164 | + assert count == 2 |
| 1165 | + assert temp_cache.get_assessment_cache_count() == 0 |
| 1166 | + |
| 1167 | + def test_cache_and_get_value(self, temp_cache): |
| 1168 | + """Test key-value cache functionality.""" |
| 1169 | + # Cache a value |
| 1170 | + temp_cache.set_cached_value(key="test_key", value="test_value", ttl_hours=24) |
| 1171 | + |
| 1172 | + # Retrieve it |
| 1173 | + result = temp_cache.get_cached_value(key="test_key") |
| 1174 | + assert result == "test_value" |
| 1175 | + |
| 1176 | + def test_get_cached_value_nonexistent(self, temp_cache): |
| 1177 | + """Test that non-existent key returns None.""" |
| 1178 | + result = temp_cache.get_cached_value(key="nonexistent_key") |
| 1179 | + assert result is None |
| 1180 | + |
| 1181 | + def test_get_article_retraction(self, temp_cache): |
| 1182 | + """Test getting cached article retraction.""" |
| 1183 | + # Cache a retraction |
| 1184 | + temp_cache.cache_article_retraction( |
| 1185 | + doi="10.1234/test", |
| 1186 | + is_retracted=True, |
| 1187 | + source="test_source", |
| 1188 | + retraction_type="full", |
| 1189 | + retraction_date="2023-01-01", |
| 1190 | + retraction_doi="10.1234/retraction", |
| 1191 | + retraction_reason="Fraud", |
| 1192 | + metadata={"note": "Test retraction"}, |
| 1193 | + ) |
| 1194 | + |
| 1195 | + # Retrieve it |
| 1196 | + result = temp_cache.get_article_retraction(doi="10.1234/test") |
| 1197 | + |
| 1198 | + assert result is not None |
| 1199 | + assert result["is_retracted"] # SQLite stores booleans as integers |
| 1200 | + assert result["retraction_type"] == "full" |
| 1201 | + assert result["metadata"]["note"] == "Test retraction" |
| 1202 | + |
| 1203 | + def test_get_article_retraction_nonexistent(self, temp_cache): |
| 1204 | + """Test that non-existent DOI returns None.""" |
| 1205 | + result = temp_cache.get_article_retraction(doi="10.1234/nonexistent") |
| 1206 | + assert result is None |
| 1207 | + |
| 1208 | + def test_get_article_retraction_invalid_json_metadata(self, temp_cache): |
| 1209 | + """Test handling of invalid JSON in metadata field.""" |
| 1210 | + # Manually insert invalid JSON |
| 1211 | + with sqlite3.connect(temp_cache.db_path) as conn: |
| 1212 | + conn.execute( |
| 1213 | + """ |
| 1214 | + INSERT INTO article_retractions |
| 1215 | + (doi, is_retracted, source, metadata, checked_at, expires_at) |
| 1216 | + VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP, datetime('now', '+30 days')) |
| 1217 | + """, |
| 1218 | + ("10.1234/test", True, "test_source", "invalid{json"), |
| 1219 | + ) |
| 1220 | + conn.commit() |
| 1221 | + |
| 1222 | + result = temp_cache.get_article_retraction(doi="10.1234/test") |
| 1223 | + |
| 1224 | + # Should still return result but with unparsed metadata |
| 1225 | + assert result is not None |
| 1226 | + assert result["is_retracted"] # SQLite stores booleans as integers |
| 1227 | + |
| 1228 | + def test_cache_article_retraction_with_metadata(self, temp_cache): |
| 1229 | + """Test caching article retraction with metadata.""" |
| 1230 | + temp_cache.cache_article_retraction( |
| 1231 | + doi="10.1234/test", |
| 1232 | + is_retracted=True, |
| 1233 | + source="test_source", |
| 1234 | + metadata={"key1": "value1", "key2": 123}, |
| 1235 | + ) |
| 1236 | + |
| 1237 | + result = temp_cache.get_article_retraction(doi="10.1234/test") |
| 1238 | + |
| 1239 | + assert result is not None |
| 1240 | + assert result["metadata"]["key1"] == "value1" |
| 1241 | + assert result["metadata"]["key2"] == 123 |
0 commit comments