Skip to content

Commit 6e41a93

Browse files
refactor: Extract duplicate Kscien fetching logic into shared utility module (fixes #11) (#88)
Consolidates duplicated code between kscien_predatory_conferences.py and kscien_generic.py by creating a shared kscien_helpers.py utility module. This eliminates ~300 lines of duplicate pagination, HTML parsing, and deduplication logic. Changes: - Add kscien_helpers.py with fetch_kscien_data(), deduplicate_entries() functions - Remove kscien_predatory_conferences.py (replaced by KscienGenericSource) - Refactor kscien_generic.py to use shared utilities - Update source registration to use KscienGenericSource for predatory conferences - Add comprehensive unit tests for refactored functionality This refactoring improves maintainability, reduces code duplication, and aligns with DRY principles while preserving all existing functionality. Co-authored-by: florath-ai-assistant[bot] <Andreas.Florath@telekom.de>
1 parent 3e41559 commit 6e41a93

File tree

6 files changed

+483
-735
lines changed

6 files changed

+483
-735
lines changed

src/aletheia_probe/updater/__init__.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@
1010
AlgerianMinistrySource,
1111
BeallsListSource,
1212
CustomListSource,
13+
KscienGenericSource,
1314
KscienHijackedJournalsSource,
14-
KscienPredatoryConferencesSource,
1515
KscienPublishersSource,
1616
KscienStandaloneJournalsSource,
1717
PredatoryJournalsSource,
@@ -36,7 +36,7 @@
3636
data_updater.add_source(BeallsListSource())
3737
data_updater.add_source(AlgerianMinistrySource())
3838
data_updater.add_source(PredatoryJournalsSource())
39-
data_updater.add_source(KscienPredatoryConferencesSource())
39+
data_updater.add_source(KscienGenericSource(publication_type="predatory-conferences"))
4040
data_updater.add_source(KscienStandaloneJournalsSource())
4141
data_updater.add_source(KscienHijackedJournalsSource())
4242
data_updater.add_source(KscienPublishersSource())
@@ -52,8 +52,8 @@
5252
"AlgerianMinistrySource",
5353
"BeallsListSource",
5454
"CustomListSource",
55+
"KscienGenericSource",
5556
"KscienHijackedJournalsSource",
56-
"KscienPredatoryConferencesSource",
5757
"KscienPublishersSource",
5858
"KscienStandaloneJournalsSource",
5959
"PredatoryJournalsSource",

src/aletheia_probe/updater/sources/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44
from .algerian import AlgerianMinistrySource
55
from .bealls import BeallsListSource
66
from .custom import CustomListSource
7+
from .kscien_generic import KscienGenericSource
78
from .kscien_hijacked_journals import KscienHijackedJournalsSource
8-
from .kscien_predatory_conferences import KscienPredatoryConferencesSource
99
from .kscien_publishers import KscienPublishersSource
1010
from .kscien_standalone_journals import KscienStandaloneJournalsSource
1111
from .predatoryjournals import PredatoryJournalsSource
@@ -17,8 +17,8 @@
1717
"AlgerianMinistrySource",
1818
"BeallsListSource",
1919
"CustomListSource",
20+
"KscienGenericSource",
2021
"KscienHijackedJournalsSource",
21-
"KscienPredatoryConferencesSource",
2222
"KscienPublishersSource",
2323
"KscienStandaloneJournalsSource",
2424
"PredatoryJournalsSource",

0 commit comments

Comments
 (0)