Skip to content

Commit 8dd4409

Browse files
feat: Add 'suspicious' as evaluation result for heuristic-only assessments (issue #65) (#66)
## Summary Implements the 'suspicious' evaluation result to distinguish between definitive predatory classifications (based on predatory lists) and heuristic-based assessments (based on indicators like retraction rates). ## Changes Made ### Core Feature - Added SUSPICIOUS to AssessmentType enum - Created EvidenceType enum (PREDATORY_LIST, LEGITIMATE_LIST, HEURISTIC) - Enhanced Backend architecture with get_evidence_type() abstract method - Updated models with suspicious counters and evidence_type field ### Classification Logic (per issue #65) - PREDATORY: Only when journal found in predatory lists - SUSPICIOUS: When assessment based solely on heuristics - Mixed case: Predatory list + negative heuristics = PREDATORY - Backend-centric evidence classification (no central lists) ### Display Updates - Added ⚠️ emoji for suspicious results in batch output - Updated CLI to support suspicious in custom list types - Enhanced batch summaries with suspicious counts - Lower confidence scoring for heuristic-only assessments ### Architecture Improvements - CachedBackend automatically detects evidence type from list_type - HybridBackend defaults to HEURISTIC evidence - DOAJ overridden to return LEGITIMATE_LIST evidence - Extensible design for new backends ### Test Updates - Fixed all mock backends to implement get_evidence_type() - Added evidence_type field to all BackendResult test instances - Updated test expectations for new classification logic - All 254 tests passing ## Technical Details - Backward compatible with existing functionality - Self-declaring backends eliminate maintenance of central lists - Proper confidence scoring adjustments for different evidence types - Comprehensive error handling and validation Co-authored-by: florath-ai-assistant[bot] <Andreas.Florath@telekom.de>
1 parent 82cc634 commit 8dd4409

File tree

10 files changed

+174
-24
lines changed

10 files changed

+174
-24
lines changed

src/aletheia_probe/backends/base.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from typing import Any
1111

1212
from ..cache import get_cache_manager
13+
from ..enums import EvidenceType
1314
from ..models import AssessmentResult, BackendResult, BackendStatus, QueryInput
1415

1516

@@ -42,6 +43,11 @@ def get_description(self) -> str:
4243
"""Return a description of what this backend checks."""
4344
pass
4445

46+
@abstractmethod
47+
def get_evidence_type(self) -> EvidenceType:
48+
"""Return the type of evidence this backend provides."""
49+
pass
50+
4551
async def query_with_timeout(
4652
self, query_input: QueryInput, timeout: int = 10
4753
) -> BackendResult:
@@ -91,6 +97,16 @@ def __init__(self, source_name: str, list_type: str, cache_ttl_hours: int = 24):
9197
self.source_name = source_name
9298
self.list_type = list_type
9399

100+
def get_evidence_type(self) -> EvidenceType:
101+
"""Return evidence type based on list type."""
102+
if self.list_type == "predatory":
103+
return EvidenceType.PREDATORY_LIST
104+
elif self.list_type == "legitimate":
105+
return EvidenceType.LEGITIMATE_LIST
106+
else:
107+
# Default to heuristic for unknown list types
108+
return EvidenceType.HEURISTIC
109+
94110
async def query(self, query_input: QueryInput) -> BackendResult:
95111
"""Query cached data for journal information."""
96112
start_time = time.time()
@@ -202,6 +218,10 @@ class HybridBackend(Backend):
202218
def __init__(self, cache_ttl_hours: int = 24):
203219
super().__init__(cache_ttl_hours)
204220

221+
def get_evidence_type(self) -> EvidenceType:
222+
"""HybridBackend provides heuristic evidence by default."""
223+
return EvidenceType.HEURISTIC
224+
205225
async def query(self, query_input: QueryInput) -> BackendResult:
206226
"""Check cache first, then query live API if needed."""
207227
start_time = time.time()

src/aletheia_probe/backends/doaj.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
import aiohttp
1010

11-
from ..enums import AssessmentType
11+
from ..enums import AssessmentType, EvidenceType
1212
from ..logging_config import get_detail_logger, get_status_logger
1313
from ..models import BackendResult, BackendStatus, QueryInput
1414
from ..retry_utils import async_retry_with_backoff
@@ -40,6 +40,9 @@ def get_name(self) -> str:
4040
def get_description(self) -> str:
4141
return "Checks DOAJ (Directory of Open Access Journals) for legitimate journals"
4242

43+
def get_evidence_type(self) -> EvidenceType:
44+
return EvidenceType.LEGITIMATE_LIST
45+
4346
async def _query_api(self, query_input: QueryInput) -> BackendResult:
4447
"""Query DOAJ API for journal information with retry logic."""
4548
start_time = time.time()

src/aletheia_probe/batch_assessor.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,12 +93,15 @@ async def assess_bibtex_file(
9393
predatory_count=0,
9494
legitimate_count=0,
9595
insufficient_data_count=0,
96+
suspicious_count=0,
9697
conference_entries=0,
9798
conference_predatory=0,
9899
conference_legitimate=0,
100+
conference_suspicious=0,
99101
journal_entries=0,
100102
journal_predatory=0,
101103
journal_legitimate=0,
104+
journal_suspicious=0,
102105
has_predatory_journals=False,
103106
retracted_articles_count=0,
104107
articles_checked_for_retraction=0,
@@ -182,6 +185,12 @@ async def assess_bibtex_file(
182185
result.conference_legitimate += 1
183186
else:
184187
result.journal_legitimate += 1
188+
elif assessment.assessment == AssessmentType.SUSPICIOUS:
189+
result.suspicious_count += 1
190+
if is_conference:
191+
result.conference_suspicious += 1
192+
else:
193+
result.journal_suspicious += 1
185194
else:
186195
result.insufficient_data_count += 1
187196

@@ -246,6 +255,15 @@ def format_summary(result: BibtexAssessmentResult, verbose: bool = False) -> str
246255
summary_lines.append(
247256
f" 🎤 Conferences: {result.conference_predatory}/{result.conference_entries}"
248257
)
258+
summary_lines.append(f" Suspicious: {result.suspicious_count} total")
259+
if result.journal_entries > 0:
260+
summary_lines.append(
261+
f" 📄 Journals: {result.journal_suspicious}/{result.journal_entries}"
262+
)
263+
if result.conference_entries > 0:
264+
summary_lines.append(
265+
f" 🎤 Conferences: {result.conference_suspicious}/{result.conference_entries}"
266+
)
249267
summary_lines.append(f" Legitimate: {result.legitimate_count} total")
250268
if result.journal_entries > 0:
251269
summary_lines.append(
@@ -294,6 +312,7 @@ def format_summary(result: BibtexAssessmentResult, verbose: bool = False) -> str
294312
emoji_map: dict[str, str] = {
295313
AssessmentType.PREDATORY.value: "❌",
296314
AssessmentType.LEGITIMATE.value: "✅",
315+
AssessmentType.SUSPICIOUS.value: "⚠️",
297316
AssessmentType.UNKNOWN.value: "❓",
298317
}
299318
status_emoji = emoji_map.get(assessment.assessment, "❓")

src/aletheia_probe/cli.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,12 @@ def status() -> None:
230230
@click.option(
231231
"--list-type",
232232
type=click.Choice(
233-
[AssessmentType.PREDATORY, AssessmentType.LEGITIMATE, AssessmentType.UNKNOWN]
233+
[
234+
AssessmentType.PREDATORY,
235+
AssessmentType.LEGITIMATE,
236+
AssessmentType.SUSPICIOUS,
237+
AssessmentType.UNKNOWN,
238+
]
234239
),
235240
default=AssessmentType.PREDATORY,
236241
help="Type of journals in the list",

src/aletheia_probe/dispatcher.py

Lines changed: 72 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
AGREEMENT_BONUS_AMOUNT,
1212
CONFIDENCE_THRESHOLD_HIGH,
1313
)
14-
from .enums import AssessmentType
14+
from .enums import AssessmentType, EvidenceType
1515
from .logging_config import get_detail_logger, get_status_logger
1616
from .models import AssessmentResult, BackendResult, BackendStatus, QueryInput
1717

@@ -205,6 +205,7 @@ async def _query_backends(
205205
assessment=None,
206206
error_message=str(result),
207207
response_time=0.0,
208+
evidence_type="heuristic", # Default for error cases
208209
)
209210
backend_results.append(error_result)
210211
elif isinstance(result, BackendResult):
@@ -227,6 +228,7 @@ async def _query_backends(
227228
assessment=None,
228229
error_message=f"Unexpected result type: {type(result)}",
229230
response_time=0.0,
231+
evidence_type="heuristic", # Default for error cases
230232
)
231233
backend_results.append(error_result)
232234

@@ -251,6 +253,7 @@ async def _query_backend_with_timing(
251253
# response_time already contains the actual backend execution time
252254
result_dict = result.model_dump()
253255
result_dict["execution_time_ms"] = result.response_time * 1000
256+
result_dict["evidence_type"] = backend.get_evidence_type().value
254257
return BackendResult(**result_dict)
255258

256259
def _calculate_assessment(
@@ -460,30 +463,63 @@ def _make_final_assessment(
460463
total_weight = score_data["total_weight"]
461464
retraction_risk_level = retraction_info.get("risk_level")
462465

463-
# Decision logic
466+
# Analyze evidence types to determine classification
467+
predatory_list_evidence = []
468+
legitimate_list_evidence = []
469+
heuristic_evidence = []
470+
471+
for result in successful_results:
472+
if (
473+
result.evidence_type == EvidenceType.PREDATORY_LIST.value
474+
and result.assessment == AssessmentType.PREDATORY
475+
):
476+
predatory_list_evidence.append(result)
477+
elif (
478+
result.evidence_type == EvidenceType.LEGITIMATE_LIST.value
479+
and result.assessment == AssessmentType.LEGITIMATE
480+
):
481+
legitimate_list_evidence.append(result)
482+
elif result.evidence_type == EvidenceType.HEURISTIC.value:
483+
heuristic_evidence.append(result)
484+
485+
# Decision logic based on issue #65 requirements
464486
if total_weight == 0:
465487
assessment = AssessmentType.UNKNOWN
466488
confidence = 0.1
467489
overall_score = 0.0
468-
elif total_predatory_weight > total_legitimate_weight:
469-
assessment = AssessmentType.PREDATORY
470-
confidence = min(0.95, total_predatory_weight / total_weight)
471-
overall_score = total_predatory_weight / total_weight
472-
reasoning.insert(
473-
0,
474-
f"Classified as predatory based on {score_data['predatory_count']} source(s)",
475-
)
476-
477-
# Cross-validate with retraction data
478-
if retraction_risk_level in ["critical", "high"]:
479-
confidence = min(
480-
CONFIDENCE_THRESHOLD_HIGH, confidence + AGREEMENT_BONUS_AMOUNT
490+
reasoning.insert(0, "No assessment data available")
491+
492+
elif len(predatory_list_evidence) > 0:
493+
# Rule: If ANY predatory list evidence exists, can be PREDATORY
494+
if total_predatory_weight > total_legitimate_weight:
495+
assessment = AssessmentType.PREDATORY
496+
confidence = min(0.95, total_predatory_weight / total_weight)
497+
overall_score = total_predatory_weight / total_weight
498+
reasoning.insert(
499+
0,
500+
f"Classified as predatory based on {len(predatory_list_evidence)} predatory list(s)",
481501
)
482-
reasoning.append(
483-
"⚠️ High retraction rate corroborates predatory classification"
502+
503+
# Cross-validate with retraction data
504+
if retraction_risk_level in ["critical", "high"]:
505+
confidence = min(
506+
CONFIDENCE_THRESHOLD_HIGH, confidence + AGREEMENT_BONUS_AMOUNT
507+
)
508+
reasoning.append(
509+
"⚠️ High retraction rate corroborates predatory classification"
510+
)
511+
else:
512+
# Predatory list evidence exists but legitimate evidence is stronger
513+
assessment = AssessmentType.LEGITIMATE
514+
confidence = min(0.9, total_legitimate_weight / total_weight)
515+
overall_score = total_legitimate_weight / total_weight
516+
reasoning.insert(
517+
0,
518+
"Classified as legitimate despite predatory list match - stronger legitimate evidence",
484519
)
485520

486521
elif total_legitimate_weight > 0:
522+
# Only legitimate evidence (list or heuristic)
487523
assessment = AssessmentType.LEGITIMATE
488524
confidence = min(0.9, total_legitimate_weight / total_weight)
489525
overall_score = total_legitimate_weight / total_weight
@@ -502,6 +538,25 @@ def _make_final_assessment(
502538
"⚠️ NOTE: Moderate retraction rate - quality concerns exist"
503539
)
504540

541+
elif total_predatory_weight > 0:
542+
# Rule: Predatory assessment based ONLY on heuristics = SUSPICIOUS
543+
assessment = AssessmentType.SUSPICIOUS
544+
confidence = min(
545+
0.85, total_predatory_weight / total_weight
546+
) # Lower confidence for heuristic-only
547+
overall_score = total_predatory_weight / total_weight
548+
reasoning.insert(
549+
0,
550+
f"Classified as suspicious based on heuristic analysis only ({score_data['predatory_count']} source(s))",
551+
)
552+
553+
# Retraction data supports suspicious classification
554+
if retraction_risk_level in ["critical", "high"]:
555+
confidence = min(0.95, confidence + AGREEMENT_BONUS_AMOUNT)
556+
reasoning.append(
557+
"⚠️ High retraction rate supports suspicious classification"
558+
)
559+
505560
else:
506561
assessment = AssessmentType.UNKNOWN
507562
confidence = 0.3

src/aletheia_probe/enums.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ class AssessmentType(str, Enum):
2020

2121
PREDATORY = "predatory"
2222
LEGITIMATE = "legitimate"
23+
SUSPICIOUS = "suspicious"
2324
UNKNOWN = "unknown"
2425
QUESTIONABLE = "questionable"
2526
QUALITY_INDICATOR = "quality_indicator"
@@ -33,6 +34,14 @@ class BackendType(str, Enum):
3334
QUALITY_INDICATOR = "quality_indicator"
3435

3536

37+
class EvidenceType(str, Enum):
38+
"""Types of evidence provided by backends for classification purposes."""
39+
40+
PREDATORY_LIST = "predatory_list" # Curated lists of predatory journals
41+
LEGITIMATE_LIST = "legitimate_list" # Curated lists of legitimate journals
42+
HEURISTIC = "heuristic" # Analysis-based assessment (retraction rates, etc.)
43+
44+
3645
class RiskLevel(str, Enum):
3746
"""Risk levels for retraction watch data."""
3847

src/aletheia_probe/models.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ class BackendResult(BaseModel):
3939
..., ge=0.0, le=1.0, description="Confidence score 0.0-1.0"
4040
)
4141
assessment: str | None = Field(
42-
None, description="predatory, legitimate, or unknown"
42+
None, description="predatory, legitimate, suspicious, or unknown"
4343
)
4444
data: dict[str, Any] = Field(
4545
default_factory=dict, description="Backend-specific raw data"
@@ -55,6 +55,10 @@ class BackendResult(BaseModel):
5555
execution_time_ms: float | None = Field(
5656
None, description="Backend execution time in milliseconds"
5757
)
58+
evidence_type: str | None = Field(
59+
None,
60+
description="Type of evidence: predatory_list, legitimate_list, or heuristic",
61+
)
5862

5963

6064
class JournalMetadata(BaseModel):
@@ -79,7 +83,7 @@ class AssessmentResult(BaseModel):
7983

8084
input_query: str = Field(..., description="Original query string")
8185
assessment: str = Field(
82-
..., description="predatory, legitimate, or insufficient_data"
86+
..., description="predatory, legitimate, suspicious, or insufficient_data"
8387
)
8488
confidence: float = Field(
8589
..., ge=0.0, le=1.0, description="Overall confidence score"
@@ -179,6 +183,9 @@ class BibtexAssessmentResult(BaseModel):
179183
insufficient_data_count: int = Field(
180184
0, description="Number of entries with insufficient data"
181185
)
186+
suspicious_count: int = Field(
187+
0, description="Number of entries with suspicious journals/conferences"
188+
)
182189
# Conference-specific counters
183190
conference_entries: int = Field(
184191
0,
@@ -188,12 +195,16 @@ class BibtexAssessmentResult(BaseModel):
188195
conference_legitimate: int = Field(
189196
0, description="Number of legitimate conferences"
190197
)
198+
conference_suspicious: int = Field(
199+
0, description="Number of suspicious conferences"
200+
)
191201
# Journal-specific counters
192202
journal_entries: int = Field(
193203
0, description="Number of journal entries (article, etc.)"
194204
)
195205
journal_predatory: int = Field(0, description="Number of predatory journals")
196206
journal_legitimate: int = Field(0, description="Number of legitimate journals")
207+
journal_suspicious: int = Field(0, description="Number of suspicious journals")
197208
has_predatory_journals: bool = Field(
198209
False, description="Whether any predatory journals/conferences were found"
199210
)

0 commit comments

Comments
 (0)