Skip to content

Commit e7706f4

Browse files
feat: increase test coverage for CrossrefAnalyzerBackend (#196)
Increased test coverage for by adding and improving unit tests in . Changes include: - Added tests for and methods. - Covered scenarios: eissn fallback and exception handling. - Covered error handling: API errors (non-200/404) and timeouts. - Adjusted test data for to use lists instead of tuples for to correctly trigger population. - Refined assertions in and to check for full generated flag messages and to ensure conditions are met. - Corrected the expected confidence value in based on the actual constant. - Fixed various indentation issues introduced during test development. This significantly increases the coverage of the logic. Co-authored-by: florath-ai-assistant[bot] <Andreas.Florath@telekom.de>
1 parent 6f67356 commit e7706f4

File tree

1 file changed

+186
-0
lines changed

1 file changed

+186
-0
lines changed
Lines changed: 186 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,186 @@
1+
# SPDX-License-Identifier: MIT
2+
"""Unit tests for the CrossrefAnalyzerBackend."""
3+
4+
import asyncio
5+
from unittest.mock import AsyncMock, MagicMock, patch
6+
7+
import pytest
8+
9+
from aletheia_probe.backends.crossref_analyzer import CrossrefAnalyzerBackend
10+
from aletheia_probe.constants import MIN_REFERENCE_COUNT
11+
from aletheia_probe.models import BackendStatus, QueryInput
12+
13+
14+
@pytest.fixture
15+
def backend():
16+
"""Fixture for the CrossrefAnalyzerBackend."""
17+
return CrossrefAnalyzerBackend(email="test@example.com")
18+
19+
20+
def test_crossref_analyzer_backend_get_name(backend):
21+
"""Test that the backend returns the correct name."""
22+
assert backend.get_name() == "crossref_analyzer"
23+
24+
25+
def test_crossref_analyzer_backend_get_description(backend):
26+
"""Test that the backend returns the correct description."""
27+
assert (
28+
backend.get_description()
29+
== "Analyzes metadata quality and publisher information from Crossref to detect predatory journals"
30+
)
31+
32+
33+
@pytest.mark.asyncio
34+
async def test_query_api_with_eissn_fallback(backend):
35+
"""Test that the backend uses eissn if issn is not found."""
36+
query_input = QueryInput(
37+
raw_input="Test Journal",
38+
identifiers={"issn": "1234-5678", "eissn": "8765-4321"},
39+
)
40+
with patch.object(
41+
backend, "_get_journal_by_issn", new_callable=AsyncMock
42+
) as mock_get:
43+
mock_get.side_effect = [None, {"title": "Test Journal"}]
44+
with patch.object(backend, "_analyze_metadata_quality") as mock_analyze:
45+
mock_analyze.return_value = {
46+
"assessment": "legitimate",
47+
"confidence": 0.8,
48+
"metrics": {},
49+
"red_flags": [],
50+
"green_flags": [],
51+
}
52+
result = await backend.query(query_input)
53+
assert result.status == BackendStatus.FOUND
54+
assert mock_get.call_count == 2
55+
mock_get.assert_any_call("1234-5678")
56+
mock_get.assert_any_call("8765-4321")
57+
58+
59+
@pytest.mark.asyncio
60+
async def test_query_api_exception_handling(backend):
61+
"""Test that the backend handles exceptions during API query."""
62+
query_input = QueryInput(
63+
raw_input="Test Journal", identifiers={"issn": "1234-5678"}
64+
)
65+
with patch.object(
66+
backend, "_get_journal_by_issn", new_callable=AsyncMock
67+
) as mock_get:
68+
mock_get.side_effect = Exception("API Error")
69+
result = await backend.query(query_input)
70+
assert result.status == BackendStatus.ERROR
71+
assert "API Error" in result.error_message
72+
73+
74+
@pytest.mark.asyncio
75+
async def test_get_journal_by_issn_api_error(backend):
76+
"""Test that _get_journal_by_issn handles API errors."""
77+
with patch("aiohttp.ClientSession.get") as mock_get:
78+
mock_response = MagicMock()
79+
mock_response.status = 500
80+
mock_get.return_value.__aenter__.return_value = mock_response
81+
82+
with pytest.raises(Exception, match="Crossref API returned status 500"):
83+
await backend._get_journal_by_issn("1234-5678")
84+
85+
86+
@pytest.mark.asyncio
87+
async def test_get_journal_by_issn_timeout(backend):
88+
"""Test that _get_journal_by_issn handles timeouts."""
89+
with patch("aiohttp.ClientSession.get") as mock_get:
90+
mock_get.side_effect = asyncio.TimeoutError
91+
with pytest.raises(Exception, match="Crossref API timeout"):
92+
await backend._get_journal_by_issn("1234-5678")
93+
94+
95+
def test_calculate_metadata_metrics_invalid_dois(backend):
96+
"""Test _calculate_metadata_metrics with invalid dois_by_year data."""
97+
journal_data = {
98+
"breakdowns": {"dois-by-issued-year": [[2020, 10], ["2021", 20], "invalid"]}
99+
}
100+
metrics, _ = backend._calculate_metadata_metrics(journal_data)
101+
assert "2021" not in metrics["dois_by_year"]
102+
103+
104+
def test_check_metadata_green_flags(backend):
105+
"""Test various green flag conditions."""
106+
metrics = {
107+
"total_dois": 5000,
108+
"overall_metadata_quality": 45,
109+
"publisher": "Test Publisher",
110+
}
111+
quality_scores = {
112+
"orcids": 45,
113+
"funders": 25,
114+
"licenses": 60,
115+
"references": 0,
116+
}
117+
green_flags = backend._check_metadata_green_flags(metrics, quality_scores)
118+
assert "Good ORCID adoption: 45% of articles include author ORCIDs" in green_flags
119+
assert (
120+
"Moderate funding transparency: 25% of articles include funding information"
121+
in green_flags
122+
)
123+
assert (
124+
"Good license documentation: 60% of articles have license information"
125+
in green_flags
126+
)
127+
assert (
128+
"Good overall metadata quality: 45.0% average across key fields" in green_flags
129+
)
130+
131+
metrics["total_dois"] = 15000
132+
green_flags = backend._check_metadata_green_flags(metrics, quality_scores)
133+
assert "Large publication volume: 15,000 DOIs registered" in green_flags
134+
135+
metrics["total_dois"] = 1500
136+
green_flags = backend._check_metadata_green_flags(metrics, quality_scores)
137+
assert "Substantial publication volume: 1,500 DOIs registered" in green_flags
138+
139+
140+
def test_check_metadata_red_flags(backend):
141+
"""Test various red flag conditions."""
142+
metrics = {
143+
"total_dois": 600,
144+
"overall_metadata_quality": 20,
145+
"publisher": "Test Publisher",
146+
"dois_by_year": [[2020, 100], [2021, 150], [2022, 600]],
147+
}
148+
quality_scores = {"orcids": 5, "funders": 1, "licenses": 4}
149+
journal_data = {}
150+
red_flags = backend._check_metadata_red_flags(metrics, quality_scores, journal_data)
151+
assert "Low ORCID adoption: only 5% of articles include author ORCIDs" in red_flags
152+
assert (
153+
"Minimal funding transparency: only 1% of articles include funding information"
154+
in red_flags
155+
)
156+
assert (
157+
"Poor license documentation: only 4% of articles have license information"
158+
in red_flags
159+
)
160+
assert "Low overall metadata quality: 20.0% average across key fields" in red_flags
161+
assert "Recent publication explosion: 600 DOIs in 2022 vs 125 average" in red_flags
162+
163+
164+
def test_determine_metadata_assessment(backend):
165+
"""Test various assessment and confidence conditions."""
166+
metrics = {"total_dois": 500}
167+
# Test green flags
168+
assessment, confidence = backend._determine_metadata_assessment(
169+
[], ["flag1", "flag2"], metrics
170+
)
171+
assert assessment == "legitimate"
172+
assert confidence > 0.6
173+
174+
assessment, confidence = backend._determine_metadata_assessment(
175+
[], ["flag1"], metrics
176+
)
177+
assert assessment == "legitimate"
178+
assert confidence == pytest.approx(0.55)
179+
180+
# test low volume
181+
metrics["total_dois"] = 40
182+
assessment, confidence = backend._determine_metadata_assessment(
183+
[], ["flag1"], metrics
184+
)
185+
assert assessment == "legitimate"
186+
assert confidence == pytest.approx(0.55 * 0.8)

0 commit comments

Comments
 (0)