|
| 1 | +# SPDX-License-Identifier: MIT |
| 2 | +"""Unit tests for the CrossrefAnalyzerBackend.""" |
| 3 | + |
| 4 | +import asyncio |
| 5 | +from unittest.mock import AsyncMock, MagicMock, patch |
| 6 | + |
| 7 | +import pytest |
| 8 | + |
| 9 | +from aletheia_probe.backends.crossref_analyzer import CrossrefAnalyzerBackend |
| 10 | +from aletheia_probe.constants import MIN_REFERENCE_COUNT |
| 11 | +from aletheia_probe.models import BackendStatus, QueryInput |
| 12 | + |
| 13 | + |
| 14 | +@pytest.fixture |
| 15 | +def backend(): |
| 16 | + """Fixture for the CrossrefAnalyzerBackend.""" |
| 17 | + return CrossrefAnalyzerBackend(email="test@example.com") |
| 18 | + |
| 19 | + |
| 20 | +def test_crossref_analyzer_backend_get_name(backend): |
| 21 | + """Test that the backend returns the correct name.""" |
| 22 | + assert backend.get_name() == "crossref_analyzer" |
| 23 | + |
| 24 | + |
| 25 | +def test_crossref_analyzer_backend_get_description(backend): |
| 26 | + """Test that the backend returns the correct description.""" |
| 27 | + assert ( |
| 28 | + backend.get_description() |
| 29 | + == "Analyzes metadata quality and publisher information from Crossref to detect predatory journals" |
| 30 | + ) |
| 31 | + |
| 32 | + |
| 33 | +@pytest.mark.asyncio |
| 34 | +async def test_query_api_with_eissn_fallback(backend): |
| 35 | + """Test that the backend uses eissn if issn is not found.""" |
| 36 | + query_input = QueryInput( |
| 37 | + raw_input="Test Journal", |
| 38 | + identifiers={"issn": "1234-5678", "eissn": "8765-4321"}, |
| 39 | + ) |
| 40 | + with patch.object( |
| 41 | + backend, "_get_journal_by_issn", new_callable=AsyncMock |
| 42 | + ) as mock_get: |
| 43 | + mock_get.side_effect = [None, {"title": "Test Journal"}] |
| 44 | + with patch.object(backend, "_analyze_metadata_quality") as mock_analyze: |
| 45 | + mock_analyze.return_value = { |
| 46 | + "assessment": "legitimate", |
| 47 | + "confidence": 0.8, |
| 48 | + "metrics": {}, |
| 49 | + "red_flags": [], |
| 50 | + "green_flags": [], |
| 51 | + } |
| 52 | + result = await backend.query(query_input) |
| 53 | + assert result.status == BackendStatus.FOUND |
| 54 | + assert mock_get.call_count == 2 |
| 55 | + mock_get.assert_any_call("1234-5678") |
| 56 | + mock_get.assert_any_call("8765-4321") |
| 57 | + |
| 58 | + |
| 59 | +@pytest.mark.asyncio |
| 60 | +async def test_query_api_exception_handling(backend): |
| 61 | + """Test that the backend handles exceptions during API query.""" |
| 62 | + query_input = QueryInput( |
| 63 | + raw_input="Test Journal", identifiers={"issn": "1234-5678"} |
| 64 | + ) |
| 65 | + with patch.object( |
| 66 | + backend, "_get_journal_by_issn", new_callable=AsyncMock |
| 67 | + ) as mock_get: |
| 68 | + mock_get.side_effect = Exception("API Error") |
| 69 | + result = await backend.query(query_input) |
| 70 | + assert result.status == BackendStatus.ERROR |
| 71 | + assert "API Error" in result.error_message |
| 72 | + |
| 73 | + |
| 74 | +@pytest.mark.asyncio |
| 75 | +async def test_get_journal_by_issn_api_error(backend): |
| 76 | + """Test that _get_journal_by_issn handles API errors.""" |
| 77 | + with patch("aiohttp.ClientSession.get") as mock_get: |
| 78 | + mock_response = MagicMock() |
| 79 | + mock_response.status = 500 |
| 80 | + mock_get.return_value.__aenter__.return_value = mock_response |
| 81 | + |
| 82 | + with pytest.raises(Exception, match="Crossref API returned status 500"): |
| 83 | + await backend._get_journal_by_issn("1234-5678") |
| 84 | + |
| 85 | + |
| 86 | +@pytest.mark.asyncio |
| 87 | +async def test_get_journal_by_issn_timeout(backend): |
| 88 | + """Test that _get_journal_by_issn handles timeouts.""" |
| 89 | + with patch("aiohttp.ClientSession.get") as mock_get: |
| 90 | + mock_get.side_effect = asyncio.TimeoutError |
| 91 | + with pytest.raises(Exception, match="Crossref API timeout"): |
| 92 | + await backend._get_journal_by_issn("1234-5678") |
| 93 | + |
| 94 | + |
| 95 | +def test_calculate_metadata_metrics_invalid_dois(backend): |
| 96 | + """Test _calculate_metadata_metrics with invalid dois_by_year data.""" |
| 97 | + journal_data = { |
| 98 | + "breakdowns": {"dois-by-issued-year": [[2020, 10], ["2021", 20], "invalid"]} |
| 99 | + } |
| 100 | + metrics, _ = backend._calculate_metadata_metrics(journal_data) |
| 101 | + assert "2021" not in metrics["dois_by_year"] |
| 102 | + |
| 103 | + |
| 104 | +def test_check_metadata_green_flags(backend): |
| 105 | + """Test various green flag conditions.""" |
| 106 | + metrics = { |
| 107 | + "total_dois": 5000, |
| 108 | + "overall_metadata_quality": 45, |
| 109 | + "publisher": "Test Publisher", |
| 110 | + } |
| 111 | + quality_scores = { |
| 112 | + "orcids": 45, |
| 113 | + "funders": 25, |
| 114 | + "licenses": 60, |
| 115 | + "references": 0, |
| 116 | + } |
| 117 | + green_flags = backend._check_metadata_green_flags(metrics, quality_scores) |
| 118 | + assert "Good ORCID adoption: 45% of articles include author ORCIDs" in green_flags |
| 119 | + assert ( |
| 120 | + "Moderate funding transparency: 25% of articles include funding information" |
| 121 | + in green_flags |
| 122 | + ) |
| 123 | + assert ( |
| 124 | + "Good license documentation: 60% of articles have license information" |
| 125 | + in green_flags |
| 126 | + ) |
| 127 | + assert ( |
| 128 | + "Good overall metadata quality: 45.0% average across key fields" in green_flags |
| 129 | + ) |
| 130 | + |
| 131 | + metrics["total_dois"] = 15000 |
| 132 | + green_flags = backend._check_metadata_green_flags(metrics, quality_scores) |
| 133 | + assert "Large publication volume: 15,000 DOIs registered" in green_flags |
| 134 | + |
| 135 | + metrics["total_dois"] = 1500 |
| 136 | + green_flags = backend._check_metadata_green_flags(metrics, quality_scores) |
| 137 | + assert "Substantial publication volume: 1,500 DOIs registered" in green_flags |
| 138 | + |
| 139 | + |
| 140 | +def test_check_metadata_red_flags(backend): |
| 141 | + """Test various red flag conditions.""" |
| 142 | + metrics = { |
| 143 | + "total_dois": 600, |
| 144 | + "overall_metadata_quality": 20, |
| 145 | + "publisher": "Test Publisher", |
| 146 | + "dois_by_year": [[2020, 100], [2021, 150], [2022, 600]], |
| 147 | + } |
| 148 | + quality_scores = {"orcids": 5, "funders": 1, "licenses": 4} |
| 149 | + journal_data = {} |
| 150 | + red_flags = backend._check_metadata_red_flags(metrics, quality_scores, journal_data) |
| 151 | + assert "Low ORCID adoption: only 5% of articles include author ORCIDs" in red_flags |
| 152 | + assert ( |
| 153 | + "Minimal funding transparency: only 1% of articles include funding information" |
| 154 | + in red_flags |
| 155 | + ) |
| 156 | + assert ( |
| 157 | + "Poor license documentation: only 4% of articles have license information" |
| 158 | + in red_flags |
| 159 | + ) |
| 160 | + assert "Low overall metadata quality: 20.0% average across key fields" in red_flags |
| 161 | + assert "Recent publication explosion: 600 DOIs in 2022 vs 125 average" in red_flags |
| 162 | + |
| 163 | + |
| 164 | +def test_determine_metadata_assessment(backend): |
| 165 | + """Test various assessment and confidence conditions.""" |
| 166 | + metrics = {"total_dois": 500} |
| 167 | + # Test green flags |
| 168 | + assessment, confidence = backend._determine_metadata_assessment( |
| 169 | + [], ["flag1", "flag2"], metrics |
| 170 | + ) |
| 171 | + assert assessment == "legitimate" |
| 172 | + assert confidence > 0.6 |
| 173 | + |
| 174 | + assessment, confidence = backend._determine_metadata_assessment( |
| 175 | + [], ["flag1"], metrics |
| 176 | + ) |
| 177 | + assert assessment == "legitimate" |
| 178 | + assert confidence == pytest.approx(0.55) |
| 179 | + |
| 180 | + # test low volume |
| 181 | + metrics["total_dois"] = 40 |
| 182 | + assessment, confidence = backend._determine_metadata_assessment( |
| 183 | + [], ["flag1"], metrics |
| 184 | + ) |
| 185 | + assert assessment == "legitimate" |
| 186 | + assert confidence == pytest.approx(0.55 * 0.8) |
0 commit comments