Skip to content

Commit 0bde9d7

Browse files
fix: skip one-sided cross-validation and clean indicator reasoning (#1038)
Avoid penalizing assessments when paired backends have no matching data, and prevent empty indicator headers in OpenAlex reasoning output. [AI-assisted] Co-authored-by: florath-ai-assistant[bot] <Andreas.Florath@telekom.de>
1 parent 342dee9 commit 0bde9d7

File tree

4 files changed

+188
-21
lines changed

4 files changed

+188
-21
lines changed

src/aletheia_probe/backends/openalex_analyzer.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -824,14 +824,16 @@ def _generate_reasoning(
824824
) -> list[str]:
825825
"""Generate human-readable reasoning for the assessment."""
826826
reasoning = []
827+
normalized_green_flags = [flag.strip() for flag in green_flags if flag.strip()]
828+
normalized_red_flags = [flag.strip() for flag in red_flags if flag.strip()]
827829

828-
if green_flags:
830+
if normalized_green_flags:
829831
reasoning.append("Positive indicators:")
830-
reasoning.extend([f" • {flag}" for flag in green_flags])
832+
reasoning.extend([f" • {flag}" for flag in normalized_green_flags])
831833

832-
if red_flags:
834+
if normalized_red_flags:
833835
reasoning.append("Warning signs:")
834-
reasoning.extend([f" • {flag}" for flag in red_flags])
836+
reasoning.extend([f" • {flag}" for flag in normalized_red_flags])
835837

836838
# Add summary statistics
837839
reasoning.append(
@@ -841,7 +843,7 @@ def _generate_reasoning(
841843
f"Citation metrics: {metrics['citation_ratio']:.2f} citations per paper on average"
842844
)
843845

844-
if not green_flags and not red_flags:
846+
if not normalized_green_flags and not normalized_red_flags:
845847
reasoning.append("Mixed or insufficient signals for clear assessment")
846848

847849
return reasoning

src/aletheia_probe/dispatcher.py

Lines changed: 33 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,7 @@ def _apply_cross_validation(
163163

164164
cross_validation_applied = False
165165
adjusted_results = []
166+
max_total_adjustment = 0.25
166167

167168
for result in backend_results:
168169
if result.status != BackendStatus.FOUND:
@@ -171,8 +172,8 @@ def _apply_cross_validation(
171172
continue
172173

173174
# Check if this result can be cross-validated with any other result
174-
confidence_adjustment = 0.0
175-
cross_validation_data = None
175+
pair_adjustments: list[float] = []
176+
cross_validation_data_list: list[dict[str, Any]] = []
176177
backend_name = result.backend_name
177178

178179
for backend1, backend2 in registered_pairs:
@@ -183,21 +184,31 @@ def _apply_cross_validation(
183184
else:
184185
continue
185186

187+
# Cross-validation requires actual findings from both backends.
188+
if other_result.status != BackendStatus.FOUND:
189+
continue
190+
186191
# Apply cross-validation for this pair
187192
validation_result = self.cross_validation_registry.validate_pair(
188193
backend_name, result, other_result.backend_name, other_result
189194
)
190195

191196
if validation_result:
192-
confidence_adjustment = validation_result.get(
193-
"confidence_adjustment", 0.0
197+
adjustment = float(
198+
validation_result.get("confidence_adjustment", 0.0)
199+
)
200+
pair_adjustments.append(adjustment)
201+
cross_validation_data_list.append(
202+
{
203+
**validation_result,
204+
"paired_backend": other_result.backend_name,
205+
}
194206
)
195-
cross_validation_data = validation_result
196207
cross_validation_applied = True
197208

198209
self.detail_logger.debug(
199210
f"Cross-validation applied between {backend_name} and {other_result.backend_name}: "
200-
f"adjustment={confidence_adjustment:+.3f}"
211+
f"adjustment={adjustment:+.3f}"
201212
)
202213

203214
# Add cross-validation reasoning
@@ -211,28 +222,34 @@ def _apply_cross_validation(
211222
for reason in validation_result["reasoning"][:3]
212223
]
213224
)
214-
215-
break # Apply only first matching cross-validation
225+
confidence_adjustment = max(
226+
-max_total_adjustment,
227+
min(max_total_adjustment, sum(pair_adjustments)),
228+
)
216229

217230
# Create adjusted result
218231
new_confidence = max(
219232
0.0, min(1.0, result.confidence + confidence_adjustment)
220233
)
221234

235+
data_with_cross_validation = {**result.data}
236+
if cross_validation_data_list:
237+
data_with_cross_validation["cross_validations"] = (
238+
cross_validation_data_list
239+
)
240+
if len(cross_validation_data_list) == 1:
241+
# Backward compatibility with existing consumers/tests.
242+
data_with_cross_validation["cross_validation"] = (
243+
cross_validation_data_list[0]
244+
)
245+
222246
# Create new result with adjusted confidence and cross-validation data
223247
adjusted_result = BackendResult(
224248
backend_name=result.backend_name,
225249
status=result.status,
226250
confidence=new_confidence,
227251
assessment=result.assessment,
228-
data={
229-
**result.data,
230-
**(
231-
{"cross_validation": cross_validation_data}
232-
if cross_validation_data
233-
else {}
234-
),
235-
},
252+
data=data_with_cross_validation,
236253
sources=result.sources,
237254
error_message=result.error_message,
238255
response_time=result.response_time,
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# SPDX-License-Identifier: MIT
2+
"""Tests for OpenAlex analyzer backend."""
3+
4+
from aletheia_probe.backends.openalex_analyzer import OpenAlexAnalyzerBackend
5+
6+
7+
class TestOpenAlexAnalyzerReasoning:
8+
"""Tests for OpenAlex reasoning generation."""
9+
10+
def test_generate_reasoning_skips_empty_indicator_headers(self):
11+
"""Do not render indicator headers when flags are empty or blank."""
12+
backend = OpenAlexAnalyzerBackend()
13+
metrics = {"total_publications": 100, "years_active": 10, "citation_ratio": 2.5}
14+
15+
reasoning = backend._generate_reasoning(
16+
red_flags=["", " "],
17+
green_flags=[" "],
18+
metrics=metrics,
19+
)
20+
21+
assert "Positive indicators:" not in reasoning
22+
assert "Warning signs:" not in reasoning
23+
assert "Mixed or insufficient signals for clear assessment" in reasoning

tests/unit/test_dispatcher.py

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,26 @@
1818
)
1919

2020

21+
def _make_backend_result(
22+
backend_name: str,
23+
confidence: float,
24+
status: BackendStatus = BackendStatus.FOUND,
25+
assessment: AssessmentType | None = AssessmentType.LEGITIMATE,
26+
) -> BackendResult:
27+
"""Create a minimal BackendResult for dispatcher cross-validation tests."""
28+
return BackendResult(
29+
fallback_chain=QueryFallbackChain([]),
30+
backend_name=backend_name,
31+
status=status,
32+
confidence=confidence,
33+
assessment=assessment,
34+
data={},
35+
sources=[backend_name],
36+
response_time=0.1,
37+
evidence_type=EvidenceType.HEURISTIC.value,
38+
)
39+
40+
2141
@pytest.fixture
2242
def dispatcher():
2343
"""Create a QueryDispatcher instance for testing."""
@@ -487,3 +507,108 @@ def test_get_enabled_backends_without_email_config(self, dispatcher):
487507
# Verify that get_backend was called (not create_backend)
488508
mock_registry.get_backend.assert_called_once_with("doaj")
489509
mock_registry.create_backend.assert_not_called()
510+
511+
def test_apply_cross_validation_aggregates_multiple_pairs(self, dispatcher):
512+
"""Test that cross-validation aggregates adjustments from all matching pairs."""
513+
backend_results = [
514+
_make_backend_result("backend_a", confidence=0.5),
515+
_make_backend_result("backend_b", confidence=0.6),
516+
_make_backend_result("backend_c", confidence=0.7),
517+
]
518+
reasoning: list[str] = []
519+
520+
with (
521+
patch.object(
522+
dispatcher.cross_validation_registry,
523+
"get_registered_pairs",
524+
return_value=[("backend_a", "backend_b"), ("backend_a", "backend_c")],
525+
),
526+
patch.object(
527+
dispatcher.cross_validation_registry, "validate_pair"
528+
) as mock_validate_pair,
529+
):
530+
mock_validate_pair.side_effect = [
531+
{"confidence_adjustment": 0.10, "reasoning": ["AB agreement"]},
532+
{"confidence_adjustment": 0.08, "reasoning": ["AC agreement"]},
533+
None,
534+
None,
535+
]
536+
537+
adjusted_results = dispatcher._apply_cross_validation(
538+
backend_results, reasoning
539+
)
540+
541+
adjusted_a = next(r for r in adjusted_results if r.backend_name == "backend_a")
542+
assert adjusted_a.confidence == pytest.approx(0.68)
543+
assert "cross_validations" in adjusted_a.data
544+
assert len(adjusted_a.data["cross_validations"]) == 2
545+
assert "AB agreement" in " ".join(reasoning)
546+
assert "AC agreement" in " ".join(reasoning)
547+
548+
def test_apply_cross_validation_caps_total_adjustment(self, dispatcher):
549+
"""Test that aggregated cross-validation adjustment is capped."""
550+
backend_results = [
551+
_make_backend_result("backend_a", confidence=0.5),
552+
_make_backend_result("backend_b", confidence=0.6),
553+
_make_backend_result("backend_c", confidence=0.7),
554+
]
555+
reasoning: list[str] = []
556+
557+
with (
558+
patch.object(
559+
dispatcher.cross_validation_registry,
560+
"get_registered_pairs",
561+
return_value=[("backend_a", "backend_b"), ("backend_a", "backend_c")],
562+
),
563+
patch.object(
564+
dispatcher.cross_validation_registry, "validate_pair"
565+
) as mock_validate_pair,
566+
):
567+
mock_validate_pair.side_effect = [
568+
{"confidence_adjustment": 0.20, "reasoning": ["AB agreement"]},
569+
{"confidence_adjustment": 0.20, "reasoning": ["AC agreement"]},
570+
None,
571+
None,
572+
]
573+
574+
adjusted_results = dispatcher._apply_cross_validation(
575+
backend_results, reasoning
576+
)
577+
578+
adjusted_a = next(r for r in adjusted_results if r.backend_name == "backend_a")
579+
# Capped at +0.25 total adjustment for a single backend.
580+
assert adjusted_a.confidence == pytest.approx(0.75)
581+
582+
def test_apply_cross_validation_skips_pairs_without_found_match(self, dispatcher):
583+
"""Test cross-validation is not applied when paired backend is not FOUND."""
584+
backend_results = [
585+
_make_backend_result(
586+
"backend_a", confidence=0.5, status=BackendStatus.FOUND
587+
),
588+
_make_backend_result(
589+
"backend_b",
590+
confidence=0.0,
591+
status=BackendStatus.NOT_FOUND,
592+
assessment=None,
593+
),
594+
]
595+
reasoning: list[str] = []
596+
597+
with (
598+
patch.object(
599+
dispatcher.cross_validation_registry,
600+
"get_registered_pairs",
601+
return_value=[("backend_a", "backend_b")],
602+
),
603+
patch.object(
604+
dispatcher.cross_validation_registry, "validate_pair"
605+
) as mock_validate_pair,
606+
):
607+
adjusted_results = dispatcher._apply_cross_validation(
608+
backend_results, reasoning
609+
)
610+
611+
adjusted_a = next(r for r in adjusted_results if r.backend_name == "backend_a")
612+
assert adjusted_a.confidence == pytest.approx(0.5)
613+
assert "cross_validations" not in adjusted_a.data
614+
mock_validate_pair.assert_not_called()

0 commit comments

Comments
 (0)