Skip to content

Commit 6b75f40

Browse files
authored
test(#972): backfill observability regression coverage (#1086)
Backfills focused regression tests originally scoped to PRs #947, #949, #951 that were superseded by the consolidated #953 merge: - OTEL_SERVICE_NAME propagation via configure_logging and FoundryTracer - AZURE_TRACING_ENABLED guard when no App Insights exporter is configured - Critical operational logging for fail-open memory, model invocation, circuit breaker state transitions, and event-handler error paths
1 parent 15a2894 commit 6b75f40

6 files changed

Lines changed: 330 additions & 12 deletions

File tree

apps/search-enrichment-agent/tests/test_event_handlers.py

Lines changed: 43 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from __future__ import annotations
44

55
import json
6+
import logging
67
from unittest.mock import AsyncMock, MagicMock
78

89
import pytest
@@ -20,6 +21,20 @@ def _clear_service_traces() -> None:
2021
tracer.clear()
2122

2223

24+
@pytest.fixture
25+
def event_logger(monkeypatch: pytest.MonkeyPatch) -> logging.Logger:
26+
"""Return a capturable logger for Event Hub handler assertions."""
27+
logger = logging.getLogger("search_enrichment_event_handler_test")
28+
logger.handlers.clear()
29+
logger.setLevel(logging.INFO)
30+
logger.propagate = True
31+
monkeypatch.setattr(
32+
"search_enrichment_agent.event_handlers.configure_logging",
33+
lambda app_name: logger,
34+
)
35+
return logger
36+
37+
2338
def _eventhub_liveness_events() -> list[dict[str, object]]:
2439
return [
2540
event
@@ -36,7 +51,11 @@ async def test_build_event_handlers_includes_search_enrichment_jobs() -> None:
3651

3752

3853
@pytest.mark.asyncio
39-
async def test_search_enrichment_event_handler_processes_job_with_mocks() -> None:
54+
async def test_search_enrichment_event_handler_processes_job_with_mocks(
55+
event_logger: logging.Logger,
56+
caplog: pytest.LogCaptureFixture,
57+
) -> None:
58+
caplog.set_level(logging.INFO, logger=event_logger.name)
4059
approved_truth = AsyncMock()
4160
approved_truth.get_approved_data = AsyncMock(
4261
return_value={
@@ -80,10 +99,18 @@ async def test_search_enrichment_event_handler_processes_job_with_mocks() -> Non
8099
"entity_id": "SKU-9",
81100
"status": "enriched",
82101
}
102+
assert any(
103+
"search_enrichment_event_processed entity_id=SKU-9 status=enriched" in record.getMessage()
104+
for record in caplog.records
105+
)
83106

84107

85108
@pytest.mark.asyncio
86-
async def test_search_enrichment_event_handler_traces_missing_entity() -> None:
109+
async def test_search_enrichment_event_handler_traces_missing_entity(
110+
event_logger: logging.Logger,
111+
caplog: pytest.LogCaptureFixture,
112+
) -> None:
113+
caplog.set_level(logging.INFO, logger=event_logger.name)
87114
approved_truth = AsyncMock()
88115
approved_truth.get_approved_data = AsyncMock(return_value=None)
89116

@@ -119,10 +146,18 @@ async def test_search_enrichment_event_handler_traces_missing_entity() -> None:
119146
"eventhub": "search-enrichment-jobs",
120147
"status": "skipped",
121148
}
149+
assert any(
150+
"search_enrichment_event_skipped_missing_entity" in record.getMessage()
151+
for record in caplog.records
152+
)
122153

123154

124155
@pytest.mark.asyncio
125-
async def test_search_enrichment_event_handler_traces_error_when_orchestrator_raises() -> None:
156+
async def test_search_enrichment_event_handler_traces_error_when_orchestrator_raises(
157+
event_logger: logging.Logger,
158+
caplog: pytest.LogCaptureFixture,
159+
) -> None:
160+
caplog.set_level(logging.INFO, logger=event_logger.name)
126161
approved_truth = AsyncMock()
127162
approved_truth.get_approved_data = AsyncMock(side_effect=RuntimeError("eventhub boom"))
128163

@@ -160,3 +195,8 @@ async def test_search_enrichment_event_handler_traces_error_when_orchestrator_ra
160195
"entity_id": "SKU-9",
161196
"status": "error",
162197
}
198+
assert any(
199+
"search_enrichment_event_processing_failed entity_id=SKU-9 error=eventhub boom"
200+
in record.getMessage()
201+
for record in caplog.records
202+
)

lib/tests/test_agents_base.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
"""Tests for base agent functionality."""
22

3+
import asyncio
4+
import logging
35
from unittest.mock import AsyncMock, Mock
46

57
import pytest
@@ -182,6 +184,54 @@ async def test_invoke_model_with_routing(self, slm_target, llm_target):
182184
assert result is not None
183185
assert result.get("_target") == "test-slm"
184186

187+
@pytest.mark.asyncio
188+
async def test_invoke_model_logs_provider_failure(self, caplog):
189+
"""Provider failures emit an error log before propagating."""
190+
191+
async def failing_invoker(**_kwargs):
192+
raise RuntimeError("provider down")
193+
194+
slm = ModelTarget(name="slm", model="small", invoker=failing_invoker)
195+
deps = AgentDependencies(slm=slm, llm=None, service_name="critical-log-test")
196+
agent = SimpleTestAgent(config=deps)
197+
198+
with caplog.at_level(logging.ERROR, logger="holiday_peak_lib.agents.base_agent"):
199+
with pytest.raises(RuntimeError, match="provider down"):
200+
await agent.invoke_model(
201+
{"query": "hello"},
202+
[{"role": "user", "content": "hello"}],
203+
)
204+
205+
assert any(
206+
"agent_model_invocation_failed service=critical-log-test "
207+
"model=small error=provider down" in record.getMessage()
208+
for record in caplog.records
209+
)
210+
211+
@pytest.mark.asyncio
212+
async def test_invoke_model_logs_provider_timeout(self, caplog):
213+
"""Provider timeouts emit an error log and return the timeout fallback."""
214+
215+
async def timeout_invoker(**_kwargs):
216+
raise asyncio.TimeoutError
217+
218+
slm = ModelTarget(name="slm", model="small", invoker=timeout_invoker)
219+
deps = AgentDependencies(slm=slm, llm=None, service_name="critical-log-test")
220+
agent = SimpleTestAgent(config=deps)
221+
222+
with caplog.at_level(logging.ERROR, logger="holiday_peak_lib.agents.base_agent"):
223+
result = await agent.invoke_model(
224+
{"query": "hello"},
225+
[{"role": "user", "content": "hello"}],
226+
)
227+
228+
assert result["error"] == "timeout"
229+
assert any(
230+
"agent_model_invocation_timeout service=critical-log-test model=small"
231+
in record.getMessage()
232+
for record in caplog.records
233+
)
234+
185235
@pytest.mark.asyncio
186236
async def test_foundry_governance_strips_system_prompt(self):
187237
"""Test Foundry governance strips local system prompts from messages."""

lib/tests/test_app_factory.py

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
"""Tests for app_factory module."""
22

3+
import os
34
from unittest.mock import AsyncMock, patch
45

56
import pytest
@@ -953,3 +954,83 @@ async def handle(self, request):
953954
client = TestClient(app)
954955
response = client.post("/invoke", json={"test": "data"})
955956
assert response.json()["custom"] is True
957+
958+
959+
class TestAzureTracingGuard:
960+
"""Tests for the AZURE_TRACING_ENABLED env-var guard."""
961+
962+
def test_guard_sets_env_when_no_appinsights_and_no_tracing_var(
963+
self, mock_hot_memory, mock_warm_memory, mock_cold_memory, monkeypatch
964+
):
965+
"""build_service_app disables Azure SDK tracing when no exporter is configured."""
966+
monkeypatch.setenv("PROJECT_ENDPOINT", TEST_PROJECT_ENDPOINT)
967+
monkeypatch.setenv("FOUNDRY_AGENT_ID_FAST", "agent-fast-123")
968+
monkeypatch.setenv("MODEL_DEPLOYMENT_NAME_FAST", "gpt-4o-mini")
969+
monkeypatch.setenv("FOUNDRY_TRACING_ENABLED", "false")
970+
monkeypatch.delenv("APPLICATIONINSIGHTS_CONNECTION_STRING", raising=False)
971+
monkeypatch.delenv("APPINSIGHTS_CONNECTION_STRING", raising=False)
972+
monkeypatch.delenv("AZURE_TRACING_ENABLED", raising=False)
973+
974+
build_service_app(
975+
service_name="guard-test",
976+
agent_class=SampleServiceAgent,
977+
hot_memory=mock_hot_memory,
978+
warm_memory=mock_warm_memory,
979+
cold_memory=mock_cold_memory,
980+
)
981+
982+
assert os.environ.get("AZURE_TRACING_ENABLED") == "false"
983+
984+
def test_guard_does_not_override_explicit_tracing_enabled(
985+
self, mock_hot_memory, mock_warm_memory, mock_cold_memory, monkeypatch
986+
):
987+
"""Explicit AZURE_TRACING_ENABLED values remain authoritative."""
988+
monkeypatch.setenv("PROJECT_ENDPOINT", TEST_PROJECT_ENDPOINT)
989+
monkeypatch.setenv("FOUNDRY_AGENT_ID_FAST", "agent-fast-123")
990+
monkeypatch.setenv("MODEL_DEPLOYMENT_NAME_FAST", "gpt-4o-mini")
991+
monkeypatch.setenv("FOUNDRY_TRACING_ENABLED", "false")
992+
monkeypatch.delenv("APPLICATIONINSIGHTS_CONNECTION_STRING", raising=False)
993+
monkeypatch.delenv("APPINSIGHTS_CONNECTION_STRING", raising=False)
994+
monkeypatch.setenv("AZURE_TRACING_ENABLED", "true")
995+
996+
build_service_app(
997+
service_name="guard-test-no-override",
998+
agent_class=SampleServiceAgent,
999+
hot_memory=mock_hot_memory,
1000+
warm_memory=mock_warm_memory,
1001+
cold_memory=mock_cold_memory,
1002+
)
1003+
1004+
assert os.environ.get("AZURE_TRACING_ENABLED") == "true"
1005+
1006+
def test_guard_does_not_activate_when_appinsights_configured(
1007+
self, mock_hot_memory, mock_warm_memory, mock_cold_memory, monkeypatch
1008+
):
1009+
"""The guard stays inactive when Application Insights is configured."""
1010+
import azure.monitor.opentelemetry as azure_monitor
1011+
1012+
monkeypatch.setattr(
1013+
azure_monitor,
1014+
"configure_azure_monitor",
1015+
lambda **_kwargs: None,
1016+
)
1017+
monkeypatch.setenv("PROJECT_ENDPOINT", TEST_PROJECT_ENDPOINT)
1018+
monkeypatch.setenv("FOUNDRY_AGENT_ID_FAST", "agent-fast-123")
1019+
monkeypatch.setenv("MODEL_DEPLOYMENT_NAME_FAST", "gpt-4o-mini")
1020+
monkeypatch.setenv("FOUNDRY_TRACING_ENABLED", "false")
1021+
monkeypatch.setenv(
1022+
"APPLICATIONINSIGHTS_CONNECTION_STRING",
1023+
"InstrumentationKey=00000000-0000-0000-0000-000000000000",
1024+
)
1025+
monkeypatch.delenv("APPINSIGHTS_CONNECTION_STRING", raising=False)
1026+
monkeypatch.delenv("AZURE_TRACING_ENABLED", raising=False)
1027+
1028+
build_service_app(
1029+
service_name="guard-test-appinsights",
1030+
agent_class=SampleServiceAgent,
1031+
hot_memory=mock_hot_memory,
1032+
warm_memory=mock_warm_memory,
1033+
cold_memory=mock_cold_memory,
1034+
)
1035+
1036+
assert os.environ.get("AZURE_TRACING_ENABLED") is None

lib/tests/test_circuit_breaker.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""Unit tests for the async circuit breaker."""
22

33
import asyncio
4+
import logging
45

56
import pytest
67
from holiday_peak_lib.utils.circuit_breaker import (
@@ -42,6 +43,27 @@ async def failing():
4243
assert cb.state == CircuitState.OPEN
4344
assert cb.failure_count == 3
4445

46+
async def test_failure_and_open_transition_are_logged(self, caplog):
47+
cb = CircuitBreaker("inventory-api", failure_threshold=1, recovery_timeout=30.0)
48+
49+
async def failing():
50+
raise RuntimeError("backend unavailable")
51+
52+
with caplog.at_level(logging.INFO, logger="holiday_peak_lib.utils.circuit_breaker"):
53+
with pytest.raises(RuntimeError, match="backend unavailable"):
54+
await cb.call(failing)
55+
56+
messages = [record.getMessage() for record in caplog.records]
57+
assert any(
58+
"circuit_breaker_failure name=inventory-api state=closed "
59+
"failure_count=1 threshold=1 error=backend unavailable" in message
60+
for message in messages
61+
)
62+
assert any(
63+
"circuit_breaker name=inventory-api state=open failures=1" in message
64+
for message in messages
65+
)
66+
4567
async def test_open_circuit_raises_without_fallback(self):
4668
cb = CircuitBreaker("test", failure_threshold=1, recovery_timeout=30.0)
4769

lib/tests/test_session_manager.py

Lines changed: 47 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""Tests for session_manager: smart session continuity logic."""
22

33
import json
4+
import logging
45
import time
56
from unittest.mock import AsyncMock
67

@@ -172,8 +173,8 @@ async def test_keyword_divergence_returns_fresh(self):
172173
assert decision.continue_session is False
173174

174175
@pytest.mark.asyncio
175-
async def test_cosmos_read_failure_still_continues(self):
176-
"""If Cosmos is unavailable, still continue but without session state."""
176+
async def test_cosmos_read_failure_still_continues(self, caplog):
177+
"""If Cosmos is unavailable, continue without state and log the failure."""
177178
hot = AsyncMock()
178179
warm = AsyncMock()
179180
summary = SessionSummary(
@@ -187,15 +188,26 @@ async def test_cosmos_read_failure_still_continues(self):
187188
)
188189
hot.get = AsyncMock(return_value=json.dumps(summary.__dict__))
189190
warm.read = AsyncMock(side_effect=Exception("Cosmos unavailable"))
190-
decision = await evaluate_session_continuity(
191-
hot,
192-
warm,
193-
{"query": "shipping eta tracking update"},
194-
service="svc",
195-
entity_id="ent1",
196-
)
191+
192+
with caplog.at_level(
193+
logging.WARNING,
194+
logger="holiday_peak_lib.agents.memory.session_manager",
195+
):
196+
decision = await evaluate_session_continuity(
197+
hot,
198+
warm,
199+
{"query": "shipping eta tracking update"},
200+
service="svc",
201+
entity_id="ent1",
202+
)
203+
197204
assert decision.continue_session is True
198205
assert decision.foundry_session_state is None
206+
assert any(
207+
"session_continuity_cosmos_read_failed session_id=svc:ent1:123 "
208+
"service=svc entity_id=ent1 error=Cosmos unavailable" in record.getMessage()
209+
for record in caplog.records
210+
)
199211

200212

201213
class TestBuildSessionSummary:
@@ -327,3 +339,29 @@ async def test_none_session_state_noop(self):
327339
summary_text="",
328340
)
329341
warm.upsert.assert_not_called()
342+
343+
@pytest.mark.asyncio
344+
async def test_cosmos_write_failure_logs_warning(self, caplog):
345+
warm = AsyncMock()
346+
warm.upsert = AsyncMock(side_effect=RuntimeError("Cosmos write unavailable"))
347+
348+
with caplog.at_level(
349+
logging.WARNING,
350+
logger="holiday_peak_lib.agents.memory.session_manager",
351+
):
352+
await persist_full_session(
353+
warm,
354+
session_id="svc:ent1:123",
355+
service="svc",
356+
entity_id="ent1",
357+
foundry_session_state={"session_id": "foundry-abc"},
358+
messages=[{"role": "user", "content": "test"}],
359+
summary_text="test summary",
360+
)
361+
362+
warm.upsert.assert_awaited_once()
363+
assert any(
364+
"session_persistence_cosmos_write_failed session_id=svc:ent1:123 "
365+
"service=svc entity_id=ent1 error=Cosmos write unavailable" in record.getMessage()
366+
for record in caplog.records
367+
)

0 commit comments

Comments
 (0)