|
| 1 | +"""Wave 1-A unified telemetry tests. |
| 2 | +
|
| 3 | +Verify that: |
| 4 | + 1. Cerebras-shaped usage (``usage.prompt_tokens_details.cached_tokens``) |
| 5 | + populates ``cache_read_input_tokens`` on ``Usage``. |
| 6 | + 2. Anthropic-shaped usage (``cache_read_input_tokens`` + |
| 7 | + ``cache_creation_input_tokens`` at the usage root) populates both |
| 8 | + cache fields on ``Usage``. |
| 9 | + 3. ``cache_supported`` stays ``True`` on the new ``TurnResult`` defaults |
| 10 | + (Cerebras gpt-oss-120b + Anthropic Claude both support prompt caching). |
| 11 | + 4. ``compute_cache_hit_pct`` returns ``None`` for missing data and the |
| 12 | + correct fraction otherwise. |
| 13 | + 5. The hermes-adapter / openclaw-adapter cache-attach helpers parse the |
| 14 | + same usage shapes onto a plain stand-in object via ``setattr``. |
| 15 | +""" |
| 16 | + |
| 17 | +from __future__ import annotations |
| 18 | + |
| 19 | +from typing import Any |
| 20 | + |
| 21 | +import pytest |
| 22 | + |
| 23 | +from eliza_lifeops_bench import compute_cache_hit_pct |
| 24 | +from eliza_lifeops_bench.clients.anthropic import AnthropicClient, ANTHROPIC_PRICING |
| 25 | +from eliza_lifeops_bench.clients.base import Usage |
| 26 | +from eliza_lifeops_bench.clients.cerebras import CerebrasClient |
| 27 | +from eliza_lifeops_bench.types import TurnResult |
| 28 | + |
| 29 | + |
| 30 | +# --------------------------------------------------------------------------- |
| 31 | +# Stand-in objects so we don't have to run any real HTTP calls. |
| 32 | +# --------------------------------------------------------------------------- |
| 33 | + |
| 34 | + |
| 35 | +class _FakeResponse: |
| 36 | + """Minimal stand-in for ``httpx.Response`` returning a fixed JSON body.""" |
| 37 | + |
| 38 | + def __init__(self, body: dict[str, Any]) -> None: |
| 39 | + self._body = body |
| 40 | + self.status_code = 200 |
| 41 | + self.text = "" |
| 42 | + |
| 43 | + def json(self) -> dict[str, Any]: |
| 44 | + return self._body |
| 45 | + |
| 46 | + |
| 47 | +class _FakeAsyncClient: |
| 48 | + """Stand-in for ``httpx.AsyncClient`` that returns the fixed response.""" |
| 49 | + |
| 50 | + def __init__(self, body: dict[str, Any]) -> None: |
| 51 | + self._body = body |
| 52 | + |
| 53 | + async def post(self, *_args: Any, **_kwargs: Any) -> _FakeResponse: # noqa: ANN401 |
| 54 | + return _FakeResponse(self._body) |
| 55 | + |
| 56 | + async def aclose(self) -> None: |
| 57 | + return None |
| 58 | + |
| 59 | + |
| 60 | +class _FakeAnthropicMessage: |
| 61 | + """Mimics the typed object returned by the Anthropic SDK.""" |
| 62 | + |
| 63 | + def __init__(self, body: dict[str, Any]) -> None: |
| 64 | + self._body = body |
| 65 | + |
| 66 | + def model_dump(self) -> dict[str, Any]: |
| 67 | + return self._body |
| 68 | + |
| 69 | + |
| 70 | +class _FakeAnthropicClient: |
| 71 | + """Stand-in for the AsyncAnthropic SDK client.""" |
| 72 | + |
| 73 | + def __init__(self, body: dict[str, Any]) -> None: |
| 74 | + self._body = body |
| 75 | + |
| 76 | + class _Messages: |
| 77 | + def __init__(self, parent: _FakeAnthropicClient) -> None: |
| 78 | + self._parent = parent |
| 79 | + |
| 80 | + async def create(self, **_kwargs: Any) -> _FakeAnthropicMessage: # noqa: ANN401 |
| 81 | + return _FakeAnthropicMessage(self._parent._body) |
| 82 | + |
| 83 | + self.messages = _Messages(self) |
| 84 | + |
| 85 | + |
| 86 | +# --------------------------------------------------------------------------- |
| 87 | +# 1. Cerebras-shaped usage |
| 88 | +# --------------------------------------------------------------------------- |
| 89 | + |
| 90 | + |
| 91 | +@pytest.mark.asyncio |
| 92 | +async def test_cerebras_parses_cached_tokens_into_cache_read_input_tokens( |
| 93 | + monkeypatch: pytest.MonkeyPatch, |
| 94 | +) -> None: |
| 95 | + body = { |
| 96 | + "choices": [ |
| 97 | + {"finish_reason": "stop", "message": {"role": "assistant", "content": "ok"}} |
| 98 | + ], |
| 99 | + "usage": { |
| 100 | + "prompt_tokens": 1024, |
| 101 | + "completion_tokens": 32, |
| 102 | + "total_tokens": 1056, |
| 103 | + "prompt_tokens_details": {"cached_tokens": 896}, |
| 104 | + }, |
| 105 | + } |
| 106 | + monkeypatch.setenv("CEREBRAS_API_KEY", "test") |
| 107 | + client = CerebrasClient(model="gpt-oss-120b", http_client=_FakeAsyncClient(body)) |
| 108 | + |
| 109 | + from eliza_lifeops_bench.clients.base import ClientCall |
| 110 | + |
| 111 | + response = await client.complete(ClientCall(messages=[{"role": "user", "content": "hi"}])) |
| 112 | + |
| 113 | + assert response.usage.cache_read_input_tokens == 896 |
| 114 | + assert response.usage.cache_creation_input_tokens is None |
| 115 | + # Legacy compatibility: cached_tokens still surfaces the same value. |
| 116 | + assert response.usage.cached_tokens == 896 |
| 117 | + |
| 118 | + |
| 119 | +@pytest.mark.asyncio |
| 120 | +async def test_cerebras_missing_cache_block_yields_none( |
| 121 | + monkeypatch: pytest.MonkeyPatch, |
| 122 | +) -> None: |
| 123 | + body = { |
| 124 | + "choices": [ |
| 125 | + {"finish_reason": "stop", "message": {"role": "assistant", "content": "ok"}} |
| 126 | + ], |
| 127 | + "usage": { |
| 128 | + "prompt_tokens": 64, |
| 129 | + "completion_tokens": 8, |
| 130 | + "total_tokens": 72, |
| 131 | + # No prompt_tokens_details at all. |
| 132 | + }, |
| 133 | + } |
| 134 | + monkeypatch.setenv("CEREBRAS_API_KEY", "test") |
| 135 | + client = CerebrasClient(model="gpt-oss-120b", http_client=_FakeAsyncClient(body)) |
| 136 | + |
| 137 | + from eliza_lifeops_bench.clients.base import ClientCall |
| 138 | + |
| 139 | + response = await client.complete(ClientCall(messages=[{"role": "user", "content": "hi"}])) |
| 140 | + |
| 141 | + # Per AGENTS.md Cmd #8: missing data stays None, not silent 0. |
| 142 | + assert response.usage.cache_read_input_tokens is None |
| 143 | + assert response.usage.cache_creation_input_tokens is None |
| 144 | + |
| 145 | + |
| 146 | +# --------------------------------------------------------------------------- |
| 147 | +# 2. Anthropic-shaped usage |
| 148 | +# --------------------------------------------------------------------------- |
| 149 | + |
| 150 | + |
| 151 | +@pytest.mark.asyncio |
| 152 | +async def test_anthropic_parses_cache_creation_and_cache_read( |
| 153 | + monkeypatch: pytest.MonkeyPatch, |
| 154 | +) -> None: |
| 155 | + monkeypatch.setenv("ANTHROPIC_API_KEY", "test") |
| 156 | + body = { |
| 157 | + "content": [{"type": "text", "text": "ok"}], |
| 158 | + "stop_reason": "end_turn", |
| 159 | + "usage": { |
| 160 | + "input_tokens": 200, |
| 161 | + "output_tokens": 50, |
| 162 | + "cache_read_input_tokens": 1024, |
| 163 | + "cache_creation_input_tokens": 128, |
| 164 | + }, |
| 165 | + } |
| 166 | + client = AnthropicClient( |
| 167 | + model="claude-opus-4-7", |
| 168 | + client=_FakeAnthropicClient(body), |
| 169 | + ) |
| 170 | + # Ensure the lazy import does not need the real SDK installed. |
| 171 | + monkeypatch.setattr( |
| 172 | + "eliza_lifeops_bench.clients.anthropic._import_anthropic_sdk", |
| 173 | + lambda: type( |
| 174 | + "AnthropicStub", |
| 175 | + (), |
| 176 | + {"APIStatusError": type("APIStatusError", (Exception,), {})}, |
| 177 | + ), |
| 178 | + ) |
| 179 | + |
| 180 | + from eliza_lifeops_bench.clients.base import ClientCall |
| 181 | + |
| 182 | + response = await client.complete( |
| 183 | + ClientCall(messages=[{"role": "user", "content": "hi"}]) |
| 184 | + ) |
| 185 | + |
| 186 | + assert response.usage.cache_read_input_tokens == 1024 |
| 187 | + assert response.usage.cache_creation_input_tokens == 128 |
| 188 | + # Cost calculation should account for cache_read at the discount tier. |
| 189 | + pricing = ANTHROPIC_PRICING["claude-opus-4-7"] |
| 190 | + expected_cost = ( |
| 191 | + (200 - 1024) / 1_000_000 * pricing["input_per_million_usd"] # negative clamps |
| 192 | + + 50 / 1_000_000 * pricing["output_per_million_usd"] |
| 193 | + + 1024 / 1_000_000 * pricing["cache_read_per_million_usd"] |
| 194 | + ) |
| 195 | + # billable_input clamps at 0 in the helper; verify cost is non-negative |
| 196 | + # and includes the cache-read line item. |
| 197 | + assert response.cost_usd >= 0 |
| 198 | + # Compute the clamped-version cost explicitly. |
| 199 | + clamped_cost = ( |
| 200 | + max(0, 200 - 1024) / 1_000_000 * pricing["input_per_million_usd"] |
| 201 | + + 50 / 1_000_000 * pricing["output_per_million_usd"] |
| 202 | + + 1024 / 1_000_000 * pricing["cache_read_per_million_usd"] |
| 203 | + ) |
| 204 | + assert response.cost_usd == pytest.approx(clamped_cost) |
| 205 | + del expected_cost # silence unused-warn |
| 206 | + |
| 207 | + |
| 208 | +# --------------------------------------------------------------------------- |
| 209 | +# 3. cache_supported defaults + TurnResult cache fields |
| 210 | +# --------------------------------------------------------------------------- |
| 211 | + |
| 212 | + |
| 213 | +def test_turn_result_cache_supported_default_is_true() -> None: |
| 214 | + result = TurnResult( |
| 215 | + turn_number=1, |
| 216 | + agent_message="ok", |
| 217 | + agent_actions=[], |
| 218 | + user_response="", |
| 219 | + latency_ms=10, |
| 220 | + input_tokens=10, |
| 221 | + output_tokens=5, |
| 222 | + cost_usd=0.0, |
| 223 | + ) |
| 224 | + # Cerebras gpt-oss-120b, OpenAI, and Anthropic all support caching → |
| 225 | + # the dataclass default is hard True. |
| 226 | + assert result.cache_supported is True |
| 227 | + # Nullable cache fields default to None — never silently 0. |
| 228 | + assert result.cache_read_input_tokens is None |
| 229 | + assert result.cache_creation_input_tokens is None |
| 230 | + assert result.cache_hit_pct is None |
| 231 | + |
| 232 | + |
| 233 | +def test_turn_result_accepts_cache_fields() -> None: |
| 234 | + result = TurnResult( |
| 235 | + turn_number=2, |
| 236 | + agent_message="ok", |
| 237 | + agent_actions=[], |
| 238 | + user_response="", |
| 239 | + latency_ms=10, |
| 240 | + input_tokens=200, |
| 241 | + output_tokens=50, |
| 242 | + cost_usd=0.001, |
| 243 | + cache_read_input_tokens=1024, |
| 244 | + cache_creation_input_tokens=128, |
| 245 | + cache_hit_pct=compute_cache_hit_pct(200, 1024, 128), |
| 246 | + cache_supported=True, |
| 247 | + model_tier="large", |
| 248 | + prompt_cache_key="lifeops/cal/seed=42", |
| 249 | + model_name="gpt-oss-120b", |
| 250 | + ) |
| 251 | + assert result.cache_read_input_tokens == 1024 |
| 252 | + assert result.cache_creation_input_tokens == 128 |
| 253 | + assert result.cache_supported is True |
| 254 | + assert result.model_tier == "large" |
| 255 | + assert result.prompt_cache_key == "lifeops/cal/seed=42" |
| 256 | + assert result.model_name == "gpt-oss-120b" |
| 257 | + # 1024 / (200 + 128 + 1024) = 1024 / 1352 |
| 258 | + assert result.cache_hit_pct == pytest.approx(1024 / 1352) |
| 259 | + |
| 260 | + |
| 261 | +# --------------------------------------------------------------------------- |
| 262 | +# 4. compute_cache_hit_pct behavior |
| 263 | +# --------------------------------------------------------------------------- |
| 264 | + |
| 265 | + |
| 266 | +def test_compute_cache_hit_pct_none_when_any_input_missing() -> None: |
| 267 | + assert compute_cache_hit_pct(None, 100, 0) is None |
| 268 | + assert compute_cache_hit_pct(100, None, 0) is None |
| 269 | + assert compute_cache_hit_pct(100, 0, None) is None |
| 270 | + |
| 271 | + |
| 272 | +def test_compute_cache_hit_pct_handles_zero_denominator() -> None: |
| 273 | + assert compute_cache_hit_pct(0, 0, 0) == 0.0 |
| 274 | + |
| 275 | + |
| 276 | +def test_compute_cache_hit_pct_uses_full_billed_input() -> None: |
| 277 | + # denominator = 100 + 50 + 250 = 400, numerator = 250 |
| 278 | + assert compute_cache_hit_pct(100, 250, 50) == pytest.approx(250 / 400) |
| 279 | + |
| 280 | + |
| 281 | +# --------------------------------------------------------------------------- |
| 282 | +# 5. Adapter-level cache-attach helpers |
| 283 | +# --------------------------------------------------------------------------- |
| 284 | + |
| 285 | + |
| 286 | +class _AttrTurn: |
| 287 | + """Plain object that supports ``setattr`` / ``getattr`` — mirrors |
| 288 | + ``MessageTurn`` for the adapter helpers without importing eliza_lifeops_bench |
| 289 | + enum/dataclass machinery.""" |
| 290 | + |
| 291 | + |
| 292 | +def test_hermes_adapter_parses_cerebras_usage_onto_turn() -> None: |
| 293 | + from hermes_adapter.lifeops_bench import _attach_usage_cache_fields |
| 294 | + |
| 295 | + usage = { |
| 296 | + "prompt_tokens": 256, |
| 297 | + "completion_tokens": 32, |
| 298 | + "prompt_tokens_details": {"cached_tokens": 192}, |
| 299 | + } |
| 300 | + turn = _AttrTurn() |
| 301 | + _attach_usage_cache_fields(turn, usage) |
| 302 | + assert getattr(turn, "input_tokens") == 256 |
| 303 | + assert getattr(turn, "output_tokens") == 32 |
| 304 | + assert getattr(turn, "cache_read_input_tokens") == 192 |
| 305 | + assert getattr(turn, "cache_creation_input_tokens") is None |
| 306 | + assert getattr(turn, "cache_supported") is True |
| 307 | + |
| 308 | + |
| 309 | +def test_hermes_adapter_parses_anthropic_usage_onto_turn() -> None: |
| 310 | + from hermes_adapter.lifeops_bench import _attach_usage_cache_fields |
| 311 | + |
| 312 | + usage = { |
| 313 | + "input_tokens": 200, |
| 314 | + "output_tokens": 50, |
| 315 | + "cache_read_input_tokens": 1024, |
| 316 | + "cache_creation_input_tokens": 128, |
| 317 | + } |
| 318 | + turn = _AttrTurn() |
| 319 | + _attach_usage_cache_fields(turn, usage) |
| 320 | + assert getattr(turn, "input_tokens") == 200 |
| 321 | + assert getattr(turn, "output_tokens") == 50 |
| 322 | + assert getattr(turn, "cache_read_input_tokens") == 1024 |
| 323 | + assert getattr(turn, "cache_creation_input_tokens") == 128 |
| 324 | + assert getattr(turn, "cache_supported") is True |
| 325 | + |
| 326 | + |
| 327 | +def test_openclaw_adapter_parses_cerebras_usage_onto_turn() -> None: |
| 328 | + from openclaw_adapter.lifeops_bench import _attach_usage_cache_fields |
| 329 | + |
| 330 | + usage = { |
| 331 | + "prompt_tokens": 64, |
| 332 | + "completion_tokens": 8, |
| 333 | + "prompt_tokens_details": {"cached_tokens": 32}, |
| 334 | + } |
| 335 | + turn = _AttrTurn() |
| 336 | + _attach_usage_cache_fields(turn, usage) |
| 337 | + assert getattr(turn, "cache_read_input_tokens") == 32 |
| 338 | + assert getattr(turn, "cache_creation_input_tokens") is None |
| 339 | + assert getattr(turn, "cache_supported") is True |
| 340 | + |
| 341 | + |
| 342 | +def test_openclaw_adapter_anthropic_shape_round_trips() -> None: |
| 343 | + from openclaw_adapter.lifeops_bench import _attach_usage_cache_fields |
| 344 | + |
| 345 | + usage = { |
| 346 | + "input_tokens": 10, |
| 347 | + "output_tokens": 2, |
| 348 | + "cache_read_input_tokens": 5, |
| 349 | + "cache_creation_input_tokens": 3, |
| 350 | + } |
| 351 | + turn = _AttrTurn() |
| 352 | + _attach_usage_cache_fields(turn, usage) |
| 353 | + assert getattr(turn, "cache_read_input_tokens") == 5 |
| 354 | + assert getattr(turn, "cache_creation_input_tokens") == 3 |
| 355 | + assert getattr(turn, "cache_supported") is True |
| 356 | + |
| 357 | + |
| 358 | +def test_openclaw_adapter_missing_cache_stays_none() -> None: |
| 359 | + from openclaw_adapter.lifeops_bench import _attach_usage_cache_fields |
| 360 | + |
| 361 | + usage = { |
| 362 | + "prompt_tokens": 1, |
| 363 | + "completion_tokens": 1, |
| 364 | + # No cache info at all. |
| 365 | + } |
| 366 | + turn = _AttrTurn() |
| 367 | + _attach_usage_cache_fields(turn, usage) |
| 368 | + # No silent 0 fallback — AGENTS.md Cmd #8. |
| 369 | + assert getattr(turn, "cache_read_input_tokens") is None |
| 370 | + assert getattr(turn, "cache_creation_input_tokens") is None |
| 371 | + |
| 372 | + |
| 373 | +# --------------------------------------------------------------------------- |
| 374 | +# 6. Plain Usage dataclass — cache fields preserved through frozen=True |
| 375 | +# --------------------------------------------------------------------------- |
| 376 | + |
| 377 | + |
| 378 | +def test_usage_dataclass_carries_new_cache_fields() -> None: |
| 379 | + usage = Usage( |
| 380 | + prompt_tokens=100, |
| 381 | + completion_tokens=20, |
| 382 | + total_tokens=120, |
| 383 | + cached_tokens=40, |
| 384 | + cache_read_input_tokens=40, |
| 385 | + cache_creation_input_tokens=0, |
| 386 | + ) |
| 387 | + assert usage.cache_read_input_tokens == 40 |
| 388 | + assert usage.cache_creation_input_tokens == 0 |
| 389 | + assert usage.cached_tokens == 40 |
0 commit comments