Skip to content

Commit 61c68bc

Browse files
committed
structured output retry+tool-use fallback, OpenRouter tracking
1 parent 44770a8 commit 61c68bc

File tree

7 files changed

+236
-102
lines changed

7 files changed

+236
-102
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@ print(is_positive) # True
1717
## Usecases
1818

1919
- **Structured outputs that work** - Same code, any provider. Pydantic models, primitives, lists, tuples.
20-
- **Zero routing config** - Model name determines provider automatically (`gemini-*`, `claude-*`, `gpt-*`)
21-
- **Know what you're spending** - Every call logged with token counts and cost. `print_usage()` for totals, `print_call_timeline()` for a visual waterfall.
20+
- **Zero routing code** - Model name determines provider automatically (`gemini-*`, `claude-*`, `gpt-*`)
21+
- **Visibility: Know what you're calling and spending** - Every call logged with token counts and cost. `print_usage()` for totals, `print_call_timeline()` for a visual waterfall.
2222

2323
## Installation
2424

covenance/_version.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
commit_id: COMMIT_ID
2929
__commit_id__: COMMIT_ID
3030

31-
__version__ = version = '0.0.3'
32-
__version_tuple__ = version_tuple = (0, 0, 3)
31+
__version__ = version = '0.0.4'
32+
__version_tuple__ = version_tuple = (0, 0, 4)
3333

3434
__commit_id__ = commit_id = None

covenance/clients/anthropic_client.py

Lines changed: 62 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -44,40 +44,28 @@ def _create_anthropic_client() -> "Anthropic":
4444

4545
client = LazyClient(_create_anthropic_client, label="anthropic")
4646

47-
# Global verbose flag for retry logging
4847
VERBOSE = False
4948

5049

51-
def _parse_wait_time_from_error(error: Exception) -> float | None:
52-
"""Parse wait time from Anthropic rate limit error message.
53-
54-
Anthropic may provide retry timing in error messages or headers.
55-
This function attempts to extract it, but may return None to trigger
56-
exponential backoff.
50+
def _is_rate_limit_error(error: Exception) -> bool:
51+
"""Check if error indicates a rate limit (explicit type or string match)."""
52+
from anthropic import RateLimitError
53+
if isinstance(error, RateLimitError):
54+
return True
55+
error_str = str(error)
56+
return "429" in error_str or "rate limit" in error_str.lower()
5757

58-
Args:
59-
error: The exception from Anthropic API
6058

61-
Returns:
62-
Wait time in seconds if found, None otherwise
63-
"""
64-
error_str = str(error)
65-
match = re.search(r"retry.*?(\d+(?:\.\d+)?)\s*(?:seconds?|s)", error_str.lower())
59+
def _parse_wait_time_from_error(error: Exception) -> float | None:
60+
"""Parse wait time from error message. Returns None to trigger exponential backoff."""
61+
match = re.search(r"retry.*?(\d+(?:\.\d+)?)\s*(?:seconds?|s)", str(error).lower())
6662
if match:
67-
try:
68-
wait_time = float(match.group(1))
69-
return max(wait_time, 0.1)
70-
except ValueError:
71-
pass
63+
return max(float(match.group(1)), 0.1)
7264
return None
7365

7466

7567
def set_rate_limiter_verbose(verbose: bool) -> None:
76-
"""Enable or disable verbose logging for Anthropic retry logic.
77-
78-
Args:
79-
verbose: If True, print detailed logging about retry attempts and wait times
80-
"""
68+
"""Enable or disable verbose logging for Anthropic retry logic."""
8169
global VERBOSE
8270
VERBOSE = verbose
8371

@@ -95,62 +83,60 @@ def ask_anthropic[T](
9583
"""Call Anthropic API with structured output.
9684
9785
Uses the structured outputs beta (constrained decoding, guaranteed valid JSON)
98-
when SDK >= 0.74.1. Retries on rate limit errors.
86+
when SDK >= 0.74.1. Falls back to tool-use for structured output when beta is
87+
not available. Retries on rate limit errors.
9988
10089
If response_type is str or None, returns plain text.
10190
"""
102-
from anthropic import APIError, RateLimitError
103-
10491
max_attempts = 100
10592
api_client = client_override or client # type: ignore[assignment]
10693
is_plain_text = response_type is str or response_type is None
10794
use_beta = _USE_STRUCTURED_OUTPUTS_BETA and not is_plain_text
10895

96+
# Prepare tool-use fallback for structured output when beta is not available
97+
tool_name = None
98+
tools = None
99+
if not is_plain_text and not use_beta:
100+
tool_name = getattr(response_type, "__name__", "structured_output")
101+
tools = [{
102+
"name": tool_name,
103+
"description": f"Generate output matching the {tool_name} schema",
104+
"input_schema": response_type.model_json_schema(), # type: ignore[union-attr]
105+
}]
106+
109107
messages = [{"role": "user", "content": user_msg}]
110108
total_tpm_wait = 0.0
111109
started_at = datetime.now(UTC)
112110

113111
for attempt in range(max_attempts):
114112
try:
115113
if VERBOSE and attempt > 0:
116-
print(
117-
f"[Anthropic Retry] Attempt {attempt + 1}/{max_attempts} for model {model}"
118-
)
114+
print(f"[Anthropic Retry] Attempt {attempt + 1}/{max_attempts}")
115+
116+
kwargs: dict = {"model": model, "max_tokens": 21_000, "messages": messages}
117+
if temperature is not None:
118+
kwargs["temperature"] = temperature
119119

120120
if use_beta:
121-
# Structured outputs beta: guaranteed schema-valid JSON
122-
kwargs = {
123-
"model": model,
124-
"messages": messages,
121+
kwargs.update({
125122
"betas": ["structured-outputs-2025-11-13"],
126123
"output_format": response_type,
127-
# max number allowed without streaming API
128-
"max_tokens": 21_000,
129-
}
124+
})
130125
if sys_msg is not None:
131-
# Beta API requires system as list of content blocks
132126
kwargs["system"] = [{"type": "text", "text": sys_msg}]
133-
if temperature is not None:
134-
kwargs["temperature"] = temperature
135127
response = api_client.beta.messages.parse(**kwargs)
136128
else:
137-
# Plain text
138-
kwargs = {
139-
"model": model,
140-
"max_tokens": 4096,
141-
"messages": messages,
142-
}
143129
if sys_msg is not None:
144130
kwargs["system"] = sys_msg
145-
if temperature is not None:
146-
kwargs["temperature"] = temperature
131+
if tools is not None:
132+
kwargs["tools"] = tools
133+
kwargs["tool_choice"] = {"type": "tool", "name": tool_name}
147134
response = api_client.messages.create(**kwargs)
148135

149136
ended_at = datetime.now(UTC)
150137
usage = _extract_anthropic_usage(response, model=model)
151138

152139
from covenance.record import record_llm_call
153-
154140
record_llm_call(
155141
model=model,
156142
provider="anthropic",
@@ -162,71 +148,56 @@ def ask_anthropic[T](
162148
)
163149

164150
if VERBOSE and attempt > 0:
165-
print(
166-
f"[Anthropic Retry] ✓ Successfully completed after {attempt + 1} attempt(s)"
167-
)
151+
print(f"[Anthropic Retry] ✓ Completed after {attempt + 1} attempt(s)")
168152

153+
# Extract result based on response type
169154
if use_beta:
170-
# Beta returns parsed_output directly
171155
if response.parsed_output is None:
172156
raise StructuredOutputParsingError(
173-
f"Anthropic API returned None parsed_output. "
174-
f"Model: {model}, response_type: {response_type}"
157+
f"Anthropic returned None parsed_output. Model: {model}"
175158
)
176159
return response.parsed_output
177160

178-
# Plain text response
179161
if not response.content:
180162
raise StructuredOutputParsingError(
181-
f"Anthropic API returned empty content. "
182-
f"Model: {model}, response_type: {response_type}"
163+
f"Anthropic returned empty content. Model: {model}"
183164
)
184-
return response.content[0].text # type: ignore[return-value]
185165

186-
except RateLimitError as e:
187-
if attempt == max_attempts - 1:
188-
if VERBOSE:
189-
print(f"[Anthropic Retry] ✗ Failed after {max_attempts} attempts")
190-
raise
191-
192-
explicit_wait = _parse_wait_time_from_error(e)
193-
wait_time = explicit_wait if explicit_wait else exponential_backoff(attempt)
166+
if is_plain_text:
167+
return response.content[0].text # type: ignore[return-value]
194168

195-
if VERBOSE:
196-
print(
197-
f"[Anthropic Retry] Rate limit (attempt {attempt + 1}/{max_attempts}): "
198-
f"waiting {wait_time:.2f}s"
169+
# Tool-use fallback: find and parse tool_use block
170+
tool_use_block = next(
171+
(b for b in response.content if b.type == "tool_use" and b.name == tool_name),
172+
None,
173+
)
174+
if tool_use_block is None:
175+
raise StructuredOutputParsingError(
176+
f"No tool_use block returned. Model: {model}, Content: {response.content}"
199177
)
200178

201-
time.sleep(wait_time)
202-
total_tpm_wait += wait_time
203-
204-
except APIError as e:
205-
error_str = str(e)
206-
is_rate_limit = "429" in error_str or "rate limit" in error_str.lower()
207-
208-
if not is_rate_limit:
209-
if VERBOSE:
210-
print(f"[Anthropic Retry] Non-rate-limit error: {type(e).__name__}")
211-
raise
179+
try:
180+
return response_type(**tool_use_block.input) # type: ignore[return-value]
181+
except Exception as e:
182+
raise StructuredOutputParsingError(
183+
f"Failed to parse as {response_type}: {e}. Input: {tool_use_block.input}"
184+
) from e
212185

213-
if attempt == max_attempts - 1:
186+
except Exception as e:
187+
if not _is_rate_limit_error(e) or attempt == max_attempts - 1:
214188
if VERBOSE:
215-
print(f"[Anthropic Retry] ✗ Failed after {max_attempts} attempts")
189+
print(f"[Anthropic Retry] ✗ {type(e).__name__} after {attempt + 1} attempts")
216190
raise
217191

218-
explicit_wait = _parse_wait_time_from_error(e)
219-
wait_time = explicit_wait if explicit_wait else exponential_backoff(attempt)
220-
192+
wait_time = _parse_wait_time_from_error(e) or exponential_backoff(attempt)
221193
if VERBOSE:
222-
print(
223-
f"[Anthropic Retry] Rate limit (attempt {attempt + 1}/{max_attempts}): "
224-
f"waiting {wait_time:.2f}s"
225-
)
194+
print(f"[Anthropic Retry] Rate limit, waiting {wait_time:.2f}s")
226195

227196
time.sleep(wait_time)
228197
total_tpm_wait += wait_time
229198

199+
raise RuntimeError("ask_anthropic exhausted retry loop")
200+
230201

231202
def _extract_anthropic_usage(response, model: str) -> TokenUsage:
232203
"""Extract token usage from Anthropic response and record to global stats."""

covenance/record.py

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,7 @@ def usage_summary(records: list[Record] | None = None) -> dict:
235235
236236
Returns:
237237
Dict with keys: calls, tokens_input, tokens_output, tokens_cached, tokens_total,
238-
cost_usd, models (set of "provider/model" strings).
238+
cost_usd, models (set of "provider/model" strings), has_openrouter (bool).
239239
"""
240240
if records is None:
241241
records = get_records()
@@ -245,6 +245,7 @@ def usage_summary(records: list[Record] | None = None) -> dict:
245245
total_output = 0
246246
total_cached = 0
247247
models_used: set[str] = set()
248+
has_openrouter = False
248249

249250
for record in records:
250251
if record.cost_usd is not None:
@@ -253,6 +254,8 @@ def usage_summary(records: list[Record] | None = None) -> dict:
253254
total_output += record.tokens_output
254255
total_cached += record.tokens_cached
255256
models_used.add(f"{record.provider}/{record.model}")
257+
if record.provider == "openrouter":
258+
has_openrouter = True
256259

257260
return {
258261
"calls": len(records),
@@ -262,6 +265,7 @@ def usage_summary(records: list[Record] | None = None) -> dict:
262265
"tokens_total": total_input + total_output,
263266
"cost_usd": total_cost,
264267
"models": models_used,
268+
"has_openrouter": has_openrouter,
265269
}
266270

267271

@@ -303,17 +307,23 @@ def print_usage(
303307
print(f" Tokens: {summary['tokens_total']:,} ({in_part}, Out: {tokens_output:,})")
304308

305309
cost_usd = summary["cost_usd"]
310+
cost_line = " Cost: "
306311
if cost_format == "cent" and cost_usd < 0.01:
307312
cost_cents = cost_usd * 100
308-
print(f" Cost: {cost_cents:.3f}¢")
313+
cost_line += f"{cost_cents:.3f}¢"
309314
elif cost_format == "exponential" and cost_usd < 0.01:
310-
print(f" Cost: ${cost_usd:.2e}")
315+
cost_line += f"${cost_usd:.2e}"
311316
else:
312317
# Plain format: show 4 decimals for small numbers, 2 decimals otherwise
313318
if cost_usd > 0 and cost_usd < 0.01:
314-
print(f" Cost: ${cost_usd:.4f}")
319+
cost_line += f"${cost_usd:.4f}"
315320
else:
316-
print(f" Cost: ${cost_usd:.2f}")
321+
cost_line += f"${cost_usd:.2f}"
322+
323+
if summary.get("has_openrouter", False):
324+
cost_line += " (excluding OpenRouter calls)"
325+
326+
print(cost_line)
317327

318328
print(f" Models: {', '.join(sorted(summary['models']))}")
319329

0 commit comments

Comments
 (0)