Skip to content

Commit c576bf1

Browse files
committed
structured
1 parent ffe5fbe commit c576bf1

14 files changed

+624
-198
lines changed

README.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,21 @@ Provider is determined by model name prefix:
135135
| `grok-*` | xAI Grok |
136136
| `org/model` (contains `/`) | OpenRouter |
137137

138+
### Structured output reliability
139+
140+
Providers differ in how they enforce JSON schema compliance:
141+
142+
| Provider | Method | Guarantee |
143+
|----------|--------|-----------|
144+
| OpenAI | [Constrained decoding](https://openai.com/index/introducing-structured-outputs-in-the-api) | 100% schema-valid JSON |
145+
| Google Gemini | [Controlled generation](https://ai.google.dev/gemini-api/docs/structured-output) | 100% schema-valid JSON |
146+
| Grok | [Constrained decoding](https://docs.x.ai/docs/guides/structured-outputs) | 100% schema-valid JSON |
147+
| Anthropic | [Structured outputs beta](https://docs.anthropic.com/en/docs/build-with-claude/structured-outputs) | 100% schema-valid JSON* |
148+
| Mistral | [Best-effort](https://docs.mistral.ai/capabilities/structured_output) | Probabilistic |
149+
| OpenRouter | Varies | Depends on underlying model |
150+
151+
*Anthropic structured outputs requires SDK >= 0.74.1 (uses `anthropic-beta: structured-outputs-2025-11-13`). Mistral uses probabilistic generation. Covenance retries automatically (up to 3 times) on JSON parse errors for Mistral.
152+
138153
## API keys
139154

140155
Set environment variables for the providers you use:

covenance/clients/anthropic_client.py

Lines changed: 69 additions & 160 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
1-
"""Anthropic Claude client with structured output support and automatic retry."""
1+
"""Anthropic Claude client with structured output support and automatic retry.
22
3+
Uses the structured outputs beta (constrained decoding) when SDK >= 0.74.1,
4+
providing guaranteed schema-valid JSON. Falls back to tool-use for older SDKs.
5+
"""
6+
7+
import re
38
import time
49
from datetime import UTC, datetime
510
from typing import TYPE_CHECKING, TypeVar
611

7-
from pydantic import BaseModel
8-
912
from covenance._lazy_client import LazyClient
1013
from covenance.exceptions import StructuredOutputParsingError, require_provider
1114
from covenance.keys import get_anthropic_api_key, require_api_key
@@ -20,6 +23,16 @@
2023

2124
T = TypeVar("T")
2225

26+
# Check SDK version for structured outputs beta support (requires >= 0.74.1)
27+
_USE_STRUCTURED_OUTPUTS_BETA = False
28+
try:
29+
from anthropic import __version__ as _anthropic_version
30+
31+
_major, _minor, _patch = map(int, _anthropic_version.split(".")[:3])
32+
_USE_STRUCTURED_OUTPUTS_BETA = (_major, _minor, _patch) >= (0, 74, 1)
33+
except Exception:
34+
pass # Fall back to tool-use if version check fails
35+
2336

2437
def _create_anthropic_client() -> "Anthropic":
2538
require_provider("anthropic")
@@ -35,26 +48,6 @@ def _create_anthropic_client() -> "Anthropic":
3548
VERBOSE = False
3649

3750

38-
def _pydantic_to_json_schema(model: type[BaseModel]) -> dict:
39-
"""Convert a Pydantic model to JSON schema for Anthropic tools.
40-
41-
Args:
42-
model: Pydantic model class
43-
44-
Returns:
45-
JSON schema dictionary compatible with Anthropic tools API
46-
"""
47-
# Get the JSON schema from Pydantic
48-
schema = model.model_json_schema()
49-
# Anthropic expects the schema directly, not wrapped in a $defs structure
50-
# Remove $defs and inline references if present
51-
if "$defs" in schema:
52-
# For simplicity, we'll use the schema as-is and let Anthropic handle it
53-
# In practice, Anthropic should handle $ref references
54-
pass
55-
return schema
56-
57-
5851
def _parse_wait_time_from_error(error: Exception) -> float | None:
5952
"""Parse wait time from Anthropic rate limit error message.
6053
@@ -69,10 +62,6 @@ def _parse_wait_time_from_error(error: Exception) -> float | None:
6962
Wait time in seconds if found, None otherwise
7063
"""
7164
error_str = str(error)
72-
# Look for common patterns in error messages
73-
# Anthropic may include retry-after information
74-
import re
75-
7665
match = re.search(r"retry.*?(\d+(?:\.\d+)?)\s*(?:seconds?|s)", error_str.lower())
7766
if match:
7867
try:
@@ -103,47 +92,23 @@ def ask_anthropic[T](
10392
record_store: "RecordStore | None" = None,
10493
temperature: float | None = None,
10594
) -> T:
106-
"""Call Anthropic API with structured output using tools parameter.
95+
"""Call Anthropic API with structured output.
10796
108-
Uses Anthropic's tools parameter with JSON schema derived from Pydantic model
109-
to get structured output. Retries up to 100 times when encountering rate limit errors.
97+
Uses the structured outputs beta (constrained decoding, guaranteed valid JSON)
98+
when SDK >= 0.74.1. Retries on rate limit errors.
11099
111-
If response_type is str, performs a standard chat completion and returns the text.
100+
If response_type is str or None, returns plain text.
112101
"""
113102
from anthropic import APIError, RateLimitError
114103

115104
max_attempts = 100
116105
api_client = client_override or client # type: ignore[assignment]
117-
118-
# Handle plain text output
119106
is_plain_text = response_type is str or response_type is None
107+
use_beta = _USE_STRUCTURED_OUTPUTS_BETA and not is_plain_text
120108

121-
if not is_plain_text:
122-
# Convert Pydantic model to JSON schema
123-
json_schema = _pydantic_to_json_schema(response_type) # type: ignore[arg-type]
124-
125-
# Create tool definition for structured output
126-
tool_name = (
127-
response_type.__name__
128-
if hasattr(response_type, "__name__")
129-
else "structured_output"
130-
)
131-
tools = [
132-
{
133-
"name": tool_name,
134-
"description": f"Generate output matching the {tool_name} schema",
135-
"input_schema": json_schema,
136-
}
137-
]
138-
else:
139-
tools = None
140-
tool_name = None
141-
142-
# Build messages array
143109
messages = [{"role": "user", "content": user_msg}]
144-
145-
total_tpm_wait = 0.0 # Accumulate TPM retry wait time
146-
started_at = datetime.now(UTC) # Record absolute start time
110+
total_tpm_wait = 0.0
111+
started_at = datetime.now(UTC)
147112

148113
for attempt in range(max_attempts):
149114
try:
@@ -152,25 +117,36 @@ def ask_anthropic[T](
152117
f"[Anthropic Retry] Attempt {attempt + 1}/{max_attempts} for model {model}"
153118
)
154119

155-
# Call Anthropic API
156-
api_kwargs = {
157-
"model": model,
158-
"max_tokens": 4096,
159-
"messages": messages,
160-
}
161-
if not is_plain_text:
162-
api_kwargs["tools"] = tools
163-
api_kwargs["tool_choice"] = {"type": "tool", "name": tool_name}
164-
165-
if sys_msg is not None:
166-
api_kwargs["system"] = sys_msg
167-
168-
if temperature is not None:
169-
api_kwargs["temperature"] = temperature
170-
171-
response = api_client.messages.create(**api_kwargs)
172-
173-
ended_at = datetime.now(UTC) # Record absolute end time
120+
if use_beta:
121+
# Structured outputs beta: guaranteed schema-valid JSON
122+
kwargs = {
123+
"model": model,
124+
"messages": messages,
125+
"betas": ["structured-outputs-2025-11-13"],
126+
"output_format": response_type,
127+
# max number allowed without streaming API
128+
"max_tokens": 21_000
129+
}
130+
if sys_msg is not None:
131+
# Beta API requires system as list of content blocks
132+
kwargs["system"] = [{"type": "text", "text": sys_msg}]
133+
if temperature is not None:
134+
kwargs["temperature"] = temperature
135+
response = api_client.beta.messages.parse(**kwargs)
136+
else:
137+
# Plain text
138+
kwargs = {
139+
"model": model,
140+
"max_tokens": 4096,
141+
"messages": messages,
142+
}
143+
if sys_msg is not None:
144+
kwargs["system"] = sys_msg
145+
if temperature is not None:
146+
kwargs["temperature"] = temperature
147+
response = api_client.messages.create(**kwargs)
148+
149+
ended_at = datetime.now(UTC)
174150
usage = _extract_anthropic_usage(response, model=model)
175151

176152
from covenance.record import record_llm_call
@@ -190,85 +166,46 @@ def ask_anthropic[T](
190166
f"[Anthropic Retry] ✓ Successfully completed after {attempt + 1} attempt(s)"
191167
)
192168

193-
if is_plain_text:
194-
if not response.content:
169+
if use_beta:
170+
# Beta returns parsed_output directly
171+
if response.parsed_output is None:
195172
raise StructuredOutputParsingError(
196-
f"Anthropic API returned empty content. "
173+
f"Anthropic API returned None parsed_output. "
197174
f"Model: {model}, response_type: {response_type}"
198175
)
199-
return response.content[0].text # type: ignore[return-value]
176+
return response.parsed_output
200177

201-
# Extract structured output from tool use
178+
# Plain text response
202179
if not response.content:
203180
raise StructuredOutputParsingError(
204181
f"Anthropic API returned empty content. "
205182
f"Model: {model}, response_type: {response_type}"
206183
)
207-
208-
# Find the tool use block
209-
tool_use_block = None
210-
for block in response.content:
211-
if block.type == "tool_use" and block.name == tool_name:
212-
tool_use_block = block
213-
break
214-
215-
if tool_use_block is None:
216-
raise StructuredOutputParsingError(
217-
f"Anthropic API did not return tool_use block. "
218-
f"Model: {model}, response_type: {response_type}, Content: {response.content}"
219-
)
220-
221-
# Parse the input as JSON and validate against Pydantic model
222-
try:
223-
parsed_data = tool_use_block.input
224-
# Validate and create Pydantic instance
225-
parsed = response_type(**parsed_data)
226-
return parsed
227-
except Exception as e:
228-
raise StructuredOutputParsingError(
229-
f"Failed to parse Anthropic response as {response_type.__name__}: {e}. "
230-
f"Model: {model}, Input: {tool_use_block.input}"
231-
) from e
184+
return response.content[0].text # type: ignore[return-value]
232185

233186
except RateLimitError as e:
234187
if attempt == max_attempts - 1:
235188
if VERBOSE:
236189
print(f"[Anthropic Retry] ✗ Failed after {max_attempts} attempts")
237190
raise
238191

239-
# Try to parse wait time from error message first
240192
explicit_wait = _parse_wait_time_from_error(e)
241-
if explicit_wait is not None:
242-
wait_time = explicit_wait
243-
if VERBOSE:
244-
print(
245-
f"[Anthropic Retry] Rate limit error (attempt {attempt + 1}/{max_attempts}): "
246-
f"using explicit wait time {wait_time:.2f}s from error message"
247-
)
248-
else:
249-
# Use exponential backoff with jitter
250-
wait_time = exponential_backoff(attempt)
251-
if VERBOSE:
252-
print(
253-
f"[Anthropic Retry] Rate limit error (attempt {attempt + 1}/{max_attempts}): "
254-
f"exponential backoff wait {wait_time:.2f}s"
255-
)
193+
wait_time = explicit_wait if explicit_wait else exponential_backoff(attempt)
256194

257195
if VERBOSE:
258-
error_str = str(e)
259-
if len(error_str) <= 300:
260-
print(f"[Anthropic Retry] Error details: {error_str}")
196+
print(
197+
f"[Anthropic Retry] Rate limit (attempt {attempt + 1}/{max_attempts}): "
198+
f"waiting {wait_time:.2f}s"
199+
)
261200

262201
time.sleep(wait_time)
263202
total_tpm_wait += wait_time
264203

265204
except APIError as e:
266-
# Handle other API errors
267205
error_str = str(e)
268206
is_rate_limit = "429" in error_str or "rate limit" in error_str.lower()
269207

270208
if not is_rate_limit:
271-
# Not a rate limit error, re-raise immediately
272209
if VERBOSE:
273210
print(f"[Anthropic Retry] Non-rate-limit error: {type(e).__name__}")
274211
raise
@@ -278,41 +215,13 @@ def ask_anthropic[T](
278215
print(f"[Anthropic Retry] ✗ Failed after {max_attempts} attempts")
279216
raise
280217

281-
# Try to parse wait time from error message first
282218
explicit_wait = _parse_wait_time_from_error(e)
283-
if explicit_wait is not None:
284-
wait_time = explicit_wait
285-
else:
286-
wait_time = exponential_backoff(attempt)
287-
288-
if VERBOSE:
289-
print(
290-
f"[Anthropic Retry] Rate limit error (attempt {attempt + 1}/{max_attempts}): "
291-
f"waiting {wait_time:.2f}s before retry"
292-
)
293-
294-
time.sleep(wait_time)
295-
total_tpm_wait += wait_time
296-
297-
except Exception as e:
298-
# Handle other potential errors
299-
error_str = str(e)
300-
is_rate_limit = "429" in error_str or "rate limit" in error_str.lower()
301-
302-
if not is_rate_limit or attempt == max_attempts - 1:
303-
if VERBOSE:
304-
print(
305-
f"[Anthropic Retry] ✗ Unexpected error or max attempts reached: {type(e).__name__}"
306-
)
307-
raise
308-
309-
# Use exponential backoff
310-
wait_time = exponential_backoff(attempt)
219+
wait_time = explicit_wait if explicit_wait else exponential_backoff(attempt)
311220

312221
if VERBOSE:
313222
print(
314-
f"[Anthropic Retry] Unexpected rate limit error (attempt {attempt + 1}/{max_attempts}): "
315-
f"waiting {wait_time:.2f}s before retry"
223+
f"[Anthropic Retry] Rate limit (attempt {attempt + 1}/{max_attempts}): "
224+
f"waiting {wait_time:.2f}s"
316225
)
317226

318227
time.sleep(wait_time)
@@ -354,7 +263,7 @@ class MovieReview(BaseModel):
354263
result = ask_anthropic(
355264
user_msg="Review the movie 'Inception' by Christopher Nolan.",
356265
response_type=MovieReview,
357-
model=ClaudeModels.haiku,
266+
model=ClaudeModels.haiku45,
358267
)
359268

360269
print(f"Movie: {result.movie_title}")

0 commit comments

Comments
 (0)