1- """Anthropic Claude client with structured output support and automatic retry."""
1+ """Anthropic Claude client with structured output support and automatic retry.
22
3+ Uses the structured outputs beta (constrained decoding) when SDK >= 0.74.1,
4+ providing guaranteed schema-valid JSON. Falls back to tool-use for older SDKs.
5+ """
6+
7+ import re
38import time
49from datetime import UTC , datetime
510from typing import TYPE_CHECKING , TypeVar
611
7- from pydantic import BaseModel
8-
912from covenance ._lazy_client import LazyClient
1013from covenance .exceptions import StructuredOutputParsingError , require_provider
1114from covenance .keys import get_anthropic_api_key , require_api_key
2023
2124T = TypeVar ("T" )
2225
26+ # Check SDK version for structured outputs beta support (requires >= 0.74.1)
27+ _USE_STRUCTURED_OUTPUTS_BETA = False
28+ try :
29+ from anthropic import __version__ as _anthropic_version
30+
31+ _major , _minor , _patch = map (int , _anthropic_version .split ("." )[:3 ])
32+ _USE_STRUCTURED_OUTPUTS_BETA = (_major , _minor , _patch ) >= (0 , 74 , 1 )
33+ except Exception :
34+ pass # Fall back to tool-use if version check fails
35+
2336
2437def _create_anthropic_client () -> "Anthropic" :
2538 require_provider ("anthropic" )
@@ -35,26 +48,6 @@ def _create_anthropic_client() -> "Anthropic":
3548VERBOSE = False
3649
3750
38- def _pydantic_to_json_schema (model : type [BaseModel ]) -> dict :
39- """Convert a Pydantic model to JSON schema for Anthropic tools.
40-
41- Args:
42- model: Pydantic model class
43-
44- Returns:
45- JSON schema dictionary compatible with Anthropic tools API
46- """
47- # Get the JSON schema from Pydantic
48- schema = model .model_json_schema ()
49- # Anthropic expects the schema directly, not wrapped in a $defs structure
50- # Remove $defs and inline references if present
51- if "$defs" in schema :
52- # For simplicity, we'll use the schema as-is and let Anthropic handle it
53- # In practice, Anthropic should handle $ref references
54- pass
55- return schema
56-
57-
5851def _parse_wait_time_from_error (error : Exception ) -> float | None :
5952 """Parse wait time from Anthropic rate limit error message.
6053
@@ -69,10 +62,6 @@ def _parse_wait_time_from_error(error: Exception) -> float | None:
6962 Wait time in seconds if found, None otherwise
7063 """
7164 error_str = str (error )
72- # Look for common patterns in error messages
73- # Anthropic may include retry-after information
74- import re
75-
7665 match = re .search (r"retry.*?(\d+(?:\.\d+)?)\s*(?:seconds?|s)" , error_str .lower ())
7766 if match :
7867 try :
@@ -103,47 +92,23 @@ def ask_anthropic[T](
10392 record_store : "RecordStore | None" = None ,
10493 temperature : float | None = None ,
10594) -> T :
106- """Call Anthropic API with structured output using tools parameter .
95+ """Call Anthropic API with structured output.
10796
108- Uses Anthropic's tools parameter with JSON schema derived from Pydantic model
109- to get structured output. Retries up to 100 times when encountering rate limit errors.
97+ Uses the structured outputs beta (constrained decoding, guaranteed valid JSON)
98+ when SDK >= 0.74.1. Retries on rate limit errors.
11099
111- If response_type is str, performs a standard chat completion and returns the text.
100+ If response_type is str or None, returns plain text.
112101 """
113102 from anthropic import APIError , RateLimitError
114103
115104 max_attempts = 100
116105 api_client = client_override or client # type: ignore[assignment]
117-
118- # Handle plain text output
119106 is_plain_text = response_type is str or response_type is None
107+ use_beta = _USE_STRUCTURED_OUTPUTS_BETA and not is_plain_text
120108
121- if not is_plain_text :
122- # Convert Pydantic model to JSON schema
123- json_schema = _pydantic_to_json_schema (response_type ) # type: ignore[arg-type]
124-
125- # Create tool definition for structured output
126- tool_name = (
127- response_type .__name__
128- if hasattr (response_type , "__name__" )
129- else "structured_output"
130- )
131- tools = [
132- {
133- "name" : tool_name ,
134- "description" : f"Generate output matching the { tool_name } schema" ,
135- "input_schema" : json_schema ,
136- }
137- ]
138- else :
139- tools = None
140- tool_name = None
141-
142- # Build messages array
143109 messages = [{"role" : "user" , "content" : user_msg }]
144-
145- total_tpm_wait = 0.0 # Accumulate TPM retry wait time
146- started_at = datetime .now (UTC ) # Record absolute start time
110+ total_tpm_wait = 0.0
111+ started_at = datetime .now (UTC )
147112
148113 for attempt in range (max_attempts ):
149114 try :
@@ -152,25 +117,36 @@ def ask_anthropic[T](
152117 f"[Anthropic Retry] Attempt { attempt + 1 } /{ max_attempts } for model { model } "
153118 )
154119
155- # Call Anthropic API
156- api_kwargs = {
157- "model" : model ,
158- "max_tokens" : 4096 ,
159- "messages" : messages ,
160- }
161- if not is_plain_text :
162- api_kwargs ["tools" ] = tools
163- api_kwargs ["tool_choice" ] = {"type" : "tool" , "name" : tool_name }
164-
165- if sys_msg is not None :
166- api_kwargs ["system" ] = sys_msg
167-
168- if temperature is not None :
169- api_kwargs ["temperature" ] = temperature
170-
171- response = api_client .messages .create (** api_kwargs )
172-
173- ended_at = datetime .now (UTC ) # Record absolute end time
120+ if use_beta :
121+ # Structured outputs beta: guaranteed schema-valid JSON
122+ kwargs = {
123+ "model" : model ,
124+ "messages" : messages ,
125+ "betas" : ["structured-outputs-2025-11-13" ],
126+ "output_format" : response_type ,
127+ # max number allowed without streaming API
128+ "max_tokens" : 21_000
129+ }
130+ if sys_msg is not None :
131+ # Beta API requires system as list of content blocks
132+ kwargs ["system" ] = [{"type" : "text" , "text" : sys_msg }]
133+ if temperature is not None :
134+ kwargs ["temperature" ] = temperature
135+ response = api_client .beta .messages .parse (** kwargs )
136+ else :
137+ # Plain text
138+ kwargs = {
139+ "model" : model ,
140+ "max_tokens" : 4096 ,
141+ "messages" : messages ,
142+ }
143+ if sys_msg is not None :
144+ kwargs ["system" ] = sys_msg
145+ if temperature is not None :
146+ kwargs ["temperature" ] = temperature
147+ response = api_client .messages .create (** kwargs )
148+
149+ ended_at = datetime .now (UTC )
174150 usage = _extract_anthropic_usage (response , model = model )
175151
176152 from covenance .record import record_llm_call
@@ -190,85 +166,46 @@ def ask_anthropic[T](
190166 f"[Anthropic Retry] ✓ Successfully completed after { attempt + 1 } attempt(s)"
191167 )
192168
193- if is_plain_text :
194- if not response .content :
169+ if use_beta :
170+ # Beta returns parsed_output directly
171+ if response .parsed_output is None :
195172 raise StructuredOutputParsingError (
196- f"Anthropic API returned empty content . "
173+ f"Anthropic API returned None parsed_output . "
197174 f"Model: { model } , response_type: { response_type } "
198175 )
199- return response .content [ 0 ]. text # type: ignore[return-value]
176+ return response .parsed_output
200177
201- # Extract structured output from tool use
178+ # Plain text response
202179 if not response .content :
203180 raise StructuredOutputParsingError (
204181 f"Anthropic API returned empty content. "
205182 f"Model: { model } , response_type: { response_type } "
206183 )
207-
208- # Find the tool use block
209- tool_use_block = None
210- for block in response .content :
211- if block .type == "tool_use" and block .name == tool_name :
212- tool_use_block = block
213- break
214-
215- if tool_use_block is None :
216- raise StructuredOutputParsingError (
217- f"Anthropic API did not return tool_use block. "
218- f"Model: { model } , response_type: { response_type } , Content: { response .content } "
219- )
220-
221- # Parse the input as JSON and validate against Pydantic model
222- try :
223- parsed_data = tool_use_block .input
224- # Validate and create Pydantic instance
225- parsed = response_type (** parsed_data )
226- return parsed
227- except Exception as e :
228- raise StructuredOutputParsingError (
229- f"Failed to parse Anthropic response as { response_type .__name__ } : { e } . "
230- f"Model: { model } , Input: { tool_use_block .input } "
231- ) from e
184+ return response .content [0 ].text # type: ignore[return-value]
232185
233186 except RateLimitError as e :
234187 if attempt == max_attempts - 1 :
235188 if VERBOSE :
236189 print (f"[Anthropic Retry] ✗ Failed after { max_attempts } attempts" )
237190 raise
238191
239- # Try to parse wait time from error message first
240192 explicit_wait = _parse_wait_time_from_error (e )
241- if explicit_wait is not None :
242- wait_time = explicit_wait
243- if VERBOSE :
244- print (
245- f"[Anthropic Retry] Rate limit error (attempt { attempt + 1 } /{ max_attempts } ): "
246- f"using explicit wait time { wait_time :.2f} s from error message"
247- )
248- else :
249- # Use exponential backoff with jitter
250- wait_time = exponential_backoff (attempt )
251- if VERBOSE :
252- print (
253- f"[Anthropic Retry] Rate limit error (attempt { attempt + 1 } /{ max_attempts } ): "
254- f"exponential backoff wait { wait_time :.2f} s"
255- )
193+ wait_time = explicit_wait if explicit_wait else exponential_backoff (attempt )
256194
257195 if VERBOSE :
258- error_str = str (e )
259- if len (error_str ) <= 300 :
260- print (f"[Anthropic Retry] Error details: { error_str } " )
196+ print (
197+ f"[Anthropic Retry] Rate limit (attempt { attempt + 1 } /{ max_attempts } ): "
198+ f"waiting { wait_time :.2f} s"
199+ )
261200
262201 time .sleep (wait_time )
263202 total_tpm_wait += wait_time
264203
265204 except APIError as e :
266- # Handle other API errors
267205 error_str = str (e )
268206 is_rate_limit = "429" in error_str or "rate limit" in error_str .lower ()
269207
270208 if not is_rate_limit :
271- # Not a rate limit error, re-raise immediately
272209 if VERBOSE :
273210 print (f"[Anthropic Retry] Non-rate-limit error: { type (e ).__name__ } " )
274211 raise
@@ -278,41 +215,13 @@ def ask_anthropic[T](
278215 print (f"[Anthropic Retry] ✗ Failed after { max_attempts } attempts" )
279216 raise
280217
281- # Try to parse wait time from error message first
282218 explicit_wait = _parse_wait_time_from_error (e )
283- if explicit_wait is not None :
284- wait_time = explicit_wait
285- else :
286- wait_time = exponential_backoff (attempt )
287-
288- if VERBOSE :
289- print (
290- f"[Anthropic Retry] Rate limit error (attempt { attempt + 1 } /{ max_attempts } ): "
291- f"waiting { wait_time :.2f} s before retry"
292- )
293-
294- time .sleep (wait_time )
295- total_tpm_wait += wait_time
296-
297- except Exception as e :
298- # Handle other potential errors
299- error_str = str (e )
300- is_rate_limit = "429" in error_str or "rate limit" in error_str .lower ()
301-
302- if not is_rate_limit or attempt == max_attempts - 1 :
303- if VERBOSE :
304- print (
305- f"[Anthropic Retry] ✗ Unexpected error or max attempts reached: { type (e ).__name__ } "
306- )
307- raise
308-
309- # Use exponential backoff
310- wait_time = exponential_backoff (attempt )
219+ wait_time = explicit_wait if explicit_wait else exponential_backoff (attempt )
311220
312221 if VERBOSE :
313222 print (
314- f"[Anthropic Retry] Unexpected rate limit error (attempt { attempt + 1 } /{ max_attempts } ): "
315- f"waiting { wait_time :.2f} s before retry "
223+ f"[Anthropic Retry] Rate limit (attempt { attempt + 1 } /{ max_attempts } ): "
224+ f"waiting { wait_time :.2f} s"
316225 )
317226
318227 time .sleep (wait_time )
@@ -354,7 +263,7 @@ class MovieReview(BaseModel):
354263 result = ask_anthropic (
355264 user_msg = "Review the movie 'Inception' by Christopher Nolan." ,
356265 response_type = MovieReview ,
357- model = ClaudeModels .haiku ,
266+ model = ClaudeModels .haiku45 ,
358267 )
359268
360269 print (f"Movie: { result .movie_title } " )
0 commit comments