@@ -44,40 +44,28 @@ def _create_anthropic_client() -> "Anthropic":
4444
4545client = LazyClient (_create_anthropic_client , label = "anthropic" )
4646
47- # Global verbose flag for retry logging
4847VERBOSE = False
4948
5049
51- def _parse_wait_time_from_error (error : Exception ) -> float | None :
52- """Parse wait time from Anthropic rate limit error message.
53-
54- Anthropic may provide retry timing in error messages or headers.
55- This function attempts to extract it, but may return None to trigger
56- exponential backoff.
50+ def _is_rate_limit_error (error : Exception ) -> bool :
51+ """Check if error indicates a rate limit (explicit type or string match)."""
52+ from anthropic import RateLimitError
53+ if isinstance (error , RateLimitError ):
54+ return True
55+ error_str = str (error )
56+ return "429" in error_str or "rate limit" in error_str .lower ()
5757
58- Args:
59- error: The exception from Anthropic API
6058
61- Returns:
62- Wait time in seconds if found, None otherwise
63- """
64- error_str = str (error )
65- match = re .search (r"retry.*?(\d+(?:\.\d+)?)\s*(?:seconds?|s)" , error_str .lower ())
59+ def _parse_wait_time_from_error (error : Exception ) -> float | None :
60+ """Parse wait time from error message. Returns None to trigger exponential backoff."""
61+ match = re .search (r"retry.*?(\d+(?:\.\d+)?)\s*(?:seconds?|s)" , str (error ).lower ())
6662 if match :
67- try :
68- wait_time = float (match .group (1 ))
69- return max (wait_time , 0.1 )
70- except ValueError :
71- pass
63+ return max (float (match .group (1 )), 0.1 )
7264 return None
7365
7466
7567def set_rate_limiter_verbose (verbose : bool ) -> None :
76- """Enable or disable verbose logging for Anthropic retry logic.
77-
78- Args:
79- verbose: If True, print detailed logging about retry attempts and wait times
80- """
68+ """Enable or disable verbose logging for Anthropic retry logic."""
8169 global VERBOSE
8270 VERBOSE = verbose
8371
@@ -95,62 +83,60 @@ def ask_anthropic[T](
9583 """Call Anthropic API with structured output.
9684
9785 Uses the structured outputs beta (constrained decoding, guaranteed valid JSON)
98- when SDK >= 0.74.1. Retries on rate limit errors.
86+ when SDK >= 0.74.1. Falls back to tool-use for structured output when beta is
87+ not available. Retries on rate limit errors.
9988
10089 If response_type is str or None, returns plain text.
10190 """
102- from anthropic import APIError , RateLimitError
103-
10491 max_attempts = 100
10592 api_client = client_override or client # type: ignore[assignment]
10693 is_plain_text = response_type is str or response_type is None
10794 use_beta = _USE_STRUCTURED_OUTPUTS_BETA and not is_plain_text
10895
96+ # Prepare tool-use fallback for structured output when beta is not available
97+ tool_name = None
98+ tools = None
99+ if not is_plain_text and not use_beta :
100+ tool_name = getattr (response_type , "__name__" , "structured_output" )
101+ tools = [{
102+ "name" : tool_name ,
103+ "description" : f"Generate output matching the { tool_name } schema" ,
104+ "input_schema" : response_type .model_json_schema (), # type: ignore[union-attr]
105+ }]
106+
109107 messages = [{"role" : "user" , "content" : user_msg }]
110108 total_tpm_wait = 0.0
111109 started_at = datetime .now (UTC )
112110
113111 for attempt in range (max_attempts ):
114112 try :
115113 if VERBOSE and attempt > 0 :
116- print (
117- f"[Anthropic Retry] Attempt { attempt + 1 } /{ max_attempts } for model { model } "
118- )
114+ print (f"[Anthropic Retry] Attempt { attempt + 1 } /{ max_attempts } " )
115+
116+ kwargs : dict = {"model" : model , "max_tokens" : 21_000 , "messages" : messages }
117+ if temperature is not None :
118+ kwargs ["temperature" ] = temperature
119119
120120 if use_beta :
121- # Structured outputs beta: guaranteed schema-valid JSON
122- kwargs = {
123- "model" : model ,
124- "messages" : messages ,
121+ kwargs .update ({
125122 "betas" : ["structured-outputs-2025-11-13" ],
126123 "output_format" : response_type ,
127- # max number allowed without streaming API
128- "max_tokens" : 21_000 ,
129- }
124+ })
130125 if sys_msg is not None :
131- # Beta API requires system as list of content blocks
132126 kwargs ["system" ] = [{"type" : "text" , "text" : sys_msg }]
133- if temperature is not None :
134- kwargs ["temperature" ] = temperature
135127 response = api_client .beta .messages .parse (** kwargs )
136128 else :
137- # Plain text
138- kwargs = {
139- "model" : model ,
140- "max_tokens" : 4096 ,
141- "messages" : messages ,
142- }
143129 if sys_msg is not None :
144130 kwargs ["system" ] = sys_msg
145- if temperature is not None :
146- kwargs ["temperature" ] = temperature
131+ if tools is not None :
132+ kwargs ["tools" ] = tools
133+ kwargs ["tool_choice" ] = {"type" : "tool" , "name" : tool_name }
147134 response = api_client .messages .create (** kwargs )
148135
149136 ended_at = datetime .now (UTC )
150137 usage = _extract_anthropic_usage (response , model = model )
151138
152139 from covenance .record import record_llm_call
153-
154140 record_llm_call (
155141 model = model ,
156142 provider = "anthropic" ,
@@ -162,71 +148,56 @@ def ask_anthropic[T](
162148 )
163149
164150 if VERBOSE and attempt > 0 :
165- print (
166- f"[Anthropic Retry] ✓ Successfully completed after { attempt + 1 } attempt(s)"
167- )
151+ print (f"[Anthropic Retry] ✓ Completed after { attempt + 1 } attempt(s)" )
168152
153+ # Extract result based on response type
169154 if use_beta :
170- # Beta returns parsed_output directly
171155 if response .parsed_output is None :
172156 raise StructuredOutputParsingError (
173- f"Anthropic API returned None parsed_output. "
174- f"Model: { model } , response_type: { response_type } "
157+ f"Anthropic returned None parsed_output. Model: { model } "
175158 )
176159 return response .parsed_output
177160
178- # Plain text response
179161 if not response .content :
180162 raise StructuredOutputParsingError (
181- f"Anthropic API returned empty content. "
182- f"Model: { model } , response_type: { response_type } "
163+ f"Anthropic returned empty content. Model: { model } "
183164 )
184- return response .content [0 ].text # type: ignore[return-value]
185165
186- except RateLimitError as e :
187- if attempt == max_attempts - 1 :
188- if VERBOSE :
189- print (f"[Anthropic Retry] ✗ Failed after { max_attempts } attempts" )
190- raise
191-
192- explicit_wait = _parse_wait_time_from_error (e )
193- wait_time = explicit_wait if explicit_wait else exponential_backoff (attempt )
166+ if is_plain_text :
167+ return response .content [0 ].text # type: ignore[return-value]
194168
195- if VERBOSE :
196- print (
197- f"[Anthropic Retry] Rate limit (attempt { attempt + 1 } /{ max_attempts } ): "
198- f"waiting { wait_time :.2f} s"
169+ # Tool-use fallback: find and parse tool_use block
170+ tool_use_block = next (
171+ (b for b in response .content if b .type == "tool_use" and b .name == tool_name ),
172+ None ,
173+ )
174+ if tool_use_block is None :
175+ raise StructuredOutputParsingError (
176+ f"No tool_use block returned. Model: { model } , Content: { response .content } "
199177 )
200178
201- time .sleep (wait_time )
202- total_tpm_wait += wait_time
203-
204- except APIError as e :
205- error_str = str (e )
206- is_rate_limit = "429" in error_str or "rate limit" in error_str .lower ()
207-
208- if not is_rate_limit :
209- if VERBOSE :
210- print (f"[Anthropic Retry] Non-rate-limit error: { type (e ).__name__ } " )
211- raise
179+ try :
180+ return response_type (** tool_use_block .input ) # type: ignore[return-value]
181+ except Exception as e :
182+ raise StructuredOutputParsingError (
183+ f"Failed to parse as { response_type } : { e } . Input: { tool_use_block .input } "
184+ ) from e
212185
213- if attempt == max_attempts - 1 :
186+ except Exception as e :
187+ if not _is_rate_limit_error (e ) or attempt == max_attempts - 1 :
214188 if VERBOSE :
215- print (f"[Anthropic Retry] ✗ Failed after { max_attempts } attempts" )
189+ print (f"[Anthropic Retry] ✗ { type ( e ). __name__ } after { attempt + 1 } attempts" )
216190 raise
217191
218- explicit_wait = _parse_wait_time_from_error (e )
219- wait_time = explicit_wait if explicit_wait else exponential_backoff (attempt )
220-
192+ wait_time = _parse_wait_time_from_error (e ) or exponential_backoff (attempt )
221193 if VERBOSE :
222- print (
223- f"[Anthropic Retry] Rate limit (attempt { attempt + 1 } /{ max_attempts } ): "
224- f"waiting { wait_time :.2f} s"
225- )
194+ print (f"[Anthropic Retry] Rate limit, waiting { wait_time :.2f} s" )
226195
227196 time .sleep (wait_time )
228197 total_tpm_wait += wait_time
229198
199+ raise RuntimeError ("ask_anthropic exhausted retry loop" )
200+
230201
231202def _extract_anthropic_usage (response , model : str ) -> TokenUsage :
232203 """Extract token usage from Anthropic response and record to global stats."""
0 commit comments