@@ -116,13 +116,22 @@ def _handle_api_error(
116116_refresh_token = None
117117_token_expires_at = None
118118
119+ # Circuit breaker for authentication failures
120+ _auth_failure_count = 0
121+ _auth_circuit_breaker_until = None
122+ _max_auth_failures = 5
123+ _circuit_breaker_duration = 300 # 5 minutes
124+ _last_circuit_breaker_rollbar_report = None
125+ _circuit_breaker_rollbar_cooldown = 120 # Only report to rollbar once every 2 minutes
119126
120- def retry_api_call (max_duration_minutes = 30 ):
127+
128+ def retry_api_call (max_duration_minutes = 30 , max_attempts = None ):
121129 """
122130 Decorator to retry API calls with exponential backoff for up to max_duration_minutes.
123131
124132 Args:
125133 max_duration_minutes (int): Maximum time to retry in minutes (default: 30)
134+ max_attempts (int): Maximum number of retry attempts (default: None for no limit)
126135
127136 Returns:
128137 Decorated function that will retry on failure
@@ -143,6 +152,20 @@ def wrapper(*args, **kwargs):
143152 attempt += 1
144153 elapsed_time = time .time () - start_time
145154
155+ # Check max attempts limit first
156+ if max_attempts and attempt >= max_attempts :
157+ error_msg = f"Max retry attempts ({ max_attempts } ) exceeded for { func .__name__ } "
158+ logger .error (error_msg )
159+ rollbar .report_message (
160+ f"Max retry attempts exceeded for { func .__name__ } " ,
161+ extra_data = {
162+ "attempt" : attempt ,
163+ "elapsed_time" : elapsed_time ,
164+ "error" : str (e ),
165+ },
166+ )
167+ raise e
168+
146169 # Calculate delay with exponential backoff (capped at 120 seconds)
147170 delay = min (base_delay * (2 ** (attempt - 1 )), 120 )
148171
@@ -155,6 +178,20 @@ def wrapper(*args, **kwargs):
155178 extra_data = {
156179 "attempt" : attempt ,
157180 "elapsed_time" : elapsed_time ,
181+ "error" : str (e ),
182+ },
183+ )
184+ raise e
185+
186+ # Don't retry on authentication errors (401, 403) for login function
187+ if func .__name__ == "login" and "401" in str (e ) or "403" in str (e ):
188+ error_msg = f"Authentication failed for { func .__name__ } - not retrying auth errors"
189+ logger .error (error_msg )
190+ rollbar .report_message (
191+ f"Authentication error - not retrying { func .__name__ } " ,
192+ extra_data = {
193+ "attempt" : attempt ,
194+ "error" : str (e ),
158195 },
159196 )
160197 raise e
@@ -177,7 +214,7 @@ def wrapper(*args, **kwargs):
177214 return decorator
178215
179216
180- @retry_api_call (max_duration_minutes = 10 )
217+ @retry_api_call (max_duration_minutes = 3 , max_attempts = 5 )
181218def login ():
182219 """
183220 Authenticate with the API and store both access and refresh tokens.
@@ -186,38 +223,89 @@ def login():
186223 str: The access token for immediate use
187224 """
188225 global _access_token , _refresh_token , _token_expires_at
226+ global \
227+ _auth_failure_count , \
228+ _auth_circuit_breaker_until , \
229+ _last_circuit_breaker_rollbar_report
230+
231+ # Check circuit breaker
232+ if _auth_circuit_breaker_until and time .time () < _auth_circuit_breaker_until :
233+ remaining = int (_auth_circuit_breaker_until - time .time ())
234+ error_msg = (
235+ f"Authentication circuit breaker active for { remaining } more seconds"
236+ )
237+ logger .error (error_msg )
189238
190- response = requests .post (
191- API_URL + "/auth" , json = {"email" : EMAIL , "password" : PASSWORD }
192- )
239+ # Only report to Rollbar if we haven't reported recently (prevent spam)
240+ current_time = time .time ()
241+ if (
242+ _last_circuit_breaker_rollbar_report is None
243+ or current_time - _last_circuit_breaker_rollbar_report
244+ >= _circuit_breaker_rollbar_cooldown
245+ ):
246+ rollbar .report_message (
247+ "Authentication circuit breaker active" ,
248+ extra_data = {"remaining_seconds" : remaining },
249+ )
250+ _last_circuit_breaker_rollbar_report = current_time
193251
194- _handle_api_error ( response , "logging in" )
252+ raise Exception ( error_msg )
195253
196- response_data = response .json ()
254+ try :
255+ response = requests .post (
256+ API_URL + "/auth" , json = {"email" : EMAIL , "password" : PASSWORD }
257+ )
197258
198- # Store both tokens based on the API response structure
199- _access_token = response_data ["access_token" ]
200- _refresh_token = response_data .get ("refresh_token" )
259+ _handle_api_error (response , "logging in" )
201260
202- # Calculate expiration time (subtract 60 seconds as buffer to refresh before expiry)
203- expires_in = response_data .get (
204- "expires_in" , 3600
205- ) # Default to 1 hour if not specified
206- _token_expires_at = time .time () + expires_in - 60
261+ response_data = response .json ()
207262
208- if _refresh_token :
209- logger .debug (
210- f"Successfully stored access and refresh tokens, expires in { expires_in } seconds"
211- )
212- else :
213- logger .warning (
214- "No refresh token found in login response - will fallback to full login for each request"
215- )
263+ # Store both tokens based on the API response structure
264+ _access_token = response_data ["access_token" ]
265+ _refresh_token = response_data .get ("refresh_token" )
216266
217- return _access_token
267+ # Calculate expiration time (subtract 60 seconds as buffer to refresh before expiry)
268+ expires_in = response_data .get (
269+ "expires_in" , 3600
270+ ) # Default to 1 hour if not specified
271+ _token_expires_at = time .time () + expires_in - 60
218272
273+ # Reset circuit breaker on successful login
274+ _auth_failure_count = 0
275+ _auth_circuit_breaker_until = None
219276
220- @retry_api_call (max_duration_minutes = 10 )
277+ if _refresh_token :
278+ logger .debug (
279+ f"Successfully stored access and refresh tokens, expires in { expires_in } seconds"
280+ )
281+ else :
282+ logger .warning (
283+ "No refresh token found in login response - will fallback to full login for each request"
284+ )
285+
286+ return _access_token
287+
288+ except Exception as e :
289+ # Increment failure count and activate circuit breaker if needed
290+ _auth_failure_count += 1
291+
292+ if _auth_failure_count >= _max_auth_failures :
293+ _auth_circuit_breaker_until = time .time () + _circuit_breaker_duration
294+ logger .error (
295+ f"Authentication circuit breaker activated after { _auth_failure_count } failures"
296+ )
297+ rollbar .report_message (
298+ "Authentication circuit breaker activated" ,
299+ extra_data = {
300+ "failure_count" : _auth_failure_count ,
301+ "circuit_breaker_duration" : _circuit_breaker_duration ,
302+ },
303+ )
304+
305+ raise e
306+
307+
308+ @retry_api_call (max_duration_minutes = 2 , max_attempts = 3 )
221309def refresh_access_token ():
222310 """
223311 Use the refresh token to get a new access token.
@@ -368,11 +456,22 @@ def make_authenticated_request(method, url, **kwargs):
368456 _refresh_token = None
369457 _token_expires_at = None
370458
371- jwt = get_access_token ()
372- kwargs ["headers" ]["Authorization" ] = f"Bearer { jwt } "
373-
374- # Retry the request with new token
375- response = requests .request (method , url , ** kwargs )
459+ # Try to get new token, but don't let this cause infinite retries
460+ try :
461+ jwt = get_access_token ()
462+ kwargs ["headers" ]["Authorization" ] = f"Bearer { jwt } "
463+
464+ # Retry the request with new token
465+ response = requests .request (method , url , ** kwargs )
466+ except Exception as e :
467+ # If authentication completely fails, don't retry - return the original 401
468+ logger .error (f"Failed to refresh authentication after 401 error: { str (e )} " )
469+ rollbar .report_message (
470+ "Authentication refresh failed after 401" ,
471+ extra_data = {"original_url" : url , "error" : str (e )},
472+ )
473+ # Return the original 401 response instead of retrying indefinitely
474+ pass
376475
377476 return response
378477
0 commit comments