Skip to content

Commit 7f198d5

Browse files
committed
Reduce rollbar spam and fail fast on invalid credentials
1 parent 43c1047 commit 7f198d5

File tree

1 file changed

+129
-30
lines changed

1 file changed

+129
-30
lines changed

gefcore/api.py

Lines changed: 129 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -116,13 +116,22 @@ def _handle_api_error(
116116
_refresh_token = None
117117
_token_expires_at = None
118118

119+
# Circuit breaker for authentication failures
120+
_auth_failure_count = 0
121+
_auth_circuit_breaker_until = None
122+
_max_auth_failures = 5
123+
_circuit_breaker_duration = 300 # 5 minutes
124+
_last_circuit_breaker_rollbar_report = None
125+
_circuit_breaker_rollbar_cooldown = 120 # Only report to rollbar once every 2 minutes
119126

120-
def retry_api_call(max_duration_minutes=30):
127+
128+
def retry_api_call(max_duration_minutes=30, max_attempts=None):
121129
"""
122130
Decorator to retry API calls with exponential backoff for up to max_duration_minutes.
123131
124132
Args:
125133
max_duration_minutes (int): Maximum time to retry in minutes (default: 30)
134+
max_attempts (int): Maximum number of retry attempts (default: None for no limit)
126135
127136
Returns:
128137
Decorated function that will retry on failure
@@ -143,6 +152,20 @@ def wrapper(*args, **kwargs):
143152
attempt += 1
144153
elapsed_time = time.time() - start_time
145154

155+
# Check max attempts limit first
156+
if max_attempts and attempt >= max_attempts:
157+
error_msg = f"Max retry attempts ({max_attempts}) exceeded for {func.__name__}"
158+
logger.error(error_msg)
159+
rollbar.report_message(
160+
f"Max retry attempts exceeded for {func.__name__}",
161+
extra_data={
162+
"attempt": attempt,
163+
"elapsed_time": elapsed_time,
164+
"error": str(e),
165+
},
166+
)
167+
raise e
168+
146169
# Calculate delay with exponential backoff (capped at 120 seconds)
147170
delay = min(base_delay * (2 ** (attempt - 1)), 120)
148171

@@ -155,6 +178,20 @@ def wrapper(*args, **kwargs):
155178
extra_data={
156179
"attempt": attempt,
157180
"elapsed_time": elapsed_time,
181+
"error": str(e),
182+
},
183+
)
184+
raise e
185+
186+
# Don't retry on authentication errors (401, 403) for login function
187+
if func.__name__ == "login" and "401" in str(e) or "403" in str(e):
188+
error_msg = f"Authentication failed for {func.__name__} - not retrying auth errors"
189+
logger.error(error_msg)
190+
rollbar.report_message(
191+
f"Authentication error - not retrying {func.__name__}",
192+
extra_data={
193+
"attempt": attempt,
194+
"error": str(e),
158195
},
159196
)
160197
raise e
@@ -177,7 +214,7 @@ def wrapper(*args, **kwargs):
177214
return decorator
178215

179216

180-
@retry_api_call(max_duration_minutes=10)
217+
@retry_api_call(max_duration_minutes=3, max_attempts=5)
181218
def login():
182219
"""
183220
Authenticate with the API and store both access and refresh tokens.
@@ -186,38 +223,89 @@ def login():
186223
str: The access token for immediate use
187224
"""
188225
global _access_token, _refresh_token, _token_expires_at
226+
global \
227+
_auth_failure_count, \
228+
_auth_circuit_breaker_until, \
229+
_last_circuit_breaker_rollbar_report
230+
231+
# Check circuit breaker
232+
if _auth_circuit_breaker_until and time.time() < _auth_circuit_breaker_until:
233+
remaining = int(_auth_circuit_breaker_until - time.time())
234+
error_msg = (
235+
f"Authentication circuit breaker active for {remaining} more seconds"
236+
)
237+
logger.error(error_msg)
189238

190-
response = requests.post(
191-
API_URL + "/auth", json={"email": EMAIL, "password": PASSWORD}
192-
)
239+
# Only report to Rollbar if we haven't reported recently (prevent spam)
240+
current_time = time.time()
241+
if (
242+
_last_circuit_breaker_rollbar_report is None
243+
or current_time - _last_circuit_breaker_rollbar_report
244+
>= _circuit_breaker_rollbar_cooldown
245+
):
246+
rollbar.report_message(
247+
"Authentication circuit breaker active",
248+
extra_data={"remaining_seconds": remaining},
249+
)
250+
_last_circuit_breaker_rollbar_report = current_time
193251

194-
_handle_api_error(response, "logging in")
252+
raise Exception(error_msg)
195253

196-
response_data = response.json()
254+
try:
255+
response = requests.post(
256+
API_URL + "/auth", json={"email": EMAIL, "password": PASSWORD}
257+
)
197258

198-
# Store both tokens based on the API response structure
199-
_access_token = response_data["access_token"]
200-
_refresh_token = response_data.get("refresh_token")
259+
_handle_api_error(response, "logging in")
201260

202-
# Calculate expiration time (subtract 60 seconds as buffer to refresh before expiry)
203-
expires_in = response_data.get(
204-
"expires_in", 3600
205-
) # Default to 1 hour if not specified
206-
_token_expires_at = time.time() + expires_in - 60
261+
response_data = response.json()
207262

208-
if _refresh_token:
209-
logger.debug(
210-
f"Successfully stored access and refresh tokens, expires in {expires_in} seconds"
211-
)
212-
else:
213-
logger.warning(
214-
"No refresh token found in login response - will fallback to full login for each request"
215-
)
263+
# Store both tokens based on the API response structure
264+
_access_token = response_data["access_token"]
265+
_refresh_token = response_data.get("refresh_token")
216266

217-
return _access_token
267+
# Calculate expiration time (subtract 60 seconds as buffer to refresh before expiry)
268+
expires_in = response_data.get(
269+
"expires_in", 3600
270+
) # Default to 1 hour if not specified
271+
_token_expires_at = time.time() + expires_in - 60
218272

273+
# Reset circuit breaker on successful login
274+
_auth_failure_count = 0
275+
_auth_circuit_breaker_until = None
219276

220-
@retry_api_call(max_duration_minutes=10)
277+
if _refresh_token:
278+
logger.debug(
279+
f"Successfully stored access and refresh tokens, expires in {expires_in} seconds"
280+
)
281+
else:
282+
logger.warning(
283+
"No refresh token found in login response - will fallback to full login for each request"
284+
)
285+
286+
return _access_token
287+
288+
except Exception as e:
289+
# Increment failure count and activate circuit breaker if needed
290+
_auth_failure_count += 1
291+
292+
if _auth_failure_count >= _max_auth_failures:
293+
_auth_circuit_breaker_until = time.time() + _circuit_breaker_duration
294+
logger.error(
295+
f"Authentication circuit breaker activated after {_auth_failure_count} failures"
296+
)
297+
rollbar.report_message(
298+
"Authentication circuit breaker activated",
299+
extra_data={
300+
"failure_count": _auth_failure_count,
301+
"circuit_breaker_duration": _circuit_breaker_duration,
302+
},
303+
)
304+
305+
raise e
306+
307+
308+
@retry_api_call(max_duration_minutes=2, max_attempts=3)
221309
def refresh_access_token():
222310
"""
223311
Use the refresh token to get a new access token.
@@ -368,11 +456,22 @@ def make_authenticated_request(method, url, **kwargs):
368456
_refresh_token = None
369457
_token_expires_at = None
370458

371-
jwt = get_access_token()
372-
kwargs["headers"]["Authorization"] = f"Bearer {jwt}"
373-
374-
# Retry the request with new token
375-
response = requests.request(method, url, **kwargs)
459+
# Try to get new token, but don't let this cause infinite retries
460+
try:
461+
jwt = get_access_token()
462+
kwargs["headers"]["Authorization"] = f"Bearer {jwt}"
463+
464+
# Retry the request with new token
465+
response = requests.request(method, url, **kwargs)
466+
except Exception as e:
467+
# If authentication completely fails, don't retry - return the original 401
468+
logger.error(f"Failed to refresh authentication after 401 error: {str(e)}")
469+
rollbar.report_message(
470+
"Authentication refresh failed after 401",
471+
extra_data={"original_url": url, "error": str(e)},
472+
)
473+
# Return the original 401 response instead of retrying indefinitely
474+
pass
376475

377476
return response
378477

0 commit comments

Comments
 (0)