Skip to content

Commit 643b1c4

Browse files
feat: App token refreshing (#289)
This PR implements app token refreshing! Thank you for your patience with the previous refactoring PRs that prepared for this one. - As part of `has_calls_remaining`, the amount of time left until estimated token expiry is checked, and tokens are refreshed if expiry is close. - A new config option `expiry_time_buffer` is added, controlling how many minutes before expiry the app token will be refreshed. This parallels how `rate_limit_buffer` is used to ensure we don't push tokens all the way to their limits. --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent 8c75c9b commit 643b1c4

File tree

4 files changed

+187
-16
lines changed

4 files changed

+187
-16
lines changed

README.md

+9-8
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,13 @@ A list of release versions is available at https://github.com/MeltanoLabs/tap-gi
2525
This tap accepts the following configuration options:
2626

2727
- Required: One and only one of the following modes:
28-
1. `repositories`: an array of strings specifying the GitHub repositories to be included. Each element of the array should be of the form `<org>/<repository>`, e.g. `MeltanoLabs/tap-github`.
29-
2. `organizations`: an array of strings containing the github organizations to be included
30-
3. `searches`: an array of search descriptor objects with the following properties:
31-
- `name`: a human readable name for the search query
32-
- `query`: a github search string (generally the same as would come after `?q=` in the URL)
33-
4. `user_usernames`: a list of github usernames
34-
5. `user_ids`: a list of github user ids [int]
28+
1. `repositories`: An array of strings specifying the GitHub repositories to be included. Each element of the array should be of the form `<org>/<repository>`, e.g. `MeltanoLabs/tap-github`.
29+
2. `organizations`: An array of strings containing the github organizations to be included
30+
3. `searches`: An array of search descriptor objects with the following properties:
31+
- `name`: A human readable name for the search query
32+
- `query`: A github search string (generally the same as would come after `?q=` in the URL)
33+
4. `user_usernames`: A list of github usernames
34+
5. `user_ids`: A list of github user ids [int]
3535
- Highly recommended:
3636
- `auth_token` - GitHub token to authenticate with.
3737
- `additional_auth_tokens` - List of GitHub tokens to authenticate with. Streams will loop through them when hitting rate limits..
@@ -43,7 +43,8 @@ This tap accepts the following configuration options:
4343
- `metrics_log_level`
4444
- `stream_maps`
4545
- `stream_maps_config`
46-
- `rate_limit_buffer` - A buffer to avoid consuming all query points for the auth_token at hand. Defaults to 1000.",
46+
- `rate_limit_buffer` - A buffer to avoid consuming all query points for the auth_token at hand. Defaults to 1000.
47+
- `expiry_time_buffer` - A buffer used when determining when to refresh Github app tokens. Only relevant when authenticating as a GitHub app. Defaults to 10 minutes. Tokens generated by GitHub apps expire 1 hour after creation, and will be refreshed once fewer than `expiry_time_buffer` minutes remain until the anticipated expiry time.
4748

4849
Note that modes 1-3 are `repository` modes and 4-5 are `user` modes and will not run the same set of streams.
4950

meltano.yml

+2
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ plugins:
2121
kind: array
2222
- name: rate_limit_buffer
2323
kind: integer
24+
- name: expiry_time_buffer
25+
kind: integer
2426
- name: searches
2527
kind: array
2628
- name: organizations

tap_github/authenticator.py

+45-6
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import logging
44
import time
55
from copy import deepcopy
6-
from datetime import datetime, timedelta
6+
from datetime import datetime, timedelta, timezone
77
from os import environ
88
from random import choice, shuffle
99
from typing import Any, Dict, List, Optional, Set, Tuple
@@ -36,7 +36,7 @@ def __init__(
3636
self.logger = logger
3737
self.rate_limit = self.DEFAULT_RATE_LIMIT
3838
self.rate_limit_remaining = self.DEFAULT_RATE_LIMIT
39-
self.rate_limit_reset: Optional[int] = None
39+
self.rate_limit_reset: Optional[datetime] = None
4040
self.rate_limit_used = 0
4141
self.rate_limit_buffer = (
4242
rate_limit_buffer
@@ -47,7 +47,9 @@ def __init__(
4747
def update_rate_limit(self, response_headers: Any) -> None:
4848
self.rate_limit = int(response_headers["X-RateLimit-Limit"])
4949
self.rate_limit_remaining = int(response_headers["X-RateLimit-Remaining"])
50-
self.rate_limit_reset = int(response_headers["X-RateLimit-Reset"])
50+
self.rate_limit_reset = datetime.utcfromtimestamp(
51+
int(response_headers["X-RateLimit-Reset"])
52+
)
5153
self.rate_limit_used = int(response_headers["X-RateLimit-Used"])
5254

5355
def is_valid_token(self) -> bool:
@@ -84,7 +86,7 @@ def has_calls_remaining(self) -> bool:
8486
return True
8587
if (
8688
self.rate_limit_used > (self.rate_limit - self.rate_limit_buffer)
87-
and self.rate_limit_reset > datetime.now().timestamp()
89+
and self.rate_limit_reset > datetime.now()
8890
):
8991
return False
9092
return True
@@ -159,7 +161,13 @@ class AppTokenManager(TokenManager):
159161
DEFAULT_RATE_LIMIT = 15000
160162
DEFAULT_EXPIRY_BUFFER_MINS = 10
161163

162-
def __init__(self, env_key: str, rate_limit_buffer: Optional[int] = None, **kwargs):
164+
def __init__(
165+
self,
166+
env_key: str,
167+
rate_limit_buffer: Optional[int] = None,
168+
expiry_time_buffer: Optional[int] = None,
169+
**kwargs,
170+
):
163171
if rate_limit_buffer is None:
164172
rate_limit_buffer = self.DEFAULT_RATE_LIMIT_BUFFER
165173
super().__init__(None, rate_limit_buffer=rate_limit_buffer, **kwargs)
@@ -171,6 +179,10 @@ def __init__(self, env_key: str, rate_limit_buffer: Optional[int] = None, **kwar
171179
parts[2] if len(parts) >= 3 else None
172180
)
173181

182+
if expiry_time_buffer is None:
183+
expiry_time_buffer = self.DEFAULT_EXPIRY_BUFFER_MINS
184+
self.expiry_time_buffer = expiry_time_buffer
185+
174186
self.token_expires_at: Optional[datetime] = None
175187
self.claim_token()
176188

@@ -204,6 +216,29 @@ def claim_token(self):
204216
self.token = None
205217
self.token_expires_at = None
206218

219+
def has_calls_remaining(self) -> bool:
220+
"""Check if a token has capacity to make more calls.
221+
222+
Returns:
223+
True if the token is valid and has enough api calls remaining.
224+
"""
225+
if self.token_expires_at is not None:
226+
close_to_expiry = datetime.now() > self.token_expires_at - timedelta(
227+
minutes=self.expiry_time_buffer
228+
)
229+
230+
if close_to_expiry:
231+
self.claim_token()
232+
if self.token is None:
233+
if self.logger:
234+
self.logger.warn("GitHub app token refresh failed.")
235+
return False
236+
else:
237+
if self.logger:
238+
self.logger.info("GitHub app token refresh succeeded.")
239+
240+
return super().has_calls_remaining()
241+
207242

208243
class GitHubTokenAuthenticator(APIAuthenticatorBase):
209244
"""Base class for offloading API auth."""
@@ -217,6 +252,7 @@ def prepare_tokens(self) -> List[TokenManager]:
217252

218253
env_dict = self.get_env()
219254
rate_limit_buffer = self._config.get("rate_limit_buffer", None)
255+
expiry_time_buffer = self._config.get("expiry_time_buffer", None)
220256

221257
personal_tokens: Set[str] = set()
222258
if "auth_token" in self._config:
@@ -255,7 +291,10 @@ def prepare_tokens(self) -> List[TokenManager]:
255291
env_key = env_dict["GITHUB_APP_PRIVATE_KEY"]
256292
try:
257293
app_token_manager = AppTokenManager(
258-
env_key, rate_limit_buffer=rate_limit_buffer, logger=self.logger
294+
env_key,
295+
rate_limit_buffer=rate_limit_buffer,
296+
expiry_time_buffer=expiry_time_buffer,
297+
logger=self.logger,
259298
)
260299
if app_token_manager.is_valid_token():
261300
token_managers.append(app_token_manager)

tap_github/tests/test_authenticator.py

+131-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import re
2-
from datetime import datetime, timedelta
2+
from datetime import datetime, timedelta, timezone
33
from unittest.mock import MagicMock, patch
44

55
import pytest
@@ -41,7 +41,7 @@ def test_update_rate_limit(self):
4141

4242
assert token_manager.rate_limit == 5000
4343
assert token_manager.rate_limit_remaining == 4999
44-
assert token_manager.rate_limit_reset == 1372700873
44+
assert token_manager.rate_limit_reset == datetime(2013, 7, 1, 17, 47, 53)
4545
assert token_manager.rate_limit_used == 1
4646

4747
def test_is_valid_token_successful(self):
@@ -165,6 +165,135 @@ def test_successful_token_generation(self):
165165
assert token_manager.token == "valid_token"
166166
assert token_manager.token_expires_at == token_time
167167

168+
def test_has_calls_remaining_regenerates_a_token_if_close_to_expiry(self):
169+
unexpired_time = datetime.now() + timedelta(days=1)
170+
expired_time = datetime.now() - timedelta(days=1)
171+
with patch.object(AppTokenManager, "is_valid_token", return_value=True), patch(
172+
"tap_github.authenticator.generate_app_access_token",
173+
return_value=("valid_token", unexpired_time),
174+
):
175+
mock_response_headers = {
176+
"X-RateLimit-Limit": "5000",
177+
"X-RateLimit-Remaining": "4999",
178+
"X-RateLimit-Reset": "1372700873",
179+
"X-RateLimit-Used": "1",
180+
}
181+
182+
token_manager = AppTokenManager("12345;;key\\ncontent;;67890")
183+
token_manager.logger = MagicMock()
184+
token_manager.token_expires_at = expired_time
185+
token_manager.update_rate_limit(mock_response_headers)
186+
187+
assert token_manager.has_calls_remaining()
188+
# calling has_calls_remaining() will trigger the token generation function to be called again,
189+
# so token_expires_at should have been reset back to the mocked unexpired_time
190+
assert token_manager.token_expires_at == unexpired_time
191+
token_manager.logger.info.assert_called_once()
192+
assert (
193+
"GitHub app token refresh succeeded."
194+
in token_manager.logger.info.call_args[0][0]
195+
)
196+
197+
def test_has_calls_remaining_logs_warning_if_token_regeneration_fails(self):
198+
unexpired_time = datetime.now() + timedelta(days=1)
199+
expired_time = datetime.now() - timedelta(days=1)
200+
with patch.object(
201+
AppTokenManager, "is_valid_token", return_value=True
202+
) as mock_is_valid, patch(
203+
"tap_github.authenticator.generate_app_access_token",
204+
return_value=("valid_token", unexpired_time),
205+
):
206+
mock_response_headers = {
207+
"X-RateLimit-Limit": "5000",
208+
"X-RateLimit-Remaining": "4999",
209+
"X-RateLimit-Reset": "1372700873",
210+
"X-RateLimit-Used": "1",
211+
}
212+
213+
token_manager = AppTokenManager("12345;;key\\ncontent;;67890")
214+
token_manager.logger = MagicMock()
215+
token_manager.token_expires_at = expired_time
216+
token_manager.update_rate_limit(mock_response_headers)
217+
218+
mock_is_valid.return_value = False
219+
assert not token_manager.has_calls_remaining()
220+
token_manager.logger.warn.assert_called_once()
221+
assert (
222+
"GitHub app token refresh failed."
223+
in token_manager.logger.warn.call_args[0][0]
224+
)
225+
226+
def test_has_calls_remaining_succeeds_if_token_new_and_never_used(self):
227+
unexpired_time = datetime.now() + timedelta(days=1)
228+
with patch.object(AppTokenManager, "is_valid_token", return_value=True), patch(
229+
"tap_github.authenticator.generate_app_access_token",
230+
return_value=("valid_token", unexpired_time),
231+
):
232+
token_manager = AppTokenManager("12345;;key\\ncontent;;67890")
233+
assert token_manager.has_calls_remaining()
234+
235+
def test_has_calls_remaining_succeeds_if_time_and_requests_left(self):
236+
unexpired_time = datetime.now() + timedelta(days=1)
237+
with patch.object(AppTokenManager, "is_valid_token", return_value=True), patch(
238+
"tap_github.authenticator.generate_app_access_token",
239+
return_value=("valid_token", unexpired_time),
240+
):
241+
mock_response_headers = {
242+
"X-RateLimit-Limit": "5000",
243+
"X-RateLimit-Remaining": "4999",
244+
"X-RateLimit-Reset": "1372700873",
245+
"X-RateLimit-Used": "1",
246+
}
247+
248+
token_manager = AppTokenManager("12345;;key\\ncontent;;67890")
249+
token_manager.update_rate_limit(mock_response_headers)
250+
251+
assert token_manager.has_calls_remaining()
252+
253+
def test_has_calls_remaining_succeeds_if_time_left_and_reset_time_reached(self):
254+
unexpired_time = datetime.now() + timedelta(days=1)
255+
with patch.object(AppTokenManager, "is_valid_token", return_value=True), patch(
256+
"tap_github.authenticator.generate_app_access_token",
257+
return_value=("valid_token", unexpired_time),
258+
):
259+
mock_response_headers = {
260+
"X-RateLimit-Limit": "5000",
261+
"X-RateLimit-Remaining": "1",
262+
"X-RateLimit-Reset": "1372700873",
263+
"X-RateLimit-Used": "4999",
264+
}
265+
266+
token_manager = AppTokenManager(
267+
"12345;;key\\ncontent;;67890", rate_limit_buffer=1000
268+
)
269+
token_manager.update_rate_limit(mock_response_headers)
270+
271+
assert token_manager.has_calls_remaining()
272+
273+
def test_has_calls_remaining_fails_if_time_left_and_few_calls_remaining_and_reset_time_not_reached(
274+
self,
275+
):
276+
unexpired_time = datetime.now() + timedelta(days=1)
277+
with patch.object(AppTokenManager, "is_valid_token", return_value=True), patch(
278+
"tap_github.authenticator.generate_app_access_token",
279+
return_value=("valid_token", unexpired_time),
280+
):
281+
mock_response_headers = {
282+
"X-RateLimit-Limit": "5000",
283+
"X-RateLimit-Remaining": "1",
284+
"X-RateLimit-Reset": str(
285+
int((datetime.now() + timedelta(days=100)).timestamp())
286+
),
287+
"X-RateLimit-Used": "4999",
288+
}
289+
290+
token_manager = AppTokenManager(
291+
"12345;;key\\ncontent;;67890", rate_limit_buffer=1000
292+
)
293+
token_manager.update_rate_limit(mock_response_headers)
294+
295+
assert not token_manager.has_calls_remaining()
296+
168297

169298
@pytest.fixture
170299
def mock_stream():

0 commit comments

Comments
 (0)