diff --git a/airbyte-integrations/connectors/source-github/metadata.yaml b/airbyte-integrations/connectors/source-github/metadata.yaml index 3899d5c35b2d..f79128988c34 100644 --- a/airbyte-integrations/connectors/source-github/metadata.yaml +++ b/airbyte-integrations/connectors/source-github/metadata.yaml @@ -10,7 +10,7 @@ data: connectorSubtype: api connectorType: source definitionId: ef69ef6e-aa7f-4af1-a01d-ef775033524e - dockerImageTag: 1.9.0 + dockerImageTag: 1.9.1-rc.1 dockerRepository: airbyte/source-github documentationUrl: https://docs.airbyte.com/integrations/sources/github erdUrl: https://dbdocs.io/airbyteio/source-github?view=relationships @@ -31,7 +31,7 @@ data: releaseStage: generally_available releases: rolloutConfiguration: - enableProgressiveRollout: false + enableProgressiveRollout: true suggestedStreams: streams: - branches diff --git a/airbyte-integrations/connectors/source-github/poetry.lock b/airbyte-integrations/connectors/source-github/poetry.lock index 4a41d984806c..d6880b251ff8 100644 --- a/airbyte-integrations/connectors/source-github/poetry.lock +++ b/airbyte-integrations/connectors/source-github/poetry.lock @@ -1,15 +1,15 @@ -# This file is automatically @generated by Poetry 2.1.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.1.3 and should not be changed by hand. [[package]] name = "airbyte-cdk" -version = "7.3.0" +version = "7.3.4" description = "A framework for writing Airbyte Connectors." optional = false python-versions = "<3.14,>=3.10" groups = ["main"] files = [ - {file = "airbyte_cdk-7.3.0-py3-none-any.whl", hash = "sha256:2efa7697ae5b608cc7025c4499f59724557b2d6ec0bf3d684c3fa7913c5903d4"}, - {file = "airbyte_cdk-7.3.0.tar.gz", hash = "sha256:a580c1ac52e35f9611d6c993caf989955d936bde35cc03d9a299155cd11733c8"}, + {file = "airbyte_cdk-7.3.4-py3-none-any.whl", hash = "sha256:03f416e06a7b390e152d6e442310a40e90d98c9b27be266ed0fd76675a432c3a"}, + {file = "airbyte_cdk-7.3.4.tar.gz", hash = "sha256:761d48a22a13eeea619893d6d0cbc05de83a4c0321ce5e18397303132dcec055"}, ] [package.dependencies] @@ -572,7 +572,7 @@ description = "Backport of PEP 654 (exception groups)" optional = false python-versions = ">=3.7" groups = ["main", "dev"] -markers = "python_version < \"3.11\"" +markers = "python_version == \"3.10\"" files = [ {file = "exceptiongroup-1.3.0-py3-none-any.whl", hash = "sha256:4d111e6e0c13d0644cad6ddaa7ed0261a0b36971f6d23e7ec9b4b9097da78a10"}, {file = "exceptiongroup-1.3.0.tar.gz", hash = "sha256:b241f5885f560bc56a59ee63ca4c6a8bfa46ae4ad651af316d4e81817bb9fd88"}, @@ -627,12 +627,12 @@ files = [ google-auth = ">=2.14.1,<3.0.0" googleapis-common-protos = ">=1.56.2,<2.0.0" grpcio = [ - {version = ">=1.33.2,<2.0.0", optional = true, markers = "extra == \"grpc\""}, {version = ">=1.49.1,<2.0.0", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""}, + {version = ">=1.33.2,<2.0.0", optional = true, markers = "python_version < \"3.11\" and extra == \"grpc\""}, ] grpcio-status = [ - {version = ">=1.33.2,<2.0.0", optional = true, markers = "extra == \"grpc\""}, {version = ">=1.49.1,<2.0.0", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""}, + {version = ">=1.33.2,<2.0.0", optional = true, markers = "extra == \"grpc\""}, ] proto-plus = ">=1.22.3,<2.0.0" protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<7.0.0" @@ -1450,8 +1450,8 @@ files = [ [package.dependencies] numpy = [ - {version = ">=1.22.4", markers = "python_version < \"3.11\""}, {version = ">=1.23.2", markers = "python_version == \"3.11\""}, + {version = ">=1.22.4", markers = "python_version < \"3.11\""}, ] python-dateutil = ">=2.8.2" pytz = ">=2020.1" @@ -2689,7 +2689,7 @@ description = "A lil' TOML parser" optional = false python-versions = ">=3.8" groups = ["dev"] -markers = "python_version < \"3.11\"" +markers = "python_version == \"3.10\"" files = [ {file = "tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249"}, {file = "tomli-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6"}, @@ -2770,7 +2770,7 @@ files = [ {file = "typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548"}, {file = "typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466"}, ] -markers = {dev = "python_version < \"3.11\""} +markers = {dev = "python_version == \"3.10\""} [[package]] name = "typing-inspection" @@ -2994,4 +2994,4 @@ files = [ [metadata] lock-version = "2.1" python-versions = "^3.10,<3.12" -content-hash = "759a440d7c3f6a8ebc4fc1fcaae6801e036e824cc68d58b90044b1a7f60fbb59" +content-hash = "34d92bda85bc3079229729941b2350c8d1b92365e5f4549714a7f0949b8cb5bc" diff --git a/airbyte-integrations/connectors/source-github/pyproject.toml b/airbyte-integrations/connectors/source-github/pyproject.toml index 3e3a00590b69..2467cd0f49bb 100644 --- a/airbyte-integrations/connectors/source-github/pyproject.toml +++ b/airbyte-integrations/connectors/source-github/pyproject.toml @@ -3,7 +3,7 @@ requires = [ "poetry-core>=1.0.0",] build-backend = "poetry.core.masonry.api" [tool.poetry] -version = "1.9.0" +version = "1.9.1-rc.1" name = "source-github" description = "Source implementation for GitHub." authors = [ "Airbyte ",] @@ -17,7 +17,7 @@ include = "source_github" [tool.poetry.dependencies] python = "^3.10,<3.12" -airbyte-cdk = "^7" +airbyte-cdk = "^7.3.4" sgqlc = "==16.3" [tool.poetry.scripts] diff --git a/airbyte-integrations/connectors/source-github/source_github/source.py b/airbyte-integrations/connectors/source-github/source_github/source.py index 9e6246e67ff3..c7b58ee95971 100644 --- a/airbyte-integrations/connectors/source-github/source_github/source.py +++ b/airbyte-integrations/connectors/source-github/source_github/source.py @@ -9,6 +9,7 @@ from airbyte_cdk.models import FailureType from airbyte_cdk.sources import AbstractSource from airbyte_cdk.sources.streams import Stream +from airbyte_cdk.sources.streams.http.http_client import MessageRepresentationAirbyteTracedErrors from airbyte_cdk.sources.streams.http.requests_native_auth import MultipleTokenAuthenticator from airbyte_cdk.utils.traced_exception import AirbyteTracedException from source_github.utils import MultipleTokenAuthenticatorWithRateLimiter @@ -184,7 +185,7 @@ def user_friendly_error_message(self, message: str) -> str: # 404 Client Error: Not Found for url: https://api.github.com/orgs/airbytehqBLA/repos?per_page=100 org_name = message.split("https://api.github.com/orgs/")[1].split("/")[0] user_message = f'Organization name: "{org_name}" is unknown, "repository" config option should be updated. Please validate your repository config.' - elif "401 Client Error: Unauthorized for url" in message: + elif "401 Client Error: Unauthorized for url" in message or ("Error: Unauthorized" in message and "401" in message): # 401 Client Error: Unauthorized for url: https://api.github.com/orgs/datarootsio/repos?per_page=100&sort=updated&direction=desc user_message = ( "Github credentials have expired or changed, please review your credentials and re-authenticate or renew your access token." @@ -203,6 +204,9 @@ def check_connection(self, logger: logging.Logger, config: Mapping[str, Any]) -> ) return True, None + except MessageRepresentationAirbyteTracedErrors as e: + user_message = self.user_friendly_error_message(e.message) + return False, user_message or e.message except Exception as e: message = repr(e) user_message = self.user_friendly_error_message(message) diff --git a/airbyte-integrations/connectors/source-github/source_github/utils.py b/airbyte-integrations/connectors/source-github/source_github/utils.py index f3d916c57746..02984822f68f 100644 --- a/airbyte-integrations/connectors/source-github/source_github/utils.py +++ b/airbyte-integrations/connectors/source-github/source_github/utils.py @@ -1,7 +1,7 @@ # # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # - +import logging import time from dataclasses import dataclass from datetime import timedelta @@ -10,10 +10,12 @@ import requests -from airbyte_cdk.models import SyncMode +from airbyte_cdk.models import FailureType, SyncMode from airbyte_cdk.sources.streams import Stream +from airbyte_cdk.sources.streams.http import HttpClient from airbyte_cdk.sources.streams.http.requests_native_auth import TokenAuthenticator from airbyte_cdk.sources.streams.http.requests_native_auth.abstract_token import AbstractHeaderAuthenticator +from airbyte_cdk.utils import AirbyteTracedException from airbyte_cdk.utils.datetime_helpers import AirbyteDateTime, ab_datetime_now, ab_datetime_parse @@ -59,14 +61,30 @@ class MultipleTokenAuthenticatorWithRateLimiter(AbstractHeaderAuthenticator): DURATION = timedelta(seconds=3600) # Duration at which the current rate limit window resets def __init__(self, tokens: List[str], auth_method: str = "token", auth_header: str = "Authorization"): + self._logger = logging.getLogger("airbyte") self._auth_method = auth_method self._auth_header = auth_header self._tokens = {t: Token() for t in tokens} + # It would've been nice to instantiate a single client on this authenticator. However, we are checking + # the limits of each token which is associated with a TokenAuthenticator. And each HttpClient can only + # correspond to one authenticator. + self._token_to_http_client: Mapping[str, HttpClient] = self._initialize_http_clients(tokens) self.check_all_tokens() self._tokens_iter = cycle(self._tokens) self._active_token = next(self._tokens_iter) self._max_time = 60 * 10 # 10 minutes as default + def _initialize_http_clients(self, tokens: List[str]) -> Mapping[str, HttpClient]: + return { + token: HttpClient( + name="token_validator", + logger=self._logger, + authenticator=TokenAuthenticator(token, auth_method=self._auth_method), + use_cache=False, # We don't want to reuse cached valued because rate limit values change frequently + ) + for token in tokens + } + @property def auth_header(self) -> str: return self._auth_header @@ -114,14 +132,27 @@ def max_time(self, value: int) -> None: def _check_token_limits(self, token: str): """check that token is not limited""" - headers = {"Accept": "application/vnd.github+json", "X-GitHub-Api-Version": "2022-11-28"} - rate_limit_info = ( - requests.get( - "https://api.github.com/rate_limit", headers=headers, auth=TokenAuthenticator(token, auth_method=self._auth_method) - ) - .json() - .get("resources") + + http_client = self._token_to_http_client.get(token) + if not http_client: + raise ValueError("No HttpClient was initialized for this token. This is unexpected. Please contact Airbyte support.") + + _, response = http_client.send_request( + http_method="GET", + url="https://api.github.com/rate_limit", + headers={"Accept": "application/vnd.github+json", "X-GitHub-Api-Version": "2022-11-28"}, + request_kwargs={}, ) + + response_body = response.json() + if "resources" not in response_body: + raise AirbyteTracedException( + failure_type=FailureType.config_error, + internal_message=f"Token rate limit info response did not contain expected key: resources", + message="Unable to validate token. Please double check that specified authentication tokens are correct", + ) + + rate_limit_info = response_body.get("resources") token_info = self._tokens[token] remaining_info_core = rate_limit_info.get("core") token_info.count_rest, token_info.reset_at_rest = ( diff --git a/airbyte-integrations/connectors/source-github/unit_tests/test_multiple_token_authenticator.py b/airbyte-integrations/connectors/source-github/unit_tests/test_multiple_token_authenticator.py index e64b10ccfd30..ddaa949a5146 100644 --- a/airbyte-integrations/connectors/source-github/unit_tests/test_multiple_token_authenticator.py +++ b/airbyte-integrations/connectors/source-github/unit_tests/test_multiple_token_authenticator.py @@ -9,11 +9,13 @@ import pytest import responses from freezegun import freeze_time +from requests import JSONDecodeError from source_github import SourceGithub from source_github.streams import Organizations from source_github.utils import MultipleTokenAuthenticatorWithRateLimiter, read_full_refresh from airbyte_cdk.models import FailureType +from airbyte_cdk.sources.streams.http.http_client import MessageRepresentationAirbyteTracedErrors from airbyte_cdk.utils import AirbyteTracedException from airbyte_cdk.utils.datetime_helpers import ab_datetime_now @@ -148,3 +150,21 @@ def request_callback_orgs(request, context): list(read_full_refresh(stream)) sleep_mock.assert_called_once_with(ACCEPTED_WAITING_TIME_IN_SECONDS) assert [(x.count_rest, x.count_graphql) for x in authenticator._tokens.values()] == [(500, 500), (500, 500), (498, 500)] + + +def test_invalid_credentials_error_message(requests_mock): + """ + Test that validates that invalid or expired credentials are gracefully caught and surfaced back in a way + that the connector can display actionable messages back to users + """ + + requests_mock.get( + "https://api.github.com/rate_limit", + status_code=401, + json={"message": "Bad credentials", "documentation_url": "https://docs.github.com/rest", "status": "401"}, + ) + + with pytest.raises(AirbyteTracedException) as e: + MultipleTokenAuthenticatorWithRateLimiter(tokens=["token1", "token2", "token3"]) + + assert "HTTP Status Code: 401" in e.value.message diff --git a/docs/integrations/sources/github.md b/docs/integrations/sources/github.md index f8d34823fad9..cc090635457f 100644 --- a/docs/integrations/sources/github.md +++ b/docs/integrations/sources/github.md @@ -225,6 +225,7 @@ Your token should have at least the `repo` scope. Depending on which streams you | Version | Date | Pull Request | Subject | |:-----------|:-----------|:------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| 1.9.1-rc.1 | 2025-10-13 | [67584](https://github.com/airbytehq/airbyte/pull/67584) | Graceful error handling of invalid credentials when running operations | | 1.9.0 | 2025-10-13 | [67708](https://github.com/airbytehq/airbyte/pull/67708) | Promoting release candidate 1.9.0-rc.3 to a main version. | | 1.9.0-rc.3 | 2025-10-09 | [67589](https://github.com/airbytehq/airbyte/pull/67589) | Fix min time to wait on token rate limits | | 1.9.0-rc.2 | 2025-10-03 | [67026](https://github.com/airbytehq/airbyte/pull/67026) | Fix converting datetime in workflows stream |