source-braintree-native: request fewer resources per request after 500 responses

Alex-Bair · Alex-Bair · commit cc4c5d675f78 · 2025-10-15T08:30:47.000-04:00
Braintree's API sometimes returns a 500 response when the connector requests
multiple larger resources in a single page. By default, the CDK transparently
retries requests that receive 5xx responses. Instead of that default behavior,
when these 500 responses occur, the connector should try to fetch fewer
resources per page.

This commit updates the connector to fetch resources individually when
it receives a 500 response after requesting a page of results. This isn't
the most efficient implementation, but it was the simplest to implement
and works to get through previously troublesome patches of large resources.
diff --git a/source-braintree-native/source_braintree_native/api/searchable_incremental_resource.py b/source-braintree-native/source_braintree_native/api/searchable_incremental_resource.py
@@ -2,11 +2,11 @@
 import itertools
 from datetime import datetime
 from logging import Logger
-from typing import Any, AsyncGenerator, TypeVar
+from typing import Any, AsyncGenerator, Awaitable, TypeVar
 
 from braintree import BraintreeGateway
 
-from estuary_cdk.http import HTTPSession
+from estuary_cdk.http import HTTPError, HTTPSession
 
 from .common import (
     HEADERS,
@@ -27,6 +27,17 @@
 )
 
 
+def _should_retry(
+    status: int,
+    headers: dict[str, Any],
+    body: bytes,
+    attempt: int,
+) -> bool:
+    # If the response is a 500 status code, that could mean that too much data was requested
+    # and the connector should make a new request for less data.
+    return status != 500
+
+
 async def fetch_searchable_resource_ids_by_field_between(
     http: HTTPSession,
     base_url: str,
@@ -93,7 +104,17 @@ async def determine_next_searchable_resource_window_end_by_field(
     return end
 
 
-async def _fetch_chunk(
+async def _process_completed_fetches(
+    fetch_coroutines: list[Awaitable[list[dict[str, Any]]]],
+) -> AsyncGenerator[dict[str, Any], None]:
+    """Helper to process fetching multiple pages of resources and yield individual resources."""
+    for coro in asyncio.as_completed(fetch_coroutines):
+        result = await coro
+        for resource in result:
+            yield resource
+
+
+async def _fetch_resource_page(
     http: HTTPSession,
     base_url: str,
     path: str,
@@ -102,8 +123,6 @@ async def _fetch_chunk(
     semaphore: asyncio.Semaphore,
     log: Logger,
 ) -> list[dict[str, Any]]:
-    assert len(ids) <= SEARCH_PAGE_SIZE
-
     url = f"{base_url}/{path}/advanced_search"
     body = {
         "search": {
@@ -114,7 +133,7 @@ async def _fetch_chunk(
     async with semaphore:
         response = response_model.model_validate(
             braintree_xml_to_dict(
-                await http.request(log, url, "POST", json=body, headers=HEADERS)
+                await http.request(log, url, "POST", json=body, headers=HEADERS, should_retry=_should_retry)
             )
         )
 
@@ -128,6 +147,46 @@ async def _fetch_chunk(
     return resources
 
 
+async def _fetch_resource_batch(
+    http: HTTPSession,
+    base_url: str,
+    path: str,
+    response_model: type[SearchResponse],
+    ids: list[str],
+    semaphore: asyncio.Semaphore,
+    log: Logger,
+) -> list[dict[str, Any]]:
+    assert len(ids) <= SEARCH_PAGE_SIZE
+
+    try:
+        # We try to fetch all resources in a single page.
+        return await _fetch_resource_page(
+            http,
+            base_url,
+            path,
+            response_model,
+            ids,
+            semaphore,
+            log,
+        )
+    except HTTPError as err:
+        # If Braintree's API server returns a 500 response, then it may be having problems
+        # sending all resources in a single response. We try fetching resources individually
+        # to make it easier for the API server to respond successfully.
+        if err.code != 500:
+            raise err
+
+        log.info(f"Received status {err.code} response when fetching {len(ids)} resources. Attempting to fetch resources individually.")
+
+    resources: list[dict[str, Any]] = []
+
+    async for resource in _process_completed_fetches(
+        [_fetch_resource_page(http, base_url, path, response_model, [id], semaphore, log) for id in ids]
+    ):
+        resources.append(resource)
+
+    return resources
+
 _IncrementalDocument = TypeVar("_IncrementalDocument", bound=IncrementalResource | Transaction)
 
 
@@ -144,19 +203,15 @@ async def fetch_by_ids(
 ) -> AsyncGenerator[_IncrementalDocument, None]:
     semaphore = asyncio.Semaphore(SEMAPHORE_LIMIT)
 
-    for coro in asyncio.as_completed(
-        [
-            _fetch_chunk(http, base_url, path, response_model, list(chunk), semaphore, log)
-            for chunk in itertools.batched(ids, SEARCH_PAGE_SIZE)
-        ]
+    async for resource in _process_completed_fetches(
+        [_fetch_resource_batch(http, base_url, path, response_model, list(chunk), semaphore, log)
+         for chunk in itertools.batched(ids, SEARCH_PAGE_SIZE)]
     ):
-        result = await coro
-        for resource in result:
-            yield document_model.model_validate(
-                braintree_object_to_dict(
-                    braintree_class(gateway, resource)
-                )
+        yield document_model.model_validate(
+            braintree_object_to_dict(
+                braintree_class(gateway, resource)
             )
+        )
 
 
 async def fetch_searchable_resources_created_between(