Skip to content

Commit ec7e961

Browse files
authored
fix(airbyte-cdk): Fix RequestOptionsProvider for PerPartitionWithGlobalCursor (#254)
1 parent 5cc9840 commit ec7e961

File tree

3 files changed

+583
-19
lines changed

3 files changed

+583
-19
lines changed

airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py

+35-18
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,8 @@ def get_request_params(
222222
next_page_token: Optional[Mapping[str, Any]] = None,
223223
) -> Mapping[str, Any]:
224224
if stream_slice:
225+
if self._to_partition_key(stream_slice.partition) not in self._cursor_per_partition:
226+
self._create_cursor_for_partition(self._to_partition_key(stream_slice.partition))
225227
return self._partition_router.get_request_params( # type: ignore # this always returns a mapping
226228
stream_state=stream_state,
227229
stream_slice=StreamSlice(partition=stream_slice.partition, cursor_slice={}),
@@ -244,6 +246,8 @@ def get_request_headers(
244246
next_page_token: Optional[Mapping[str, Any]] = None,
245247
) -> Mapping[str, Any]:
246248
if stream_slice:
249+
if self._to_partition_key(stream_slice.partition) not in self._cursor_per_partition:
250+
self._create_cursor_for_partition(self._to_partition_key(stream_slice.partition))
247251
return self._partition_router.get_request_headers( # type: ignore # this always returns a mapping
248252
stream_state=stream_state,
249253
stream_slice=StreamSlice(partition=stream_slice.partition, cursor_slice={}),
@@ -266,6 +270,8 @@ def get_request_body_data(
266270
next_page_token: Optional[Mapping[str, Any]] = None,
267271
) -> Union[Mapping[str, Any], str]:
268272
if stream_slice:
273+
if self._to_partition_key(stream_slice.partition) not in self._cursor_per_partition:
274+
self._create_cursor_for_partition(self._to_partition_key(stream_slice.partition))
269275
return self._partition_router.get_request_body_data( # type: ignore # this always returns a mapping
270276
stream_state=stream_state,
271277
stream_slice=StreamSlice(partition=stream_slice.partition, cursor_slice={}),
@@ -288,6 +294,8 @@ def get_request_body_json(
288294
next_page_token: Optional[Mapping[str, Any]] = None,
289295
) -> Mapping[str, Any]:
290296
if stream_slice:
297+
if self._to_partition_key(stream_slice.partition) not in self._cursor_per_partition:
298+
self._create_cursor_for_partition(self._to_partition_key(stream_slice.partition))
291299
return self._partition_router.get_request_body_json( # type: ignore # this always returns a mapping
292300
stream_state=stream_state,
293301
stream_slice=StreamSlice(partition=stream_slice.partition, cursor_slice={}),
@@ -303,21 +311,6 @@ def get_request_body_json(
303311
raise ValueError("A partition needs to be provided in order to get request body json")
304312

305313
def should_be_synced(self, record: Record) -> bool:
306-
if (
307-
record.associated_slice
308-
and self._to_partition_key(record.associated_slice.partition)
309-
not in self._cursor_per_partition
310-
):
311-
partition_state = (
312-
self._state_to_migrate_from
313-
if self._state_to_migrate_from
314-
else self._NO_CURSOR_STATE
315-
)
316-
cursor = self._create_cursor(partition_state)
317-
318-
self._cursor_per_partition[
319-
self._to_partition_key(record.associated_slice.partition)
320-
] = cursor
321314
return self._get_cursor(record).should_be_synced(
322315
self._convert_record_to_cursor_record(record)
323316
)
@@ -356,8 +349,32 @@ def _get_cursor(self, record: Record) -> DeclarativeCursor:
356349
)
357350
partition_key = self._to_partition_key(record.associated_slice.partition)
358351
if partition_key not in self._cursor_per_partition:
359-
raise ValueError(
360-
"Invalid state as stream slices that are emitted should refer to an existing cursor"
361-
)
352+
self._create_cursor_for_partition(partition_key)
362353
cursor = self._cursor_per_partition[partition_key]
363354
return cursor
355+
356+
def _create_cursor_for_partition(self, partition_key: str) -> None:
357+
"""
358+
Dynamically creates and initializes a cursor for the specified partition.
359+
360+
This method is required for `ConcurrentPerPartitionCursor`. For concurrent cursors,
361+
stream_slices is executed only for the concurrent cursor, so cursors per partition
362+
are not created for the declarative cursor. This method ensures that a cursor is available
363+
to create requests for the specified partition. The cursor is initialized
364+
with the per-partition state if present in the initial state, or with the global state
365+
adjusted by the lookback window, or with the state to migrate from.
366+
367+
Note:
368+
This is a temporary workaround and should be removed once the declarative cursor
369+
is decoupled from the concurrent cursor implementation.
370+
371+
Args:
372+
partition_key (str): The unique identifier for the partition for which the cursor
373+
needs to be created.
374+
"""
375+
partition_state = (
376+
self._state_to_migrate_from if self._state_to_migrate_from else self._NO_CURSOR_STATE
377+
)
378+
cursor = self._create_cursor(partition_state)
379+
380+
self._cursor_per_partition[partition_key] = cursor

airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2386,7 +2386,7 @@ def create_simple_retriever(
23862386
if (
23872387
not isinstance(stream_slicer, DatetimeBasedCursor)
23882388
or type(stream_slicer) is not DatetimeBasedCursor
2389-
) and not isinstance(stream_slicer, PerPartitionWithGlobalCursor):
2389+
):
23902390
# Many of the custom component implementations of DatetimeBasedCursor override get_request_params() (or other methods).
23912391
# Because we're decoupling RequestOptionsProvider from the Cursor, custom components will eventually need to reimplement
23922392
# their own RequestOptionsProvider. However, right now the existing StreamSlicer/Cursor still can act as the SimpleRetriever's

0 commit comments

Comments
 (0)