Skip to content
25 changes: 17 additions & 8 deletions airbyte_cdk/sources/declarative/datetime/min_max_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

from airbyte_cdk.sources.declarative.datetime.datetime_parser import DatetimeParser
from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
from airbyte_cdk.utils.datetime_helpers import ab_datetime_try_parse


@dataclass
Expand Down Expand Up @@ -65,15 +66,23 @@ def get_datetime(
if not datetime_format:
datetime_format = "%Y-%m-%dT%H:%M:%S.%f%z"

time = self._parser.parse(
str(
self.datetime.eval( # type: ignore[union-attr] # str has no attribute "eval"
config,
**additional_parameters,
datetime_str = str(
self.datetime.eval( # type: ignore[union-attr] # str has no attribute "eval"
config,
**additional_parameters,
)
)

try:
time = self._parser.parse(datetime_str, datetime_format)
except ValueError:
parsed_dt = ab_datetime_try_parse(datetime_str)
if parsed_dt is not None:
time = parsed_dt
else:
raise ValueError(
f"Unable to parse datetime '{datetime_str}' with format '{datetime_format}' or robust parsing"
)
),
datetime_format,
) # type: ignore # datetime is always cast to an interpolated string

if self.min_datetime:
min_time = str(self.min_datetime.eval(config, **additional_parameters)) # type: ignore # min_datetime is always cast to an interpolated string
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -943,8 +943,11 @@ definitions:
type: array
items:
type: string
airbyte_hidden: true
description: |
The possible formats for the cursor field, in order of preference. The first format that matches the cursor field value will be used to parse it. If not provided, the Outgoing Datetime Format will be used.
The possible formats for the cursor field, in order of preference. The first format that matches the cursor field value will be used to parse it.
If none of the specified formats match, the system will attempt to parse the value using robust datetime parsing that handles most ISO8601/RFC3339 compliant formats.
If not provided, the Outgoing Datetime Format will be used as the first attempt.
Use placeholders starting with "%" to describe the format the API is using. The following placeholders are available:
* **%s**: Epoch unix timestamp - `1686218963`
* **%s_as_float**: Epoch unix timestamp in seconds as float with microsecond precision - `1686218963.123456`
Expand Down Expand Up @@ -2803,6 +2806,7 @@ definitions:
- "{{ now_utc().strftime('%Y-%m-%dT%H:%M:%SZ') }}"
datetime_format:
title: Datetime Format
airbyte_hidden: true
description: |
Format of the datetime value. Defaults to "%Y-%m-%dT%H:%M:%S.%f%z" if left empty. Use placeholders starting with "%" to describe the format the API is using. The following placeholders are available:
* **%s**: Epoch unix timestamp - `1686218963`
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
)
from airbyte_cdk.sources.message import MessageRepository
from airbyte_cdk.sources.types import Config, Record, StreamSlice, StreamState
from airbyte_cdk.utils.datetime_helpers import ab_datetime_format, ab_datetime_try_parse
from airbyte_cdk.utils.mapping_helpers import _validate_component_request_option_paths


Expand Down Expand Up @@ -313,6 +314,14 @@ def parse_date(self, date: str) -> datetime.datetime:
return self._parser.parse(date, datetime_format)
except ValueError:
pass

# If we have not parsed by now, use the robust parser which handles
# all common formats, including all formats supported by ISO8601
# and RFC3339.
parsed_dt = ab_datetime_try_parse(date)
if parsed_dt is not None:
return parsed_dt

raise ValueError(f"No format in {self.cursor_datetime_formats} matching {date}")

@classmethod
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -997,6 +997,30 @@ def test_parse_date_legacy_merge_datetime_format_in_cursor_datetime_format(
["%Y-%m-%dT%H:%M:%S.%f%z", "%s"],
datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc),
),
(
"test_robust_fallback_z_suffix",
"2021-01-01T00:00:00Z",
["%Y-%m-%d"],
datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc),
),
(
"test_robust_fallback_iso_with_colon_tz",
"2021-01-01T00:00:00+00:00",
["%Y-%m-%d"],
datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc),
),
(
"test_robust_fallback_date_only",
"2021-01-01",
["%s"],
datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc),
),
(
"test_robust_fallback_unix_timestamp_string",
"1609459200",
["%Y-%m-%d"],
datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc),
),
],
)
def test_parse_date(test_name, input_date, date_formats, expected_output_date):
Expand All @@ -1021,7 +1045,26 @@ def test_given_unknown_format_when_parse_date_then_raise_error():
parameters={},
)
with pytest.raises(ValueError):
slicer.parse_date("2021-01-01T00:00:00.000000+0000")
slicer.parse_date("not-a-valid-datetime-string")


def test_minmax_datetime_robust_fallback():
from airbyte_cdk.sources.declarative.datetime.min_max_datetime import MinMaxDatetime

test_cases = [
("2021-01-01T00:00:00Z", "%Y-%m-%d"),
("2021-01-01T00:00:00+00:00", "%Y-%m-%d"),
("1609459200", "%Y-%m-%d"),
]

for input_date, incompatible_format in test_cases:
min_max_dt = MinMaxDatetime(
datetime=input_date, datetime_format=incompatible_format, parameters={}
)
result = min_max_dt.get_datetime({})
assert result.year == 2021
assert result.month == 1
assert result.day == 1


@pytest.mark.parametrize(
Expand Down
Loading