Skip to content

feat(timestamp): improve ibis.timestamp() #11129

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions ibis/backends/sql/compilers/bigquery/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -527,9 +527,11 @@
return self.f.strpos(haystack, needle) > 0

def visit_TimestampFromYMDHMS(
self, op, *, year, month, day, hours, minutes, seconds
self, op, *, year, month, day, hours, minutes, seconds, dtype: dt.Timestamp
):
return self.f.anon.DATETIME(year, month, day, hours, minutes, seconds)
if dtype.timezone is not None:

Check warning on line 532 in ibis/backends/sql/compilers/bigquery/__init__.py

View check run for this annotation

Codecov / codecov/patch

ibis/backends/sql/compilers/bigquery/__init__.py#L532

Added line #L532 was not covered by tests
raise NotImplementedError()
return self.f.datetime_from_parts(year, month, day, hours, minutes, seconds)

Check warning on line 534 in ibis/backends/sql/compilers/bigquery/__init__.py

View check run for this annotation

Codecov / codecov/patch

ibis/backends/sql/compilers/bigquery/__init__.py#L534

Added line #L534 was not covered by tests

def visit_NonNullLiteral(self, op, *, value, dtype):
if dtype.is_inet() or dtype.is_macaddr():
Expand Down
4 changes: 2 additions & 2 deletions ibis/backends/sql/compilers/clickhouse.py
Original file line number Diff line number Diff line change
Expand Up @@ -430,7 +430,7 @@ def visit_DateFromYMD(self, op, *, year, month, day):
)

def visit_TimestampFromYMDHMS(
self, op, *, year, month, day, hours, minutes, seconds, **_
self, op, *, year, month, day, hours, minutes, seconds, dtype: dt.Timestamp, **_
):
to_datetime = self.f.toDateTime(
self.f.concat(
Expand All @@ -447,7 +447,7 @@ def visit_TimestampFromYMDHMS(
self.f.leftPad(self.f.toString(seconds), 2, "0"),
)
)
if timezone := op.dtype.timezone:
if timezone := dtype.timezone:
return self.f.toTimeZone(to_datetime, timezone)
return to_datetime

Expand Down
35 changes: 18 additions & 17 deletions ibis/backends/sql/compilers/datafusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -424,24 +424,25 @@
)

def visit_TimestampFromYMDHMS(
self, op, *, year, month, day, hours, minutes, seconds, **_
self, op, *, year, month, day, hours, minutes, seconds, dtype: dt.Timestamp, **_
):
return self.f.to_timestamp_micros(
self.f.concat(
self.f.lpad(self.cast(self.cast(year, dt.int64), dt.string), 4, "0"),
"-",
self.f.lpad(self.cast(self.cast(month, dt.int64), dt.string), 2, "0"),
"-",
self.f.lpad(self.cast(self.cast(day, dt.int64), dt.string), 2, "0"),
"T",
self.f.lpad(self.cast(self.cast(hours, dt.int64), dt.string), 2, "0"),
":",
self.f.lpad(self.cast(self.cast(minutes, dt.int64), dt.string), 2, "0"),
":",
self.f.lpad(self.cast(self.cast(seconds, dt.int64), dt.string), 2, "0"),
".000000Z",
)
)
args = [

Check warning on line 429 in ibis/backends/sql/compilers/datafusion.py

View check run for this annotation

Codecov / codecov/patch

ibis/backends/sql/compilers/datafusion.py#L429

Added line #L429 was not covered by tests
self.f.lpad(self.cast(self.cast(year, dt.int64), dt.string), 4, "0"),
"-",
self.f.lpad(self.cast(self.cast(month, dt.int64), dt.string), 2, "0"),
"-",
self.f.lpad(self.cast(self.cast(day, dt.int64), dt.string), 2, "0"),
"T",
self.f.lpad(self.cast(self.cast(hours, dt.int64), dt.string), 2, "0"),
":",
self.f.lpad(self.cast(self.cast(minutes, dt.int64), dt.string), 2, "0"),
":",
self.f.lpad(self.cast(self.cast(seconds, dt.int64), dt.string), 2, "0"),
"Z",
]
if dtype.timezone is not None:
args.append(dtype.timezone)
return self.f.to_timestamp_seconds(self.f.concat(*args))

Check warning on line 445 in ibis/backends/sql/compilers/datafusion.py

View check run for this annotation

Codecov / codecov/patch

ibis/backends/sql/compilers/datafusion.py#L444-L445

Added lines #L444 - L445 were not covered by tests

def visit_IsInf(self, op, *, arg):
return sg.and_(sg.not_(self.f.isnan(arg)), self.f.abs(arg).eq(self.POS_INF))
Expand Down
4 changes: 3 additions & 1 deletion ibis/backends/sql/compilers/druid.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,8 +182,10 @@ def visit_TimestampFromUNIX(self, op, *, arg, unit):
raise exc.UnsupportedArgumentError(f"Druid doesn't support {unit} units")

def visit_TimestampFromYMDHMS(
self, op, *, year, month, day, hours, minutes, seconds
self, op, *, year, month, day, hours, minutes, seconds, dtype: dt.Timestamp
):
if dtype.timezone is not None:
raise NotImplementedError()
return self.f.time_parse(
self.f.concat(
self.f.lpad(self.cast(year, dt.string), 4, "0"),
Expand Down
13 changes: 5 additions & 8 deletions ibis/backends/sql/compilers/duckdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -403,16 +403,13 @@
raise com.UnsupportedOperationError(f"{unit!r} unit is not supported!")

def visit_TimestampFromYMDHMS(
self, op, *, year, month, day, hours, minutes, seconds, **_
self, op, *, year, month, day, hours, minutes, seconds, dtype: dt.Timestamp, **_
):
args = [year, month, day, hours, minutes, seconds]

func = "make_timestamp"
if (timezone := op.dtype.timezone) is not None:
func += "tz"
args.append(timezone)

return self.f[func](*args)
if (timezone := dtype.timezone) is not None:
return self.f.make_timestamptz(*args, timezone)

Check warning on line 410 in ibis/backends/sql/compilers/duckdb.py

View check run for this annotation

Codecov / codecov/patch

ibis/backends/sql/compilers/duckdb.py#L410

Added line #L410 was not covered by tests
else:
return self.f.make_timestamp(*args)

def visit_Cast(self, op, *, arg, to):
dtype = op.arg.dtype
Expand Down
4 changes: 2 additions & 2 deletions ibis/backends/sql/compilers/flink.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,7 +381,7 @@ def visit_DateFromYMD(self, op, *, year, month, day):
)

def visit_TimestampFromYMDHMS(
self, op, *, year, month, day, hours, minutes, seconds
self, op, *, year, month, day, hours, minutes, seconds, dtype: dt.Timestamp
):
padded_year = self.f.lpad(self.cast(year, dt.string), 4, "0")
padded_month = self.f.lpad(self.cast(month, dt.string), 2, "0")
Expand All @@ -403,7 +403,7 @@ def visit_TimestampFromYMDHMS(
":",
padded_second,
),
op.dtype,
dtype,
)

def visit_ExtractEpochSeconds(self, op, *, arg):
Expand Down
8 changes: 6 additions & 2 deletions ibis/backends/sql/compilers/mssql.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,9 +334,13 @@ def visit_TimeFromHMS(self, op, *, hours, minutes, seconds):
return self.f.timefromparts(hours, minutes, seconds, 0, 0)

def visit_TimestampFromYMDHMS(
self, op, *, year, month, day, hours, minutes, seconds
self, op, *, year, month, day, hours, minutes, seconds, dtype: dt.Timestamp
):
return self.f.datetimefromparts(year, month, day, hours, minutes, seconds, 0)
if dtype.timezone is not None:
raise NotImplementedError()
return self.f.datetime2fromparts(
year, month, day, hours, minutes, seconds, 0, dtype.scale
)

def visit_StringFind(self, op, *, arg, substr, start, end):
if start is not None:
Expand Down
8 changes: 6 additions & 2 deletions ibis/backends/sql/compilers/postgres.py
Original file line number Diff line number Diff line change
Expand Up @@ -662,17 +662,21 @@
return None

def visit_TimestampFromYMDHMS(
self, op, *, year, month, day, hours, minutes, seconds
self, op, *, year, month, day, hours, minutes, seconds, dtype: dt.Timestamp
):
to_int32 = partial(self.cast, to=dt.int32)
return self.f.make_timestamp(
args = (
to_int32(year),
to_int32(month),
to_int32(day),
to_int32(hours),
to_int32(minutes),
self.cast(seconds, dt.float64),
)
if dtype.timezone:
return self.f.make_timestamptz(*args, dtype.timezone)

Check warning on line 677 in ibis/backends/sql/compilers/postgres.py

View check run for this annotation

Codecov / codecov/patch

ibis/backends/sql/compilers/postgres.py#L677

Added line #L677 was not covered by tests
else:
return self.f.make_timestamp(*args)

def visit_DateFromYMD(self, op, *, year, month, day):
to_int32 = partial(self.cast, to=dt.int32)
Expand Down
4 changes: 3 additions & 1 deletion ibis/backends/sql/compilers/sqlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,8 +274,10 @@ def visit_TimeFromHMS(self, op, *, hours, minutes, seconds):
return self.f.time(self.f.printf("%02d:%02d:%02d", hours, minutes, seconds))

def visit_TimestampFromYMDHMS(
self, op, *, year, month, day, hours, minutes, seconds
self, op, *, year, month, day, hours, minutes, seconds, dtype: dt.Timestamp
):
if dtype.timezone not in (None, "UTC"):
raise NotImplementedError
return self.f.datetime(
self.f.printf(
"%04d-%02d-%02d %02d:%02d:%02d%s",
Expand Down
4 changes: 2 additions & 2 deletions ibis/backends/sql/compilers/trino.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,7 @@ def visit_TimeFromHMS(self, op, *, hours, minutes, seconds):
)

def visit_TimestampFromYMDHMS(
self, op, *, year, month, day, hours, minutes, seconds
self, op, *, year, month, day, hours, minutes, seconds, dtype: dt.Timestamp
):
return self.cast(
self.f.from_iso8601_timestamp(
Expand All @@ -337,7 +337,7 @@ def visit_TimestampFromYMDHMS(
seconds,
)
),
dt.timestamp,
dtype,
)

def visit_TimestampFromUNIX(self, op, *, arg, unit):
Expand Down
3 changes: 0 additions & 3 deletions ibis/backends/tests/test_generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1783,9 +1783,6 @@ def test_cast(con, from_type, to_type, from_val, expected):
pytest.mark.notimpl(
["polars"], reason="casts to 1672531200000000000 (nanoseconds)"
),
pytest.mark.notimpl(
["datafusion"], reason="casts to 1672531200000000 (microseconds)"
),
pytest.mark.notimpl(["mysql"], reason="returns 20230101000000"),
pytest.mark.notyet(["mssql"], raises=PyODBCDataError),
],
Expand Down
7 changes: 1 addition & 6 deletions ibis/backends/tests/test_temporal.py
Original file line number Diff line number Diff line change
Expand Up @@ -813,11 +813,6 @@ def convert_to_offset(x):
raises=PyAthenaOperationalError,
reason="not supported in hive",
),
pytest.mark.notyet(
["datafusion"],
raises=Exception,
reason="pyarrow.lib.ArrowNotImplementedError: Unsupported cast",
),
pytest.mark.notimpl(
["oracle"],
raises=com.OperationNotDefinedError,
Expand Down Expand Up @@ -1477,7 +1472,7 @@ def test_date_literal(con, backend):
"impala": "TIMESTAMP",
"snowflake": "TIMESTAMP_NTZ",
"sqlite": "text",
"trino": "timestamp(3)",
"trino": "timestamp(0)",
"athena": "timestamp(3)",
"duckdb": "TIMESTAMP",
"postgres": "timestamp without time zone",
Expand Down
4 changes: 2 additions & 2 deletions ibis/common/temporal.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ def normalize_timedelta(
return int(value)


def normalize_timezone(tz):
def normalize_timezone(tz) -> datetime.tzinfo | None:
if tz is None:
return None
elif isinstance(tz, str):
Expand All @@ -223,7 +223,7 @@ def normalize_timezone(tz):


@lazy_singledispatch
def normalize_datetime(value):
def normalize_datetime(value) -> datetime.datetime:
raise TypeError(f"Unable to normalize {type(value)} to timestamp")


Expand Down
25 changes: 16 additions & 9 deletions ibis/expr/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -806,11 +806,17 @@
second: int | ir.IntegerValue | Deferred,
/,
timezone: str | None = None,
nullable: bool = True,
) -> TimestampValue: ...


@overload
def timestamp(value_or_year: Any, /, timezone: str | None = None) -> TimestampValue: ...
def timestamp(
value_or_year: str | datetime.datetime,
/,
timezone: str | None = None,
nullable: bool = True,
) -> TimestampValue: ...


@deferrable
Expand All @@ -823,6 +829,7 @@
second=None,
/,
timezone=None,
nullable: bool = True,
):
"""Construct a timestamp scalar or column.

Expand All @@ -843,6 +850,8 @@
The timestamp second component; required if `value_or_year` is a year.
timezone
The timezone name, or none for a timezone-naive timestamp.
nullable
Whether the resulting timestamp should be nullable. Defaults to True.

Returns
-------
Expand Down Expand Up @@ -886,20 +895,18 @@
is_ymdhms = any(a is not None for a in args[1:])

if is_ymdhms:
if timezone is not None:
raise NotImplementedError(
"Timezone currently not supported when creating a timestamp from components"
)
return ops.TimestampFromYMDHMS(*args).to_expr()
dtype = dt.Timestamp(timezone=timezone, nullable=nullable, scale=0)
return ops.TimestampFromYMDHMS(*args, dtype=dtype).to_expr()
elif isinstance(value_or_year, (numbers.Real, ir.IntegerValue)):
raise TypeError("Use ibis.literal(...).as_timestamp() instead")
elif isinstance(value_or_year, ir.Expr):
return value_or_year.cast(dt.Timestamp(timezone=timezone))
return value_or_year.cast(dt.Timestamp(timezone=timezone, nullable=nullable))

Check warning on line 903 in ibis/expr/api.py

View check run for this annotation

Codecov / codecov/patch

ibis/expr/api.py#L903

Added line #L903 was not covered by tests
else:
value = normalize_datetime(value_or_year)
tzinfo = normalize_timezone(timezone or value.tzinfo)
timezone = tzinfo.tzname(value) if tzinfo is not None else None
return literal(value, type=dt.Timestamp(timezone=timezone))
value = value.astimezone(tzinfo) if tzinfo is not None else value
dtype = dt.Timestamp.from_datetime(value, nullable=nullable)
return literal(value, type=dtype)


@overload
Expand Down
4 changes: 4 additions & 0 deletions ibis/expr/datatypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,10 @@ def higher_precedence(left: dt.DataType, right: dt.DataType) -> dt.DataType:
@public
def highest_precedence(dtypes: Iterator[dt.DataType]) -> dt.DataType:
"""Compute the highest precedence of `dtypes`."""
# TODO: currently,
# highest_precedence([dt.Timestamp(scale=3), dt.Timestamp(timezone="UTC")])
# returns dt.Timestamp(timezone="UTC").
# Perhaps it should return dt.Timestamp(scale=3, timezone="UTC") instead.
if collected := list(dtypes):
return functools.reduce(higher_precedence, collected)
else:
Expand Down
Loading
Loading