From f06313beac3da6a8e279641eea1d75feee395274 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 1 Apr 2025 15:22:37 -0700 Subject: [PATCH 1/3] basic --- .../cudf_polars/dsl/expressions/datetime.py | 10 ++++++++++ .../tests/expressions/test_datetime_basic.py | 15 +++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/datetime.py b/python/cudf_polars/cudf_polars/dsl/expressions/datetime.py index ec60024c1b0..993f90f0cab 100644 --- a/python/cudf_polars/cudf_polars/dsl/expressions/datetime.py +++ b/python/cudf_polars/cudf_polars/dsl/expressions/datetime.py @@ -112,6 +112,7 @@ def from_polars(cls, obj: pl_expr.TemporalFunction) -> Self: Name.IsoYear, Name.MonthStart, Name.MonthEnd, + Name.CastTimeUnit, } def __init__( @@ -142,6 +143,15 @@ def do_evaluate( for child in self.children ] (column,) = columns + if self.name is TemporalFunction.Name.CastTimeUnit: + (unit,) = self.options + if plc.traits.is_timestamp(column.obj.type()): + dtype = plc.interop.from_arrow(pa.timestamp(unit)) + elif plc.traits.is_duration(column.obj.type()): + dtype = plc.interop.from_arrow(pa.duration(unit)) + result = plc.unary.cast(column.obj, dtype) + return Column(result) + if self.name is TemporalFunction.Name.Week: result = plc.strings.convert.convert_integers.to_integers( plc.strings.convert.convert_datetime.from_timestamps( diff --git a/python/cudf_polars/tests/expressions/test_datetime_basic.py b/python/cudf_polars/tests/expressions/test_datetime_basic.py index 73735255b9f..adb12bbde19 100644 --- a/python/cudf_polars/tests/expressions/test_datetime_basic.py +++ b/python/cudf_polars/tests/expressions/test_datetime_basic.py @@ -265,3 +265,18 @@ def test_isoyear(): q = df.with_columns(pl.col("date").dt.iso_year().alias("isoyear")) assert_gpu_result_equal(q) + + +@pytest.mark.parametrize( + "dtype", [pl.Date(), pl.Datetime("ms"), pl.Datetime("us"), pl.Datetime("ns")] +) +@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"]) +def test_datetime_cast_time_unit(dtype, time_unit): + sr = pl.Series( + [datetime.datetime(2001, 1, 1), datetime.datetime(2001, 1, 3)], dtype=dtype + ) + df = pl.DataFrame({"date": sr}).lazy() + + q = df.select(pl.col("date").dt.cast_time_unit(time_unit).alias("time_unit_ms")) + + assert_gpu_result_equal(q) From b8217354fa7def16c2fc21efda5f70ecbeded151 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Wed, 2 Apr 2025 10:04:37 -0700 Subject: [PATCH 2/3] flesh out test data --- .../tests/expressions/test_datetime_basic.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/python/cudf_polars/tests/expressions/test_datetime_basic.py b/python/cudf_polars/tests/expressions/test_datetime_basic.py index adb12bbde19..e8d2a64f675 100644 --- a/python/cudf_polars/tests/expressions/test_datetime_basic.py +++ b/python/cudf_polars/tests/expressions/test_datetime_basic.py @@ -273,7 +273,15 @@ def test_isoyear(): @pytest.mark.parametrize("time_unit", ["ms", "us", "ns"]) def test_datetime_cast_time_unit(dtype, time_unit): sr = pl.Series( - [datetime.datetime(2001, 1, 1), datetime.datetime(2001, 1, 3)], dtype=dtype + "date", + [ + datetime.datetime(1970, 1, 1, 0, 0, 0), + datetime.datetime(1999, 12, 31, 23, 59, 59), + datetime.datetime(2001, 1, 1, 12, 0, 0), + datetime.datetime(2020, 2, 29, 23, 59, 59), + datetime.datetime(2024, 12, 31, 23, 59, 59, 999999), + ], + dtype=dtype, ) df = pl.DataFrame({"date": sr}).lazy() From 1633922c0ee0501cde19d5361618ac838f200f96 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 15 Apr 2025 09:02:32 -0700 Subject: [PATCH 3/3] add tests --- .../tests/expressions/test_datetime_basic.py | 24 ++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/python/cudf_polars/tests/expressions/test_datetime_basic.py b/python/cudf_polars/tests/expressions/test_datetime_basic.py index 24aee3a8f8f..0c7d5c66e0a 100644 --- a/python/cudf_polars/tests/expressions/test_datetime_basic.py +++ b/python/cudf_polars/tests/expressions/test_datetime_basic.py @@ -302,7 +302,7 @@ def test_isoyear(): "dtype", [pl.Date(), pl.Datetime("ms"), pl.Datetime("us"), pl.Datetime("ns")] ) @pytest.mark.parametrize("time_unit", ["ms", "us", "ns"]) -def test_datetime_cast_time_unit(dtype, time_unit): +def test_datetime_cast_time_unit_datetime(dtype, time_unit): sr = pl.Series( "date", [ @@ -319,3 +319,25 @@ def test_datetime_cast_time_unit(dtype, time_unit): q = df.select(pl.col("date").dt.cast_time_unit(time_unit).alias("time_unit_ms")) assert_gpu_result_equal(q) + + +@pytest.mark.parametrize( + "dtype", [pl.Duration("ms"), pl.Duration("us"), pl.Duration("ns")] +) +@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"]) +def test_datetime_cast_time_unit_duration(dtype, time_unit): + sr = pl.Series( + "date", + [ + datetime.timedelta(days=1), + datetime.timedelta(days=2), + datetime.timedelta(days=3), + datetime.timedelta(days=4), + datetime.timedelta(days=5), + ], + dtype=dtype, + ) + df = pl.DataFrame({"date": sr}).lazy() + + q = df.select(pl.col("date").dt.cast_time_unit(time_unit).alias("time_unit_ms")) + assert_gpu_result_equal(q)