Quantco · svengiegerich · May 27, 2025 · May 27, 2025 · May 27, 2025
@@ -8,6 +8,7 @@
 from typing import Any, cast
 
 import polars as pl
+from polars._typing import TimeUnit
 
 from dataframely._compat import pa, sa, sa_mssql, sa_TypeEngine
 from dataframely._polars import (
@@ -300,6 +301,7 @@ def __init__(
         max_exclusive: dt.datetime | None = None,
         resolution: str | None = None,
         time_zone: str | dt.tzinfo | None = None,
+        time_unit: TimeUnit = "us",
         check: (
             Callable[[pl.Expr], pl.Expr]
             | list[Callable[[pl.Expr], pl.Expr]]
@@ -330,6 +332,7 @@ def __init__(
             time_zone: The time zone that datetimes in the column must have. The time
                 zone must use a valid IANA time zone name identifier e.x. ``Etc/UTC`` or
                 ``America/New_York``.
+            time_unit: Unit of time. Defaults to ``us`` (microseconds).
             check: A custom rule or multiple rules to run for this column. This can be:
                 - A single callable that returns a non-aggregated boolean expression.
                 The name of the rule is derived from the callable name, or defaults to
@@ -373,10 +376,11 @@ def __init__(
         )
         self.resolution = resolution
         self.time_zone = time_zone
+        self.time_unit = time_unit
 
     @property
     def dtype(self) -> pl.DataType:
-        return pl.Datetime(time_zone=self.time_zone)
+        return pl.Datetime(time_zone=self.time_zone, time_unit=self.time_unit)
 
     def validation_rules(self, expr: pl.Expr) -> dict[str, pl.Expr]:
         result = super().validation_rules(expr)
@@ -400,7 +404,7 @@ def pyarrow_dtype(self) -> pa.DataType:
             if isinstance(self.time_zone, dt.tzinfo)
             else self.time_zone
         )
-        return pa.timestamp("us", time_zone)
+        return pa.timestamp(self.time_unit, time_zone)
 
     def _sample_unchecked(self, generator: Generator, n: int) -> pl.Series:
         return generator.sample_datetime(
@@ -416,6 +420,7 @@ def _sample_unchecked(self, generator: Generator, n: int) -> pl.Series:
                 allow_null_response=True,
             ),
             resolution=self.resolution,
+            time_unit=self.time_unit,
             time_zone=self.time_zone,
             null_probability=self._null_probability,
         )

@@ -7,6 +7,7 @@
 
 import numpy as np
 import polars as pl
+from polars._typing import TimeUnit
 
 from ._extre import sample as extre_sample
 from ._polars import (
@@ -294,6 +295,7 @@ def sample_datetime(
         max: dt.datetime | None,
         resolution: str | None = None,
         time_zone: str | dt.tzinfo | None = None,
+        time_unit: TimeUnit = "us",
         null_probability: float = 0.0,
     ) -> pl.Series:
         """Sample a list of datetimes in the provided range.
@@ -303,7 +305,9 @@ def sample_datetime(
             min: The minimum datetime to sample (inclusive).
             max: The maximum datetime to sample (exclusive). '10000-01-01' when ``None``.
             resolution: The resolution that datetimes in the column must have. This uses
-                the formatting language used by :mod:`polars` datetime ``round`` method.
+                the formatting language used by :mod:`polars` datetime ``round``
+                method.
+            time_unit: The time unit of the datetime column. Defaults to ``us`` (microseconds).
             time_zone: The time zone that datetimes in the column must have. The time
                 zone must use a valid IANA time zone name identifier e.x. ``Etc/UTC`` or
                 ``America/New_York``.
@@ -333,7 +337,7 @@ def sample_datetime(
             )
             # NOTE: polars tracks datetimes relative to epoch
             - _datetime_to_microseconds(EPOCH_DATETIME)
-        ).cast(pl.Datetime(time_zone=time_zone))
+        ).cast(pl.Datetime(time_unit=time_unit, time_zone=time_zone))
 
         if resolution is not None:
             return result.dt.truncate(resolution)

@@ -396,6 +396,13 @@ def test_validate_resolution(
             min=dt.datetime(2020, 1, 1), max=dt.datetime(2021, 1, 1), resolution="1h"
         ),
         dy.Datetime(time_zone="Etc/UTC"),
+        dy.Datetime(time_unit="ms"),
+        dy.Datetime(
+            min=dt.datetime(2020, 1, 1),
+            max=dt.datetime(2021, 1, 1),
+            resolution="1h",
+            time_unit="us",
+        ),
     ],
 )
 def test_sample(column: dy.Column) -> None:

@@ -2,6 +2,7 @@
 # SPDX-License-Identifier: BSD-3-Clause
 
 import pytest
+from polars._typing import TimeUnit
 
 import dataframely as dy
 from dataframely.columns import Column
@@ -121,3 +122,9 @@ def test_nullability_information_struct(inner: Column, nullable: bool) -> None:
 def test_multiple_columns() -> None:
     schema = create_schema("test", {"a": dy.Int32(nullable=False), "b": dy.Integer()})
     assert str(schema.pyarrow_schema()).split("\n") == ["a: int32 not null", "b: int64"]
+
+
+@pytest.mark.parametrize("time_unit", ["ns", "us", "ms"])
+def test_datetime_time_unit(time_unit: TimeUnit) -> None:
+    schema = create_schema("test", {"a": dy.Datetime(time_unit=time_unit)})
+    assert str(schema.pyarrow_schema()) == f"a: timestamp[{time_unit}]"