diff --git a/pointblank/field.py b/pointblank/field.py index da76005d3..2f0c84cf1 100644 --- a/pointblank/field.py +++ b/pointblank/field.py @@ -1941,7 +1941,7 @@ def duration_field( ), ) - pb.generate_dataset(schema, n=100, seed=23) + pb.preview(pb.generate_dataset(schema, n=100, seed=23)) ``` Colon-separated strings can also be used for quick duration definitions: @@ -1952,7 +1952,7 @@ def duration_field( break_time=pb.duration_field(min_duration="0:05:00", max_duration="0:30:00"), ) - pb.generate_dataset(schema, n=30, seed=23) + pb.preview(pb.generate_dataset(schema, n=30, seed=23)) ``` Optional durations can be created with `nullable=True`, and duration fields work well @@ -1972,7 +1972,7 @@ def duration_field( ), ) - pb.generate_dataset(schema, n=30, seed=7) + pb.preview(pb.generate_dataset(schema, n=30, seed=7)) ``` """ return DurationField( diff --git a/pointblank/schema.py b/pointblank/schema.py index 84df18deb..9ecaadba2 100644 --- a/pointblank/schema.py +++ b/pointblank/schema.py @@ -1821,7 +1821,7 @@ def test_age_range(generate_dataset): ), ) - pb.generate_dataset(schema, n=50, seed=23) + pb.preview(pb.generate_dataset(schema, n=50, seed=23)) ``` """ return schema.generate( diff --git a/pointblank/validate.py b/pointblank/validate.py index 62432a311..45d7375dc 100644 --- a/pointblank/validate.py +++ b/pointblank/validate.py @@ -90,6 +90,7 @@ _format_to_integer_value, _get_fn_name, _get_tbl_type, + _is_duration_dtype, _is_lazy_frame, _is_lib_present, _is_narwhals_table, @@ -2442,6 +2443,27 @@ def _generate_display_table( none_values = [(k, i) for k, v in none_values.items() for i, val in enumerate(v) if val] + # Cast duration columns to string for display since Great Tables cannot handle duration types; + # the original dtype labels are already captured in `col_dtype_dict` / `col_dtype_dict_short` + # so they will still show correctly in the column headers + duration_cols = [ + col_name + for col_name, col_dtype in col_dtype_dict.items() + if _is_duration_dtype(col_dtype.lower()) + ] + if duration_cols: + if df_lib_name_gt == "polars": + import polars as pl + + for c in duration_cols: + vals = data[c].to_list() + str_vals = [str(v) if v is not None else None for v in vals] + data = data.with_columns(pl.Series(c, str_vals, dtype=pl.Utf8)) + else: + # Pandas (and PySpark converted to Pandas) + for c in duration_cols: + data[c] = data[c].astype(str) + # Import Great Tables to get preliminary renders of the columns import great_tables as gt