Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions python/pyarrow/_parquet.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ cdef shared_ptr[ArrowWriterProperties] _create_arrow_writer_properties(
writer_engine_version=*,
use_compliant_nested_type=*,
store_schema=*,
write_time_adjusted_to_utc=*,
) except *


Expand Down
9 changes: 7 additions & 2 deletions python/pyarrow/_parquet.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2202,7 +2202,8 @@ cdef shared_ptr[ArrowWriterProperties] _create_arrow_writer_properties(
allow_truncated_timestamps=False,
writer_engine_version=None,
use_compliant_nested_type=True,
store_schema=True) except *:
store_schema=True,
write_time_adjusted_to_utc=False) except *:
"""Arrow writer properties"""
cdef:
shared_ptr[ArrowWriterProperties] arrow_properties
Expand Down Expand Up @@ -2251,6 +2252,8 @@ cdef shared_ptr[ArrowWriterProperties] _create_arrow_writer_properties(
elif writer_engine_version != "V2":
raise ValueError(f"Unsupported Writer Engine Version: {writer_engine_version}")

arrow_props.set_time_adjusted_to_utc(write_time_adjusted_to_utc)

arrow_properties = arrow_props.build()

return arrow_properties
Expand Down Expand Up @@ -2312,7 +2315,8 @@ cdef class ParquetWriter(_Weakrefable):
write_page_checksum=False,
sorting_columns=None,
store_decimal_as_integer=False,
use_content_defined_chunking=False):
use_content_defined_chunking=False,
write_time_adjusted_to_utc=False):
cdef:
shared_ptr[WriterProperties] properties
shared_ptr[ArrowWriterProperties] arrow_properties
Expand Down Expand Up @@ -2356,6 +2360,7 @@ cdef class ParquetWriter(_Weakrefable):
writer_engine_version=writer_engine_version,
use_compliant_nested_type=use_compliant_nested_type,
store_schema=store_schema,
write_time_adjusted_to_utc=write_time_adjusted_to_utc,
)

pool = maybe_unbox_memory_pool(memory_pool)
Expand Down
1 change: 1 addition & 0 deletions python/pyarrow/includes/libparquet.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -519,6 +519,7 @@ cdef extern from "parquet/api/writer.h" namespace "parquet" nogil:
Builder* enable_compliant_nested_types()
Builder* disable_compliant_nested_types()
Builder* set_engine_version(ArrowWriterEngineVersion version)
Builder* set_time_adjusted_to_utc(c_bool adjusted)
shared_ptr[ArrowWriterProperties] build()
c_bool support_deprecated_int96_timestamps()

Expand Down
9 changes: 9 additions & 0 deletions python/pyarrow/parquet/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -939,6 +939,11 @@ def _sanitize_table(table, new_schema, flavor):
balance between deduplication ratio and fragmentation. Use norm_level=1 or
norm_level=2 to reach a higher deduplication ratio at the expense of
fragmentation.
write_time_adjusted_to_utc : bool, default False
Specify whether the TIME columns are expressed in reference
to midnight in the UTC timezone if True, otherwise it is not tied to UTC,
and is expressed in reference to midnight in an unknown,
presumably local, timezone.
"""

_parquet_writer_example_doc = """\
Expand Down Expand Up @@ -1035,6 +1040,7 @@ def __init__(self, where, schema, filesystem=None,
write_page_checksum=False,
sorting_columns=None,
store_decimal_as_integer=False,
write_time_adjusted_to_utc=False,
**options):
if use_deprecated_int96_timestamps is None:
# Use int96 timestamps for Spark
Expand Down Expand Up @@ -1088,6 +1094,7 @@ def __init__(self, where, schema, filesystem=None,
write_page_checksum=write_page_checksum,
sorting_columns=sorting_columns,
store_decimal_as_integer=store_decimal_as_integer,
write_time_adjusted_to_utc=write_time_adjusted_to_utc,
**options)
self.is_open = True

Expand Down Expand Up @@ -1949,6 +1956,7 @@ def write_table(table, where, row_group_size=None, version='2.6',
write_page_checksum=False,
sorting_columns=None,
store_decimal_as_integer=False,
write_time_adjusted_to_utc=False,
**kwargs):
# Implementor's note: when adding keywords here / updating defaults, also
# update it in write_to_dataset and _dataset_parquet.pyx ParquetFileWriteOptions
Expand Down Expand Up @@ -1980,6 +1988,7 @@ def write_table(table, where, row_group_size=None, version='2.6',
write_page_checksum=write_page_checksum,
sorting_columns=sorting_columns,
store_decimal_as_integer=store_decimal_as_integer,
write_time_adjusted_to_utc=write_time_adjusted_to_utc,
**kwargs) as writer:
writer.write_table(table, row_group_size=row_group_size)
except Exception:
Expand Down
40 changes: 40 additions & 0 deletions python/pyarrow/tests/parquet/test_parquet_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -447,3 +447,43 @@ def test_parquet_content_defined_chunking_parameters(tempdir):
# using min_chunk_size, max_chunk_size and norm_level
cdc_options = {"min_chunk_size": 32_768, "max_chunk_size": 65_536, "norm_level": 1}
pq.write_table(table, path, use_content_defined_chunking=cdc_options)


@pytest.mark.parametrize("time_type, time_unit", [
(pa.time32, "s"),
(pa.time32, "ms"),
(pa.time64, "us"),
(pa.time64, "ns"),
])
@pytest.mark.parametrize("utc_flag_val", [False, True])
def test_arrow_writer_props_time_adjusted_to_utc(
tempdir,
utc_flag_val,
time_type,
time_unit,
):
# GH-47441
filename = tempdir / "time_adjusted_to_utc.parquet"

time_values = [0, 123, 10_000, 86_399]

table = pa.table({
"time_col": pa.array(time_values, type=time_type(time_unit)),
})

schema = pa.schema([
("time_col", time_type(time_unit)),
])

with pq.ParquetWriter(
where=filename,
schema=schema,
write_time_adjusted_to_utc=utc_flag_val,
) as writer:
writer.write_table(table)

result = pq.read_table(filename, schema=schema)

result.validate(full=True)

assert result.equals(table)
Loading