@@ -579,6 +579,7 @@ def test_hive_partitioned_data(tmpdir, cpu):
579
579
# Make sure the directory structure is hive-like
580
580
df_expect = ddf .compute ()
581
581
df_expect = df_expect .sort_values (["id" , "x" , "y" ]).reset_index (drop = True )
582
+ ts_dtype = df_expect ["timestamp" ].dtype
582
583
timestamp_check = df_expect ["timestamp" ].iloc [0 ]
583
584
name_check = df_expect ["name" ].iloc [0 ]
584
585
result_paths = glob .glob (
@@ -596,7 +597,7 @@ def test_hive_partitioned_data(tmpdir, cpu):
596
597
# Read back with dask.dataframe and check the data
597
598
df_check = dd .read_parquet (path , engine = "pyarrow" ).compute ()
598
599
df_check ["name" ] = df_check ["name" ].astype ("object" )
599
- df_check ["timestamp" ] = df_check ["timestamp" ].astype ("int64" )
600
+ df_check ["timestamp" ] = df_check ["timestamp" ].astype (ts_dtype )
600
601
df_check = df_check .sort_values (["id" , "x" , "y" ]).reset_index (drop = True )
601
602
for col in df_expect :
602
603
# Order of columns can change after round-trip partitioning
@@ -605,7 +606,7 @@ def test_hive_partitioned_data(tmpdir, cpu):
605
606
# Read back with NVT and check the data
606
607
df_check = merlin .io .Dataset (path , engine = "parquet" ).to_ddf ().compute ()
607
608
df_check ["name" ] = df_check ["name" ].astype ("object" )
608
- df_check ["timestamp" ] = df_check ["timestamp" ].astype ("int64" )
609
+ df_check ["timestamp" ] = df_check ["timestamp" ].astype (ts_dtype )
609
610
df_check = df_check .sort_values (["id" , "x" , "y" ]).reset_index (drop = True )
610
611
for col in df_expect :
611
612
# Order of columns can change after round-trip partitioning
0 commit comments