Merge branch 'main' into use-to_backend

jperez999 · web-flow · commit 9f31c6710a60 · 2024-07-26T15:40:33.000-04:00
diff --git a/tests/unit/io/test_io.py b/tests/unit/io/test_io.py
@@ -579,6 +579,7 @@ def test_hive_partitioned_data(tmpdir, cpu):
     # Make sure the directory structure is hive-like
     df_expect = ddf.compute()
     df_expect = df_expect.sort_values(["id", "x", "y"]).reset_index(drop=True)
+    ts_dtype = df_expect["timestamp"].dtype
     timestamp_check = df_expect["timestamp"].iloc[0]
     name_check = df_expect["name"].iloc[0]
     result_paths = glob.glob(
@@ -596,7 +597,7 @@ def test_hive_partitioned_data(tmpdir, cpu):
     # Read back with dask.dataframe and check the data
     df_check = dd.read_parquet(path, engine="pyarrow").compute()
     df_check["name"] = df_check["name"].astype("object")
-    df_check["timestamp"] = df_check["timestamp"].astype("int64")
+    df_check["timestamp"] = df_check["timestamp"].astype(ts_dtype)
     df_check = df_check.sort_values(["id", "x", "y"]).reset_index(drop=True)
     for col in df_expect:
         # Order of columns can change after round-trip partitioning
@@ -605,7 +606,7 @@ def test_hive_partitioned_data(tmpdir, cpu):
     # Read back with NVT and check the data
     df_check = merlin.io.Dataset(path, engine="parquet").to_ddf().compute()
     df_check["name"] = df_check["name"].astype("object")
-    df_check["timestamp"] = df_check["timestamp"].astype("int64")
+    df_check["timestamp"] = df_check["timestamp"].astype(ts_dtype)
     df_check = df_check.sort_values(["id", "x", "y"]).reset_index(drop=True)
     for col in df_expect:
         # Order of columns can change after round-trip partitioning