|
1 | 1 | from pathlib import Path
|
2 | 2 |
|
| 3 | +import ast |
3 | 4 | import duckdb
|
4 | 5 | import numpy as np
|
5 | 6 | import pandas as pd
|
|
11 | 12 | duckdb.sql("SET python_scan_all_frames=true")
|
12 | 13 |
|
13 | 14 | try:
|
14 |
| - import polars |
| 15 | + import polars # noqa: F401 |
15 | 16 |
|
16 | 17 | NO_POLARS = False
|
17 | 18 | except ImportError:
|
@@ -328,6 +329,32 @@ class cDataset(af.Dataset):
|
328 | 329 | cDataset().sql("SELECT v2 FROM df") # "df" != last test's data_a.df
|
329 | 330 |
|
330 | 331 |
|
| 332 | +@pytest.mark.skipif(NO_PYARROW, reason="pyarrow is not installed") |
| 333 | +def test_objects_as_metadata(): |
| 334 | + class aDataset(af.Dataset): |
| 335 | + """Objects other than strings can go into metadata.""" |
| 336 | + |
| 337 | + v1 = af.VectorBool(comment={"x": 1, "y": "z"}) |
| 338 | + v2 = af.VectorF32(comment=list("abc")) |
| 339 | + |
| 340 | + def try_ast_literal_eval(x: str): |
| 341 | + try: |
| 342 | + return ast.literal_eval(x) |
| 343 | + except (SyntaxError, ValueError): |
| 344 | + return x |
| 345 | + |
| 346 | + data = aDataset(v1=[True], v2=[1 / 2], v3=[3]) |
| 347 | + test_file_arrow = Path("test_arrow.parquet") |
| 348 | + data.to_parquet(test_file_arrow, engine="arrow") |
| 349 | + pf = pyarrow.parquet.ParquetFile(test_file_arrow) |
| 350 | + pf_metadata = pf.schema_arrow.metadata |
| 351 | + decoded_metadata = { |
| 352 | + k.decode(): try_ast_literal_eval(v.decode()) for k, v in pf_metadata.items() |
| 353 | + } |
| 354 | + assert decoded_metadata.get("v1") == aDataset.v1.comment |
| 355 | + assert decoded_metadata.get("v2") == aDataset.v2.comment |
| 356 | + |
| 357 | + |
331 | 358 | @pytest.mark.skipif(NO_POLARS, reason="polars is not installed")
|
332 | 359 | @pytest.mark.skipif(NO_PYARROW, reason="pyarrow is not installed")
|
333 | 360 | def test_to_parquet_with_metadata():
|
|
0 commit comments