Skip to content

Commit d8ebc2f

Browse files
committed
exclude invalid files reading the parquet dataset
1 parent c43ec80 commit d8ebc2f

1 file changed

Lines changed: 5 additions & 1 deletion

File tree

src/spherinator/data/parquet_dataset.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,11 @@ def __init__(
3333
if not isinstance(data_column, list):
3434
data_column = [data_column]
3535

36-
dataset = ds.dataset(data_directory, format="parquet")
36+
dataset = ds.dataset(
37+
data_directory,
38+
format="parquet",
39+
exclude_invalid_files=True,
40+
)
3741
table = dataset.to_table(columns=data_column)
3842
self.transform = transform
3943
self.with_index = with_index

0 commit comments

Comments
 (0)