Skip to content

Commit 9b04c7b

Browse files
author
The TensorFlow Datasets Authors
committed
Fix incompatibility of tfds.Builder.as_data_source with MultiSplitInfo.
PiperOrigin-RevId: 857137643
1 parent 982a819 commit 9b04c7b

File tree

2 files changed

+18
-11
lines changed

2 files changed

+18
-11
lines changed

tensorflow_datasets/core/dataset_builder.py

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -890,18 +890,22 @@ def as_data_source(
890890
)
891891
chosen_format = file_format
892892
else:
893-
chosen_format = suitable_formats.pop()
894-
logging.info(
895-
"Found random access formats: %s. Chose to use %s. Overriding file"
896-
" format in the dataset info.",
897-
", ".join([f.name for f in suitable_formats]),
898-
chosen_format,
899-
)
893+
if info.file_format in suitable_formats:
894+
chosen_format = info.file_format
895+
else:
896+
chosen_format = suitable_formats.pop()
897+
logging.info(
898+
"Found random access formats: %s. Chose to use %s. Overriding file"
899+
" format in the dataset info.",
900+
", ".join([f.name for f in suitable_formats]),
901+
chosen_format,
902+
)
900903

901-
# Change the dataset info to read from a random access format.
902-
info.set_file_format(
903-
chosen_format, override=True, override_if_initialized=True
904-
)
904+
if info.file_format != chosen_format:
905+
# Change the dataset info to read from a random access format.
906+
info.set_file_format(
907+
chosen_format, override=True, override_if_initialized=True
908+
)
905909

906910
# Create a dataset for each of the given splits
907911
def build_single_data_source(split: str) -> Sequence[Any]:

tensorflow_datasets/core/dataset_info.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -538,6 +538,9 @@ def set_file_format(
538538
updated_split_infos = []
539539
for split_info in self.splits.values():
540540
if split_info.filename_template is None:
541+
logging.warning(
542+
"Split %s has no filename template, skipping.", split_info.name
543+
)
541544
continue
542545
updated_split_info = split_info.replace(
543546
filename_template=split_info.filename_template.replace(

0 commit comments

Comments
 (0)