Skip to content

Commit 0e565b1

Browse files
authored
Consistent ordering of cached data files between deeplearning and Great Lakes (#1075)
Sort files from cached_data_path when loading them in
1 parent 1baf4d4 commit 0e565b1

File tree

1 file changed

+3
-1
lines changed

1 file changed

+3
-1
lines changed

bliss/cached_dataset.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -266,7 +266,9 @@ def setup(self, stage: str) -> None: # noqa: WPS324
266266
raise RuntimeError(f"setup skips stage {stage}")
267267

268268
def _load_file_paths_and_slices(self):
269-
file_names = [f for f in os.listdir(str(self.cached_data_path)) if f.endswith(".pt")]
269+
file_names = [
270+
f for f in sorted(os.listdir(str(self.cached_data_path))) if f.endswith(".pt")
271+
]
270272
if self.subset_fraction:
271273
file_names = file_names[: math.ceil(len(file_names) * self.subset_fraction)]
272274
self.file_paths = [os.path.join(str(self.cached_data_path), f) for f in file_names]

0 commit comments

Comments
 (0)