Skip to content

Commit 4594079

Browse files
authored
missed a typecheck in zephryization (#2141)
1 parent 5cf4ec1 commit 4594079

2 files changed

Lines changed: 2 additions & 22 deletions

File tree

lib/levanter/src/levanter/data/audio.py

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -391,7 +391,6 @@ def _has_validation_set(self):
391391
def build_or_load_cache(
392392
self,
393393
split: str,
394-
logger_name: Optional[str] = None,
395394
cache_options: CacheOptions = CacheOptions.default(),
396395
) -> Optional[ProcessedAudioCache]:
397396
split_cache_dir = os.path.join(self.cache_dir, split)
@@ -577,14 +576,6 @@ def build_caches(self, split: str) -> Dict[str, ProcessedAudioCache]:
577576
else:
578577
caches[name] = cache
579578

580-
# in practice it works best if we block on validation caches
581-
if split == "validation":
582-
for cache in caches.values():
583-
cache.cache.await_finished()
584-
585-
else:
586-
logger.info(f"Not waiting for {split} caches to finish building")
587-
588579
return caches
589580

590581
@property

lib/levanter/src/levanter/data/text.py

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -915,7 +915,7 @@ def build_lm_dataset_cache(
915915
tokenizer: HfTokenizer,
916916
options: CacheOptions = CacheOptions.default(),
917917
enforce_eos=True,
918-
):
918+
) -> TreeCache[dict]:
919919
"""
920920
Creates a cache for a dataset. If the cache already exists, it will be loaded. Otherwise, it will be built.
921921
@@ -943,10 +943,6 @@ def build_lm_dataset_cache(
943943
except FileNotFoundError:
944944
pass
945945

946-
if source is None:
947-
logger.info(f"No data for {name}")
948-
return None
949-
950946
logger.info(f"Building cache for {name}...")
951947
return build_or_load_cache(
952948
cache_dir,
@@ -1339,7 +1335,7 @@ def validation_sets(self, Pos: Axis) -> Mapping[str, AsyncDataset[LmExample]]:
13391335
return validation_datasets
13401336

13411337
def build_caches(self, split: str) -> Dict[str, TreeCache[dict]]:
1342-
caches = {}
1338+
caches: dict[str, TreeCache[dict]] = {}
13431339
for name, source_config in self.configs.items():
13441340
# Skip datasets with zero weight in all stages
13451341
if isinstance(self.train_weights, dict):
@@ -1387,13 +1383,6 @@ def build_caches(self, split: str) -> Dict[str, TreeCache[dict]]:
13871383
self.enforce_eos,
13881384
)
13891385

1390-
# In practice, it works best if we block on validation caches
1391-
if split == "validation":
1392-
for cache in caches.values():
1393-
cache.await_finished()
1394-
else:
1395-
logger.info(f"Not waiting for {split} caches to finish building")
1396-
13971386
return caches
13981387

13991388
@property

0 commit comments

Comments
 (0)