Skip to content

Commit cd41865

Browse files
authored
Added warning if data path is a directory but ACES is not in shard mode (#169)
1 parent 0c5f437 commit cd41865

File tree

1 file changed

+8
-0
lines changed

1 file changed

+8
-0
lines changed

src/aces/predicates.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -683,6 +683,14 @@ def get_predicates_df(cfg: TaskExtractorConfig, data_config: DictConfig) -> pl.D
683683
standard = data_config.standard
684684
data_path = Path(data_config.path)
685685

686+
expand_shards_enabled = getattr(data_config, "shard", False)
687+
if not expand_shards_enabled and data_path.is_dir():
688+
logger.warning(
689+
"Expand shards is not enabled but your data path is a directory. "
690+
"If you are working with sharded datasets or large-scale queries, using `expand_shards` and"
691+
"`data=sharded` will improve efficiency and completeness."
692+
)
693+
686694
# plain predicates
687695
plain_predicates = cfg.plain_predicates
688696
match standard.lower():

0 commit comments

Comments
 (0)