Skip to content

Commit 3700745

Browse files
tomvdwThe TensorFlow Datasets Authors
authored andcommitted
Expose num_shards flag to tfds build
This is especially handy in combination when setting nondeterministic order to true. PiperOrigin-RevId: 702686971
1 parent eaefd56 commit 3700745

File tree

2 files changed

+5
-0
lines changed

2 files changed

+5
-0
lines changed

tensorflow_datasets/scripts/cli/build.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -381,6 +381,8 @@ def _make_download_config(
381381
kwargs = {}
382382
if args.max_shard_size_mb:
383383
kwargs['max_shard_size'] = args.max_shard_size_mb << 20
384+
if args.num_shards:
385+
kwargs['num_shards'] = args.num_shards
384386
if args.download_config:
385387
kwargs.update(json.loads(args.download_config))
386388

tensorflow_datasets/scripts/cli/cli_utils.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,9 @@ def add_generation_argument_group(parser: argparse.ArgumentParser):
255255
generation_group.add_argument(
256256
'--max_shard_size_mb', type=int, help='The max shard size in megabytes.'
257257
)
258+
generation_group.add_argument(
259+
'--num_shards', type=int, help='The number of shards to write to.'
260+
)
258261
generation_group.add_argument(
259262
'--num-processes',
260263
type=int,

0 commit comments

Comments
 (0)