Skip to content

Commit 33d8cd1

Browse files
committed
Add flwr_datasets_creator CLI
1 parent 3e79051 commit 33d8cd1

File tree

3 files changed

+120
-0
lines changed

3 files changed

+120
-0
lines changed
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
# Copyright 2025 Flower Labs GmbH. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
# ==============================================================================
15+
"""Flower Datasets command line interface."""
16+
17+
18+
from .flwr_datasets_creator import flwr_datasets_creator
19+
20+
__all__ = [
21+
"flwr_datasets_creator",
22+
]
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
# Copyright 2025 Flower Labs GmbH. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
# ==============================================================================
15+
"""`flwr-datasets-creator` command."""
16+
17+
18+
import argparse
19+
import shutil
20+
from pathlib import Path
21+
22+
import typer
23+
from flwr_datasets import FederatedDataset
24+
from flwr_datasets.partitioner import IidPartitioner
25+
26+
27+
def flwr_datasets_creator() -> None:
28+
"""Create a federated dataset and save it to disk.
29+
30+
This command is used to generated demo data, and currently supports
31+
only IID partitioning via `IidPartitioner`.
32+
"""
33+
parser = _parse_args_run_creator()
34+
args = parser.parse_args()
35+
36+
# Validate number of partitions
37+
if args.num_partitions <= 0:
38+
parser.error("--num-partitions must be a positive integer.")
39+
40+
# Handle output directory
41+
if args.out_dir.exists():
42+
overwrite = typer.confirm(
43+
f"Output directory '{args.out_dir}' already exists. Overwrite?",
44+
default=False,
45+
)
46+
if not overwrite:
47+
typer.echo("Aborting.")
48+
return
49+
50+
shutil.rmtree(args.out_dir)
51+
52+
args.out_dir.mkdir(parents=True, exist_ok=True)
53+
54+
# Create data partitioner
55+
partitioner = IidPartitioner(num_partitions=args.num_partitions)
56+
57+
# Create the federated dataset
58+
fds = FederatedDataset(
59+
dataset=args.name,
60+
partitioners={"train": partitioner},
61+
)
62+
63+
# Load partitions and save them to disk
64+
for partition_id in range(args.num_partitions):
65+
partition = fds.load_partition(partition_id=partition_id)
66+
out_dir = args.out_dir / f"partition_{partition_id}"
67+
partition.save_to_disk(out_dir)
68+
69+
70+
def _parse_args_run_creator() -> argparse.ArgumentParser:
71+
"""Parse flwr-datasets-creator command-line arguments."""
72+
parser = argparse.ArgumentParser(
73+
description="Create federated dataset partitions and save them to disk.",
74+
)
75+
parser.add_argument(
76+
"--name",
77+
required=True,
78+
type=str,
79+
help="Hugging Face dataset identifier (e.g., 'username/dataset_name').",
80+
)
81+
parser.add_argument(
82+
"--num-partitions",
83+
default=10,
84+
type=int,
85+
help="Number of partitions to create for the federated dataset.",
86+
)
87+
parser.add_argument(
88+
"--out-dir",
89+
default=Path("./federated_dataset"),
90+
type=Path,
91+
help="Output directory for the federated dataset.",
92+
)
93+
94+
return parser

datasets/pyproject.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,10 @@ classifiers = [
4949
packages = [{ include = "flwr_datasets", from = "./" }]
5050
exclude = ["./**/*_test.py"]
5151

52+
[tool.poetry.scripts]
53+
# `flwr-datasets` CLI
54+
flwr-datasets-creator = "flwr_datasets.cli:flwr_datasets_creator"
55+
5256
[tool.poetry.dependencies]
5357
python = "^3.10"
5458
numpy = ">=1.26.0,<3.0.0"

0 commit comments

Comments
 (0)