From 33d8cd13c42f87b84072dfabb8a3bc2035576dfa Mon Sep 17 00:00:00 2001 From: yan-gao-GY Date: Tue, 3 Feb 2026 15:48:16 +0000 Subject: [PATCH 01/21] Add flwr_datasets_creator CLI --- datasets/flwr_datasets/cli/__init__.py | 22 +++++ .../cli/flwr_datasets_creator.py | 94 +++++++++++++++++++ datasets/pyproject.toml | 4 + 3 files changed, 120 insertions(+) create mode 100644 datasets/flwr_datasets/cli/__init__.py create mode 100644 datasets/flwr_datasets/cli/flwr_datasets_creator.py diff --git a/datasets/flwr_datasets/cli/__init__.py b/datasets/flwr_datasets/cli/__init__.py new file mode 100644 index 000000000000..2e2b51c75040 --- /dev/null +++ b/datasets/flwr_datasets/cli/__init__.py @@ -0,0 +1,22 @@ +# Copyright 2025 Flower Labs GmbH. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Flower Datasets command line interface.""" + + +from .flwr_datasets_creator import flwr_datasets_creator + +__all__ = [ + "flwr_datasets_creator", +] diff --git a/datasets/flwr_datasets/cli/flwr_datasets_creator.py b/datasets/flwr_datasets/cli/flwr_datasets_creator.py new file mode 100644 index 000000000000..481ea1e034de --- /dev/null +++ b/datasets/flwr_datasets/cli/flwr_datasets_creator.py @@ -0,0 +1,94 @@ +# Copyright 2025 Flower Labs GmbH. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""`flwr-datasets-creator` command.""" + + +import argparse +import shutil +from pathlib import Path + +import typer +from flwr_datasets import FederatedDataset +from flwr_datasets.partitioner import IidPartitioner + + +def flwr_datasets_creator() -> None: + """Create a federated dataset and save it to disk. + + This command is used to generated demo data, and currently supports + only IID partitioning via `IidPartitioner`. + """ + parser = _parse_args_run_creator() + args = parser.parse_args() + + # Validate number of partitions + if args.num_partitions <= 0: + parser.error("--num-partitions must be a positive integer.") + + # Handle output directory + if args.out_dir.exists(): + overwrite = typer.confirm( + f"Output directory '{args.out_dir}' already exists. Overwrite?", + default=False, + ) + if not overwrite: + typer.echo("Aborting.") + return + + shutil.rmtree(args.out_dir) + + args.out_dir.mkdir(parents=True, exist_ok=True) + + # Create data partitioner + partitioner = IidPartitioner(num_partitions=args.num_partitions) + + # Create the federated dataset + fds = FederatedDataset( + dataset=args.name, + partitioners={"train": partitioner}, + ) + + # Load partitions and save them to disk + for partition_id in range(args.num_partitions): + partition = fds.load_partition(partition_id=partition_id) + out_dir = args.out_dir / f"partition_{partition_id}" + partition.save_to_disk(out_dir) + + +def _parse_args_run_creator() -> argparse.ArgumentParser: + """Parse flwr-datasets-creator command-line arguments.""" + parser = argparse.ArgumentParser( + description="Create federated dataset partitions and save them to disk.", + ) + parser.add_argument( + "--name", + required=True, + type=str, + help="Hugging Face dataset identifier (e.g., 'username/dataset_name').", + ) + parser.add_argument( + "--num-partitions", + default=10, + type=int, + help="Number of partitions to create for the federated dataset.", + ) + parser.add_argument( + "--out-dir", + default=Path("./federated_dataset"), + type=Path, + help="Output directory for the federated dataset.", + ) + + return parser diff --git a/datasets/pyproject.toml b/datasets/pyproject.toml index 25e54e1223b4..218eccff70cc 100644 --- a/datasets/pyproject.toml +++ b/datasets/pyproject.toml @@ -49,6 +49,10 @@ classifiers = [ packages = [{ include = "flwr_datasets", from = "./" }] exclude = ["./**/*_test.py"] +[tool.poetry.scripts] +# `flwr-datasets` CLI +flwr-datasets-creator = "flwr_datasets.cli:flwr_datasets_creator" + [tool.poetry.dependencies] python = "^3.10" numpy = ">=1.26.0,<3.0.0" From 30a4b7871eea67f4719af05c371070e560708e15 Mon Sep 17 00:00:00 2001 From: yan-gao-GY Date: Tue, 3 Feb 2026 17:11:10 +0000 Subject: [PATCH 02/21] Refactor cmd to use typer --- datasets/flwr_datasets/cli/__init__.py | 9 +- datasets/flwr_datasets/cli/app.py | 57 +++++++++++ datasets/flwr_datasets/cli/create.py | 87 +++++++++++++++++ .../cli/flwr_datasets_creator.py | 94 ------------------- datasets/pyproject.toml | 2 +- 5 files changed, 146 insertions(+), 103 deletions(-) create mode 100644 datasets/flwr_datasets/cli/app.py create mode 100644 datasets/flwr_datasets/cli/create.py delete mode 100644 datasets/flwr_datasets/cli/flwr_datasets_creator.py diff --git a/datasets/flwr_datasets/cli/__init__.py b/datasets/flwr_datasets/cli/__init__.py index 2e2b51c75040..35efd105a204 100644 --- a/datasets/flwr_datasets/cli/__init__.py +++ b/datasets/flwr_datasets/cli/__init__.py @@ -12,11 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Flower Datasets command line interface.""" - - -from .flwr_datasets_creator import flwr_datasets_creator - -__all__ = [ - "flwr_datasets_creator", -] +"""Flower Ddatasets command line interface.""" diff --git a/datasets/flwr_datasets/cli/app.py b/datasets/flwr_datasets/cli/app.py new file mode 100644 index 000000000000..2e1623075831 --- /dev/null +++ b/datasets/flwr_datasets/cli/app.py @@ -0,0 +1,57 @@ +# Copyright 2025 Flower Labs GmbH. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Flower Datasets command line interface.""" + + +import typer +from typer.main import get_command + +from flwr_datasets.common.version import package_version + +from .create import create + +app = typer.Typer( + help=typer.style( + "flwr-datasets is the Flower Datasets command line interface.", + fg=typer.colors.BRIGHT_YELLOW, + bold=True, + ), + no_args_is_help=True, + context_settings={"help_option_names": ["-h", "--help"]}, +) + +app.command()(create) + +typer_click_object = get_command(app) + + +@app.callback(invoke_without_command=True) +def main( + version: bool = typer.Option( + None, + "-V", + "--version", + is_eager=True, + help="Show the version and exit.", + ), +) -> None: + """Flower Datasets CLI.""" + if version: + typer.secho(f"Flower Datasets version: {package_version}", fg="blue") + raise typer.Exit() + + +if __name__ == "__main__": + app() diff --git a/datasets/flwr_datasets/cli/create.py b/datasets/flwr_datasets/cli/create.py new file mode 100644 index 000000000000..4a1becdb7cc5 --- /dev/null +++ b/datasets/flwr_datasets/cli/create.py @@ -0,0 +1,87 @@ +# Copyright 2025 Flower Labs GmbH. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Flower Datasets command line interface `create` command.""" + + +from pathlib import Path +from typing import Annotated + +import click +import typer + +from flwr_datasets import FederatedDataset +from flwr_datasets.partitioner import IidPartitioner + + +def create( + name: Annotated[ + str, + typer.Option( + "--name", + help="Hugging Face dataset identifier (e.g., 'username/dataset_name').", + ), + ], + num_partitions: Annotated[ + int, + typer.Option( + "--num-partitions", + min=1, + help="Number of partitions to create for the federated dataset.", + ), + ] = 10, + out_dir: Annotated[ + Path, + typer.Option( + "--out-dir", + help="Output directory for the federated dataset.", + ), + ] = Path("./federated_dataset"), +) -> None: + """Create a federated dataset and save it to disk. + + This command is used to generate federated datasets + for demo purposes and currently supports only IID + partitioning `IidPartitioner`. + """ + # Validate number of partitions + if num_partitions <= 0: + raise click.ClickException("--num-partitions must be a positive integer.") + + # Handle output directory + if out_dir.exists(): + overwrite = typer.confirm( + typer.style( + f"\nšŸ’¬ {out_dir} already exists, do you want to override it?", + fg=typer.colors.MAGENTA, + bold=True, + ), + default=False, + ) + if not overwrite: + return + + out_dir.mkdir(parents=True, exist_ok=True) + + # Create data partitioner + partitioner = IidPartitioner(num_partitions=num_partitions) + + # Create the federated dataset + fds = FederatedDataset(dataset=name, partitioners={"train": partitioner}) + + # Load partitions and save them to disk + for partition_id in range(num_partitions): + partition = fds.load_partition(partition_id=partition_id) + partition_out_dir = out_dir / f"partition_{partition_id}" + partition.save_to_disk(partition_out_dir) diff --git a/datasets/flwr_datasets/cli/flwr_datasets_creator.py b/datasets/flwr_datasets/cli/flwr_datasets_creator.py deleted file mode 100644 index 481ea1e034de..000000000000 --- a/datasets/flwr_datasets/cli/flwr_datasets_creator.py +++ /dev/null @@ -1,94 +0,0 @@ -# Copyright 2025 Flower Labs GmbH. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""`flwr-datasets-creator` command.""" - - -import argparse -import shutil -from pathlib import Path - -import typer -from flwr_datasets import FederatedDataset -from flwr_datasets.partitioner import IidPartitioner - - -def flwr_datasets_creator() -> None: - """Create a federated dataset and save it to disk. - - This command is used to generated demo data, and currently supports - only IID partitioning via `IidPartitioner`. - """ - parser = _parse_args_run_creator() - args = parser.parse_args() - - # Validate number of partitions - if args.num_partitions <= 0: - parser.error("--num-partitions must be a positive integer.") - - # Handle output directory - if args.out_dir.exists(): - overwrite = typer.confirm( - f"Output directory '{args.out_dir}' already exists. Overwrite?", - default=False, - ) - if not overwrite: - typer.echo("Aborting.") - return - - shutil.rmtree(args.out_dir) - - args.out_dir.mkdir(parents=True, exist_ok=True) - - # Create data partitioner - partitioner = IidPartitioner(num_partitions=args.num_partitions) - - # Create the federated dataset - fds = FederatedDataset( - dataset=args.name, - partitioners={"train": partitioner}, - ) - - # Load partitions and save them to disk - for partition_id in range(args.num_partitions): - partition = fds.load_partition(partition_id=partition_id) - out_dir = args.out_dir / f"partition_{partition_id}" - partition.save_to_disk(out_dir) - - -def _parse_args_run_creator() -> argparse.ArgumentParser: - """Parse flwr-datasets-creator command-line arguments.""" - parser = argparse.ArgumentParser( - description="Create federated dataset partitions and save them to disk.", - ) - parser.add_argument( - "--name", - required=True, - type=str, - help="Hugging Face dataset identifier (e.g., 'username/dataset_name').", - ) - parser.add_argument( - "--num-partitions", - default=10, - type=int, - help="Number of partitions to create for the federated dataset.", - ) - parser.add_argument( - "--out-dir", - default=Path("./federated_dataset"), - type=Path, - help="Output directory for the federated dataset.", - ) - - return parser diff --git a/datasets/pyproject.toml b/datasets/pyproject.toml index 218eccff70cc..898dd2260aac 100644 --- a/datasets/pyproject.toml +++ b/datasets/pyproject.toml @@ -51,7 +51,7 @@ exclude = ["./**/*_test.py"] [tool.poetry.scripts] # `flwr-datasets` CLI -flwr-datasets-creator = "flwr_datasets.cli:flwr_datasets_creator" +flwr-datasets = "flwr_datasets.cli.app:app" [tool.poetry.dependencies] python = "^3.10" From 827a69759f460322621dadc51cf913c59b654f3e Mon Sep 17 00:00:00 2001 From: yan-gao-GY Date: Wed, 4 Feb 2026 11:50:01 +0000 Subject: [PATCH 03/21] Add test --- datasets/flwr_datasets/cli/create_test.py | 175 ++++++++++++++++++++++ 1 file changed, 175 insertions(+) create mode 100644 datasets/flwr_datasets/cli/create_test.py diff --git a/datasets/flwr_datasets/cli/create_test.py b/datasets/flwr_datasets/cli/create_test.py new file mode 100644 index 000000000000..b4f105705823 --- /dev/null +++ b/datasets/flwr_datasets/cli/create_test.py @@ -0,0 +1,175 @@ +# Copyright 2025 Flower Labs GmbH. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Test for Flower Datasets command line interface `create` command.""" + + +from dataclasses import dataclass +from pathlib import Path +from types import SimpleNamespace +from typing import Any + +import click +import pytest +import typer + +from . import create as create_module +from .create import create + + +class _FakePartition: + """Fake dataset partition used to capture save-to-disk calls.""" + + def __init__(self, saved_dirs: list[Path]) -> None: + """Initialize the fake partition.""" + self._saved_dirs = saved_dirs + + def save_to_disk(self, out_dir: Path) -> None: + """Record the output directory instead of writing to disk.""" + self._saved_dirs.append(out_dir) + + +class _FakeFederatedDataset: + """Fake FederatedDataset that records partition loading behavior.""" + + def __init__(self, calls: dict[str, Any]) -> None: + """Initialize the fake federated dataset.""" + self._calls = calls + + def load_partition(self, *, partition_id: int) -> _FakePartition: + """Simulate loading a partition and record calls.""" + self._calls.setdefault("loaded_ids", []).append(partition_id) + return _FakePartition(self._calls.setdefault("saved_dirs", [])) + + +def test_create_raises_on_non_positive_num_partitions(tmp_path: Path) -> None: + """Ensure `create` fails when `num_partitions` is not a positive integer.""" + with pytest.raises(click.ClickException, match="positive integer"): + create(name="user/ds", num_partitions=0, out_dir=tmp_path) + + +@dataclass(frozen=True) +class _CreateCase: + """Single parametrized case for `create` output-directory behavior tests.""" + + out_dir_exists: bool + user_overwrite: bool | None + expect_runs: bool + expect_confirm_calls: int + num_partitions: int = 3 + + +@pytest.mark.parametrize( + "case", + [ + _CreateCase( + out_dir_exists=False, + user_overwrite=None, + expect_runs=True, + expect_confirm_calls=0, + ), + _CreateCase( + out_dir_exists=True, + user_overwrite=False, + expect_runs=False, + expect_confirm_calls=1, + ), + _CreateCase( + out_dir_exists=True, + user_overwrite=True, + expect_runs=True, + expect_confirm_calls=1, + ), + ], +) +def test_create_partitions_save_behavior( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + case: _CreateCase, +) -> None: + """Test `create` behavior depending on whether the output directory exists.""" + out_dir = tmp_path / "out" + calls: dict[str, Any] = {} + confirm_calls: list[str] = [] + mkdir_calls: list[Path] = [] + + def _exists(self: Path) -> bool: + """Simulate existence of the output directory.""" + return case.out_dir_exists and self == out_dir + + def _confirm(message: str, _default: bool = False) -> bool: + """Simulate user response to overwrite confirmation.""" + confirm_calls.append(message) + assert ( + case.user_overwrite is not None + ), "confirm should not be called in this scenario" + return case.user_overwrite + + def _mkdir(self: Path, _parents: bool = False, _exist_ok: bool = False) -> None: + """Record directory creation attempts.""" + mkdir_calls.append(self) + + monkeypatch.setattr(Path, "exists", _exists) + monkeypatch.setattr(typer, "confirm", _confirm) + monkeypatch.setattr(Path, "mkdir", _mkdir) + + if case.expect_runs: + + def _fake_partitioner(*, num_partitions: int) -> SimpleNamespace: + """Record partitioner initialization.""" + calls["partitioner_num_partitions"] = num_partitions + return SimpleNamespace(num_partitions=num_partitions) + + def _fake_fds( + *, dataset: str, partitioners: dict[str, object] + ) -> _FakeFederatedDataset: + """Record dataset creation and return a fake federated dataset.""" + calls["dataset"] = dataset + calls["partitioners"] = partitioners + return _FakeFederatedDataset(calls) + + monkeypatch.setattr(create_module, "IidPartitioner", _fake_partitioner) + monkeypatch.setattr(create_module, "FederatedDataset", _fake_fds) + else: + monkeypatch.setattr( + create_module, + "IidPartitioner", + lambda **_: (_ for _ in ()).throw( + AssertionError("IidPartitioner should not be called") + ), + ) + monkeypatch.setattr( + create_module, + "FederatedDataset", + lambda **_: (_ for _ in ()).throw( + AssertionError("FederatedDataset should not be called") + ), + ) + + create(name="user/ds", num_partitions=case.num_partitions, out_dir=out_dir) + + assert len(confirm_calls) == case.expect_confirm_calls + + if not case.expect_runs: + assert not mkdir_calls + return + + assert mkdir_calls == [out_dir] + assert calls["partitioner_num_partitions"] == case.num_partitions + assert calls["dataset"] == "user/ds" + assert "train" in calls["partitioners"] + assert calls["loaded_ids"] == list(range(case.num_partitions)) + assert calls["saved_dirs"] == [ + out_dir / f"partition_{i}" for i in range(case.num_partitions) + ] From fa434a2afea074dd5b5ff529103e8e5ab09a9f20 Mon Sep 17 00:00:00 2001 From: Yan Gao Date: Wed, 4 Feb 2026 20:42:22 +0800 Subject: [PATCH 04/21] Update datasets/pyproject.toml Co-authored-by: Javier --- datasets/pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/datasets/pyproject.toml b/datasets/pyproject.toml index 898dd2260aac..79b9d4b41ca3 100644 --- a/datasets/pyproject.toml +++ b/datasets/pyproject.toml @@ -61,6 +61,7 @@ pillow = { version = ">=6.2.1", optional = true } soundfile = { version = ">=0.12.1", optional = true } librosa = { version = ">=0.10.0.post2", optional = true } tqdm = "^4.66.1" +rich = "^13.5.0" matplotlib = "^3.7.5" seaborn = "^0.13.0" torch = { version = ">=2.8.0", optional = true, python = ">=3.10,<3.14" } From 9013dadbb9a18735f80cd363d014b5207de402cf Mon Sep 17 00:00:00 2001 From: Yan Gao Date: Wed, 4 Feb 2026 20:42:47 +0800 Subject: [PATCH 05/21] Update datasets/flwr_datasets/cli/create.py Co-authored-by: Javier --- datasets/flwr_datasets/cli/create.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datasets/flwr_datasets/cli/create.py b/datasets/flwr_datasets/cli/create.py index 4a1becdb7cc5..f04c4cfdacac 100644 --- a/datasets/flwr_datasets/cli/create.py +++ b/datasets/flwr_datasets/cli/create.py @@ -30,7 +30,7 @@ def create( str, typer.Option( "--name", - help="Hugging Face dataset identifier (e.g., 'username/dataset_name').", + help="Hugging Face dataset identifier (e.g., 'ylecun/mnist').", ), ], num_partitions: Annotated[ From 451d0979d98c585350bf4193307c4ec38de94fe3 Mon Sep 17 00:00:00 2001 From: Yan Gao Date: Wed, 4 Feb 2026 20:49:41 +0800 Subject: [PATCH 06/21] Update datasets/flwr_datasets/cli/create.py Co-authored-by: Javier --- datasets/flwr_datasets/cli/create.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datasets/flwr_datasets/cli/create.py b/datasets/flwr_datasets/cli/create.py index f04c4cfdacac..be3b0cc9f8e4 100644 --- a/datasets/flwr_datasets/cli/create.py +++ b/datasets/flwr_datasets/cli/create.py @@ -38,7 +38,7 @@ def create( typer.Option( "--num-partitions", min=1, - help="Number of partitions to create for the federated dataset.", + help="Number of partitions to create.", ), ] = 10, out_dir: Annotated[ From 4ba8dfd73c696e0cfcdca1887a4430d5e54a84aa Mon Sep 17 00:00:00 2001 From: Yan Gao Date: Wed, 4 Feb 2026 20:49:58 +0800 Subject: [PATCH 07/21] Update datasets/flwr_datasets/cli/create.py Co-authored-by: Javier --- datasets/flwr_datasets/cli/create.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datasets/flwr_datasets/cli/create.py b/datasets/flwr_datasets/cli/create.py index be3b0cc9f8e4..922f2f56a306 100644 --- a/datasets/flwr_datasets/cli/create.py +++ b/datasets/flwr_datasets/cli/create.py @@ -49,7 +49,7 @@ def create( ), ] = Path("./federated_dataset"), ) -> None: - """Create a federated dataset and save it to disk. + """Create a federated dataset and save each partition in a sub-directory. This command is used to generate federated datasets for demo purposes and currently supports only IID From 2d01146aaf89e1100a0a83775701ea0187fc7438 Mon Sep 17 00:00:00 2001 From: yan-gao-GY Date: Wed, 4 Feb 2026 13:01:52 +0000 Subject: [PATCH 08/21] Update --- datasets/flwr_datasets/cli/create.py | 13 +++++++++---- datasets/flwr_datasets/cli/create_test.py | 4 ++-- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/datasets/flwr_datasets/cli/create.py b/datasets/flwr_datasets/cli/create.py index 922f2f56a306..830981a91f3a 100644 --- a/datasets/flwr_datasets/cli/create.py +++ b/datasets/flwr_datasets/cli/create.py @@ -26,10 +26,9 @@ def create( - name: Annotated[ + dataset_name: Annotated[ str, - typer.Option( - "--name", + typer.Argument( help="Hugging Face dataset identifier (e.g., 'ylecun/mnist').", ), ], @@ -78,10 +77,16 @@ def create( partitioner = IidPartitioner(num_partitions=num_partitions) # Create the federated dataset - fds = FederatedDataset(dataset=name, partitioners={"train": partitioner}) + fds = FederatedDataset(dataset=dataset_name, partitioners={"train": partitioner}) # Load partitions and save them to disk for partition_id in range(num_partitions): partition = fds.load_partition(partition_id=partition_id) partition_out_dir = out_dir / f"partition_{partition_id}" partition.save_to_disk(partition_out_dir) + + typer.secho( + f"šŸŽŠ Created {num_partitions} partitions for {dataset_name} in {out_dir}", + fg=typer.colors.GREEN, + bold=True, + ) diff --git a/datasets/flwr_datasets/cli/create_test.py b/datasets/flwr_datasets/cli/create_test.py index b4f105705823..9ee608adf7e6 100644 --- a/datasets/flwr_datasets/cli/create_test.py +++ b/datasets/flwr_datasets/cli/create_test.py @@ -56,7 +56,7 @@ def load_partition(self, *, partition_id: int) -> _FakePartition: def test_create_raises_on_non_positive_num_partitions(tmp_path: Path) -> None: """Ensure `create` fails when `num_partitions` is not a positive integer.""" with pytest.raises(click.ClickException, match="positive integer"): - create(name="user/ds", num_partitions=0, out_dir=tmp_path) + create(dataset_name="user/ds", num_partitions=0, out_dir=tmp_path) @dataclass(frozen=True) @@ -157,7 +157,7 @@ def _fake_fds( ), ) - create(name="user/ds", num_partitions=case.num_partitions, out_dir=out_dir) + create(dataset_name="user/ds", num_partitions=case.num_partitions, out_dir=out_dir) assert len(confirm_calls) == case.expect_confirm_calls From e26d9fc65ba75fe2bddb6d5011370744348d0345 Mon Sep 17 00:00:00 2001 From: Yan Gao Date: Wed, 4 Feb 2026 22:14:14 +0800 Subject: [PATCH 09/21] Update datasets/flwr_datasets/cli/create_test.py Co-authored-by: Javier --- datasets/flwr_datasets/cli/create_test.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/datasets/flwr_datasets/cli/create_test.py b/datasets/flwr_datasets/cli/create_test.py index 9ee608adf7e6..fb84d6f505bd 100644 --- a/datasets/flwr_datasets/cli/create_test.py +++ b/datasets/flwr_datasets/cli/create_test.py @@ -108,8 +108,9 @@ def _exists(self: Path) -> bool: """Simulate existence of the output directory.""" return case.out_dir_exists and self == out_dir - def _confirm(message: str, _default: bool = False) -> bool: + def _confirm(message: str, default: bool = False) -> bool: """Simulate user response to overwrite confirmation.""" + del default # unused confirm_calls.append(message) assert ( case.user_overwrite is not None From a0e25862bdff4a6ab09014d160cfe81d6d6fb37f Mon Sep 17 00:00:00 2001 From: Yan Gao Date: Wed, 4 Feb 2026 22:14:23 +0800 Subject: [PATCH 10/21] Update datasets/flwr_datasets/cli/create_test.py Co-authored-by: Javier --- datasets/flwr_datasets/cli/create_test.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/datasets/flwr_datasets/cli/create_test.py b/datasets/flwr_datasets/cli/create_test.py index fb84d6f505bd..274060b60cb3 100644 --- a/datasets/flwr_datasets/cli/create_test.py +++ b/datasets/flwr_datasets/cli/create_test.py @@ -117,8 +117,9 @@ def _confirm(message: str, default: bool = False) -> bool: ), "confirm should not be called in this scenario" return case.user_overwrite - def _mkdir(self: Path, _parents: bool = False, _exist_ok: bool = False) -> None: + def _mkdir(self: Path, parents: bool = False, exist_ok: bool = False) -> None: """Record directory creation attempts.""" + del parents, exist_ok # unused mkdir_calls.append(self) monkeypatch.setattr(Path, "exists", _exists) From f273355875af3113509890ff440f3abb36520258 Mon Sep 17 00:00:00 2001 From: yan-gao-GY Date: Wed, 4 Feb 2026 16:06:36 +0000 Subject: [PATCH 11/21] Handle "dataset not found" error --- datasets/flwr_datasets/cli/create.py | 25 ++++++++---- datasets/flwr_datasets/cli/create_test.py | 47 +++++++++++++++++++++++ 2 files changed, 65 insertions(+), 7 deletions(-) diff --git a/datasets/flwr_datasets/cli/create.py b/datasets/flwr_datasets/cli/create.py index 830981a91f3a..dcac30f91196 100644 --- a/datasets/flwr_datasets/cli/create.py +++ b/datasets/flwr_datasets/cli/create.py @@ -76,14 +76,25 @@ def create( # Create data partitioner partitioner = IidPartitioner(num_partitions=num_partitions) - # Create the federated dataset - fds = FederatedDataset(dataset=dataset_name, partitioners={"train": partitioner}) + try: + # Create the federated dataset + fds = FederatedDataset( + dataset=dataset_name, + partitioners={"train": partitioner}, + ) + + # Load partitions and save them to disk + for partition_id in range(num_partitions): + partition = fds.load_partition(partition_id=partition_id) + partition_out_dir = out_dir / f"partition_{partition_id}" + partition.save_to_disk(partition_out_dir) - # Load partitions and save them to disk - for partition_id in range(num_partitions): - partition = fds.load_partition(partition_id=partition_id) - partition_out_dir = out_dir / f"partition_{partition_id}" - partition.save_to_disk(partition_out_dir) + except Exception as err: # pylint: disable=broad-exception-caught + raise click.ClickException( + f"Dataset '{dataset_name}' could not be found on the Hugging Face Hub or " + "network access is unavailable. " + "Please verify the dataset identifier and your connection." + ) from err typer.secho( f"šŸŽŠ Created {num_partitions} partitions for {dataset_name} in {out_dir}", diff --git a/datasets/flwr_datasets/cli/create_test.py b/datasets/flwr_datasets/cli/create_test.py index 274060b60cb3..072c3cb97916 100644 --- a/datasets/flwr_datasets/cli/create_test.py +++ b/datasets/flwr_datasets/cli/create_test.py @@ -15,6 +15,7 @@ """Test for Flower Datasets command line interface `create` command.""" +import re from dataclasses import dataclass from pathlib import Path from types import SimpleNamespace @@ -59,6 +60,52 @@ def test_create_raises_on_non_positive_num_partitions(tmp_path: Path) -> None: create(dataset_name="user/ds", num_partitions=0, out_dir=tmp_path) +def test_create_raises_click_exception_when_dataset_load_fails( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + """Ensure `create` raises a user-friendly error when dataset loading fails. + + This covers cases like: + - dataset does not exist on the Hugging Face Hub + - network access/authentication issues + - other upstream HF/Datasets failures + """ + out_dir = tmp_path / "out" + dataset_name = "does-not-exist/dataset" + + # Avoid overwrite prompt path + monkeypatch.setattr(Path, "exists", lambda _self: False) + + # Avoid touching the real filesystem in this unit test + monkeypatch.setattr( + Path, "mkdir", lambda _self, _parents=False, _exist_ok=False: None + ) + + # Mock partitioner + monkeypatch.setattr( + create_module, + "IidPartitioner", + lambda *, num_partitions: SimpleNamespace(num_partitions=num_partitions), + ) + + # Make FederatedDataset construction fail (simulates "dataset not found"/network issues) + def _raise_fds( + *, dataset: str, partitioners: dict[str, object] + ) -> _FakeFederatedDataset: + raise RuntimeError("upstream failure") + + monkeypatch.setattr(create_module, "FederatedDataset", _raise_fds) + + expected_msg = ( + f"Dataset '{dataset_name}' could not be found on the Hugging Face Hub or " + "network access is unavailable. " + "Please verify the dataset identifier and your connection." + ) + + with pytest.raises(click.ClickException, match=re.escape(expected_msg)): + create(dataset_name=dataset_name, num_partitions=2, out_dir=out_dir) + + @dataclass(frozen=True) class _CreateCase: """Single parametrized case for `create` output-directory behavior tests.""" From acbdeb4ea5ba5dce343a671ef368a8bec5e2405f Mon Sep 17 00:00:00 2001 From: yan-gao-GY Date: Wed, 4 Feb 2026 17:24:48 +0000 Subject: [PATCH 12/21] Formatting --- datasets/flwr_datasets/cli/create_test.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/datasets/flwr_datasets/cli/create_test.py b/datasets/flwr_datasets/cli/create_test.py index 072c3cb97916..e7d4d7a87b48 100644 --- a/datasets/flwr_datasets/cli/create_test.py +++ b/datasets/flwr_datasets/cli/create_test.py @@ -77,9 +77,7 @@ def test_create_raises_click_exception_when_dataset_load_fails( monkeypatch.setattr(Path, "exists", lambda _self: False) # Avoid touching the real filesystem in this unit test - monkeypatch.setattr( - Path, "mkdir", lambda _self, _parents=False, _exist_ok=False: None - ) + monkeypatch.setattr(Path, "mkdir", lambda _self, **_kwargs: None) # Mock partitioner monkeypatch.setattr( From 202ab5ef3334e35da07277aa3aa69726d1032898 Mon Sep 17 00:00:00 2001 From: jafermarq Date: Thu, 5 Feb 2026 11:03:37 +0000 Subject: [PATCH 13/21] update copyright year --- datasets/flwr_datasets/cli/__init__.py | 2 +- datasets/flwr_datasets/cli/app.py | 2 +- datasets/flwr_datasets/cli/create.py | 2 +- datasets/flwr_datasets/cli/create_test.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/datasets/flwr_datasets/cli/__init__.py b/datasets/flwr_datasets/cli/__init__.py index 35efd105a204..c50081801fde 100644 --- a/datasets/flwr_datasets/cli/__init__.py +++ b/datasets/flwr_datasets/cli/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2025 Flower Labs GmbH. All Rights Reserved. +# Copyright 2026 Flower Labs GmbH. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/datasets/flwr_datasets/cli/app.py b/datasets/flwr_datasets/cli/app.py index 2e1623075831..1e59ef34a857 100644 --- a/datasets/flwr_datasets/cli/app.py +++ b/datasets/flwr_datasets/cli/app.py @@ -1,4 +1,4 @@ -# Copyright 2025 Flower Labs GmbH. All Rights Reserved. +# Copyright 2026 Flower Labs GmbH. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/datasets/flwr_datasets/cli/create.py b/datasets/flwr_datasets/cli/create.py index dcac30f91196..d578fdf8f4f4 100644 --- a/datasets/flwr_datasets/cli/create.py +++ b/datasets/flwr_datasets/cli/create.py @@ -1,4 +1,4 @@ -# Copyright 2025 Flower Labs GmbH. All Rights Reserved. +# Copyright 2026 Flower Labs GmbH. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/datasets/flwr_datasets/cli/create_test.py b/datasets/flwr_datasets/cli/create_test.py index e7d4d7a87b48..c755eb2c3aae 100644 --- a/datasets/flwr_datasets/cli/create_test.py +++ b/datasets/flwr_datasets/cli/create_test.py @@ -1,4 +1,4 @@ -# Copyright 2025 Flower Labs GmbH. All Rights Reserved. +# Copyright 2026 Flower Labs GmbH. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From c68347ebc55b30a3c75518a7046ae4d69fc11f41 Mon Sep 17 00:00:00 2001 From: yan-gao-GY Date: Thu, 5 Feb 2026 11:48:17 +0000 Subject: [PATCH 14/21] Update error handling --- datasets/flwr_datasets/cli/create.py | 15 +++++++++++---- datasets/flwr_datasets/cli/create_test.py | 20 ++++++++------------ 2 files changed, 19 insertions(+), 16 deletions(-) diff --git a/datasets/flwr_datasets/cli/create.py b/datasets/flwr_datasets/cli/create.py index d578fdf8f4f4..a1a06530b766 100644 --- a/datasets/flwr_datasets/cli/create.py +++ b/datasets/flwr_datasets/cli/create.py @@ -21,6 +21,7 @@ import click import typer +from datasets.load import DatasetNotFoundError from flwr_datasets import FederatedDataset from flwr_datasets.partitioner import IidPartitioner @@ -89,13 +90,19 @@ def create( partition_out_dir = out_dir / f"partition_{partition_id}" partition.save_to_disk(partition_out_dir) - except Exception as err: # pylint: disable=broad-exception-caught + except DatasetNotFoundError as err: raise click.ClickException( - f"Dataset '{dataset_name}' could not be found on the Hugging Face Hub or " - "network access is unavailable. " - "Please verify the dataset identifier and your connection." + f"Dataset '{dataset_name}' could not be found on the Hugging Face Hub, " + "or network access is unavailable. " + "Please verify the dataset identifier and your internet connection." ) from err + except Exception as ex: # pylint: disable=broad-exception-caught + raise click.ClickException( + "An unexpected error occurred while creating the federated dataset. " + "Please try again or check the logs for more details." + ) from ex + typer.secho( f"šŸŽŠ Created {num_partitions} partitions for {dataset_name} in {out_dir}", fg=typer.colors.GREEN, diff --git a/datasets/flwr_datasets/cli/create_test.py b/datasets/flwr_datasets/cli/create_test.py index c755eb2c3aae..2a2a87f9e4f9 100644 --- a/datasets/flwr_datasets/cli/create_test.py +++ b/datasets/flwr_datasets/cli/create_test.py @@ -63,13 +63,9 @@ def test_create_raises_on_non_positive_num_partitions(tmp_path: Path) -> None: def test_create_raises_click_exception_when_dataset_load_fails( tmp_path: Path, monkeypatch: pytest.MonkeyPatch ) -> None: - """Ensure `create` raises a user-friendly error when dataset loading fails. - - This covers cases like: - - dataset does not exist on the Hugging Face Hub - - network access/authentication issues - - other upstream HF/Datasets failures - """ + """Ensure `create` raises a user-friendly error when the dataset is + missing/unreachable.""" + # Create a unique dataset name out_dir = tmp_path / "out" dataset_name = "does-not-exist/dataset" @@ -86,18 +82,18 @@ def test_create_raises_click_exception_when_dataset_load_fails( lambda *, num_partitions: SimpleNamespace(num_partitions=num_partitions), ) - # Make FederatedDataset construction fail (simulates "dataset not found"/network issues) + # Ensure the command handles DatasetNotFoundError specifically def _raise_fds( *, dataset: str, partitioners: dict[str, object] ) -> _FakeFederatedDataset: - raise RuntimeError("upstream failure") + raise create_module.DatasetNotFoundError("not found") monkeypatch.setattr(create_module, "FederatedDataset", _raise_fds) expected_msg = ( - f"Dataset '{dataset_name}' could not be found on the Hugging Face Hub or " - "network access is unavailable. " - "Please verify the dataset identifier and your connection." + f"Dataset '{dataset_name}' could not be found on the Hugging Face Hub, " + "or network access is unavailable. " + "Please verify the dataset identifier and your internet connection." ) with pytest.raises(click.ClickException, match=re.escape(expected_msg)): From f7aa9758b48255af35a6df190dc2372faafc2c4d Mon Sep 17 00:00:00 2001 From: yan-gao-GY Date: Thu, 5 Feb 2026 12:25:20 +0000 Subject: [PATCH 15/21] Formatting --- datasets/flwr_datasets/cli/create_test.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/datasets/flwr_datasets/cli/create_test.py b/datasets/flwr_datasets/cli/create_test.py index 2a2a87f9e4f9..5d5e92495b49 100644 --- a/datasets/flwr_datasets/cli/create_test.py +++ b/datasets/flwr_datasets/cli/create_test.py @@ -64,7 +64,9 @@ def test_create_raises_click_exception_when_dataset_load_fails( tmp_path: Path, monkeypatch: pytest.MonkeyPatch ) -> None: """Ensure `create` raises a user-friendly error when the dataset is - missing/unreachable.""" + missing/unreachable. + """ + # Create a unique dataset name out_dir = tmp_path / "out" dataset_name = "does-not-exist/dataset" From c8a48a42e7fc2ab4eab0e426c6538b3c57a12868 Mon Sep 17 00:00:00 2001 From: yan-gao-GY Date: Thu, 5 Feb 2026 12:35:47 +0000 Subject: [PATCH 16/21] Formatting --- datasets/flwr_datasets/cli/create_test.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/datasets/flwr_datasets/cli/create_test.py b/datasets/flwr_datasets/cli/create_test.py index 5d5e92495b49..3cbaf6ec1b7a 100644 --- a/datasets/flwr_datasets/cli/create_test.py +++ b/datasets/flwr_datasets/cli/create_test.py @@ -64,9 +64,8 @@ def test_create_raises_click_exception_when_dataset_load_fails( tmp_path: Path, monkeypatch: pytest.MonkeyPatch ) -> None: """Ensure `create` raises a user-friendly error when the dataset is - missing/unreachable. - """ - + missing/unreachable.""" + # Create a unique dataset name out_dir = tmp_path / "out" dataset_name = "does-not-exist/dataset" From fc8918f998ea4c539747e4502e3c56bf91a4adb7 Mon Sep 17 00:00:00 2001 From: Yan Gao Date: Thu, 5 Feb 2026 20:45:03 +0800 Subject: [PATCH 17/21] Update datasets/flwr_datasets/cli/create.py Co-authored-by: Javier --- datasets/flwr_datasets/cli/create.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datasets/flwr_datasets/cli/create.py b/datasets/flwr_datasets/cli/create.py index a1a06530b766..d73dc2ec6794 100644 --- a/datasets/flwr_datasets/cli/create.py +++ b/datasets/flwr_datasets/cli/create.py @@ -100,7 +100,7 @@ def create( except Exception as ex: # pylint: disable=broad-exception-caught raise click.ClickException( "An unexpected error occurred while creating the federated dataset. " - "Please try again or check the logs for more details." + f"Please try again or check the logs for more details: {str(ex)}" ) from ex typer.secho( From aa76e60ae6f5eb5f0e6b2946a36493ffa7507573 Mon Sep 17 00:00:00 2001 From: jafermarq Date: Thu, 5 Feb 2026 12:56:40 +0000 Subject: [PATCH 18/21] formatted? --- datasets/flwr_datasets/cli/create_test.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/datasets/flwr_datasets/cli/create_test.py b/datasets/flwr_datasets/cli/create_test.py index 3cbaf6ec1b7a..64dd30e98d0c 100644 --- a/datasets/flwr_datasets/cli/create_test.py +++ b/datasets/flwr_datasets/cli/create_test.py @@ -63,9 +63,10 @@ def test_create_raises_on_non_positive_num_partitions(tmp_path: Path) -> None: def test_create_raises_click_exception_when_dataset_load_fails( tmp_path: Path, monkeypatch: pytest.MonkeyPatch ) -> None: - """Ensure `create` raises a user-friendly error when the dataset is - missing/unreachable.""" + """Ensure `create` raises a user-friendly error when the dataset is. + missing/unreachable. + """ # Create a unique dataset name out_dir = tmp_path / "out" dataset_name = "does-not-exist/dataset" From 3b4fb434773edafaae9aa9653307f3b8ef979d47 Mon Sep 17 00:00:00 2001 From: jafermarq Date: Thu, 5 Feb 2026 13:10:07 +0000 Subject: [PATCH 19/21] simplifications --- datasets/flwr_datasets/cli/create_test.py | 27 ++++++++++------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/datasets/flwr_datasets/cli/create_test.py b/datasets/flwr_datasets/cli/create_test.py index 64dd30e98d0c..c2461ff7d066 100644 --- a/datasets/flwr_datasets/cli/create_test.py +++ b/datasets/flwr_datasets/cli/create_test.py @@ -25,6 +25,8 @@ import pytest import typer +from datasets.load import DatasetNotFoundError + from . import create as create_module from .create import create @@ -88,7 +90,7 @@ def test_create_raises_click_exception_when_dataset_load_fails( def _raise_fds( *, dataset: str, partitioners: dict[str, object] ) -> _FakeFederatedDataset: - raise create_module.DatasetNotFoundError("not found") + raise DatasetNotFoundError("not found") monkeypatch.setattr(create_module, "FederatedDataset", _raise_fds) @@ -187,20 +189,15 @@ def _fake_fds( monkeypatch.setattr(create_module, "IidPartitioner", _fake_partitioner) monkeypatch.setattr(create_module, "FederatedDataset", _fake_fds) else: - monkeypatch.setattr( - create_module, - "IidPartitioner", - lambda **_: (_ for _ in ()).throw( - AssertionError("IidPartitioner should not be called") - ), - ) - monkeypatch.setattr( - create_module, - "FederatedDataset", - lambda **_: (_ for _ in ()).throw( - AssertionError("FederatedDataset should not be called") - ), - ) + + def _fail_partitioner(**_: object) -> None: + raise AssertionError("IidPartitioner should not be called") + + def _fail_fds(**_: object) -> None: + raise AssertionError("FederatedDataset should not be called") + + monkeypatch.setattr(create_module, "IidPartitioner", _fail_partitioner) + monkeypatch.setattr(create_module, "FederatedDataset", _fail_fds) create(dataset_name="user/ds", num_partitions=case.num_partitions, out_dir=out_dir) From 20afdd5706a2013c1ec669d668c6509fd051da20 Mon Sep 17 00:00:00 2001 From: jafermarq Date: Thu, 5 Feb 2026 13:13:30 +0000 Subject: [PATCH 20/21] small edit final log --- datasets/flwr_datasets/cli/create.py | 2 +- datasets/flwr_datasets/cli/create_test.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/datasets/flwr_datasets/cli/create.py b/datasets/flwr_datasets/cli/create.py index d73dc2ec6794..9b3588d984e8 100644 --- a/datasets/flwr_datasets/cli/create.py +++ b/datasets/flwr_datasets/cli/create.py @@ -104,7 +104,7 @@ def create( ) from ex typer.secho( - f"šŸŽŠ Created {num_partitions} partitions for {dataset_name} in {out_dir}", + f"šŸŽŠ Created {num_partitions} partitions for '{dataset_name}' in '{out_dir.absolute()}'", fg=typer.colors.GREEN, bold=True, ) diff --git a/datasets/flwr_datasets/cli/create_test.py b/datasets/flwr_datasets/cli/create_test.py index c2461ff7d066..709b0cb82a70 100644 --- a/datasets/flwr_datasets/cli/create_test.py +++ b/datasets/flwr_datasets/cli/create_test.py @@ -90,7 +90,7 @@ def test_create_raises_click_exception_when_dataset_load_fails( def _raise_fds( *, dataset: str, partitioners: dict[str, object] ) -> _FakeFederatedDataset: - raise DatasetNotFoundError("not found") + raise DatasetNotFoundError() monkeypatch.setattr(create_module, "FederatedDataset", _raise_fds) From deb67c02b29eadc0b628145b7f202e43a80245c9 Mon Sep 17 00:00:00 2001 From: yan-gao-GY Date: Thu, 5 Feb 2026 13:27:16 +0000 Subject: [PATCH 21/21] Twist --- datasets/flwr_datasets/cli/create.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/datasets/flwr_datasets/cli/create.py b/datasets/flwr_datasets/cli/create.py index 9b3588d984e8..c3533ffe1008 100644 --- a/datasets/flwr_datasets/cli/create.py +++ b/datasets/flwr_datasets/cli/create.py @@ -37,8 +37,7 @@ def create( int, typer.Option( "--num-partitions", - min=1, - help="Number of partitions to create.", + help="Number of partitions to create. Must be a positive integer", ), ] = 10, out_dir: Annotated[