Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
f02352d
ci(datasets): Migrate Flower Datasets to uv
danieljanes Feb 4, 2026
2067715
Split build and publish into two scripts
danieljanes Feb 4, 2026
dd96a4f
Improve rm-caches.sh
danieljanes Feb 4, 2026
98a451c
Make Flower Datasets compatible with the latest version of HF Datasets
danieljanes Feb 4, 2026
04fa80c
Merge branch 'main' into uv-migration-datasets
danieljanes Feb 4, 2026
341ad19
Merge remote-tracking branch 'refs/remotes/origin/uv-migration-datase…
danieljanes Feb 4, 2026
902ba7a
Exclude .venv from taplo fmt
danieljanes Feb 4, 2026
a5ee3bb
Merge branch 'main' into uv-migration-datasets
danieljanes Feb 9, 2026
d2a7113
Fix docs build
danieljanes Feb 9, 2026
e019eaf
Merge branch 'main' into uv-migration-datasets
danieljanes Feb 10, 2026
2387641
Merge branch 'main' into uv-migration-datasets
danieljanes Feb 10, 2026
0562c33
Merge branch 'main' into uv-migration-datasets
danieljanes Feb 14, 2026
440df66
Merge branch 'main' into uv-migration-datasets
danieljanes Feb 14, 2026
269f701
Update uv
danieljanes Feb 14, 2026
8adc585
ci(datasets): Migrate datasets E2E to uv
danieljanes Feb 14, 2026
f01494e
Restore deleted tests
danieljanes Feb 14, 2026
e8a5d8e
ci(datasets): Migrate Flower Datasets to uv
danieljanes Feb 4, 2026
689ce38
Split build and publish into two scripts
danieljanes Feb 4, 2026
5805884
Make Flower Datasets compatible with the latest version of HF Datasets
danieljanes Feb 4, 2026
753ef96
Exclude .venv from taplo fmt
danieljanes Feb 4, 2026
cb278f0
Fix docs build
danieljanes Feb 9, 2026
32f54ce
Update uv
danieljanes Feb 14, 2026
00ed667
Merge remote-tracking branch 'refs/remotes/origin/uv-migration-datase…
danieljanes Feb 14, 2026
d60283b
Undo
danieljanes Feb 14, 2026
898d6e6
ci(datasets): Migrate Flower Datasets to uv
danieljanes Feb 4, 2026
aaf2dd1
Make Flower Datasets compatible with the latest version of HF Datasets
danieljanes Feb 4, 2026
8621f4f
Revert
danieljanes Feb 14, 2026
6e73989
Remove new docs page
danieljanes Feb 14, 2026
76da4cc
Reorder pyproject.toml
danieljanes Feb 14, 2026
5747591
Reorder pyproject.toml
danieljanes Feb 14, 2026
2e1dc65
Add comment
danieljanes Feb 14, 2026
f3ed784
Reorder dependencies
danieljanes Feb 14, 2026
5adf2dd
Lower-case keys
danieljanes Feb 14, 2026
7bcad6e
Add docs
danieljanes Feb 14, 2026
9840590
Merge branch 'main' into uv-migration-datasets
danieljanes Feb 16, 2026
b187bcb
Merge branch 'main' into uv-migration-datasets
danieljanes Feb 16, 2026
b726ba4
Merge branch 'main' into uv-migration-datasets
danieljanes Feb 16, 2026
15bf78c
Lock dependencies
danieljanes Feb 16, 2026
0eb280f
Bump pillow
danieljanes Feb 16, 2026
42745e8
Merge branch 'main' into uv-migration-datasets
danieljanes Feb 16, 2026
b0449df
Merge branch 'main' into uv-migration-datasets
danieljanes Feb 28, 2026
b4c0928
Apply suggestion from @jafermarq
jafermarq Feb 28, 2026
75073b8
Merge branch 'main' into uv-migration-datasets
danieljanes Feb 28, 2026
d21789a
Update datasets/docs/source/contributor-how-to-develop-flwr-datasets.rst
danieljanes Feb 28, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/build-deploy-non-framework-docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ jobs:
- name: Install docs dependencies
run: |
cd framework
python -m poetry add ../datasets
python -m poetry install
python -m pip install -e ../datasets
- name: Update HTML theme options
run: python dev/update-html-themes.py
- name: Build baselines docs
Expand Down
18 changes: 14 additions & 4 deletions .github/workflows/datasets-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ concurrency:

env:
FLWR_TELEMETRY_ENABLED: 0
UV_NO_MANAGED_PYTHON: 1
UV_PYTHON_DOWNLOADS: never

jobs:
test_core:
Expand All @@ -42,6 +44,11 @@ jobs:
uses: ./.github/actions/bootstrap
with:
python-version: ${{ matrix.python }}
poetry-skip: "true"
- name: Set up uv
uses: astral-sh/setup-uv@v7
with:
version: "0.10.2"
- name: Free Disk Space (Ubuntu)
uses: jlumbroso/free-disk-space@v1.3.1
with:
Expand All @@ -56,10 +63,10 @@ jobs:
run: |
sudo apt-get update
sudo apt-get install -y ffmpeg
- name: Install dependencies (mandatory only)
- name: Install dependencies
run: |
cd datasets
python -m poetry install --all-extras
uv sync --frozen --all-extras
- name: Cache Hugging Face datasets
uses: actions/cache@v3
with:
Expand All @@ -68,11 +75,14 @@ jobs:
restore-keys: hf-datasets-
- name: Set Hugging Face token
run: |
cd datasets
if [ -n "${{ secrets.HF_TOKEN }}" ]; then
echo "Logging into Hugging Face..."
hf auth login --token ${{ secrets.HF_TOKEN }}
uv run hf auth login --token ${{ secrets.HF_TOKEN }}
else
echo "Skipping Hugging Face login stage (HF_TOKEN not set)"
fi
- name: Test (formatting + unit tests)
run: ./datasets/dev/test.sh
run: |
cd datasets
uv run ./dev/test.sh
21 changes: 21 additions & 0 deletions datasets/dev/build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/bin/bash

# Copyright 2026 Flower Labs GmbH. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

set -e
cd "$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"/../

uv build --clear
2 changes: 1 addition & 1 deletion datasets/dev/publish.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,4 @@
set -e
cd "$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"/../

python -m poetry publish -u __token__ -p ${PYPI_TOKEN}
uv publish --token "${PYPI_TOKEN}"
2 changes: 1 addition & 1 deletion datasets/docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
_pyproject_path = Path(__file__).parent.parent.parent / "pyproject.toml"
with open(_pyproject_path, "rb") as f:
_pyproject = tomllib.load(f)
release = _pyproject["tool"]["poetry"]["version"]
release = _pyproject["project"]["version"]

# Make version available as a substitution in rst files (e.g., |release|)
rst_prolog = f"""
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
How to develop flwr-datasets
============================

Flower Datasets uses ``uv`` for development and CI. The commands presented in this guide assume you have cloned the `Flower Github repository <https://github.com/adap/flower>`_.


Setup
-----

.. code-block:: bash

cd datasets
uv sync --all-extras

.. tip::

Use ``uv sync --frozen --all-extras`` to ensure ``uv.lock`` is not modified.

Run checks (formatting + unit tests)
------------------------------------

.. code-block:: bash

cd datasets
uv run ./dev/test.sh

Format
------

.. code-block:: bash

cd datasets
uv run ./dev/format.sh

Build docs
----------

.. code-block:: bash

cd datasets
uv run ./dev/build-flwr-datasets-docs.sh

Run E2E tests
-------------

.. code-block:: bash

cd datasets/e2e/pytorch
uv sync --frozen
uv run python -m unittest discover -p "*_test.py"

Repeat for ``datasets/e2e/scikit-learn`` and ``datasets/e2e/tensorflow``.

Dependency management (no ``uv pip``)
-------------------------------------

.. code-block:: bash

cd datasets

# Add a runtime dependency
uv add <package>

# Add a dev dependency
uv add --dev <package>

# Add a dependency to an extra (e.g. "vision")
uv add --optional vision <package>

# Update lockfile (commit the result)
uv lock
1 change: 1 addition & 0 deletions datasets/docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ Information-oriented API reference and other reference material.
:maxdepth: 1
:caption: Contributor tutorials

contributor-how-to-develop-flwr-datasets
contributor-how-to-contribute-dataset


Expand Down
99 changes: 54 additions & 45 deletions datasets/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,17 +1,15 @@
[build-system]
requires = ["poetry-core>=2.1.3"]
build-backend = "poetry.core.masonry.api"
requires = ["hatchling"]
build-backend = "hatchling.build"

[tool.poetry]
[project]
name = "flwr-datasets"
version = "0.6.0"
description = "Flower Datasets"
license = "Apache-2.0"
authors = ["The Flower Authors <hello@flower.ai>"]
authors = [{ name = "The Flower Authors", email = "hello@flower.ai" }]
readme = "README.md"
homepage = "https://flower.ai"
repository = "https://github.com/adap/flower"
documentation = "https://flower.ai/docs/datasets"
requires-python = ">=3.10"
keywords = [
"Artificial Intelligence",
"Dataset",
Expand Down Expand Up @@ -46,48 +44,59 @@ classifiers = [
"Topic :: Software Development :: Libraries :: Python Modules",
"Typing :: Typed",
]
packages = [{ include = "flwr_datasets", from = "./" }]
exclude = ["./**/*_test.py"]
dependencies = [
"numpy>=1.26.0,<3.0.0",
"datasets>=4.0.0,<5.0.0",
"tqdm>=4.66.1,<5.0.0",
"rich>=13.5.0",
"matplotlib>=3.7.5,<4.0.0",
"seaborn>=0.13.0,<0.14.0",
]

[tool.poetry.scripts]
[project.optional-dependencies]
vision = ["pillow>=12.1.1"]
audio = ["torch>=2.8.0", "torchcodec>=0.7.0"]

[project.scripts]
# `flwr-datasets` CLI
flwr-datasets = "flwr_datasets.cli.app:app"

[tool.poetry.dependencies]
python = "^3.10"
# Mandatory dependencies
numpy = ">=1.26.0,<3.0.0"
datasets = ">=4.0.0,<5.0.0"
tqdm = "^4.66.1"
rich = "^13.5.0"
matplotlib = "^3.7.5"
seaborn = "^0.13.0"
# Optional dependencies (vision)
pillow = { version = ">=12.1.1", optional = true }
# Optional dependencies (audio)
torch = { version = ">=2.8.0", python = ">=3.10,<3.14", optional = true }
torchcodec = { version = ">=0.7.0", python = ">=3.10,<3.14", optional = true }

[tool.poetry.extras]
vision = ["pillow"]
audio = ["torch", "torchcodec"]

[tool.poetry.group.dev.dependencies]
tomli = { version = "^2.0.1", python = "<3.11" }
types-requests = "==2.31.0.20240125"
types-setuptools = "==80.9.0.20250822"
setuptools = "==80.9.0"
isort = "==5.13.2"
black = { version = "==25.11.0", extras = ["jupyter"] }
taplo = "==0.9.3"
docformatter = "==1.7.5"
mypy = "==1.8.0"
pylint = "==3.3.1"
parameterized = "==0.9.0"
pytest = "==7.4.4"
pytest-watcher = "==0.4.3"
ruff = "==0.14.5"
devtool = [{ path = "./devtool", develop = true }]
[project.urls]
homepage = "https://flower.ai"
repository = "https://github.com/adap/flower"
documentation = "https://flower.ai/docs/datasets"

[dependency-groups]
dev = [
"types-requests==2.31.0.20240125",
"types-setuptools==80.9.0.20250822",
"isort==5.13.2",
"black[jupyter]==25.11.0",
"taplo==0.9.3",
"docformatter==1.7.5",
"mypy==1.8.0",
"pylint==3.3.1",
"parameterized==0.9.0",
"pytest==7.4.4",
"pytest-watcher==0.4.3",
"ruff==0.14.5",
"devtool",
]

[tool.uv]
default-groups = ["dev"]

[tool.uv.sources]
devtool = { path = "devtool", editable = true }

[tool.hatch.build.targets.wheel]
packages = ["flwr_datasets"]

[tool.hatch.build.targets.sdist]
include = ["README.md", "pyproject.toml", "flwr_datasets/**"]

[tool.hatch.build]
exclude = ["/flwr_datasets/**/*_test.py"]

[tool.isort]
line_length = 88
Expand Down
Loading