Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions .github/workflows/datasets-e2e.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,20 @@ jobs:
uses: ./.github/actions/bootstrap
with:
python-version: 3.10.19
- name: Free Disk Space (Ubuntu)
uses: jlumbroso/free-disk-space@v1.3.1
with:
tool-cache: false
android: true
dotnet: true
haskell: true
large-packages: false
docker-images: false
swap-storage: false
- name: Install dependencies (system)
run: |
sudo apt-get update
sudo apt-get install -y ffmpeg
- name: Install dependencies
run: python -m poetry install
- name: Run tests
Expand Down
14 changes: 14 additions & 0 deletions .github/workflows/datasets.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,20 @@ jobs:
uses: ./.github/actions/bootstrap
with:
python-version: ${{ matrix.python }}
- name: Free Disk Space (Ubuntu)
uses: jlumbroso/free-disk-space@v1.3.1
with:
tool-cache: false
android: true
dotnet: true
haskell: true
large-packages: false
docker-images: false
swap-storage: false
- name: Install dependencies (system)
run: |
sudo apt-get update
sudo apt-get install -y ffmpeg
- name: Install dependencies (mandatory only)
run: |
cd datasets
Expand Down
8 changes: 4 additions & 4 deletions datasets/e2e/scikit-learn/sklearn_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,10 @@ def _get_partition_data(self):
partition = fds.load_partition(partition_id, "train")
partition.set_format("numpy")
partition_train_test = partition.train_test_split(test_size=0.2, seed=42)
X_train, y_train = partition_train_test["train"]["image"], partition_train_test[
"train"]["label"]
X_test, y_test = partition_train_test["test"]["image"], partition_train_test[
"test"]["label"]
X_train, y_train = partition_train_test["train"][:]["image"], partition_train_test[
"train"][:]["label"]
X_test, y_test = partition_train_test["test"][:]["image"], partition_train_test[
"test"][:]["label"]
X_train = X_train.reshape(-1, 28 * 28)
X_test = X_test.reshape(-1, 28 * 28)
if self.preprocessing:
Expand Down
19 changes: 12 additions & 7 deletions datasets/flwr_datasets/federated_dataset_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -613,6 +613,7 @@ def test_incorrect_three_partitioners(self) -> None:
)


# pylint: disable=too-many-return-statements
def datasets_are_equal(ds1: Dataset, ds2: Dataset) -> bool:
"""Check if two Datasets have the same values."""
# Check if both datasets have the same length
Expand All @@ -629,14 +630,18 @@ def datasets_are_equal(ds1: Dataset, ds2: Dataset) -> bool:
for key in row1:
if key == "audio":
# Special handling for 'audio' key
if not all(
[
np.array_equal(row1[key]["array"], row2[key]["array"]),
row1[key]["path"] == row2[key]["path"],
row1[key]["sampling_rate"] == row2[key]["sampling_rate"],
]
):
# Check array and sampling_rate
if not np.array_equal(row1[key]["array"], row2[key]["array"]):
return False
if row1[key]["sampling_rate"] != row2[key]["sampling_rate"]:
return False

# Check path if available (AudioDecoder raises TypeError)
try:
if row1[key]["path"] != row2[key]["path"]:
return False
except TypeError:
pass
elif row1[key] != row2[key]:
# Direct comparison for other keys
return False
Expand Down
2 changes: 1 addition & 1 deletion datasets/flwr_datasets/mock_utils_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,7 +334,7 @@ def _mock_speach_commands(num_rows: int) -> Dataset:
features = Features(
{
"audio": datasets.Audio(
sampling_rate=sampling_rate, mono=True, decode=True
sampling_rate=sampling_rate, num_channels=1, decode=True
),
"is_unknown": Value(dtype="bool"),
"speaker_id": Value(dtype="string"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ def test_missing_column_raises(self) -> None:
dataset = Dataset.from_dict(data)
partitioner = ContinuousPartitioner(3, partition_by="missing", strictness=0.5)
partitioner.dataset = dataset
with self.assertRaises(KeyError):
with self.assertRaises(ValueError):
_ = partitioner.load_partition(0)

def test_nan_value_in_column_raises(self) -> None:
Expand Down
6 changes: 4 additions & 2 deletions datasets/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -52,17 +52,19 @@ exclude = ["./**/*_test.py"]
[tool.poetry.dependencies]
python = "^3.10"
numpy = ">=1.26.0,<3.0.0"
datasets = ">=2.14.6 <=3.1.0"
datasets = ">=2.14.6, <5.0.0"
pillow = { version = ">=6.2.1", optional = true }
soundfile = { version = ">=0.12.1", optional = true }
librosa = { version = ">=0.10.0.post2", optional = true }
tqdm = "^4.66.1"
matplotlib = "^3.7.5"
seaborn = "^0.13.0"
torch = { version = ">=2.8.0", optional = true, python = ">=3.10,<3.14" }
torchcodec = { version = ">=0.7.0", optional = true, python = ">=3.10,<3.14" }

[tool.poetry.extras]
vision = ["pillow"]
audio = ["soundfile", "librosa"]
audio = ["soundfile", "librosa", "torch", "torchcodec"]

[tool.poetry.group.dev.dependencies]
types-requests = "==2.31.0.20240125"
Expand Down