flwrlabs · danieljanes · Jan 15, 2026 · Nov 15, 2025 · Nov 15, 2025 · Nov 15, 2025
@@ -46,6 +46,20 @@ jobs:
         uses: ./.github/actions/bootstrap
         with:
           python-version: 3.10.19
+      - name: Free Disk Space (Ubuntu)
+        uses: jlumbroso/free-disk-space@v1.3.1
+        with:
+          tool-cache: false
+          android: true
+          dotnet: true
+          haskell: true
+          large-packages: false
+          docker-images: false
+          swap-storage: false
+      - name: Install dependencies (system)
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y ffmpeg
       - name: Install dependencies
         run: python -m poetry install
       - name: Run tests

@@ -44,6 +44,20 @@ jobs:
         uses: ./.github/actions/bootstrap
         with:
           python-version: ${{ matrix.python }}
+      - name: Free Disk Space (Ubuntu)
+        uses: jlumbroso/free-disk-space@v1.3.1
+        with:
+          tool-cache: false
+          android: true
+          dotnet: true
+          haskell: true
+          large-packages: false
+          docker-images: false
+          swap-storage: false
+      - name: Install dependencies (system)
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y ffmpeg
       - name: Install dependencies (mandatory only)
         run: |
           cd datasets

@@ -30,10 +30,10 @@ def _get_partition_data(self):
         partition = fds.load_partition(partition_id, "train")
         partition.set_format("numpy")
         partition_train_test = partition.train_test_split(test_size=0.2, seed=42)
-        X_train, y_train = partition_train_test["train"]["image"], partition_train_test[
-            "train"]["label"]
-        X_test, y_test = partition_train_test["test"]["image"], partition_train_test[
-            "test"]["label"]
+        X_train, y_train = partition_train_test["train"][:]["image"], partition_train_test[
+            "train"][:]["label"]
+        X_test, y_test = partition_train_test["test"][:]["image"], partition_train_test[
+            "test"][:]["label"]
         X_train = X_train.reshape(-1, 28 * 28)
         X_test = X_test.reshape(-1, 28 * 28)
         if self.preprocessing:

@@ -613,6 +613,7 @@ def test_incorrect_three_partitioners(self) -> None:
         )
 
 
+# pylint: disable=too-many-return-statements
 def datasets_are_equal(ds1: Dataset, ds2: Dataset) -> bool:
     """Check if two Datasets have the same values."""
     # Check if both datasets have the same length
@@ -629,14 +630,18 @@ def datasets_are_equal(ds1: Dataset, ds2: Dataset) -> bool:
         for key in row1:
             if key == "audio":
                 # Special handling for 'audio' key
-                if not all(
-                    [
-                        np.array_equal(row1[key]["array"], row2[key]["array"]),
-                        row1[key]["path"] == row2[key]["path"],
-                        row1[key]["sampling_rate"] == row2[key]["sampling_rate"],
-                    ]
-                ):
+                # Check array and sampling_rate
+                if not np.array_equal(row1[key]["array"], row2[key]["array"]):
                     return False
+                if row1[key]["sampling_rate"] != row2[key]["sampling_rate"]:
+                    return False
+
+                # Check path if available (AudioDecoder raises TypeError)
+                try:
+                    if row1[key]["path"] != row2[key]["path"]:
+                        return False
+                except TypeError:
+                    pass
             elif row1[key] != row2[key]:
                 # Direct comparison for other keys
                 return False

@@ -334,7 +334,7 @@ def _mock_speach_commands(num_rows: int) -> Dataset:
     features = Features(
         {
             "audio": datasets.Audio(
-                sampling_rate=sampling_rate, mono=True, decode=True
+                sampling_rate=sampling_rate, num_channels=1, decode=True
             ),
             "is_unknown": Value(dtype="bool"),
             "speaker_id": Value(dtype="string"),

@@ -179,7 +179,7 @@ def test_missing_column_raises(self) -> None:
         dataset = Dataset.from_dict(data)
         partitioner = ContinuousPartitioner(3, partition_by="missing", strictness=0.5)
         partitioner.dataset = dataset
-        with self.assertRaises(KeyError):
+        with self.assertRaises(ValueError):
             _ = partitioner.load_partition(0)
 
     def test_nan_value_in_column_raises(self) -> None:

@@ -52,17 +52,19 @@ exclude = ["./**/*_test.py"]
 [tool.poetry.dependencies]
 python = "^3.10"
 numpy = ">=1.26.0,<3.0.0"
-datasets = ">=2.14.6 <=3.1.0"
+datasets = ">=2.14.6, <5.0.0"
 pillow = { version = ">=6.2.1", optional = true }
 soundfile = { version = ">=0.12.1", optional = true }
 librosa = { version = ">=0.10.0.post2", optional = true }
 tqdm = "^4.66.1"
 matplotlib = "^3.7.5"
 seaborn = "^0.13.0"
+torch = { version = ">=2.8.0", optional = true, python = ">=3.10,<3.14" }
+torchcodec = { version = ">=0.7.0", optional = true, python = ">=3.10,<3.14" }
 
 [tool.poetry.extras]
 vision = ["pillow"]
-audio = ["soundfile", "librosa"]
+audio = ["soundfile", "librosa", "torch", "torchcodec"]
 
 [tool.poetry.group.dev.dependencies]
 types-requests = "==2.31.0.20240125"