diff --git a/.github/workflows/datasets.yml b/.github/workflows/datasets.yml index 860d944696f9..b617b6b4769b 100644 --- a/.github/workflows/datasets.yml +++ b/.github/workflows/datasets.yml @@ -49,5 +49,13 @@ jobs: python-version: ${{ matrix.python }} - name: Install dependencies (mandatory only) run: python -m poetry install --all-extras + - name: Cache Hugging Face datasets + uses: actions/cache@v3 + with: + path: ~/.cache/huggingface + key: hf-datasets-v1 + restore-keys: hf-datasets- + - name: Set Hugging Face token + run: huggingface-cli login --token ${{ secrets.HF_TOKEN }} - name: Test (formatting + unit tests) run: ./dev/test.sh diff --git a/datasets/.flake8 b/datasets/.flake8 index 2bcd70e390ce..c48d2d36eb32 100644 --- a/datasets/.flake8 +++ b/datasets/.flake8 @@ -1,2 +1,3 @@ [flake8] max-line-length = 88 +ignore = SIG203, SIG503, W503 diff --git a/datasets/dev/test.sh b/datasets/dev/test.sh index babb5a3cdd36..77f57b6ffc9c 100755 --- a/datasets/dev/test.sh +++ b/datasets/dev/test.sh @@ -4,7 +4,7 @@ cd "$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"/../ # Append path to PYTHONPATH that makes flwr_tool.init_py_check discoverable PARENT_DIR=$(dirname "$(pwd)") # Go one dir up from flower/datasets -export PYTHONPATH="${PYTHONPATH}:${PARENT_DIR}/src/py" +export PYTHONPATH="${PYTHONPATH}:${PARENT_DIR}/framework/py" echo "=== test.sh ===" diff --git a/datasets/flwr_datasets/partitioner/dirichlet_partitioner.py b/datasets/flwr_datasets/partitioner/dirichlet_partitioner.py index 55c190087f7c..35273d495382 100644 --- a/datasets/flwr_datasets/partitioner/dirichlet_partitioner.py +++ b/datasets/flwr_datasets/partitioner/dirichlet_partitioner.py @@ -85,7 +85,7 @@ class DirichletPartitioner(Partitioner): [2134, 2615, 3646, 6011, 6170, 6386, 6715, 7653, 8435, 10235] """ - def __init__( # pylint: disable=R0913 + def __init__( # pylint: disable=R0913, R0917 self, num_partitions: int, partition_by: str, diff --git a/datasets/flwr_datasets/partitioner/dirichlet_partitioner_test.py b/datasets/flwr_datasets/partitioner/dirichlet_partitioner_test.py index 693e0d6a5aa6..64336e7a3193 100644 --- a/datasets/flwr_datasets/partitioner/dirichlet_partitioner_test.py +++ b/datasets/flwr_datasets/partitioner/dirichlet_partitioner_test.py @@ -104,7 +104,7 @@ class TestDirichletPartitionerFailure(unittest.TestCase): """Test DirichletPartitioner failures (exceptions) by incorrect usage.""" @parameterized.expand([(-2,), (-1,), (3,), (4,), (100,)]) # type: ignore - def test_load_invalid_partition_index(self, partition_id): + def test_load_invalid_partition_index(self, partition_id) -> None: """Test if raises when the load_partition is above the num_partitions.""" _, partitioner = _dummy_setup(3, 0.5, 100, "labels") with self.assertRaises(KeyError): @@ -129,7 +129,7 @@ def test_load_invalid_partition_index(self, partition_id): (np.array([0.5, 0.5, -0.5, -0.5, 0.5]), 5), ] ) - def test_negative_values_in_alpha(self, alpha, num_partitions): + def test_negative_values_in_alpha(self, alpha, num_partitions) -> None: """Test if giving the negative value of alpha raises error.""" num_rows, partition_by = 100, "labels" with self.assertRaises(ValueError): @@ -146,7 +146,7 @@ def test_negative_values_in_alpha(self, alpha, num_partitions): (np.array([0.5, 0.5, 0.5, 0.5]), 3), ] ) - def test_incorrect_alpha_shape(self, alpha, num_partitions): + def test_incorrect_alpha_shape(self, alpha, num_partitions) -> None: """Test alpha list len not matching the num_partitions.""" with self.assertRaises(ValueError): DirichletPartitioner( @@ -156,7 +156,7 @@ def test_incorrect_alpha_shape(self, alpha, num_partitions): @parameterized.expand( # type: ignore [(0,), (-1,), (11,), (100,)] ) # num_partitions, - def test_invalid_num_partitions(self, num_partitions): + def test_invalid_num_partitions(self, num_partitions) -> None: """Test if 0 is invalid num_partitions.""" with self.assertRaises(ValueError): _, partitioner = _dummy_setup( diff --git a/datasets/flwr_datasets/partitioner/distribution_partitioner.py b/datasets/flwr_datasets/partitioner/distribution_partitioner.py index e4182f587cad..5d42278f4dd2 100644 --- a/datasets/flwr_datasets/partitioner/distribution_partitioner.py +++ b/datasets/flwr_datasets/partitioner/distribution_partitioner.py @@ -155,7 +155,7 @@ class DistributionPartitioner(Partitioner): # pylint: disable=R0902 9: {0: 124, 9: 13}} """ - def __init__( # pylint: disable=R0913 + def __init__( # pylint: disable=R0913, R0917 self, distribution_array: Union[NDArrayInt, NDArrayFloat], num_partitions: int, diff --git a/datasets/flwr_datasets/partitioner/distribution_partitioner_test.py b/datasets/flwr_datasets/partitioner/distribution_partitioner_test.py index 306e208a706b..d3e627f9b106 100644 --- a/datasets/flwr_datasets/partitioner/distribution_partitioner_test.py +++ b/datasets/flwr_datasets/partitioner/distribution_partitioner_test.py @@ -55,7 +55,7 @@ def _dummy_distribution_setup( # pylint: disable=R0913 -def _get_partitioner( +def _get_partitioner( # pylint: disable=R0917 num_partitions: int, num_unique_labels_per_partition: int, num_samples: int, diff --git a/datasets/flwr_datasets/partitioner/inner_dirichlet_partitioner.py b/datasets/flwr_datasets/partitioner/inner_dirichlet_partitioner.py index e62b8fdbb212..a9ef76dd5ac1 100644 --- a/datasets/flwr_datasets/partitioner/inner_dirichlet_partitioner.py +++ b/datasets/flwr_datasets/partitioner/inner_dirichlet_partitioner.py @@ -66,7 +66,7 @@ class InnerDirichletPartitioner(Partitioner): # pylint: disable=R0902 >>> print(partition[0]) # Print the first example """ - def __init__( # pylint: disable=R0913 + def __init__( # pylint: disable=R0913, R0917 self, partition_sizes: Union[list[int], NDArrayInt], partition_by: str, diff --git a/datasets/flwr_datasets/partitioner/natural_id_partitioner.py b/datasets/flwr_datasets/partitioner/natural_id_partitioner.py index 64b51855e1f4..b8b072051ce5 100644 --- a/datasets/flwr_datasets/partitioner/natural_id_partitioner.py +++ b/datasets/flwr_datasets/partitioner/natural_id_partitioner.py @@ -145,6 +145,7 @@ def partition_id_to_natural_id(self) -> dict[int, str]: @partition_id_to_natural_id.setter def partition_id_to_natural_id(self, value: dict[int, str]) -> None: + """Set the partition_id_to_natural_id property.""" raise AttributeError( "Setting the partition_id_to_natural_id dictionary is not allowed." ) diff --git a/datasets/flwr_datasets/partitioner/partitioner.py b/datasets/flwr_datasets/partitioner/partitioner.py index 0404a11e772c..af1817ec14fd 100644 --- a/datasets/flwr_datasets/partitioner/partitioner.py +++ b/datasets/flwr_datasets/partitioner/partitioner.py @@ -43,6 +43,7 @@ def dataset(self) -> Dataset: @dataset.setter def dataset(self, value: Dataset) -> None: + """Set the dataset property.""" if self._dataset is not None: raise ValueError( "The dataset should be assigned only once to the partitioner." diff --git a/datasets/flwr_datasets/partitioner/pathological_partitioner.py b/datasets/flwr_datasets/partitioner/pathological_partitioner.py index d114ccbda02f..3337883af1d6 100644 --- a/datasets/flwr_datasets/partitioner/pathological_partitioner.py +++ b/datasets/flwr_datasets/partitioner/pathological_partitioner.py @@ -93,7 +93,7 @@ class PathologicalPartitioner(Partitioner): >>> partition = fds.load_partition(0) """ - def __init__( + def __init__( # pylint: disable=R0917 self, num_partitions: int, partition_by: str, diff --git a/datasets/flwr_datasets/partitioner/pathological_partitioner_test.py b/datasets/flwr_datasets/partitioner/pathological_partitioner_test.py index 5a3b13bb1436..d2d67a00882f 100644 --- a/datasets/flwr_datasets/partitioner/pathological_partitioner_test.py +++ b/datasets/flwr_datasets/partitioner/pathological_partitioner_test.py @@ -131,7 +131,7 @@ def test_first_class_deterministic_assignment(self) -> None: ) def test_deterministic_class_assignment( self, num_partitions, num_classes_per_partition, num_samples, num_unique_classes - ): + ) -> None: """Test deterministic assignment of classes to partitions.""" dataset = _dummy_dataset_setup(num_samples, "labels", num_unique_classes) partitioner = PathologicalPartitioner( diff --git a/datasets/flwr_datasets/partitioner/shard_partitioner.py b/datasets/flwr_datasets/partitioner/shard_partitioner.py index 3001df6dcb69..5e793bc07ebe 100644 --- a/datasets/flwr_datasets/partitioner/shard_partitioner.py +++ b/datasets/flwr_datasets/partitioner/shard_partitioner.py @@ -137,7 +137,7 @@ class ShardPartitioner(Partitioner): # pylint: disable=R0902 [5550, 5940, 5940, 5940, 5940, 5940, 5940, 5940, 5940, 6930] """ - def __init__( # pylint: disable=R0913 + def __init__( # pylint: disable=R0913, R0917 self, num_partitions: int, partition_by: str, diff --git a/datasets/flwr_datasets/partitioner/shard_partitioner_test.py b/datasets/flwr_datasets/partitioner/shard_partitioner_test.py index be8edf9d2764..6507e602108b 100644 --- a/datasets/flwr_datasets/partitioner/shard_partitioner_test.py +++ b/datasets/flwr_datasets/partitioner/shard_partitioner_test.py @@ -23,7 +23,7 @@ from flwr_datasets.partitioner.shard_partitioner import ShardPartitioner -def _dummy_setup( +def _dummy_setup( # pylint: disable=R0917 num_rows: int, partition_by: str, num_partitions: int, diff --git a/datasets/flwr_datasets/partitioner/size_partitioner_test.py b/datasets/flwr_datasets/partitioner/size_partitioner_test.py index be8edf9d2764..6507e602108b 100644 --- a/datasets/flwr_datasets/partitioner/size_partitioner_test.py +++ b/datasets/flwr_datasets/partitioner/size_partitioner_test.py @@ -23,7 +23,7 @@ from flwr_datasets.partitioner.shard_partitioner import ShardPartitioner -def _dummy_setup( +def _dummy_setup( # pylint: disable=R0917 num_rows: int, partition_by: str, num_partitions: int, diff --git a/datasets/flwr_datasets/partitioner/vertical_even_partitioner.py b/datasets/flwr_datasets/partitioner/vertical_even_partitioner.py index 03a8631b86cb..90439be1b42d 100644 --- a/datasets/flwr_datasets/partitioner/vertical_even_partitioner.py +++ b/datasets/flwr_datasets/partitioner/vertical_even_partitioner.py @@ -81,7 +81,7 @@ class VerticalEvenPartitioner(Partitioner): >>> print([partition.column_names for partition in partitions]) """ - def __init__( + def __init__( # pylint: disable=R0917 self, num_partitions: int, active_party_columns: Optional[Union[str, list[str]]] = None, diff --git a/datasets/flwr_datasets/partitioner/vertical_size_partitioner.py b/datasets/flwr_datasets/partitioner/vertical_size_partitioner.py index 31fee5bf3724..04e360992d44 100644 --- a/datasets/flwr_datasets/partitioner/vertical_size_partitioner.py +++ b/datasets/flwr_datasets/partitioner/vertical_size_partitioner.py @@ -91,7 +91,7 @@ class VerticalSizePartitioner(Partitioner): >>> print([partition.column_names for partition in partitions]) """ - def __init__( + def __init__( # pylint: disable=R0917 self, partition_sizes: Union[list[int], list[float]], active_party_columns: Optional[Union[str, list[str]]] = None, diff --git a/datasets/flwr_datasets/visualization/bar_plot.py b/datasets/flwr_datasets/visualization/bar_plot.py index 0f6936976fc0..e6196dfe473e 100644 --- a/datasets/flwr_datasets/visualization/bar_plot.py +++ b/datasets/flwr_datasets/visualization/bar_plot.py @@ -25,8 +25,7 @@ from matplotlib.figure import Figure -# pylint: disable=too-many-arguments,too-many-locals,too-many-branches -def _plot_bar( +def _plot_bar( # pylint: disable=R0912, R0913, R0914, R0917 dataframe: pd.DataFrame, axis: Optional[Axes], figsize: Optional[tuple[float, float]], diff --git a/datasets/flwr_datasets/visualization/comparison_label_distribution.py b/datasets/flwr_datasets/visualization/comparison_label_distribution.py index c741ddee219e..b6c763022e6f 100644 --- a/datasets/flwr_datasets/visualization/comparison_label_distribution.py +++ b/datasets/flwr_datasets/visualization/comparison_label_distribution.py @@ -31,7 +31,7 @@ # pylint: disable=too-many-arguments,too-many-locals # mypy: disable-error-code="call-overload" -def plot_comparison_label_distribution( +def plot_comparison_label_distribution( # pylint: disable=R0917 partitioner_list: list[Partitioner], label_name: Union[str, list[str]], plot_type: Literal["bar", "heatmap"] = "bar", diff --git a/datasets/flwr_datasets/visualization/heatmap_plot.py b/datasets/flwr_datasets/visualization/heatmap_plot.py index b5a0e640eb1b..38b9add492ea 100644 --- a/datasets/flwr_datasets/visualization/heatmap_plot.py +++ b/datasets/flwr_datasets/visualization/heatmap_plot.py @@ -25,8 +25,7 @@ from matplotlib.axes import Axes -# pylint: disable=too-many-arguments,too-many-locals -def _plot_heatmap( +def _plot_heatmap( # pylint: disable=R0913, R0917 dataframe: pd.DataFrame, axis: Optional[Axes], figsize: Optional[tuple[float, float]], diff --git a/datasets/flwr_datasets/visualization/label_distribution.py b/datasets/flwr_datasets/visualization/label_distribution.py index 550a4ecae725..8ce237957865 100644 --- a/datasets/flwr_datasets/visualization/label_distribution.py +++ b/datasets/flwr_datasets/visualization/label_distribution.py @@ -32,7 +32,7 @@ # pylint: disable=too-many-arguments,too-many-locals -def plot_label_distributions( +def plot_label_distributions( # pylint: disable=R0917 partitioner: Partitioner, label_name: str, plot_type: str = "bar", diff --git a/datasets/pyproject.toml b/datasets/pyproject.toml index 8fe387de29c4..b78d33b3469e 100644 --- a/datasets/pyproject.toml +++ b/datasets/pyproject.toml @@ -67,14 +67,14 @@ seaborn = "^0.13.0" isort = "==5.13.2" black = { version = "==24.2.0", extras = ["jupyter"] } docformatter = "==1.7.5" -mypy = "==1.4.0" -pylint = "==3.0.3" -flake8 = "==3.9.2" +mypy = "==1.8.0" +pylint = "==3.3.1" +flake8 = "==5.0.4" parameterized = "==0.9.0" -pytest = "==7.1.2" -pytest-watcher = "==0.4.1" +pytest = "==7.4.4" +pytest-watcher = "==0.4.3" ruff = "==0.4.5" -types-requests = "==2.28.11.17" +types-requests = "==2.31.0.20240125" types-setuptools = "==69.0.0.20240125" [tool.poetry.extras] @@ -132,9 +132,6 @@ wrap-descriptions = 88 [tool.ruff] target-version = "py39" line-length = 88 -select = ["D", "E", "F", "W", "B", "ISC", "C4", "UP"] -fixable = ["D", "E", "F", "W", "B", "ISC", "C4", "UP"] -ignore = ["B024", "B027"] exclude = [ ".bzr", ".direnv", @@ -159,5 +156,10 @@ exclude = [ "proto", ] -[tool.ruff.pydocstyle] +[tool.ruff.lint] +select = ["D", "E", "F", "W", "B", "ISC", "C4", "UP"] +fixable = ["D", "E", "F", "W", "B", "ISC", "C4", "UP"] +ignore = ["B024", "B027"] + +[tool.ruff.lint.pydocstyle] convention = "numpy"