From 4dd4ff0920d05a28bbb635cfcbffbc59a4325731 Mon Sep 17 00:00:00 2001 From: thyripian Date: Thu, 19 Sep 2024 19:38:21 -0400 Subject: [PATCH 1/7] Update _collection.py - Added functionality for row-wise mode calculation (axis=1) to support the Dask DataFrame API. - The new implementation dynamically handles row-wise mode and ensures consistent metadata handling across partitions. - Added validation for the axis parameter, with appropriate error handling for unsupported values. - Ensured compatibility with existing column-wise (axis=0) mode functionality, preserving the original behavior for that case. Resolves dask-expr issue #1136. --- dask_expr/_collection.py | 48 ++++++++++++++++++++++++++++++++++------ 1 file changed, 41 insertions(+), 7 deletions(-) diff --git a/dask_expr/_collection.py b/dask_expr/_collection.py index 26772730f..a0622bab2 100644 --- a/dask_expr/_collection.py +++ b/dask_expr/_collection.py @@ -3650,13 +3650,47 @@ def query(self, expr, **kwargs): return new_collection(Query(self, expr, kwargs)) @derived_from(pd.DataFrame) - def mode(self, dropna=True, split_every=False, numeric_only=False): - modes = [] - for _, col in self.items(): - if numeric_only and not pd.api.types.is_numeric_dtype(col.dtype): - continue - modes.append(col.mode(dropna=dropna, split_every=split_every)) - return concat(modes, axis=1) + # GH#11389 - Dask issue related to adding row-wise mode functionality + # GH#1136 - Dask-Expr specific implementation for row-wise mode functionality + # Contributor: @thyripian + def mode(self, axis=0, numeric_only=False, dropna=True, split_every=False): + if axis == 0: + # Existing logic for axis=0 (column-wise mode) + modes = [] + for _, col in self.items(): + if numeric_only and not pd.api.types.is_numeric_dtype(col.dtype): + continue + modes.append(col.mode(dropna=dropna, split_every=split_every)) + return concat(modes, axis=1) + elif axis == 1: + # Implement axis=1 (row-wise mode) + num_columns = len(self.columns) # Maximum possible number of modes per row + + def row_wise_mode(df): + result = df.mode(axis=1, numeric_only=numeric_only, dropna=dropna) + # Ensure consistent number of columns across all partitions + if result.shape[1] < num_columns: + # Pad with NaN columns + for i in range(result.shape[1], num_columns): + result[i] = np.nan + elif result.shape[1] > num_columns: + # Trim extra columns + result = result.iloc[:, :num_columns] + # Reindex columns to ensure consistent order + result = result.reindex(columns=range(num_columns)) + # Set column data types to float64 to accommodate NaN values + result = result.astype('float64') + return result + + # Create metadata with the correct number of columns and float64 dtype + meta = pd.DataFrame({i: pd.Series(dtype='float64') for i in range(num_columns)}) + + return self.map_partitions( + row_wise_mode, + meta=meta + ) + else: + raise ValueError(f"No axis named {axis} for object type {type(self)}") @derived_from(pd.DataFrame) def add_prefix(self, prefix): From e976f380ac586fc4f8b0313286337b92b15d09f5 Mon Sep 17 00:00:00 2001 From: thyripian Date: Fri, 20 Sep 2024 18:15:10 -0400 Subject: [PATCH 2/7] Reformat _collection.py Formatted _collection.py with Black --- dask_expr/_collection.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/dask_expr/_collection.py b/dask_expr/_collection.py index a0622bab2..c92f066d9 100644 --- a/dask_expr/_collection.py +++ b/dask_expr/_collection.py @@ -3679,16 +3679,15 @@ def row_wise_mode(df): # Reindex columns to ensure consistent order result = result.reindex(columns=range(num_columns)) # Set column data types to float64 to accommodate NaN values - result = result.astype('float64') + result = result.astype("float64") return result # Create metadata with the correct number of columns and float64 dtype - meta = pd.DataFrame({i: pd.Series(dtype='float64') for i in range(num_columns)}) - - return self.map_partitions( - row_wise_mode, - meta=meta + meta = pd.DataFrame( + {i: pd.Series(dtype="float64") for i in range(num_columns)} ) + + return self.map_partitions(row_wise_mode, meta=meta) else: raise ValueError(f"No axis named {axis} for object type {type(self)}") From bc340ee5121a19015c7998692d4cb11d37895104 Mon Sep 17 00:00:00 2001 From: thyripian Date: Sun, 6 Oct 2024 15:38:36 -0400 Subject: [PATCH 3/7] Refactor row-wise mode functionality in mode method Simplified the logic for row-wise mode computation (axis=1) to dynamically handle multiple modes per row. Refactored metadata handling to ensure the number of columns is consistent across partitions, avoiding mismatches in column count. This addresses issues with inconsistent column numbers between computed data and metadata in Dask, and addresses dev team feedback. --- dask_expr/_collection.py | 36 +++----- ..._editable__.dask-2024.9.0+3.ga87509934.pth | 1 + ...ble___dask_2024_9_0_3_ga87509934_finder.py | 85 ++++++++++++++++++ .../INSTALLER | 1 + .../LICENSE.txt | 29 ++++++ .../METADATA | 89 +++++++++++++++++++ .../NUMPY_LICENSE.txt | 30 +++++++ .../RECORD | 14 +++ .../REQUESTED | 0 .../WHEEL | 5 ++ .../direct_url.json | 1 + .../entry_points.txt | 5 ++ .../top_level.txt | 1 + 13 files changed, 274 insertions(+), 23 deletions(-) create mode 100644 venv/lib/python3.10/site-packages/__editable__.dask-2024.9.0+3.ga87509934.pth create mode 100644 venv/lib/python3.10/site-packages/__editable___dask_2024_9_0_3_ga87509934_finder.py create mode 100644 venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/INSTALLER create mode 100644 venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/LICENSE.txt create mode 100644 venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/METADATA create mode 100644 venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/NUMPY_LICENSE.txt create mode 100644 venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/RECORD create mode 100644 venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/REQUESTED create mode 100644 venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/WHEEL create mode 100644 venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/direct_url.json create mode 100644 venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/entry_points.txt create mode 100644 venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/top_level.txt diff --git a/dask_expr/_collection.py b/dask_expr/_collection.py index c92f066d9..0e236f3ed 100644 --- a/dask_expr/_collection.py +++ b/dask_expr/_collection.py @@ -3663,31 +3663,21 @@ def mode(self, axis=0, numeric_only=False, dropna=True, split_every=False): modes.append(col.mode(dropna=dropna, split_every=split_every)) return concat(modes, axis=1) elif axis == 1: - # Implement axis=1 (row-wise mode) - num_columns = len(self.columns) # Maximum possible number of modes per row - - def row_wise_mode(df): - result = df.mode(axis=1, numeric_only=numeric_only, dropna=dropna) - # Ensure consistent number of columns across all partitions - if result.shape[1] < num_columns: - # Pad with NaN columns - for i in range(result.shape[1], num_columns): - result[i] = np.nan - elif result.shape[1] > num_columns: - # Trim extra columns - result = result.iloc[:, :num_columns] - # Reindex columns to ensure consistent order - result = result.reindex(columns=range(num_columns)) - # Set column data types to float64 to accommodate NaN values - result = result.astype("float64") - return result + # The maximum possible number of modes per row is equal to the number of columns + num_columns = len(self.columns) - # Create metadata with the correct number of columns and float64 dtype - meta = pd.DataFrame( - {i: pd.Series(dtype="float64") for i in range(num_columns)} - ) + # Create metadata DataFrame with the correct number of columns + # Use the first column's dtype as a representative dtype + mode_dtype = self._meta.dtypes[0] if len(self.columns) > 0 else float + meta = pd.DataFrame(columns=range(num_columns), dtype=mode_dtype) - return self.map_partitions(row_wise_mode, meta=meta) + # Apply map_partitions and reindex to ensure consistent columns + return self.map_partitions( + lambda df: df.mode( + axis=1, numeric_only=numeric_only, dropna=dropna + ).reindex(columns=range(num_columns), fill_value=np.nan), + meta=meta, + ) else: raise ValueError(f"No axis named {axis} for object type {type(self)}") diff --git a/venv/lib/python3.10/site-packages/__editable__.dask-2024.9.0+3.ga87509934.pth b/venv/lib/python3.10/site-packages/__editable__.dask-2024.9.0+3.ga87509934.pth new file mode 100644 index 000000000..7a258e3c4 --- /dev/null +++ b/venv/lib/python3.10/site-packages/__editable__.dask-2024.9.0+3.ga87509934.pth @@ -0,0 +1 @@ +import __editable___dask_2024_9_0_3_ga87509934_finder; __editable___dask_2024_9_0_3_ga87509934_finder.install() \ No newline at end of file diff --git a/venv/lib/python3.10/site-packages/__editable___dask_2024_9_0_3_ga87509934_finder.py b/venv/lib/python3.10/site-packages/__editable___dask_2024_9_0_3_ga87509934_finder.py new file mode 100644 index 000000000..45c56adf0 --- /dev/null +++ b/venv/lib/python3.10/site-packages/__editable___dask_2024_9_0_3_ga87509934_finder.py @@ -0,0 +1,85 @@ +from __future__ import annotations +import sys +from importlib.machinery import ModuleSpec, PathFinder +from importlib.machinery import all_suffixes as module_suffixes +from importlib.util import spec_from_file_location +from itertools import chain +from pathlib import Path + +MAPPING: dict[str, str] = {'dask': '/Users/8823tech/Desktop/Code/Open_Source/dask/dask'} +NAMESPACES: dict[str, list[str]] = {} +PATH_PLACEHOLDER = '__editable__.dask-2024.9.0+3.ga87509934.finder' + ".__path_hook__" + + +class _EditableFinder: # MetaPathFinder + @classmethod + def find_spec(cls, fullname: str, path=None, target=None) -> ModuleSpec | None: # type: ignore + # Top-level packages and modules (we know these exist in the FS) + if fullname in MAPPING: + pkg_path = MAPPING[fullname] + return cls._find_spec(fullname, Path(pkg_path)) + + # Handle immediate children modules (required for namespaces to work) + # To avoid problems with case sensitivity in the file system we delegate + # to the importlib.machinery implementation. + parent, _, child = fullname.rpartition(".") + if parent and parent in MAPPING: + return PathFinder.find_spec(fullname, path=[MAPPING[parent]]) + + # Other levels of nesting should be handled automatically by importlib + # using the parent path. + return None + + @classmethod + def _find_spec(cls, fullname: str, candidate_path: Path) -> ModuleSpec | None: + init = candidate_path / "__init__.py" + candidates = (candidate_path.with_suffix(x) for x in module_suffixes()) + for candidate in chain([init], candidates): + if candidate.exists(): + return spec_from_file_location(fullname, candidate) + return None + + +class _EditableNamespaceFinder: # PathEntryFinder + @classmethod + def _path_hook(cls, path) -> type[_EditableNamespaceFinder]: + if path == PATH_PLACEHOLDER: + return cls + raise ImportError + + @classmethod + def _paths(cls, fullname: str) -> list[str]: + paths = NAMESPACES[fullname] + if not paths and fullname in MAPPING: + paths = [MAPPING[fullname]] + # Always add placeholder, for 2 reasons: + # 1. __path__ cannot be empty for the spec to be considered namespace. + # 2. In the case of nested namespaces, we need to force + # import machinery to query _EditableNamespaceFinder again. + return [*paths, PATH_PLACEHOLDER] + + @classmethod + def find_spec(cls, fullname: str, target=None) -> ModuleSpec | None: # type: ignore + if fullname in NAMESPACES: + spec = ModuleSpec(fullname, None, is_package=True) + spec.submodule_search_locations = cls._paths(fullname) + return spec + return None + + @classmethod + def find_module(cls, _fullname) -> None: + return None + + +def install(): + if not any(finder == _EditableFinder for finder in sys.meta_path): + sys.meta_path.append(_EditableFinder) + + if not NAMESPACES: + return + + if not any(hook == _EditableNamespaceFinder._path_hook for hook in sys.path_hooks): + # PathEntryFinder is needed to create NamespaceSpec without private APIS + sys.path_hooks.append(_EditableNamespaceFinder._path_hook) + if PATH_PLACEHOLDER not in sys.path: + sys.path.append(PATH_PLACEHOLDER) # Used just to trigger the path hook diff --git a/venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/INSTALLER b/venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/INSTALLER new file mode 100644 index 000000000..a1b589e38 --- /dev/null +++ b/venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/INSTALLER @@ -0,0 +1 @@ +pip diff --git a/venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/LICENSE.txt b/venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/LICENSE.txt new file mode 100644 index 000000000..0aa7cf43e --- /dev/null +++ b/venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/LICENSE.txt @@ -0,0 +1,29 @@ +BSD 3-Clause License + +Copyright (c) 2014, Anaconda, Inc. and contributors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/METADATA b/venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/METADATA new file mode 100644 index 000000000..474c54212 --- /dev/null +++ b/venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/METADATA @@ -0,0 +1,89 @@ +Metadata-Version: 2.1 +Name: dask +Version: 2024.9.0+3.ga87509934 +Summary: Parallel PyData with Task Scheduling +Maintainer-email: Matthew Rocklin +License: BSD-3-Clause +Project-URL: Homepage, https://github.com/dask/dask/ +Keywords: task-scheduling parallel numpy pandas pydata +Classifier: Development Status :: 5 - Production/Stable +Classifier: Intended Audience :: Developers +Classifier: Intended Audience :: Science/Research +Classifier: License :: OSI Approved :: BSD License +Classifier: Operating System :: OS Independent +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3 :: Only +Classifier: Programming Language :: Python :: 3.10 +Classifier: Programming Language :: Python :: 3.11 +Classifier: Programming Language :: Python :: 3.12 +Classifier: Topic :: Scientific/Engineering +Classifier: Topic :: System :: Distributed Computing +Requires-Python: >=3.10 +Description-Content-Type: text/x-rst +License-File: LICENSE.txt +License-File: dask/array/NUMPY_LICENSE.txt +Requires-Dist: click >=8.1 +Requires-Dist: cloudpickle >=3.0.0 +Requires-Dist: fsspec >=2021.09.0 +Requires-Dist: packaging >=20.0 +Requires-Dist: partd >=1.4.0 +Requires-Dist: pyyaml >=5.3.1 +Requires-Dist: toolz >=0.10.0 +Requires-Dist: importlib-metadata >=4.13.0 ; python_version < "3.12" +Provides-Extra: array +Requires-Dist: numpy >=1.24 ; extra == 'array' +Provides-Extra: bag +Provides-Extra: complete +Requires-Dist: dask[array,dataframe,diagnostics,distributed] ; extra == 'complete' +Requires-Dist: pyarrow >=14.0.1 ; extra == 'complete' +Requires-Dist: lz4 >=4.3.2 ; extra == 'complete' +Provides-Extra: dataframe +Requires-Dist: dask[array] ; extra == 'dataframe' +Requires-Dist: pandas >=2.0 ; extra == 'dataframe' +Requires-Dist: dask-expr <1.2,>=1.1 ; extra == 'dataframe' +Provides-Extra: delayed +Provides-Extra: diagnostics +Requires-Dist: bokeh >=3.1.0 ; extra == 'diagnostics' +Requires-Dist: jinja2 >=2.10.3 ; extra == 'diagnostics' +Provides-Extra: distributed +Requires-Dist: distributed ==2024.9.0 ; extra == 'distributed' +Provides-Extra: test +Requires-Dist: pandas[test] ; extra == 'test' +Requires-Dist: pytest ; extra == 'test' +Requires-Dist: pytest-cov ; extra == 'test' +Requires-Dist: pytest-rerunfailures ; extra == 'test' +Requires-Dist: pytest-timeout ; extra == 'test' +Requires-Dist: pytest-xdist ; extra == 'test' +Requires-Dist: pre-commit ; extra == 'test' + +Dask +==== + +|Build Status| |Coverage| |Doc Status| |Discourse| |Version Status| |NumFOCUS| + +Dask is a flexible parallel computing library for analytics. See +documentation_ for more information. + + +LICENSE +------- + +New BSD. See `License File `__. + +.. _documentation: https://dask.org +.. |Build Status| image:: https://github.com/dask/dask/actions/workflows/tests.yml/badge.svg + :target: https://github.com/dask/dask/actions/workflows/tests.yml +.. |Coverage| image:: https://codecov.io/gh/dask/dask/branch/main/graph/badge.svg + :target: https://codecov.io/gh/dask/dask/branch/main + :alt: Coverage status +.. |Doc Status| image:: https://readthedocs.org/projects/dask/badge/?version=latest + :target: https://dask.org + :alt: Documentation Status +.. |Discourse| image:: https://img.shields.io/discourse/users?logo=discourse&server=https%3A%2F%2Fdask.discourse.group + :alt: Discuss Dask-related things and ask for help + :target: https://dask.discourse.group +.. |Version Status| image:: https://img.shields.io/pypi/v/dask.svg + :target: https://pypi.python.org/pypi/dask/ +.. |NumFOCUS| image:: https://img.shields.io/badge/powered%20by-NumFOCUS-orange.svg?style=flat&colorA=E1523D&colorB=007D8A + :target: https://www.numfocus.org/ diff --git a/venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/NUMPY_LICENSE.txt b/venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/NUMPY_LICENSE.txt new file mode 100644 index 000000000..b4139af86 --- /dev/null +++ b/venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/NUMPY_LICENSE.txt @@ -0,0 +1,30 @@ +Copyright (c) 2005-2015, NumPy Developers. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of the NumPy Developers nor the names of any + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/RECORD b/venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/RECORD new file mode 100644 index 000000000..a2228a0b9 --- /dev/null +++ b/venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/RECORD @@ -0,0 +1,14 @@ +../../../bin/dask,sha256=S8aT79BaVsTyBGzlyqyKTI5bUHbiY1fK6KkytOftzpM,263 +__editable__.dask-2024.9.0+3.ga87509934.pth,sha256=3VaIuRATJ6ckwf94V4bYS5h_FmVgMhLV0fZD8ITyAT4,111 +__editable___dask_2024_9_0_3_ga87509934_finder.py,sha256=KT87v6V0qMJqq8pqqusSk8-ahs7JLAWfKaPpgQ8q0K4,3411 +__pycache__/__editable___dask_2024_9_0_3_ga87509934_finder.cpython-310.pyc,, +dask-2024.9.0+3.ga87509934.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +dask-2024.9.0+3.ga87509934.dist-info/LICENSE.txt,sha256=Eht58q-4tqaPMAZ_mKA1qwg3gL00MgWgctdpJDg7Y88,1531 +dask-2024.9.0+3.ga87509934.dist-info/METADATA,sha256=QMndRaKOkxBJOim5UKjU8OtsS8oIzupi4yv16ot2za0,3730 +dask-2024.9.0+3.ga87509934.dist-info/NUMPY_LICENSE.txt,sha256=VGrgTUHkF-M_FQ1sMIt_4tu2im7Ej0nHkDcsTTeZ4Cc,1543 +dask-2024.9.0+3.ga87509934.dist-info/RECORD,, +dask-2024.9.0+3.ga87509934.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +dask-2024.9.0+3.ga87509934.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91 +dask-2024.9.0+3.ga87509934.dist-info/direct_url.json,sha256=kIHibdooBINlpcAdmvjNld7U6iwAOndoRY6UWx2Vs0c,95 +dask-2024.9.0+3.ga87509934.dist-info/entry_points.txt,sha256=DqIpjJmPgioJQpDSCDN2w_ZwaOvs2zlhHoZ2yahFCRI,124 +dask-2024.9.0+3.ga87509934.dist-info/top_level.txt,sha256=iT6x3D1cKsV7ordRW_UwlPRBdavTonNRtWVPB-Kti7U,5 diff --git a/venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/REQUESTED b/venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/REQUESTED new file mode 100644 index 000000000..e69de29bb diff --git a/venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/WHEEL b/venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/WHEEL new file mode 100644 index 000000000..dcfdc6e35 --- /dev/null +++ b/venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/WHEEL @@ -0,0 +1,5 @@ +Wheel-Version: 1.0 +Generator: setuptools (75.1.0) +Root-Is-Purelib: true +Tag: py3-none-any + diff --git a/venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/direct_url.json b/venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/direct_url.json new file mode 100644 index 000000000..018ab6db3 --- /dev/null +++ b/venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/direct_url.json @@ -0,0 +1 @@ +{"dir_info": {"editable": true}, "url": "file:///Users/8823tech/Desktop/Code/Open_Source/dask"} \ No newline at end of file diff --git a/venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/entry_points.txt b/venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/entry_points.txt new file mode 100644 index 000000000..aae31dc37 --- /dev/null +++ b/venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/entry_points.txt @@ -0,0 +1,5 @@ +[console_scripts] +dask = dask.__main__:main + +[dask.array.backends] +cupy = dask.array.cupy_entry_point:CupyBackendEntrypoint diff --git a/venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/top_level.txt b/venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/top_level.txt new file mode 100644 index 000000000..b2034ba3b --- /dev/null +++ b/venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/top_level.txt @@ -0,0 +1 @@ +dask From 9f918acccd2cbb282f0d57093e79fabd1b70d907 Mon Sep 17 00:00:00 2001 From: thyripian Date: Sun, 6 Oct 2024 15:42:29 -0400 Subject: [PATCH 4/7] drop venv My bad. Git desktop added my venv to the last push but I didn't see it. --- dask_expr/_expr.py | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/dask_expr/_expr.py b/dask_expr/_expr.py index aa807eabd..ab2b8b74c 100644 --- a/dask_expr/_expr.py +++ b/dask_expr/_expr.py @@ -1269,9 +1269,11 @@ def _simplify_up(self, parent, dependents): columns = _convert_to_list(columns) frame_columns = set(self.frame.columns) columns = [ - reverse_mapping[col] - if col in reverse_mapping and reverse_mapping[col] in frame_columns - else col + ( + reverse_mapping[col] + if col in reverse_mapping and reverse_mapping[col] in frame_columns + else col + ) for col in columns ] columns = [col for col in self.frame.columns if col in columns] @@ -2342,9 +2344,11 @@ class AddPrefix(Elemwise): @functools.cached_property def unique_partition_mapping_columns_from_shuffle(self): return { - f"{self.prefix}{c}" - if not isinstance(c, tuple) - else tuple(self.prefix + t for t in c) + ( + f"{self.prefix}{c}" + if not isinstance(c, tuple) + else tuple(self.prefix + t for t in c) + ) for c in self.frame.unique_partition_mapping_columns_from_shuffle } @@ -2373,9 +2377,11 @@ class AddSuffix(AddPrefix): @functools.cached_property def unique_partition_mapping_columns_from_shuffle(self): return { - f"{c}{self.suffix}" - if not isinstance(c, tuple) - else tuple(t + self.suffix for t in c) + ( + f"{c}{self.suffix}" + if not isinstance(c, tuple) + else tuple(t + self.suffix for t in c) + ) for c in self.frame.unique_partition_mapping_columns_from_shuffle } @@ -2421,9 +2427,11 @@ def _task(self, index: int): def _simplify_down(self): if isinstance(self.frame, Elemwise): operands = [ - Head(op, self.n, self.operand("npartitions")) - if isinstance(op, Expr) and not isinstance(op, _DelayedExpr) - else op + ( + Head(op, self.n, self.operand("npartitions")) + if isinstance(op, Expr) and not isinstance(op, _DelayedExpr) + else op + ) for op in self.frame.operands ] return type(self.frame)(*operands) From ddaef074fcef1f07f7d378892ab775e4f356de2f Mon Sep 17 00:00:00 2001 From: thyripian Date: Sun, 6 Oct 2024 15:45:16 -0400 Subject: [PATCH 5/7] Add venv to .gitignore to avoid tracking the virtual environment --- .gitignore | 1 + ..._editable__.dask-2024.9.0+3.ga87509934.pth | 1 - ...ble___dask_2024_9_0_3_ga87509934_finder.py | 85 ------------------ .../INSTALLER | 1 - .../LICENSE.txt | 29 ------ .../METADATA | 89 ------------------- .../NUMPY_LICENSE.txt | 30 ------- .../RECORD | 14 --- .../REQUESTED | 0 .../WHEEL | 5 -- .../direct_url.json | 1 - .../entry_points.txt | 5 -- .../top_level.txt | 1 - 13 files changed, 1 insertion(+), 261 deletions(-) delete mode 100644 venv/lib/python3.10/site-packages/__editable__.dask-2024.9.0+3.ga87509934.pth delete mode 100644 venv/lib/python3.10/site-packages/__editable___dask_2024_9_0_3_ga87509934_finder.py delete mode 100644 venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/INSTALLER delete mode 100644 venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/LICENSE.txt delete mode 100644 venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/METADATA delete mode 100644 venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/NUMPY_LICENSE.txt delete mode 100644 venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/RECORD delete mode 100644 venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/REQUESTED delete mode 100644 venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/WHEEL delete mode 100644 venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/direct_url.json delete mode 100644 venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/entry_points.txt delete mode 100644 venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/top_level.txt diff --git a/.gitignore b/.gitignore index 12e933d2b..b32bb6a90 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,4 @@ bench/shakespeare.txt .idea/ .ipynb_checkpoints/ coverage.xml +venv/ diff --git a/venv/lib/python3.10/site-packages/__editable__.dask-2024.9.0+3.ga87509934.pth b/venv/lib/python3.10/site-packages/__editable__.dask-2024.9.0+3.ga87509934.pth deleted file mode 100644 index 7a258e3c4..000000000 --- a/venv/lib/python3.10/site-packages/__editable__.dask-2024.9.0+3.ga87509934.pth +++ /dev/null @@ -1 +0,0 @@ -import __editable___dask_2024_9_0_3_ga87509934_finder; __editable___dask_2024_9_0_3_ga87509934_finder.install() \ No newline at end of file diff --git a/venv/lib/python3.10/site-packages/__editable___dask_2024_9_0_3_ga87509934_finder.py b/venv/lib/python3.10/site-packages/__editable___dask_2024_9_0_3_ga87509934_finder.py deleted file mode 100644 index 45c56adf0..000000000 --- a/venv/lib/python3.10/site-packages/__editable___dask_2024_9_0_3_ga87509934_finder.py +++ /dev/null @@ -1,85 +0,0 @@ -from __future__ import annotations -import sys -from importlib.machinery import ModuleSpec, PathFinder -from importlib.machinery import all_suffixes as module_suffixes -from importlib.util import spec_from_file_location -from itertools import chain -from pathlib import Path - -MAPPING: dict[str, str] = {'dask': '/Users/8823tech/Desktop/Code/Open_Source/dask/dask'} -NAMESPACES: dict[str, list[str]] = {} -PATH_PLACEHOLDER = '__editable__.dask-2024.9.0+3.ga87509934.finder' + ".__path_hook__" - - -class _EditableFinder: # MetaPathFinder - @classmethod - def find_spec(cls, fullname: str, path=None, target=None) -> ModuleSpec | None: # type: ignore - # Top-level packages and modules (we know these exist in the FS) - if fullname in MAPPING: - pkg_path = MAPPING[fullname] - return cls._find_spec(fullname, Path(pkg_path)) - - # Handle immediate children modules (required for namespaces to work) - # To avoid problems with case sensitivity in the file system we delegate - # to the importlib.machinery implementation. - parent, _, child = fullname.rpartition(".") - if parent and parent in MAPPING: - return PathFinder.find_spec(fullname, path=[MAPPING[parent]]) - - # Other levels of nesting should be handled automatically by importlib - # using the parent path. - return None - - @classmethod - def _find_spec(cls, fullname: str, candidate_path: Path) -> ModuleSpec | None: - init = candidate_path / "__init__.py" - candidates = (candidate_path.with_suffix(x) for x in module_suffixes()) - for candidate in chain([init], candidates): - if candidate.exists(): - return spec_from_file_location(fullname, candidate) - return None - - -class _EditableNamespaceFinder: # PathEntryFinder - @classmethod - def _path_hook(cls, path) -> type[_EditableNamespaceFinder]: - if path == PATH_PLACEHOLDER: - return cls - raise ImportError - - @classmethod - def _paths(cls, fullname: str) -> list[str]: - paths = NAMESPACES[fullname] - if not paths and fullname in MAPPING: - paths = [MAPPING[fullname]] - # Always add placeholder, for 2 reasons: - # 1. __path__ cannot be empty for the spec to be considered namespace. - # 2. In the case of nested namespaces, we need to force - # import machinery to query _EditableNamespaceFinder again. - return [*paths, PATH_PLACEHOLDER] - - @classmethod - def find_spec(cls, fullname: str, target=None) -> ModuleSpec | None: # type: ignore - if fullname in NAMESPACES: - spec = ModuleSpec(fullname, None, is_package=True) - spec.submodule_search_locations = cls._paths(fullname) - return spec - return None - - @classmethod - def find_module(cls, _fullname) -> None: - return None - - -def install(): - if not any(finder == _EditableFinder for finder in sys.meta_path): - sys.meta_path.append(_EditableFinder) - - if not NAMESPACES: - return - - if not any(hook == _EditableNamespaceFinder._path_hook for hook in sys.path_hooks): - # PathEntryFinder is needed to create NamespaceSpec without private APIS - sys.path_hooks.append(_EditableNamespaceFinder._path_hook) - if PATH_PLACEHOLDER not in sys.path: - sys.path.append(PATH_PLACEHOLDER) # Used just to trigger the path hook diff --git a/venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/INSTALLER b/venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/INSTALLER deleted file mode 100644 index a1b589e38..000000000 --- a/venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/INSTALLER +++ /dev/null @@ -1 +0,0 @@ -pip diff --git a/venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/LICENSE.txt b/venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/LICENSE.txt deleted file mode 100644 index 0aa7cf43e..000000000 --- a/venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/LICENSE.txt +++ /dev/null @@ -1,29 +0,0 @@ -BSD 3-Clause License - -Copyright (c) 2014, Anaconda, Inc. and contributors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -* Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/METADATA b/venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/METADATA deleted file mode 100644 index 474c54212..000000000 --- a/venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/METADATA +++ /dev/null @@ -1,89 +0,0 @@ -Metadata-Version: 2.1 -Name: dask -Version: 2024.9.0+3.ga87509934 -Summary: Parallel PyData with Task Scheduling -Maintainer-email: Matthew Rocklin -License: BSD-3-Clause -Project-URL: Homepage, https://github.com/dask/dask/ -Keywords: task-scheduling parallel numpy pandas pydata -Classifier: Development Status :: 5 - Production/Stable -Classifier: Intended Audience :: Developers -Classifier: Intended Audience :: Science/Research -Classifier: License :: OSI Approved :: BSD License -Classifier: Operating System :: OS Independent -Classifier: Programming Language :: Python -Classifier: Programming Language :: Python :: 3 -Classifier: Programming Language :: Python :: 3 :: Only -Classifier: Programming Language :: Python :: 3.10 -Classifier: Programming Language :: Python :: 3.11 -Classifier: Programming Language :: Python :: 3.12 -Classifier: Topic :: Scientific/Engineering -Classifier: Topic :: System :: Distributed Computing -Requires-Python: >=3.10 -Description-Content-Type: text/x-rst -License-File: LICENSE.txt -License-File: dask/array/NUMPY_LICENSE.txt -Requires-Dist: click >=8.1 -Requires-Dist: cloudpickle >=3.0.0 -Requires-Dist: fsspec >=2021.09.0 -Requires-Dist: packaging >=20.0 -Requires-Dist: partd >=1.4.0 -Requires-Dist: pyyaml >=5.3.1 -Requires-Dist: toolz >=0.10.0 -Requires-Dist: importlib-metadata >=4.13.0 ; python_version < "3.12" -Provides-Extra: array -Requires-Dist: numpy >=1.24 ; extra == 'array' -Provides-Extra: bag -Provides-Extra: complete -Requires-Dist: dask[array,dataframe,diagnostics,distributed] ; extra == 'complete' -Requires-Dist: pyarrow >=14.0.1 ; extra == 'complete' -Requires-Dist: lz4 >=4.3.2 ; extra == 'complete' -Provides-Extra: dataframe -Requires-Dist: dask[array] ; extra == 'dataframe' -Requires-Dist: pandas >=2.0 ; extra == 'dataframe' -Requires-Dist: dask-expr <1.2,>=1.1 ; extra == 'dataframe' -Provides-Extra: delayed -Provides-Extra: diagnostics -Requires-Dist: bokeh >=3.1.0 ; extra == 'diagnostics' -Requires-Dist: jinja2 >=2.10.3 ; extra == 'diagnostics' -Provides-Extra: distributed -Requires-Dist: distributed ==2024.9.0 ; extra == 'distributed' -Provides-Extra: test -Requires-Dist: pandas[test] ; extra == 'test' -Requires-Dist: pytest ; extra == 'test' -Requires-Dist: pytest-cov ; extra == 'test' -Requires-Dist: pytest-rerunfailures ; extra == 'test' -Requires-Dist: pytest-timeout ; extra == 'test' -Requires-Dist: pytest-xdist ; extra == 'test' -Requires-Dist: pre-commit ; extra == 'test' - -Dask -==== - -|Build Status| |Coverage| |Doc Status| |Discourse| |Version Status| |NumFOCUS| - -Dask is a flexible parallel computing library for analytics. See -documentation_ for more information. - - -LICENSE -------- - -New BSD. See `License File `__. - -.. _documentation: https://dask.org -.. |Build Status| image:: https://github.com/dask/dask/actions/workflows/tests.yml/badge.svg - :target: https://github.com/dask/dask/actions/workflows/tests.yml -.. |Coverage| image:: https://codecov.io/gh/dask/dask/branch/main/graph/badge.svg - :target: https://codecov.io/gh/dask/dask/branch/main - :alt: Coverage status -.. |Doc Status| image:: https://readthedocs.org/projects/dask/badge/?version=latest - :target: https://dask.org - :alt: Documentation Status -.. |Discourse| image:: https://img.shields.io/discourse/users?logo=discourse&server=https%3A%2F%2Fdask.discourse.group - :alt: Discuss Dask-related things and ask for help - :target: https://dask.discourse.group -.. |Version Status| image:: https://img.shields.io/pypi/v/dask.svg - :target: https://pypi.python.org/pypi/dask/ -.. |NumFOCUS| image:: https://img.shields.io/badge/powered%20by-NumFOCUS-orange.svg?style=flat&colorA=E1523D&colorB=007D8A - :target: https://www.numfocus.org/ diff --git a/venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/NUMPY_LICENSE.txt b/venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/NUMPY_LICENSE.txt deleted file mode 100644 index b4139af86..000000000 --- a/venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/NUMPY_LICENSE.txt +++ /dev/null @@ -1,30 +0,0 @@ -Copyright (c) 2005-2015, NumPy Developers. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following - disclaimer in the documentation and/or other materials provided - with the distribution. - - * Neither the name of the NumPy Developers nor the names of any - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/RECORD b/venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/RECORD deleted file mode 100644 index a2228a0b9..000000000 --- a/venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/RECORD +++ /dev/null @@ -1,14 +0,0 @@ -../../../bin/dask,sha256=S8aT79BaVsTyBGzlyqyKTI5bUHbiY1fK6KkytOftzpM,263 -__editable__.dask-2024.9.0+3.ga87509934.pth,sha256=3VaIuRATJ6ckwf94V4bYS5h_FmVgMhLV0fZD8ITyAT4,111 -__editable___dask_2024_9_0_3_ga87509934_finder.py,sha256=KT87v6V0qMJqq8pqqusSk8-ahs7JLAWfKaPpgQ8q0K4,3411 -__pycache__/__editable___dask_2024_9_0_3_ga87509934_finder.cpython-310.pyc,, -dask-2024.9.0+3.ga87509934.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 -dask-2024.9.0+3.ga87509934.dist-info/LICENSE.txt,sha256=Eht58q-4tqaPMAZ_mKA1qwg3gL00MgWgctdpJDg7Y88,1531 -dask-2024.9.0+3.ga87509934.dist-info/METADATA,sha256=QMndRaKOkxBJOim5UKjU8OtsS8oIzupi4yv16ot2za0,3730 -dask-2024.9.0+3.ga87509934.dist-info/NUMPY_LICENSE.txt,sha256=VGrgTUHkF-M_FQ1sMIt_4tu2im7Ej0nHkDcsTTeZ4Cc,1543 -dask-2024.9.0+3.ga87509934.dist-info/RECORD,, -dask-2024.9.0+3.ga87509934.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 -dask-2024.9.0+3.ga87509934.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91 -dask-2024.9.0+3.ga87509934.dist-info/direct_url.json,sha256=kIHibdooBINlpcAdmvjNld7U6iwAOndoRY6UWx2Vs0c,95 -dask-2024.9.0+3.ga87509934.dist-info/entry_points.txt,sha256=DqIpjJmPgioJQpDSCDN2w_ZwaOvs2zlhHoZ2yahFCRI,124 -dask-2024.9.0+3.ga87509934.dist-info/top_level.txt,sha256=iT6x3D1cKsV7ordRW_UwlPRBdavTonNRtWVPB-Kti7U,5 diff --git a/venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/REQUESTED b/venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/REQUESTED deleted file mode 100644 index e69de29bb..000000000 diff --git a/venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/WHEEL b/venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/WHEEL deleted file mode 100644 index dcfdc6e35..000000000 --- a/venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/WHEEL +++ /dev/null @@ -1,5 +0,0 @@ -Wheel-Version: 1.0 -Generator: setuptools (75.1.0) -Root-Is-Purelib: true -Tag: py3-none-any - diff --git a/venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/direct_url.json b/venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/direct_url.json deleted file mode 100644 index 018ab6db3..000000000 --- a/venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/direct_url.json +++ /dev/null @@ -1 +0,0 @@ -{"dir_info": {"editable": true}, "url": "file:///Users/8823tech/Desktop/Code/Open_Source/dask"} \ No newline at end of file diff --git a/venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/entry_points.txt b/venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/entry_points.txt deleted file mode 100644 index aae31dc37..000000000 --- a/venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/entry_points.txt +++ /dev/null @@ -1,5 +0,0 @@ -[console_scripts] -dask = dask.__main__:main - -[dask.array.backends] -cupy = dask.array.cupy_entry_point:CupyBackendEntrypoint diff --git a/venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/top_level.txt b/venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/top_level.txt deleted file mode 100644 index b2034ba3b..000000000 --- a/venv/lib/python3.10/site-packages/dask-2024.9.0+3.ga87509934.dist-info/top_level.txt +++ /dev/null @@ -1 +0,0 @@ -dask From f8b6463cc93dc343d82314c0a3d1a1fd26938f20 Mon Sep 17 00:00:00 2001 From: thyripian Date: Sun, 6 Oct 2024 16:01:39 -0400 Subject: [PATCH 6/7] Refactor to rely solely on meta_nonempty for row-wise mode Modified row-wise mode implementation to rely entirely on self._meta_nonempty for metadata generation, as per developer feedback. Ensured complete removal of explicit typecasting and ensured consistent column handling between computed data and metadata. --- dask_expr/_collection.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/dask_expr/_collection.py b/dask_expr/_collection.py index 0e236f3ed..4fca4df6d 100644 --- a/dask_expr/_collection.py +++ b/dask_expr/_collection.py @@ -3663,20 +3663,20 @@ def mode(self, axis=0, numeric_only=False, dropna=True, split_every=False): modes.append(col.mode(dropna=dropna, split_every=split_every)) return concat(modes, axis=1) elif axis == 1: - # The maximum possible number of modes per row is equal to the number of columns - num_columns = len(self.columns) - - # Create metadata DataFrame with the correct number of columns - # Use the first column's dtype as a representative dtype - mode_dtype = self._meta.dtypes[0] if len(self.columns) > 0 else float - meta = pd.DataFrame(columns=range(num_columns), dtype=mode_dtype) - - # Apply map_partitions and reindex to ensure consistent columns + # Use self._meta_nonempty to generate meta + meta = self._meta_nonempty.mode(axis=1, numeric_only=numeric_only, dropna=dropna) + + # Determine the maximum number of modes any row can have + max_modes = len(self.columns) + + # Reindex meta to have the maximum number of columns + meta = meta.reindex(columns=range(max_modes)) + + # Apply map_partitions using pandas' mode function directly return self.map_partitions( - lambda df: df.mode( - axis=1, numeric_only=numeric_only, dropna=dropna - ).reindex(columns=range(num_columns), fill_value=np.nan), - meta=meta, + lambda df: df.mode(axis=1, numeric_only=numeric_only, dropna=dropna) + .reindex(columns=range(max_modes)), + meta=meta ) else: raise ValueError(f"No axis named {axis} for object type {type(self)}") From 894bd9c07ba40b4ef57ad2ba0f3203f0031fd560 Mon Sep 17 00:00:00 2001 From: thyripian Date: Sun, 6 Oct 2024 16:04:32 -0400 Subject: [PATCH 7/7] Run pre-commit linting Made linting changes, specifically for black. --- dask_expr/_collection.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/dask_expr/_collection.py b/dask_expr/_collection.py index 4fca4df6d..9b7e1cd97 100644 --- a/dask_expr/_collection.py +++ b/dask_expr/_collection.py @@ -3664,19 +3664,22 @@ def mode(self, axis=0, numeric_only=False, dropna=True, split_every=False): return concat(modes, axis=1) elif axis == 1: # Use self._meta_nonempty to generate meta - meta = self._meta_nonempty.mode(axis=1, numeric_only=numeric_only, dropna=dropna) - + meta = self._meta_nonempty.mode( + axis=1, numeric_only=numeric_only, dropna=dropna + ) + # Determine the maximum number of modes any row can have max_modes = len(self.columns) - + # Reindex meta to have the maximum number of columns meta = meta.reindex(columns=range(max_modes)) - + # Apply map_partitions using pandas' mode function directly return self.map_partitions( - lambda df: df.mode(axis=1, numeric_only=numeric_only, dropna=dropna) - .reindex(columns=range(max_modes)), - meta=meta + lambda df: df.mode( + axis=1, numeric_only=numeric_only, dropna=dropna + ).reindex(columns=range(max_modes)), + meta=meta, ) else: raise ValueError(f"No axis named {axis} for object type {type(self)}")