From 1e5addae3edffd65a105d0f2bbda53f67a6de93f Mon Sep 17 00:00:00 2001 From: Andrii <9934266+andrii-uniq@users.noreply.github.com> Date: Thu, 14 Nov 2024 15:05:19 +0200 Subject: [PATCH 1/4] Fix inifinite loop --- dask_expr/_core.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/dask_expr/_core.py b/dask_expr/_core.py index ed9b8c41f..b8db367d6 100644 --- a/dask_expr/_core.py +++ b/dask_expr/_core.py @@ -507,8 +507,6 @@ def __getattr__(self, key): if key in _parameters: idx = _parameters.index(key) return self.operands[idx] - if is_dataframe_like(self._meta) and key in self._meta.columns: - return self[key] link = "https://github.com/dask-contrib/dask-expr/blob/main/README.md#api-coverage" raise AttributeError( From 1b3bb747eefefab9a00fa123f31f7eddfd6c89ad Mon Sep 17 00:00:00 2001 From: Andrii <9934266+andrii-uniq@users.noreply.github.com> Date: Thu, 14 Nov 2024 23:09:21 +0200 Subject: [PATCH 2/4] Add test --- dask_expr/tests/a.zip | Bin 0 -> 151 bytes dask_expr/tests/test_meta.py | 51 +++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) create mode 100644 dask_expr/tests/a.zip create mode 100644 dask_expr/tests/test_meta.py diff --git a/dask_expr/tests/a.zip b/dask_expr/tests/a.zip new file mode 100644 index 0000000000000000000000000000000000000000..11b96a7f5b9910f3ad7ce3ad92d85ea4ee89c9ba GIT binary patch literal 151 zcmWIWW@h1H00HTpd6B0Bza_H+*&xgc#EE*z#bt&%3Pw5##sS`pO!f@8)T#hggMb2< ngzDsiav2#U7^L=o$hc{Cn-MAxrvkiL*+3jdAPfi6T3{IfGu9M{ literal 0 HcmV?d00001 diff --git a/dask_expr/tests/test_meta.py b/dask_expr/tests/test_meta.py new file mode 100644 index 000000000..014fca6ae --- /dev/null +++ b/dask_expr/tests/test_meta.py @@ -0,0 +1,51 @@ +import rootutils +rootutils.setup_root(__file__, indicator='.project-root', pythonpath=True) + +import fsspec +import dask.dataframe as dd +import zipfile +import fsspec +from fsspec.archive import AbstractArchiveFileSystem + + +class ZipFileSystem(AbstractArchiveFileSystem): + protocol = "tzip" + + def __init__( + self, + fo="", + **kwargs, + ): + super().__init__(self, **kwargs) + fo = fsspec.open(fo, mode='rb') + self.of = fo + self.fo = fo.__enter__() + self.zip = zipfile.ZipFile(self.fo, mode='r') + self.dir_cache = None + + @classmethod + def _strip_protocol(cls, path): + return super()._strip_protocol(path).lstrip("/") + + def __del__(self): + if hasattr(self, "zip"): + self.close() + del self.zip + + def close(self): + self.zip.close() + + def _open( + self, + path, + **kwargs, + ): + path = self._strip_protocol(path) + out = self.zip.open(path, 'r') + return out + +fsspec.register_implementation('tzip', ZipFileSystem) +with fsspec.open('tzip://a.csv', fo='a.zip') as f: + print(f.read(1)) +df = dd.read_csv('tzip://a.csv', storage_options={'fo':'a.zip'}) +print(df.head()) From 2b1ffce0334541c8ddfa1d95c2fee27929628e71 Mon Sep 17 00:00:00 2001 From: Andrii <9934266+andrii-uniq@users.noreply.github.com> Date: Thu, 14 Nov 2024 23:11:39 +0200 Subject: [PATCH 3/4] Remove unused dependencies --- dask_expr/tests/test_meta.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/dask_expr/tests/test_meta.py b/dask_expr/tests/test_meta.py index 014fca6ae..5fc24826e 100644 --- a/dask_expr/tests/test_meta.py +++ b/dask_expr/tests/test_meta.py @@ -1,6 +1,3 @@ -import rootutils -rootutils.setup_root(__file__, indicator='.project-root', pythonpath=True) - import fsspec import dask.dataframe as dd import zipfile From 979bd7f76218a7a74aa8d49b485fd4d207b91f28 Mon Sep 17 00:00:00 2001 From: Andrii <9934266+andrii-uniq@users.noreply.github.com> Date: Fri, 15 Nov 2024 19:43:27 +0200 Subject: [PATCH 4/4] Fix test according to hooks --- dask_expr/tests/test_meta.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/dask_expr/tests/test_meta.py b/dask_expr/tests/test_meta.py index 5fc24826e..3ef2d13e7 100644 --- a/dask_expr/tests/test_meta.py +++ b/dask_expr/tests/test_meta.py @@ -1,6 +1,6 @@ -import fsspec -import dask.dataframe as dd import zipfile + +import dask.dataframe as dd import fsspec from fsspec.archive import AbstractArchiveFileSystem @@ -14,10 +14,10 @@ def __init__( **kwargs, ): super().__init__(self, **kwargs) - fo = fsspec.open(fo, mode='rb') + fo = fsspec.open(fo, mode="rb") self.of = fo self.fo = fo.__enter__() - self.zip = zipfile.ZipFile(self.fo, mode='r') + self.zip = zipfile.ZipFile(self.fo, mode="r") self.dir_cache = None @classmethod @@ -38,11 +38,12 @@ def _open( **kwargs, ): path = self._strip_protocol(path) - out = self.zip.open(path, 'r') + out = self.zip.open(path, "r") return out -fsspec.register_implementation('tzip', ZipFileSystem) -with fsspec.open('tzip://a.csv', fo='a.zip') as f: + +fsspec.register_implementation("tzip", ZipFileSystem) +with fsspec.open("tzip://a.csv", fo="a.zip") as f: print(f.read(1)) -df = dd.read_csv('tzip://a.csv', storage_options={'fo':'a.zip'}) +df = dd.read_csv("tzip://a.csv", storage_options={"fo": "a.zip"}) print(df.head())