Skip to content

Commit bbe0591

Browse files
authored
Avoid serializing cache for file objects (#1753)
1 parent ac830a0 commit bbe0591

File tree

3 files changed

+57
-22
lines changed

3 files changed

+57
-22
lines changed

fsspec/implementations/http.py

-22
Original file line numberDiff line numberDiff line change
@@ -696,25 +696,6 @@ async def async_fetch_range(self, start, end):
696696

697697
_fetch_range = sync_wrapper(async_fetch_range)
698698

699-
def __reduce__(self):
700-
return (
701-
reopen,
702-
(
703-
self.fs,
704-
self.url,
705-
self.mode,
706-
self.blocksize,
707-
self.cache.name if self.cache else "none",
708-
self.size,
709-
),
710-
)
711-
712-
713-
def reopen(fs, url, mode, blocksize, cache_type, size=None):
714-
return fs.open(
715-
url, mode=mode, block_size=blocksize, cache_type=cache_type, size=size
716-
)
717-
718699

719700
magic_check = re.compile("([*[])")
720701

@@ -764,9 +745,6 @@ def close(self):
764745
asyncio.run_coroutine_threadsafe(self._close(), self.loop)
765746
super().close()
766747

767-
def __reduce__(self):
768-
return reopen, (self.fs, self.url, self.mode, self.blocksize, self.cache.name)
769-
770748

771749
class AsyncStreamFile(AbstractAsyncStreamedFile):
772750
def __init__(

fsspec/spec.py

+31
Original file line numberDiff line numberDiff line change
@@ -2060,6 +2060,22 @@ def writable(self):
20602060
"""Whether opened for writing"""
20612061
return self.mode in {"wb", "ab", "xb"} and not self.closed
20622062

2063+
def __reduce__(self):
2064+
if self.mode != "rb":
2065+
raise RuntimeError("Pickling a writeable file is not supported")
2066+
2067+
return reopen, (
2068+
self.fs,
2069+
self.path,
2070+
self.mode,
2071+
self.blocksize,
2072+
self.loc,
2073+
self.size,
2074+
self.autocommit,
2075+
self.cache.name if self.cache else "none",
2076+
self.kwargs,
2077+
)
2078+
20632079
def __del__(self):
20642080
if not self.closed:
20652081
self.close()
@@ -2074,3 +2090,18 @@ def __enter__(self):
20742090

20752091
def __exit__(self, *args):
20762092
self.close()
2093+
2094+
2095+
def reopen(fs, path, mode, blocksize, loc, size, autocommit, cache_type, kwargs):
2096+
file = fs.open(
2097+
path,
2098+
mode=mode,
2099+
block_size=blocksize,
2100+
autocommit=autocommit,
2101+
cache_type=cache_type,
2102+
size=size,
2103+
**kwargs,
2104+
)
2105+
if loc > 0:
2106+
file.seek(loc)
2107+
return file

fsspec/tests/test_spec.py

+26
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from fsspec.implementations.http import HTTPFileSystem
1616
from fsspec.implementations.local import LocalFileSystem
1717
from fsspec.spec import AbstractBufferedFile, AbstractFileSystem
18+
from fsspec.tests.conftest import data
1819

1920
PATHS_FOR_GLOB_TESTS = (
2021
{"name": "test0.json", "type": "file", "size": 100},
@@ -744,6 +745,31 @@ def test_cache():
744745
assert len(DummyTestFS._cache) == 0
745746

746747

748+
def test_cache_not_pickled(server):
749+
fs = fsspec.filesystem(
750+
"http",
751+
cache_type="readahead",
752+
headers={"give_length": "true", "head_ok": "true"},
753+
)
754+
filepath = server.realfile
755+
length = 3
756+
f = fs.open(filepath, mode="rb")
757+
assert isinstance(f, AbstractBufferedFile)
758+
assert not f.cache.cache # No cache initially
759+
assert f.read(length=length) == data[:length]
760+
assert f.cache.cache == data # Cache is populated
761+
762+
# Roundtrip through pickle
763+
import pickle
764+
765+
f2 = pickle.loads(pickle.dumps(f))
766+
assert not f2.cache.cache # No cache initially
767+
assert (
768+
f2.read(length=length) == data[length : 2 * length]
769+
) # Read file from previous seek point
770+
assert f2.cache.cache == data[length:] # Cache is populated
771+
772+
747773
def test_current():
748774
fs = DummyTestFS()
749775
fs2 = DummyTestFS(arg=1)

0 commit comments

Comments
 (0)