Skip to content

Commit 00c8a41

Browse files
authored
Transactions on simplecache (#1531)
1 parent 14dce8c commit 00c8a41

File tree

7 files changed

+95
-16
lines changed

7 files changed

+95
-16
lines changed

Diff for: ci/environment-friends.yml

+1
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ dependencies:
2121
- flake8
2222
- black
2323
- google-cloud-core
24+
- google-cloud-storage
2425
- google-api-core
2526
- google-api-python-client
2627
- httpretty

Diff for: fsspec/generic.py

+5
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,10 @@ def rsync(
8787
fs: GenericFileSystem|None
8888
Instance to use if explicitly given. The instance defines how to
8989
to make downstream file system instances from paths.
90+
91+
Returns
92+
-------
93+
dict of the copy operations that were performed, {source: destination}
9094
"""
9195
fs = fs or GenericFileSystem(**(inst_kwargs or {}))
9296
source = fs._strip_protocol(source)
@@ -137,6 +141,7 @@ def rsync(
137141
logger.debug(f"{len(to_delete)} files to delete")
138142
if delete_missing:
139143
fs.rm(to_delete)
144+
return allfiles
140145

141146

142147
class GenericFileSystem(AsyncFileSystem):

Diff for: fsspec/implementations/cached.py

+65-8
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,10 @@ def complete(self, commit=True):
3232
lpaths = [f.fn for f in self.files]
3333
if commit:
3434
self.fs.put(lpaths, rpaths)
35-
# else remove?
35+
self.files.clear()
3636
self.fs._intrans = False
37+
self.fs._transaction = None
38+
self.fs = None # break cycle
3739

3840

3941
class CachingFileSystem(AbstractFileSystem):
@@ -391,8 +393,11 @@ def close_and_update(self, f, close):
391393
close()
392394
f.closed = True
393395

396+
def ls(self, path, detail=True):
397+
return self.fs.ls(path, detail)
398+
394399
def __getattribute__(self, item):
395-
if item in [
400+
if item in {
396401
"load_cache",
397402
"_open",
398403
"save_cache",
@@ -409,6 +414,11 @@ def __getattribute__(self, item):
409414
"read_block",
410415
"tail",
411416
"head",
417+
"info",
418+
"ls",
419+
"exists",
420+
"isfile",
421+
"isdir",
412422
"_check_file",
413423
"_check_cache",
414424
"_mkcache",
@@ -428,9 +438,12 @@ def __getattribute__(self, item):
428438
"cache_size",
429439
"pipe_file",
430440
"pipe",
441+
"isdir",
442+
"isfile",
443+
"exists",
431444
"start_transaction",
432445
"end_transaction",
433-
]:
446+
}:
434447
# all the methods defined in this class. Note `open` here, since
435448
# it calls `_open`, but is actually in superclass
436449
return lambda *args, **kw: getattr(type(self), item).__get__(self)(
@@ -756,6 +769,49 @@ def pipe_file(self, path, value=None, **kwargs):
756769
else:
757770
super().pipe_file(path, value)
758771

772+
def ls(self, path, detail=True, **kwargs):
773+
path = self._strip_protocol(path)
774+
details = []
775+
try:
776+
details = self.fs.ls(
777+
path, detail=True, **kwargs
778+
).copy() # don't edit original!
779+
except FileNotFoundError as e:
780+
ex = e
781+
else:
782+
ex = None
783+
if self._intrans:
784+
path1 = path.rstrip("/") + "/"
785+
for f in self.transaction.files:
786+
if f.path == path:
787+
details.append(
788+
{"name": path, "size": f.size or f.tell(), "type": "file"}
789+
)
790+
elif f.path.startswith(path1):
791+
if f.path.count("/") == path1.count("/"):
792+
details.append(
793+
{"name": f.path, "size": f.size or f.tell(), "type": "file"}
794+
)
795+
else:
796+
dname = "/".join(f.path.split("/")[: path1.count("/") + 1])
797+
details.append({"name": dname, "size": 0, "type": "directory"})
798+
if ex is not None and not details:
799+
raise ex
800+
if detail:
801+
return details
802+
return sorted(_["name"] for _ in details)
803+
804+
def info(self, path, **kwargs):
805+
path = self._strip_protocol(path)
806+
if self._intrans:
807+
f = [_ for _ in self.transaction.files if _.path == path]
808+
if f:
809+
return {"name": path, "size": f[0].size or f[0].tell(), "type": "file"}
810+
f = any(_.path.startswith(path + "/") for _ in self.transaction.files)
811+
if f:
812+
return {"name": path, "size": 0, "type": "directory"}
813+
return self.fs.info(path, **kwargs)
814+
759815
def pipe(self, path, value=None, **kwargs):
760816
if isinstance(path, str):
761817
self.pipe_file(self._strip_protocol(path), value, **kwargs)
@@ -836,6 +892,7 @@ def __init__(self, fs, path, fn, mode="wb", autocommit=True, seek=0, **kwargs):
836892
if seek:
837893
self.fh.seek(seek)
838894
self.path = path
895+
self.size = None
839896
self.fs = fs
840897
self.closed = False
841898
self.autocommit = autocommit
@@ -855,6 +912,7 @@ def __exit__(self, exc_type, exc_val, exc_tb):
855912
self.close()
856913

857914
def close(self):
915+
self.size = self.fh.tell()
858916
if self.closed:
859917
return
860918
self.fh.close()
@@ -868,15 +926,14 @@ def discard(self):
868926

869927
def commit(self):
870928
self.fs.put(self.fn, self.path, **self.kwargs)
871-
try:
872-
os.remove(self.fn)
873-
except (PermissionError, FileNotFoundError):
874-
# file path may be held by new version of the file on windows
875-
pass
929+
# we do not delete local copy - it's still in the cache
876930

877931
@property
878932
def name(self):
879933
return self.fn
880934

935+
def __repr__(self) -> str:
936+
return f"LocalTempFile: {self.path}"
937+
881938
def __getattr__(self, item):
882939
return getattr(self.fh, item)

Diff for: fsspec/implementations/http.py

+8-5
Original file line numberDiff line numberDiff line change
@@ -158,11 +158,14 @@ async def _ls_real(self, url, detail=True, **kwargs):
158158
session = await self.set_session()
159159
async with session.get(self.encode_url(url), **self.kwargs) as r:
160160
self._raise_not_found_for_status(r, url)
161-
text = await r.text()
162-
if self.simple_links:
163-
links = ex2.findall(text) + [u[2] for u in ex.findall(text)]
164-
else:
165-
links = [u[2] for u in ex.findall(text)]
161+
try:
162+
text = await r.text()
163+
if self.simple_links:
164+
links = ex2.findall(text) + [u[2] for u in ex.findall(text)]
165+
else:
166+
links = [u[2] for u in ex.findall(text)]
167+
except UnicodeDecodeError:
168+
links = [] # binary, not HTML
166169
out = set()
167170
parts = urlparse(url)
168171
for l in links:

Diff for: fsspec/implementations/memory.py

+2
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,7 @@ def rmdir(self, path):
138138
raise FileNotFoundError(path)
139139

140140
def info(self, path, **kwargs):
141+
logger.debug("info: %s", path)
141142
path = self._strip_protocol(path)
142143
if path in self.pseudo_dirs or any(
143144
p.startswith(path + "/") for p in list(self.store) + self.pseudo_dirs
@@ -210,6 +211,7 @@ def cp_file(self, path1, path2, **kwargs):
210211
raise FileNotFoundError(path1)
211212

212213
def cat_file(self, path, start=None, end=None, **kwargs):
214+
logger.debug("cat: %s", path)
213215
path = self._strip_protocol(path)
214216
try:
215217
return bytes(self.store[path].getbuffer()[start:end])

Diff for: fsspec/implementations/tests/test_cached.py

+6
Original file line numberDiff line numberDiff line change
@@ -1291,10 +1291,16 @@ def patched_put(*args, **kwargs):
12911291
with fs.transaction:
12921292
fs.pipe("myfile", b"1")
12931293
fs.pipe("otherfile", b"2")
1294+
fs.pipe("deep/dir/otherfile", b"3")
12941295
with fs.open("blarh", "wb") as f:
12951296
f.write(b"ff")
12961297
assert not m.find("")
12971298

1299+
assert fs.info("otherfile")["size"] == 1
1300+
assert fs.info("deep")["type"] == "directory"
1301+
assert fs.isdir("deep")
1302+
assert fs.ls("deep", detail=False) == ["/deep/dir"]
1303+
12981304
assert m.cat("myfile") == b"1"
12991305
assert m.cat("otherfile") == b"2"
13001306
assert called[0] == 1 # copy was done in one go

Diff for: fsspec/transaction.py

+8-3
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ class Transaction:
99
instance as the ``.transaction`` attribute of the given filesystem
1010
"""
1111

12-
def __init__(self, fs):
12+
def __init__(self, fs, **kwargs):
1313
"""
1414
Parameters
1515
----------
@@ -26,8 +26,10 @@ def __exit__(self, exc_type, exc_val, exc_tb):
2626
"""End transaction and commit, if exit is not due to exception"""
2727
# only commit if there was no exception
2828
self.complete(commit=exc_type is None)
29-
self.fs._intrans = False
30-
self.fs._transaction = None
29+
if self.fs:
30+
self.fs._intrans = False
31+
self.fs._transaction = None
32+
self.fs = None
3133

3234
def start(self):
3335
"""Start a transaction on this FileSystem"""
@@ -43,6 +45,8 @@ def complete(self, commit=True):
4345
else:
4446
f.discard()
4547
self.fs._intrans = False
48+
self.fs._transaction = None
49+
self.fs = None
4650

4751

4852
class FileActor:
@@ -83,3 +87,4 @@ def complete(self, commit=True):
8387
else:
8488
self.files.discard().result()
8589
self.fs._intrans = False
90+
self.fs = None

0 commit comments

Comments
 (0)