1
+ import logging
1
2
import time
2
3
from collections .abc import MutableMapping
4
+ from enum import Enum
3
5
from functools import lru_cache
6
+ from pathlib import Path
7
+ from typing import Optional , Union
4
8
9
+ logger = logging .getLogger (__name__ )
5
10
6
- class DirCache (MutableMapping ):
11
+
12
+ class DisabledListingsCache (MutableMapping ):
13
+ def __init__ (self , * args , ** kwargs ):
14
+ pass
15
+
16
+ def __getitem__ (self , item ):
17
+ raise KeyError
18
+
19
+ def __setitem__ (self , key , value ):
20
+ pass
21
+
22
+ def __delitem__ (self , key ):
23
+ pass
24
+
25
+ def __iter__ (self ):
26
+ return iter (())
27
+
28
+ def __len__ (self ):
29
+ return 0
30
+
31
+ def clear (self ):
32
+ pass
33
+
34
+ def __contains__ (self , item ):
35
+ return False
36
+
37
+ def __reduce__ (self ):
38
+ return (DisabledListingsCache , ())
39
+
40
+
41
+ class MemoryListingsCache (MutableMapping ):
7
42
"""
8
43
Caching of directory listings, in a structure like::
9
44
@@ -26,19 +61,14 @@ class DirCache(MutableMapping):
26
61
27
62
def __init__ (
28
63
self ,
29
- use_listings_cache = True ,
30
- listings_expiry_time = None ,
64
+ expiry_time = None ,
31
65
max_paths = None ,
32
- ** kwargs ,
33
66
):
34
67
"""
35
68
36
69
Parameters
37
70
----------
38
- use_listings_cache: bool
39
- If False, this cache never returns items, but always reports KeyError,
40
- and setting items has no effect
41
- listings_expiry_time: int or float (optional)
71
+ expiry_time: int or float (optional)
42
72
Time in seconds that a listing is considered valid. If None,
43
73
listings do not expire.
44
74
max_paths: int (optional)
@@ -49,15 +79,14 @@ def __init__(
49
79
self ._times = {}
50
80
if max_paths :
51
81
self ._q = lru_cache (max_paths + 1 )(lambda key : self ._cache .pop (key , None ))
52
- self .use_listings_cache = use_listings_cache
53
- self .listings_expiry_time = listings_expiry_time
54
- self .max_paths = max_paths
82
+ self ._expiry_time = expiry_time
83
+ self ._max_paths = max_paths
55
84
56
85
def __getitem__ (self , item ):
57
- if self .listings_expiry_time is not None :
58
- if self ._times .get (item , 0 ) - time .time () < - self .listings_expiry_time :
86
+ if self ._expiry_time is not None :
87
+ if self ._times .get (item , 0 ) - time .time () < - self ._expiry_time :
59
88
del self ._cache [item ]
60
- if self .max_paths :
89
+ if self ._max_paths :
61
90
self ._q (item )
62
91
return self ._cache [item ] # maybe raises KeyError
63
92
@@ -75,12 +104,10 @@ def __contains__(self, item):
75
104
return False
76
105
77
106
def __setitem__ (self , key , value ):
78
- if not self .use_listings_cache :
79
- return
80
- if self .max_paths :
107
+ if self ._max_paths :
81
108
self ._q (key )
82
109
self ._cache [key ] = value
83
- if self .listings_expiry_time is not None :
110
+ if self ._expiry_time is not None :
84
111
self ._times [key ] = time .time ()
85
112
86
113
def __delitem__ (self , key ):
@@ -93,6 +120,99 @@ def __iter__(self):
93
120
94
121
def __reduce__ (self ):
95
122
return (
96
- DirCache ,
97
- (self .use_listings_cache , self .listings_expiry_time , self .max_paths ),
123
+ MemoryListingsCache ,
124
+ (self ._expiry_time , self ._max_paths ),
125
+ )
126
+
127
+
128
+ class FileListingsCache (MutableMapping ):
129
+ def __init__ (
130
+ self ,
131
+ expiry_time : Optional [int ],
132
+ directory : Optional [Path ],
133
+ ):
134
+ """
135
+
136
+ Parameters
137
+ ----------
138
+ expiry_time: int or float (optional)
139
+ Time in seconds that a listing is considered valid. If None,
140
+ listings do not expire.
141
+ directory: str (optional)
142
+ Directory path at which the listings cache file is stored. If None,
143
+ an autogenerated path at the user folder is created.
144
+
145
+ """
146
+ try :
147
+ import platformdirs
148
+ from diskcache import Cache
149
+ except ImportError as e :
150
+ raise ImportError (
151
+ "The optional dependencies ``platformdirs`` and ``diskcache`` are required for file-based dircache."
152
+ ) from e
153
+
154
+ if not directory :
155
+ directory = platformdirs .user_cache_dir (appname = "fsspec" )
156
+ directory = Path (directory ) / "dircache" / str (expiry_time )
157
+
158
+ try :
159
+ directory .mkdir (exist_ok = True , parents = True )
160
+ except OSError as e :
161
+ logger .error (f"Directory for dircache could not be created at { directory } ." )
162
+ raise e
163
+ else :
164
+ logger .info (f"Dircache located at { directory } ." )
165
+
166
+ self ._expiry_time = expiry_time
167
+ self ._directory = directory
168
+ self ._cache = Cache (directory = str (directory ))
169
+
170
+ def __getitem__ (self , item ):
171
+ """Draw item as fileobject from cache, retry if timeout occurs"""
172
+ return self ._cache .get (key = item , read = True , retry = True )
173
+
174
+ def clear (self ):
175
+ self ._cache .clear ()
176
+
177
+ def __len__ (self ):
178
+ return len (list (self ._cache .iterkeys ()))
179
+
180
+ def __contains__ (self , item ):
181
+ value = self ._cache .get (item , retry = True ) # None, if expired
182
+ if value :
183
+ return True
184
+ return False
185
+
186
+ def __setitem__ (self , key , value ):
187
+ self ._cache .set (key = key , value = value , expire = self ._expiry_time , retry = True )
188
+
189
+ def __delitem__ (self , key ):
190
+ del self ._cache [key ]
191
+
192
+ def __iter__ (self ):
193
+ return (k for k in self ._cache .iterkeys () if k in self )
194
+
195
+ def __reduce__ (self ):
196
+ return (
197
+ FileListingsCache ,
198
+ (self ._expiry_time , self ._directory ),
98
199
)
200
+
201
+
202
+ class CacheType (Enum ):
203
+ DISABLED = DisabledListingsCache
204
+ MEMORY = MemoryListingsCache
205
+ FILE = FileListingsCache
206
+
207
+
208
+ def create_listings_cache (
209
+ cache_type : CacheType ,
210
+ expiry_time : Optional [int ],
211
+ ** kwargs ,
212
+ ) -> Optional [Union [MemoryListingsCache , FileListingsCache ]]:
213
+ cache_map = {
214
+ CacheType .DISABLED : DisabledListingsCache ,
215
+ CacheType .MEMORY : MemoryListingsCache ,
216
+ CacheType .FILE : FileListingsCache ,
217
+ }
218
+ return cache_map [cache_type ](expiry_time , ** kwargs )
0 commit comments