1
+ import logging
1
2
import time
2
3
from collections .abc import MutableMapping
4
+ from enum import Enum
3
5
from functools import lru_cache
6
+ from pathlib import Path
7
+ from typing import Union , Optional
4
8
9
+ logger = logging .getLogger (__name__ )
5
10
6
- class DirCache (MutableMapping ):
11
+
12
+ class MemoryDirCache (MutableMapping ):
7
13
"""
8
14
Caching of directory listings, in a structure like::
9
15
@@ -26,19 +32,15 @@ class DirCache(MutableMapping):
26
32
27
33
def __init__ (
28
34
self ,
29
- use_listings_cache = True ,
30
- listings_expiry_time = None ,
35
+ expiry_time = None ,
31
36
max_paths = None ,
32
37
** kwargs ,
33
38
):
34
39
"""
35
40
36
41
Parameters
37
42
----------
38
- use_listings_cache: bool
39
- If False, this cache never returns items, but always reports KeyError,
40
- and setting items has no effect
41
- listings_expiry_time: int or float (optional)
43
+ expiry_time: int or float (optional)
42
44
Time in seconds that a listing is considered valid. If None,
43
45
listings do not expire.
44
46
max_paths: int (optional)
@@ -49,13 +51,12 @@ def __init__(
49
51
self ._times = {}
50
52
if max_paths :
51
53
self ._q = lru_cache (max_paths + 1 )(lambda key : self ._cache .pop (key , None ))
52
- self .use_listings_cache = use_listings_cache
53
- self .listings_expiry_time = listings_expiry_time
54
+ self .expiry_time = expiry_time
54
55
self .max_paths = max_paths
55
56
56
57
def __getitem__ (self , item ):
57
- if self .listings_expiry_time is not None :
58
- if self ._times .get (item , 0 ) - time .time () < - self .listings_expiry_time :
58
+ if self .expiry_time is not None :
59
+ if self ._times .get (item , 0 ) - time .time () < - self .expiry_time :
59
60
del self ._cache [item ]
60
61
if self .max_paths :
61
62
self ._q (item )
@@ -75,12 +76,10 @@ def __contains__(self, item):
75
76
return False
76
77
77
78
def __setitem__ (self , key , value ):
78
- if not self .use_listings_cache :
79
- return
80
79
if self .max_paths :
81
80
self ._q (key )
82
81
self ._cache [key ] = value
83
- if self .listings_expiry_time is not None :
82
+ if self .expiry_time is not None :
84
83
self ._times [key ] = time .time ()
85
84
86
85
def __delitem__ (self , key ):
@@ -93,6 +92,112 @@ def __iter__(self):
93
92
94
93
def __reduce__ (self ):
95
94
return (
96
- DirCache ,
97
- (self .use_listings_cache , self .listings_expiry_time , self .max_paths ),
95
+ MemoryDirCache ,
96
+ (self .expiry_time , self .max_paths ),
97
+ )
98
+
99
+
100
+ class FileDirCache (MutableMapping ):
101
+ def __init__ (
102
+ self ,
103
+ expiry_time = None ,
104
+ directory = None ,
105
+ ** kwargs ,
106
+ ):
107
+ """
108
+
109
+ Parameters
110
+ ----------
111
+ expiry_time: int or float (optional)
112
+ Time in seconds that a listing is considered valid. If None,
113
+ listings do not expire.
114
+ directory: str (optional)
115
+ Directory path at which the listings cache file is stored. If None,
116
+ an autogenerated path at the user folder is created.
117
+
118
+ """
119
+ import platformdirs
120
+ from diskcache import Cache
121
+
122
+ if not directory :
123
+ directory = platformdirs .user_cache_dir (appname = "fsspec" )
124
+ directory = Path (directory ) / "dircache" / str (expiry_time )
125
+
126
+ try :
127
+ directory .mkdir (exist_ok = True , parents = True )
128
+ except OSError as e :
129
+ logger .error (
130
+ f"folder for dircache could not be created at { directory } "
131
+ )
132
+ raise e
133
+ else :
134
+ logger .info (f"Dircache located at { directory } " )
135
+
136
+ self .directory = directory
137
+ self ._cache = Cache (directory = str (directory ))
138
+ self .expiry_time = expiry_time
139
+
140
+ def __getitem__ (self , item ):
141
+ """Draw item as fileobject from cache, retry if timeout occurs"""
142
+ return self ._cache .get (key = item , read = True , retry = True )
143
+
144
+ def clear (self ):
145
+ self ._cache .clear ()
146
+
147
+ def __len__ (self ):
148
+ return len (list (self ._cache .iterkeys ()))
149
+
150
+ def __contains__ (self , item ):
151
+ value = self ._cache .get (item , retry = True ) # None, if expired
152
+ if value :
153
+ return True
154
+ return False
155
+
156
+ def __setitem__ (self , key , value ):
157
+ self ._cache .set (
158
+ key = key , value = value , expire = self .expiry_time , retry = True
159
+ )
160
+
161
+ def __delitem__ (self , key ):
162
+ del self ._cache [key ]
163
+
164
+ def __iter__ (self ):
165
+ return (k for k in self ._cache .iterkeys () if k in self )
166
+
167
+ def __reduce__ (self ):
168
+ return (
169
+ FileDirCache ,
170
+ (self .expiry_time , self .directory ),
98
171
)
172
+
173
+
174
+ class CacheType (Enum ):
175
+ MEMORY = MemoryDirCache
176
+ FILE = FileDirCache
177
+
178
+
179
+ def create_dircache (
180
+ cache_type : Union [str , CacheType ] = None ,
181
+ expiry_time : Optional [Union [int , float ]] = None ,
182
+ ** kwargs ,
183
+ ) -> Optional [Union [MemoryDirCache , FileDirCache ]]:
184
+ if not cache_type :
185
+ return
186
+ cache_map = {
187
+ CacheType .MEMORY : MemoryDirCache ,
188
+ CacheType .FILE : FileDirCache ,
189
+ }
190
+ if isinstance (cache_type , str ):
191
+ try :
192
+ cache_type = CacheType [cache_type .upper ()]
193
+ except KeyError as e :
194
+ raise ValueError (f"Cache type must be one of { ', ' .join (ct .name .lower () for ct in CacheType )} " ) from e
195
+ expiry_time = expiry_time and float (expiry_time )
196
+ if expiry_time == 0.0 :
197
+ return
198
+ return cache_map [cache_type ](expiry_time , ** kwargs )
199
+
200
+
201
+ if __name__ == "__main__" :
202
+ d = create_dircache (cache_type = "memory" )
203
+ print (d )
0 commit comments