@@ -137,34 +137,26 @@ def delete(self, id):
137
137
os .remove (self ._filename (id ))
138
138
139
139
140
- class S3Store (BaseBlobStore ):
141
- def __init__ (self , cachedir , basepath , s3_config = None , s3fs = None ,
142
- read = True , write = True , read_through_write = True ,
140
+ class RemoteStore (BaseBlobStore ):
141
+ def __init__ (self , cachedir , basepath , read = True , write = True , read_through_write = True ,
143
142
delete = False , on_duplicate_key = 'skip' , cleanup_cachedir = False ,
144
- always_check_s3 = False ):
143
+ always_check_remote = False ):
145
144
"""
146
145
Parameters
147
146
----------
148
- always_check_s3 : bool
149
- When True S3 will be checked with every __contains__ call. Otherwise it will
147
+ always_check_remote : bool
148
+ When True the remote store will be checked with every __contains__ call. Otherwise it will
150
149
short-circuit if the blob is found in the cachedir. For performance reasons this
151
150
should always be set to False. The only reason why you would want to use this
152
- is if you are using a S3Store and a DiskStore in a ChainedStore together for
153
- some reason. Since the S3Store basically doubles as a DiskStore with it's cachedir
151
+ is if you are using a RemoteStore and a DiskStore in a ChainedStore together for
152
+ some reason. Since the RemoteStore basically doubles as a DiskStore with it's cachedir
154
153
chaining the two doesn't really make sense though.
155
154
"""
156
- super (S3Store , self ).__init__ (
155
+ super (RemoteStore , self ).__init__ (
157
156
read = read , write = write , read_through_write = read_through_write ,
158
157
delete = delete , on_duplicate_key = on_duplicate_key )
159
158
160
- self .always_check_s3 = always_check_s3
161
-
162
- if s3fs :
163
- self .s3fs = s3fs
164
- elif s3_config is not None :
165
- self .s3fs = S3FileSystem (** s3_config )
166
- else :
167
- raise ValueError ("You must provide either s3_config or s3fs for a S3Store" )
159
+ self .always_check = always_check_remote
168
160
169
161
self .cachedir = _abspath (cachedir )
170
162
self .basepath = basepath
@@ -181,12 +173,25 @@ def _filename(self, id):
181
173
def _path (self , id ):
182
174
return os .path .join (self .basepath , id )
183
175
176
+ def _exists (self , path ):
177
+ raise NotImplementedError ()
178
+
179
+ def _delete_remote (self , path ):
180
+ raise NotImplementedError ()
181
+
182
+ def _upload_file (self , filename , path ):
183
+ raise NotImplementedError ()
184
+
185
+ def _download_file (self , path , dest_filename ):
186
+ raise NotImplementedError ()
187
+
184
188
def __contains__ (self , id ):
185
189
cs .ensure_contains (self )
186
- if self .always_check_s3 :
187
- return self .s3fs .exists (self ._path (id ))
190
+ path = self ._path (id )
191
+ if self .always_check :
192
+ return self ._exists (path )
188
193
else :
189
- return os .path .exists (self ._filename (id )) or self .s3fs . exists ( self . _path ( id ) )
194
+ return os .path .exists (self ._filename (id )) or self ._exists ( path )
190
195
191
196
def _put_overwrite (self , id , value , serializer , read_through ):
192
197
cs .ensure_put (self , id , read_through , check_contains = False )
@@ -195,25 +200,69 @@ def _put_overwrite(self, id, value, serializer, read_through):
195
200
if not os .path .isfile (filename ):
196
201
with _atomic_write (filename ) as temp :
197
202
serializer .dump (value , temp )
198
- self .s3fs . put (filename , self ._path (id ))
203
+ self ._upload_file (filename , self ._path (id ))
199
204
200
205
def get (self , id , serializer = DEFAULT_VALUE_SERIALIZER , ** _kargs ):
201
206
cs .ensure_read (self )
202
207
cs .ensure_present (self , id )
203
208
filename = self ._filename (id )
204
209
if not os .path .exists (filename ):
205
210
with _atomic_write (filename ) as temp :
206
- self .s3fs . get (self ._path (id ), temp )
211
+ self ._download_file (self ._path (id ), temp )
207
212
return serializer .load (filename )
208
213
209
214
def delete (self , id ):
210
215
cs .ensure_delete (self , id )
211
216
filename = self ._filename (id )
212
217
if os .path .exists (filename ):
213
218
os .remove (filename )
214
- self .s3fs .rm (self ._path (id ))
219
+ self ._delete_remote (self ._path (id ))
220
+
221
+
222
+ class S3Store (RemoteStore ):
223
+ def __init__ (self , cachedir , basepath , s3_config = None , s3fs = None ,
224
+ read = True , write = True , read_through_write = True ,
225
+ delete = False , on_duplicate_key = 'skip' , cleanup_cachedir = False ,
226
+ always_check_s3 = False ):
227
+ """
228
+ Parameters
229
+ ----------
230
+ always_check_s3 : bool
231
+ When True S3 will be checked with every __contains__ call. Otherwise it will
232
+ short-circuit if the blob is found in the cachedir. For performance reasons this
233
+ should always be set to False. The only reason why you would want to use this
234
+ is if you are using a S3Store and a DiskStore in a ChainedStore together for
235
+ some reason. Since the S3Store basically doubles as a DiskStore with it's cachedir
236
+ chaining the two doesn't really make sense though.
237
+ """
238
+ super (S3Store , self ).__init__ (always_check_remote = always_check_s3 ,
239
+ cachedir = cachedir ,
240
+ basepath = basepath ,
241
+ cleanup_cachedir = cleanup_cachedir ,
242
+ read = read , write = write , read_through_write = read_through_write ,
243
+ delete = delete , on_duplicate_key = on_duplicate_key )
244
+
245
+ if s3fs :
246
+ self .s3fs = s3fs
247
+ elif s3_config is not None :
248
+ self .s3fs = S3FileSystem (** s3_config )
249
+ else :
250
+ raise ValueError ("You must provide either s3_config or s3fs for a S3Store" )
251
+
252
+ def _exists (self , path ):
253
+ return self .s3fs .exists (path )
254
+
255
+ def _delete_remote (self , path ):
256
+ self .s3fs .rm (path )
257
+
258
+ def _upload_file (self , filename , path ):
259
+ self .s3fs .put (filename , path )
260
+
261
+ def _download_file (self , remote_path , dest_filename ):
262
+ self .s3fs .get (remote_path , dest_filename )
215
263
216
264
265
+
217
266
class ChainedStore (BaseBlobStore ):
218
267
def __init__ (self , stores , read = True , write = True , read_through_write = True ,
219
268
delete = True , on_duplicate_key = 'skip' ):
0 commit comments