29
29
from datetime import datetime
30
30
31
31
from flask import current_app
32
+ from flask_mail import Message
32
33
from invenio_db import db
33
34
from invenio_files_rest .models import (
34
35
Bucket ,
37
38
ObjectVersion ,
38
39
ObjectVersionTag ,
39
40
)
40
- from invenio_pidstore .models import PersistentIdentifier
41
41
from invenio_records_files .api import FileObject , FilesIterator , Record
42
- from sqlalchemy import func
43
- from flask_mail import Message
44
-
45
- from cernopendata .api import RecordFilesWithIndex
46
42
47
43
from .models import RequestMetadata , TransferMetadata
48
44
@@ -91,7 +87,7 @@ class Transfer:
91
87
@staticmethod
92
88
def create (entry ):
93
89
"""Create a new transfer entry."""
94
- transfer = Transfer (
90
+ transfer = TransferMetadata (
95
91
action = entry ["action" ],
96
92
new_filename = entry ["new_filename" ],
97
93
record_uuid = entry ["record_uuid" ],
@@ -110,22 +106,23 @@ def create(entry):
110
106
def _get_ongoing_transfers (last_check ):
111
107
"""Get transfers that need processing."""
112
108
return (
113
- Transfer .query .filter (
114
- Transfer .last_check <= last_check , Transfer .finished .is_ (None )
109
+ TransferMetadata .query .filter (
110
+ TransferMetadata .last_check <= last_check ,
111
+ TransferMetadata .finished .is_ (None ),
115
112
)
116
- .order_by (Transfer .last_check )
113
+ .order_by (TransferMetadata .last_check )
117
114
.all ()
118
115
)
119
116
120
117
@staticmethod
121
118
def is_scheduled (file_id , action ):
122
119
"""Check if a transfer is already scheduled."""
123
120
return (
124
- db .session .query (Transfer .id )
121
+ db .session .query (TransferMetadata .id )
125
122
.filter (
126
- Transfer .action == action ,
127
- Transfer .file_id == file_id ,
128
- Transfer .finished .is_ (None ),
123
+ TransferMetadata .action == action ,
124
+ TransferMetadata .file_id == file_id ,
125
+ TransferMetadata .finished .is_ (None ),
129
126
)
130
127
.first ()
131
128
is not None
@@ -139,46 +136,6 @@ def _load_class(full_class_path):
139
136
cls = getattr (module , class_name )
140
137
return cls ()
141
138
142
- @staticmethod
143
- def check_transfers (catalog ):
144
- """Check all the ongoing transfers."""
145
- logger .info ("Checking all the ongoing transfers" )
146
- now = datetime .utcnow ()
147
- all_status = {}
148
- summary = {}
149
- for transfer in Transfer ._get_ongoing_transfers (now ):
150
- id = transfer .id
151
- transfer .last_check = datetime .utcnow ()
152
- transfer .status , error = Transfer ._load_class (
153
- f"{ transfer .method } .TransferManager"
154
- ).transfer_status (transfer .method_id )
155
- all_status [id ] = transfer .status
156
- if transfer .status not in summary :
157
- summary [transfer .status ] = 0
158
- summary [transfer .status ] += 1
159
- if transfer .status == "DONE" :
160
- logger .debug (
161
- f"Transfer { id } : just finished! Let's update the catalog and mark it as done"
162
- )
163
- transfer .finished = datetime .now ()
164
- catalog .add_copy (
165
- transfer .record_uuid ,
166
- transfer .file_id ,
167
- transfer .action ,
168
- transfer .new_filename ,
169
- )
170
- if transfer .status == "FAILED" or not transfer .status :
171
- logger .error (f"The transfer { id } failed :(" )
172
- transfer .reason = error
173
- transfer .finished = datetime .now ()
174
- else :
175
- logger .debug (f"Transfer { id } is in status { transfer .status } " )
176
- db .session .add (transfer )
177
- db .session .commit ()
178
- catalog .reindex_entries ()
179
- logger .info (f"Summary: { summary } " )
180
- return all_status
181
-
182
139
183
140
class Request :
184
141
"""Class to check the cold storage requests."""
@@ -247,148 +204,3 @@ def subscribe(transfer_id, email):
247
204
db .session .commit ()
248
205
return True
249
206
return False
250
-
251
- def check_requests (self , manager ):
252
- """Check the active requests."""
253
- # The requests would go through these stages
254
- # SUBMITTED -> STARTED -> COMPLETED
255
- self ._check_submitted (manager )
256
-
257
- # Now, let's look at the ones that 'started'
258
- self ._check_running ()
259
-
260
- @staticmethod
261
- def _check_submitted (manager ):
262
- """Check if there are any new transfers submitted."""
263
- for action in ["stage" , "archive" ]:
264
- active_transfers_count = Transfer .query .filter (
265
- Transfer .finished .is_ (None ), Transfer .action == action
266
- ).count ()
267
- threshold = Request .get_active_transfers_threshold (action )
268
-
269
- logger .info (
270
- f"Checking if we can { action } more records: active { active_transfers_count } /{ threshold } "
271
- )
272
- submitted = 0
273
- limit = threshold - active_transfers_count
274
- if limit > 0 :
275
- transfers = Request .query .filter_by (
276
- status = "submitted" , action = action
277
- ).all ()
278
-
279
- for transfer in transfers :
280
- info = manager .doOperation (
281
- action ,
282
- transfer .record_id ,
283
- limit = limit ,
284
- register = True ,
285
- force = False ,
286
- dry = False ,
287
- )
288
- logger .debug (f"Got { info } " )
289
- if info :
290
- submitted += len (info )
291
- transfer .num_files += len (info )
292
- transfer .size += sum (item .size for item in info )
293
- transfer .started_at = datetime .utcnow ()
294
- if limit == submitted :
295
- logger .info (
296
- f"Reached the threshold of { threshold } transfers. There might be more in this record"
297
- f"({ submitted + active_transfers_count } ). Let's wait before continuing"
298
- )
299
- else :
300
- transfer .status = "started"
301
- db .session .add (transfer )
302
- limit -= submitted
303
- if limit <= 0 :
304
- logger .info ("We have submitted enough. Stopping" )
305
- break
306
- if submitted :
307
- logger .info (f"{ submitted } transfers have been submitted!" )
308
- db .session .commit ()
309
-
310
- @staticmethod
311
- def _check_running ():
312
- """Check the records that are being archived."""
313
- for action in ["stage" , "archive" ]:
314
-
315
- requests = Request .query .filter_by (status = "started" , action = action ).all ()
316
- logger .debug (f"Checking the { len (requests )} { action } requests" )
317
- completed = 0
318
- for request in requests :
319
- record = RecordFilesWithIndex .get_record (request .record_id )
320
-
321
- if action == "stage" :
322
- if record ["availability" ] != "online" :
323
- continue
324
- elif action == "archive" :
325
- files = (f for index in record .file_indices for f in index ["files" ])
326
- missing = next (
327
- (
328
- f
329
- for f in files
330
- if "tags" not in f or "uri_cold" not in f ["tags" ]
331
- ),
332
- None ,
333
- )
334
- if missing :
335
- logger .debug (f"The file { missing ['key' ]} is not in tape yet..." )
336
- continue
337
- completed += 1
338
- Request .complete (request )
339
- logger .info (f"{ completed } /{ len (requests )} requests have finished" )
340
-
341
- @staticmethod
342
- def get_requests (
343
- status = None ,
344
- action = None ,
345
- record = None ,
346
- summary = False ,
347
- sort = None ,
348
- direction = None ,
349
- page = None ,
350
- per_page = None ,
351
- ):
352
- """Get the summary of the requests."""
353
- if summary :
354
- query = db .session .query (
355
- Request .status ,
356
- Request .action ,
357
- func .count ().label ("count" ),
358
- func .sum (Request .num_files ).label ("files" ),
359
- func .sum (Request .size ).label ("size" ),
360
- )
361
- else :
362
- query = Request .query
363
-
364
- if status :
365
- status_list = status .split ("," )
366
- query = query .filter (Request .status .in_ (status_list ))
367
-
368
- if action :
369
- action_list = action .split ("," )
370
- query = query .filter (Request .action .in_ (action_list ))
371
-
372
- if record :
373
- try :
374
- uuid = PersistentIdentifier .get ("recid" , record ).object_uuid
375
- query = query .filter_by (record_id = uuid )
376
- except Exception :
377
- query = query .filter (False )
378
- if summary :
379
- result = query .group_by (Request .status , Request .action ).all ()
380
-
381
- if sort :
382
- column = getattr (Request , sort , None )
383
- if column :
384
- if direction == "desc" :
385
- query = query .order_by (column .desc ())
386
- else :
387
- query = query .order_by (column .asc ())
388
-
389
- if page :
390
- result = query .order_by (Request .created_at .desc ()).paginate (
391
- page = page , per_page = per_page , error_out = False
392
- )
393
-
394
- return result
0 commit comments