@@ -26,7 +26,6 @@ def decode(self, required_members=None):
2626 return obj
2727
2828
29-
3029class UploadHandler (BaseHandler ):
3130
3231 @gen .coroutine
@@ -92,62 +91,6 @@ def post(self):
9291 "host" : verification .archive .host }})
9392
9493
95- class RandomUnverifiedArchiveHandler (BaseHandler ):
96-
97- @gen .coroutine
98- def get (self ):
99- """
100- Returns an unverified Archive object that has an associated was Upload object
101- within the interval [today - age - margin, today - margin]. The margin value is
102- used as a safety buffer, to make sure that the archived data has been properly
103- flushed to tape upstreams at PDC.
104-
105- :param age: Number of days we should look back when picking an unverified archive
106- :param safety_margin: Number of days we should use as safety buffer
107- :param today: (optional) if specified, use this timestamp for the reference date instead of
108- datetime.datetime.utcnow().isoformat()
109- :return A randomly pickedunverified archive within the specified date interval
110- """
111- body = self .decode (required_members = ["age" , "safety_margin" ])
112- age = int (body ["age" ])
113- margin = int (body ["safety_margin" ])
114- today = body .get ("today" , dt .date .today ().isoformat ())
115-
116- from_timestamp = dt .datetime .fromisoformat (today ) - dt .timedelta (days = age + margin )
117- to_timestamp = from_timestamp + dt .timedelta (days = age )
118-
119- # "Give me a randomly chosen archive that was uploaded between from_timestamp and
120- # to_timestamp, and has no previous verifications"
121- query = Upload \
122- .select ()\
123- .join (Verification , JOIN .LEFT_OUTER , on = (
124- Verification .archive_id == Upload .archive_id ))\
125- .where (Upload .timestamp .between (from_timestamp , to_timestamp ))\
126- .group_by (Upload .archive_id )\
127- .having (fn .Count (Verification .id ) < 1 )\
128- .order_by (fn .Random ())
129-
130- result_len = query .count ()
131-
132- if result_len > 0 :
133- upload = query .first ()
134- archive_name = os .path .basename (os .path .normpath (upload .archive .path ))
135- self .write_json ({
136- "status" : "unverified" ,
137- "archive" : {
138- "timestamp" : str (upload .timestamp ),
139- "path" : upload .archive .path ,
140- "description" : upload .archive .description ,
141- "host" : upload .archive .host ,
142- "archive" : archive_name
143- }
144- })
145- else :
146- msg = f"No unverified archives uploaded between { from_timestamp } and { to_timestamp } " \
147- f"was found!"
148- self .set_status (204 , reason = msg )
149-
150-
15194# TODO: We might have to add logic in some of the services
15295# that adds a file with the description inside the archive,
15396# so we can verify that we're operating on the correct
@@ -209,6 +152,15 @@ def get(self):
209152
210153class QueryHandlerBase (BaseHandler ):
211154
155+ @staticmethod
156+ def _str_as_bool (bool_str ):
157+ if type (bool_str ) is bool :
158+ return bool_str
159+ if type (bool_str ) is str and bool_str .lower () in ["true" , "false" ]:
160+ return bool_str .lower () == "true"
161+ raise TypeError (
162+ f"{ bool_str } can not be converted to bool" )
163+
212164 @staticmethod
213165 def _db_query ():
214166
@@ -232,6 +184,48 @@ def _db_query():
232184 Archive .path .asc ())
233185 return query
234186
187+ @staticmethod
188+ def _filter_query (
189+ query ,
190+ path = None ,
191+ description = None ,
192+ host = None ,
193+ uploaded_before = None ,
194+ uploaded_after = None ,
195+ verified = None ,
196+ removed = None ,
197+ ** kwargs ):
198+
199+ if path :
200+ query = query .where (
201+ Archive .path .contains (path ))
202+ if description :
203+ query = query .where (
204+ Archive .description .contains (description ))
205+ if host :
206+ query = query .where (
207+ Archive .host .contains (host ))
208+ if uploaded_before :
209+ query = query .where (
210+ Upload .timestamp <= dt .datetime .strptime (
211+ f"{ uploaded_before } 23:59:59" ,
212+ "%Y-%m-%d %H:%M:%S" ))
213+ if uploaded_after :
214+ query = query .where (
215+ Upload .timestamp >= dt .datetime .strptime (
216+ uploaded_after ,
217+ "%Y-%m-%d" ))
218+ if verified is not None :
219+ query = query .where (
220+ Verification .timestamp .is_null (
221+ not QueryHandlerBase ._str_as_bool (verified )))
222+ if removed is not None :
223+ query = query .where (
224+ Removal .timestamp .is_null (
225+ not QueryHandlerBase ._str_as_bool (removed )))
226+
227+ return query .dicts ()
228+
235229 def _do_query (self , query ):
236230 if query :
237231 self .write_json ({
@@ -304,29 +298,80 @@ def post(self):
304298 under the key "archives"
305299 """
306300 body = self .decode ()
307- query = self ._db_query ()
301+ query = self ._filter_query (
302+ self ._db_query (),
303+ ** body )
304+ self ._do_query (query )
308305
309- if body .get ("path" ):
310- query = query .where (Archive .path .contains (body ["path" ]))
311- if body .get ("description" ):
312- query = query .where (Archive .description .contains (body ["description" ]))
313- if body .get ("host" ):
314- query = query .where (Archive .host .contains (body ["host" ]))
315- if body .get ("uploaded_before" ):
316- query = query .where (
317- Upload .timestamp <= dt .datetime .strptime (
318- f"{ body ['uploaded_before' ]} 23:59:59" ,
319- "%Y-%m-%d %H:%M:%S" ))
320- if body .get ("uploaded_after" ):
321- query = query .where (
322- Upload .timestamp >= dt .datetime .strptime (body ["uploaded_after" ], "%Y-%m-%d" ))
323- if body .get ("verified" ) is not None and body ["verified" ] in ["True" , "False" ]:
324- query = query .where (Verification .timestamp .is_null (body ["verified" ] == "False" ))
325- if body .get ("removed" ) is not None and body ["removed" ] in ["True" , "False" ]:
326- query = query .where (Removal .timestamp .is_null (body ["removed" ] == "False" ))
327306
328- query = (query .dicts ())
329- self ._do_query (query )
307+ class RandomUnverifiedArchiveHandler (QueryHandlerBase ):
308+
309+ @gen .coroutine
310+ def get (self ):
311+ """
312+ For backwards compability, forward this GET request to the POST handler
313+ """
314+ self .post ()
315+
316+ @gen .coroutine
317+ def post (self ):
318+ """
319+ Returns an unverified Archive object that has an associated was Upload object
320+ within the interval [today - age - margin, today - margin]. The margin value is
321+ used as a safety buffer, to make sure that the archived data has been properly
322+ flushed to tape upstreams at PDC.
323+
324+ :param age: Number of days we should look back when picking an unverified archive
325+ :param safety_margin: Number of days we should use as safety buffer
326+ :param today: (optional) if specified, use this timestamp for the reference date instead of
327+ datetime.datetime.utcnow().isoformat()
328+ :return A randomly pickedunverified archive within the specified date interval
329+ """
330+ body = self .decode (
331+ required_members = [
332+ "age" ,
333+ "safety_margin" ])
334+ age = int (body ["age" ])
335+ margin = int (body ["safety_margin" ])
336+ today = body .get ("today" , dt .date .today ().isoformat ())
337+
338+ from_timestamp = dt .datetime .fromisoformat (today ) - dt .timedelta (days = age + margin )
339+ to_timestamp = from_timestamp + dt .timedelta (days = age )
340+
341+ body ["uploaded_before" ] = to_timestamp .date ().isoformat ()
342+ body ["uploaded_after" ] = from_timestamp .date ().isoformat ()
343+ body ["verified" ] = False
344+
345+ query = self ._filter_query (
346+ self ._db_query (),
347+ ** body )
348+
349+ result_len = query .count ()
350+
351+ if result_len > 0 :
352+ upload = query .first ()
353+ archive_name = os .path .basename (
354+ os .path .normpath (
355+ upload ["path" ]
356+ )
357+ )
358+ self .write_json ({
359+ "status" : "unverified" ,
360+ "archive" : {
361+ "timestamp" : str (upload ["uploaded" ]),
362+ "path" : upload ["path" ],
363+ "description" : upload ["description" ],
364+ "host" : upload ["host" ],
365+ "archive" : archive_name
366+ }
367+ })
368+ else :
369+ criteria = ", " .join ([f"{ k } ={ v } " for k , v in body .items ()])
370+ msg = f"No archives matching criteria { criteria } were found!"
371+ self .set_status (
372+ 204 ,
373+ reason = msg
374+ )
330375
331376
332377class VersionHandler (BaseHandler ):
0 commit comments