Skip to content

Commit f43ebf5

Browse files
authored
allow filtering random archives by criteria (#10)
* allow filtering random archives by criteria * include search criteria in error message
1 parent 340ae83 commit f43ebf5

File tree

3 files changed

+147
-85
lines changed

3 files changed

+147
-85
lines changed

archive_db/handlers/DbHandlers.py

Lines changed: 123 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@ def decode(self, required_members=None):
2626
return obj
2727

2828

29-
3029
class UploadHandler(BaseHandler):
3130

3231
@gen.coroutine
@@ -92,62 +91,6 @@ def post(self):
9291
"host": verification.archive.host}})
9392

9493

95-
class RandomUnverifiedArchiveHandler(BaseHandler):
96-
97-
@gen.coroutine
98-
def get(self):
99-
"""
100-
Returns an unverified Archive object that has an associated was Upload object
101-
within the interval [today - age - margin, today - margin]. The margin value is
102-
used as a safety buffer, to make sure that the archived data has been properly
103-
flushed to tape upstreams at PDC.
104-
105-
:param age: Number of days we should look back when picking an unverified archive
106-
:param safety_margin: Number of days we should use as safety buffer
107-
:param today: (optional) if specified, use this timestamp for the reference date instead of
108-
datetime.datetime.utcnow().isoformat()
109-
:return A randomly pickedunverified archive within the specified date interval
110-
"""
111-
body = self.decode(required_members=["age", "safety_margin"])
112-
age = int(body["age"])
113-
margin = int(body["safety_margin"])
114-
today = body.get("today", dt.date.today().isoformat())
115-
116-
from_timestamp = dt.datetime.fromisoformat(today) - dt.timedelta(days=age+margin)
117-
to_timestamp = from_timestamp + dt.timedelta(days=age)
118-
119-
# "Give me a randomly chosen archive that was uploaded between from_timestamp and
120-
# to_timestamp, and has no previous verifications"
121-
query = Upload\
122-
.select()\
123-
.join(Verification, JOIN.LEFT_OUTER, on=(
124-
Verification.archive_id == Upload.archive_id))\
125-
.where(Upload.timestamp.between(from_timestamp, to_timestamp))\
126-
.group_by(Upload.archive_id)\
127-
.having(fn.Count(Verification.id) < 1)\
128-
.order_by(fn.Random())
129-
130-
result_len = query.count()
131-
132-
if result_len > 0:
133-
upload = query.first()
134-
archive_name = os.path.basename(os.path.normpath(upload.archive.path))
135-
self.write_json({
136-
"status": "unverified",
137-
"archive": {
138-
"timestamp": str(upload.timestamp),
139-
"path": upload.archive.path,
140-
"description": upload.archive.description,
141-
"host": upload.archive.host,
142-
"archive": archive_name
143-
}
144-
})
145-
else:
146-
msg = f"No unverified archives uploaded between {from_timestamp} and {to_timestamp} " \
147-
f"was found!"
148-
self.set_status(204, reason=msg)
149-
150-
15194
# TODO: We might have to add logic in some of the services
15295
# that adds a file with the description inside the archive,
15396
# so we can verify that we're operating on the correct
@@ -209,6 +152,15 @@ def get(self):
209152

210153
class QueryHandlerBase(BaseHandler):
211154

155+
@staticmethod
156+
def _str_as_bool(bool_str):
157+
if type(bool_str) is bool:
158+
return bool_str
159+
if type(bool_str) is str and bool_str.lower() in ["true", "false"]:
160+
return bool_str.lower() == "true"
161+
raise TypeError(
162+
f"{bool_str} can not be converted to bool")
163+
212164
@staticmethod
213165
def _db_query():
214166

@@ -232,6 +184,48 @@ def _db_query():
232184
Archive.path.asc())
233185
return query
234186

187+
@staticmethod
188+
def _filter_query(
189+
query,
190+
path=None,
191+
description=None,
192+
host=None,
193+
uploaded_before=None,
194+
uploaded_after=None,
195+
verified=None,
196+
removed=None,
197+
**kwargs):
198+
199+
if path:
200+
query = query.where(
201+
Archive.path.contains(path))
202+
if description:
203+
query = query.where(
204+
Archive.description.contains(description))
205+
if host:
206+
query = query.where(
207+
Archive.host.contains(host))
208+
if uploaded_before:
209+
query = query.where(
210+
Upload.timestamp <= dt.datetime.strptime(
211+
f"{uploaded_before} 23:59:59",
212+
"%Y-%m-%d %H:%M:%S"))
213+
if uploaded_after:
214+
query = query.where(
215+
Upload.timestamp >= dt.datetime.strptime(
216+
uploaded_after,
217+
"%Y-%m-%d"))
218+
if verified is not None:
219+
query = query.where(
220+
Verification.timestamp.is_null(
221+
not QueryHandlerBase._str_as_bool(verified)))
222+
if removed is not None:
223+
query = query.where(
224+
Removal.timestamp.is_null(
225+
not QueryHandlerBase._str_as_bool(removed)))
226+
227+
return query.dicts()
228+
235229
def _do_query(self, query):
236230
if query:
237231
self.write_json({
@@ -304,29 +298,80 @@ def post(self):
304298
under the key "archives"
305299
"""
306300
body = self.decode()
307-
query = self._db_query()
301+
query = self._filter_query(
302+
self._db_query(),
303+
**body)
304+
self._do_query(query)
308305

309-
if body.get("path"):
310-
query = query.where(Archive.path.contains(body["path"]))
311-
if body.get("description"):
312-
query = query.where(Archive.description.contains(body["description"]))
313-
if body.get("host"):
314-
query = query.where(Archive.host.contains(body["host"]))
315-
if body.get("uploaded_before"):
316-
query = query.where(
317-
Upload.timestamp <= dt.datetime.strptime(
318-
f"{body['uploaded_before']} 23:59:59",
319-
"%Y-%m-%d %H:%M:%S"))
320-
if body.get("uploaded_after"):
321-
query = query.where(
322-
Upload.timestamp >= dt.datetime.strptime(body["uploaded_after"], "%Y-%m-%d"))
323-
if body.get("verified") is not None and body["verified"] in ["True", "False"]:
324-
query = query.where(Verification.timestamp.is_null(body["verified"] == "False"))
325-
if body.get("removed") is not None and body["removed"] in ["True", "False"]:
326-
query = query.where(Removal.timestamp.is_null(body["removed"] == "False"))
327306

328-
query = (query.dicts())
329-
self._do_query(query)
307+
class RandomUnverifiedArchiveHandler(QueryHandlerBase):
308+
309+
@gen.coroutine
310+
def get(self):
311+
"""
312+
For backwards compability, forward this GET request to the POST handler
313+
"""
314+
self.post()
315+
316+
@gen.coroutine
317+
def post(self):
318+
"""
319+
Returns an unverified Archive object that has an associated was Upload object
320+
within the interval [today - age - margin, today - margin]. The margin value is
321+
used as a safety buffer, to make sure that the archived data has been properly
322+
flushed to tape upstreams at PDC.
323+
324+
:param age: Number of days we should look back when picking an unverified archive
325+
:param safety_margin: Number of days we should use as safety buffer
326+
:param today: (optional) if specified, use this timestamp for the reference date instead of
327+
datetime.datetime.utcnow().isoformat()
328+
:return A randomly pickedunverified archive within the specified date interval
329+
"""
330+
body = self.decode(
331+
required_members=[
332+
"age",
333+
"safety_margin"])
334+
age = int(body["age"])
335+
margin = int(body["safety_margin"])
336+
today = body.get("today", dt.date.today().isoformat())
337+
338+
from_timestamp = dt.datetime.fromisoformat(today) - dt.timedelta(days=age+margin)
339+
to_timestamp = from_timestamp + dt.timedelta(days=age)
340+
341+
body["uploaded_before"] = to_timestamp.date().isoformat()
342+
body["uploaded_after"] = from_timestamp.date().isoformat()
343+
body["verified"] = False
344+
345+
query = self._filter_query(
346+
self._db_query(),
347+
**body)
348+
349+
result_len = query.count()
350+
351+
if result_len > 0:
352+
upload = query.first()
353+
archive_name = os.path.basename(
354+
os.path.normpath(
355+
upload["path"]
356+
)
357+
)
358+
self.write_json({
359+
"status": "unverified",
360+
"archive": {
361+
"timestamp": str(upload["uploaded"]),
362+
"path": upload["path"],
363+
"description": upload["description"],
364+
"host": upload["host"],
365+
"archive": archive_name
366+
}
367+
})
368+
else:
369+
criteria = ", ".join([f"{k}={v}" for k, v in body.items()])
370+
msg = f"No archives matching criteria {criteria} were found!"
371+
self.set_status(
372+
204,
373+
reason=msg
374+
)
330375

331376

332377
class VersionHandler(BaseHandler):

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ include = ["archive_db*"]
77

88
[project]
99
name = "archive-db"
10-
version = "1.3.1"
10+
version = "1.4.0"
1111
authors = [
1212
{name = "SNP&SEQ Technology Platform, Uppsala University", email = "[email protected]" },
1313
]

tests/test_models.py

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ class TestDb(AsyncHTTPTestCase):
2222
num_archives = 5
2323
first_archive = 1
2424
second_archive = 3
25+
third_archive = 4
2526

2627
API_BASE = "/api/1.0"
2728

@@ -48,8 +49,9 @@ def example_data(self):
4849
"path": f"/data/testhost/runfolders/archive-{i}",
4950
"host": "testhost",
5051
"uploaded": str(self.now - datetime.timedelta(days=i)) if i in [
51-
self.first_archive, self.second_archive] else None,
52-
"verified": str(self.now) if i == self.second_archive else None,
52+
self.first_archive, self.second_archive, self.third_archive] else None,
53+
"verified": str(self.now) if i in [
54+
self.second_archive] else None,
5355
"removed": str(self.now) if i == self.second_archive else None
5456
}
5557

@@ -173,8 +175,8 @@ def test_failing_fetch_random_unverified_archive(self):
173175
self.create_data()
174176
# I.e. our lookback window is [today - age - safety_margin, today - safety_margin] days.
175177
body = {
176-
"age": "5",
177-
"safety_margin": "1",
178+
"age": "1",
179+
"safety_margin": "2",
178180
"today": self.now.date().isoformat()
179181
}
180182
resp = self.go("/randomarchive", method="GET", body=body)
@@ -183,8 +185,8 @@ def test_failing_fetch_random_unverified_archive(self):
183185
def test_fetch_random_unverified_archive(self):
184186
archives = self.create_data()
185187
body = {
186-
"age": "5",
187-
"safety_margin": "0",
188+
"age": "2",
189+
"safety_margin": "1",
188190
"today": self.now.date().isoformat()
189191
}
190192
resp = self.go("/randomarchive", method="GET", body=body)
@@ -194,6 +196,21 @@ def test_fetch_random_unverified_archive(self):
194196
for key in ("description", "host", "path"):
195197
self.assertEqual(obs_archive[key], exp_archive[key])
196198

199+
def test_fetch_random_archive_with_criteria(self):
200+
archives = self.create_data()
201+
body = {
202+
"age": "5",
203+
"safety_margin": "2",
204+
"description": f"-{self.third_archive}",
205+
"today": self.now.date().isoformat()
206+
}
207+
resp = self.go("/randomarchive", method="POST", body=body)
208+
self.assertEqual(resp.code, 200)
209+
obs_archive = json_decode(resp.body).get("archive")
210+
exp_archive = archives[self.third_archive]
211+
for key in ("description", "host", "path"):
212+
self.assertEqual(obs_archive[key], exp_archive[key])
213+
197214
def test_version(self):
198215
resp = self.go("/version", method="GET")
199216
self.assertEqual(resp.code, 200)

0 commit comments

Comments
 (0)