Skip to content

Commit 3a62d19

Browse files
author
Stephan Lohse
authored
Merge pull request #4 from johanherman/archive-verify-remove
Update archive-db with support for verifications (and some ideas about removals)
2 parents 9320b2a + 079237e commit 3a62d19

File tree

7 files changed

+172
-73
lines changed

7 files changed

+172
-73
lines changed

.travis.yml

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,17 @@
11
language: python
2+
23
python:
3-
- "3.5"
4+
- "3.5"
5+
6+
before_install:
7+
- sudo python -m pip install pipenv
48

59
install:
6-
- pip install pipenv
7-
- pipenv install --dev
10+
- pipenv install --dev
811

912
script:
10-
- pipenv run nosetests tests/
13+
- pipenv run nosetests tests/
1114

1215
notifications:
13-
email: false
16+
email: false
17+

README.md

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,14 @@
1-
Arteria Archive DB
1+
SNPSEQ Archive DB
22
==================
33

4-
A self contained (Tornado) REST service that serves as a frontend for a simple SQL db that contains the state of our uploads, verifications and removals done by other Arteria archive services.
4+
A self contained (Tornado) REST service that serves as a frontend for a simple SQL db that contains the state of our uploads, verifications and removals done by other SNPSEQ archive services.
55

66
Trying it out
77
-------------
88

99
python3 -m pip install pipenv
1010
pipenv install --deploy
1111

12-
1312
Try running it:
1413

1514
pipenv run ./archive-db-ws --config=config/ --port=8888 --debug
@@ -28,4 +27,14 @@ Running tests
2827
REST endpoints
2928
--------------
3029

31-
# FIXME: Update example
30+
Creating a new Upload (and associated Archive if none exists):
31+
32+
curl -i -X "POST" -d '{"path": "/path/to/directory/", "host": "my-host", "description": "my-descr"}' http://localhost:8888/api/1.0/upload
33+
34+
Creating a new Verification (and associated Archive if none exists):
35+
36+
curl -i -X "POST" -d '{"path": "/path/to/directory/", "host": "my-host", "description": "my-descr"}' http://localhost:8888/api/1.0/verification
37+
38+
Getting a randomly picked Archive that has been uploaded within a certain timespan, but never verified before:
39+
40+
curl -i -X "GET" -d '{"age": "7", "safety_margin": "3"}' http://localhost:8888/api/1.0/randomarchive

archive_db/app.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import datetime
22

33
from archive_db.models.Model import init_db, Archive, Upload, Verification, Removal
4-
from archive_db.handlers.DbHandlers import UploadHandler, VerificationHandler, RemovalHandler, VersionHandler
4+
from archive_db.handlers.DbHandlers import UploadHandler, VerificationHandler, RemovalHandler, VersionHandler, RandomUnverifiedArchiveHandler
55

66
from arteria.web.app import AppService
77
from peewee import *
@@ -19,9 +19,9 @@ def routes(**kwargs):
1919
return [
2020
url(r"/api/1.0/version", VersionHandler, name="version", kwargs=kwargs),
2121
url(r"/api/1.0/upload", UploadHandler, name="upload"),
22-
url(r"/api/1.0/verifification/([\w_-]+)",
23-
VerificationHandler, name="verification", kwargs=kwargs),
24-
url(r"/api/1.0/removal/([\w_-]+)", RemovalHandler, name="removal", kwargs=kwargs)
22+
url(r"/api/1.0/verification", VerificationHandler, name="verification"),
23+
url(r"/api/1.0/randomarchive", RandomUnverifiedArchiveHandler, name="randomarchive"),
24+
url(r"/api/1.0/removal", RemovalHandler, name="removal")
2525
]
2626

2727

archive_db/handlers/DbHandlers.py

Lines changed: 118 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,16 @@
11
import datetime as dt
2+
import os
23

34
from arteria.web.handlers import BaseRestHandler
45

56
from archive_db.models.Model import Archive, Upload, Verification, Removal
67
from archive_db import __version__ as version
78

9+
from peewee import *
810
from tornado import gen
911
from tornado.web import RequestHandler, HTTPError
1012
from tornado.escape import json_decode, json_encode
1113

12-
# TODO: Shall we implement a handler for something like:
13-
# "Has any package been verified since date `bar`?
14-
# At the moment this can be solved in the client by comparing
15-
# with `last verified date`.
16-
17-
"""
18-
Our handlers are supposed to work as following:
19-
20-
POST /upload/ - create a new archive entry + upload entry
21-
GET /upload/ - get last global upload
22-
GET /upload/<archive> - get last upload for <archive>
23-
POST /verification/ - create a new verification entry
24-
GET /verification/ - get last global verification
25-
GET /verification/<archive> - get last verification for <archive>
26-
POST /removal/<archive> - create a new removal entry for <archive>
27-
"""
28-
2914

3015
class BaseHandler(BaseRestHandler):
3116
# BaseRestHandler.body_as_object() does not work well
@@ -45,11 +30,12 @@ class UploadHandler(BaseHandler):
4530
@gen.coroutine
4631
def post(self):
4732
"""
48-
Archive `path` was just now uploaded (not?) OK.
33+
Creates a new Upload object in the db, and the associated Archive if it doesn't already exist.
4934
5035
:param path: Path to archive uploaded
51-
:param description: The TSM description of the archive
36+
:param description: The unique TSM description of the archive
5237
:param host: From which host the archive was uploaded
38+
:return Information about the created object
5339
"""
5440

5541
body = self.decode(required_members=["path", "description", "host"])
@@ -65,60 +51,140 @@ def post(self):
6551
"path": upload.archive.path,
6652
"host": upload.archive.host}})
6753

54+
55+
class VerificationHandler(BaseHandler):
56+
6857
@gen.coroutine
69-
def get(self, archive):
58+
def post(self):
59+
"""
60+
Creates a new Verification object in the db, associated to a certain Archive object.
61+
If no Archive object matching the input parameters is found one will be created.
62+
This way we can take care of verifications done for archives uploaded to PDC before
63+
this web service and db existed.
64+
65+
:param description: The unique TSM description of the archive we've verified.
66+
:param path: The path to the archive that was uploaded
67+
:param host: The host from which the archive was uploaded
68+
:return Information about the created object
7069
"""
71-
Archive `foo` was last uploaded OK at date `bar`.
70+
body = self.decode(required_members=["description", "path", "host"])
7271

73-
:param archive: Path to archive uploaded
74-
:param description: The TSM description of the archive
75-
:param host: From which host the archive was uploaded
76-
:return The `archive` when it was last uploaded. If no `archive` specified, then it will
77-
return the last global upload archive.
72+
archive, created = Archive.get_or_create(description=body["description"], host=body["host"], path=body["path"])
73+
74+
verification = Verification.create(archive=archive, timestamp=dt.datetime.utcnow())
75+
76+
self.write_json({"status": "created", "verification":
77+
{"id": verification.id,
78+
"timestamp": str(verification.timestamp),
79+
"description": verification.archive.description,
80+
"path": verification.archive.path,
81+
"host": verification.archive.host}})
82+
83+
class RandomUnverifiedArchiveHandler(BaseHandler):
84+
85+
@gen.coroutine
86+
def get(self):
7887
"""
79-
# Step 1 - get date when archive was last updated
80-
pass
88+
Returns an unverified Archive object that has an associated was Upload object
89+
within the interval [today - age - margin, today - margin]. The margin value is
90+
used as a safety buffer, to make sure that the archived data has been properly
91+
flushed to tape upstreams at PDC.
92+
93+
:param age: Number of days we should look back when picking an unverified archive
94+
:param safety_margin: Number of days we should use as safety buffer
95+
:return A randomly pickedunverified archive within the specified date interval
96+
"""
97+
body = self.decode(required_members=["age", "safety_margin"])
98+
age = int(body["age"])
99+
margin = int(body["safety_margin"])
100+
101+
from_timestamp = dt.datetime.utcnow() - dt.timedelta(days=age+margin)
102+
to_timestamp = dt.datetime.utcnow() - dt.timedelta(days=margin)
103+
104+
# "Give me a randomly chosen archive that was uploaded between from_timestamp and
105+
# to_timestamp, and has no previous verifications"
106+
query = (Upload
107+
.select()
108+
.join(Verification, JOIN.LEFT_OUTER, on=(
109+
Verification.archive_id == Upload.archive_id))
110+
.where(Upload.timestamp.between(from_timestamp, to_timestamp))
111+
.group_by(Upload.archive_id)
112+
.having(fn.Count(Verification.id) < 1)
113+
.order_by(fn.Random())
114+
.limit(1))
115+
116+
result_len = query.count()
117+
118+
if result_len > 0:
119+
upload = next(query.execute())
120+
archive_name = os.path.basename(os.path.normpath(upload.archive.path))
121+
self.write_json({"status": "unverified", "archive":
122+
{"timestamp": str(upload.timestamp),
123+
"path": upload.archive.path,
124+
"description": upload.archive.description,
125+
"host": upload.archive.host,
126+
"archive": archive_name}})
127+
else:
128+
msg = "No unverified archives uploaded between {} and {} was found!".format(
129+
from_timestamp.strftime("%Y-%m-%d %H:%M:%S"), to_timestamp.strftime("%Y-%m-%d %H:%M:%S"))
130+
raise HTTPError(500, msg)
131+
81132

82133
# TODO: We might have to add logic in some of the services
83134
# that adds a file with the description inside the archive,
84135
# so we can verify that we're operating on the correct
85-
# archive before verifying/removing.
86-
136+
# archive before (verifying/)removing.
87137

88-
class VerificationHandler(BaseHandler):
138+
class RemovalHandler(BaseHandler):
89139

90140
@gen.coroutine
91-
def post(self, archive):
141+
def post(self):
92142
"""
93-
Archive `foo` was verified (not) OK at date `bar`.
94-
95-
:param archive: Path to archive verified
96-
:param description: The TSM description of the archive we verified
143+
Archive `foo` was either staged for removal or actually just physically removed from local disk, as well
144+
as all its associated files (e.g. runfolder etc).
97145
"""
98146
pass
99-
# Step 1 - set date when archive was verified OK
147+
100148

101-
@gen.coroutine
102-
def get(self, archive):
103149
"""
104-
Give me the date for when any archive was last verified (OK).
105-
106-
:param archive: Path to archive we want to check
107-
:return The `archive` when it was last verified. If no `archive` specified, then it will
108-
return the last globally verified archive.
150+
# This is an example for how one could start implementing the handler that first schedules archives for
151+
# removal.
152+
153+
body = self.decode(required_members=["description", "action"])
154+
155+
try:
156+
archive = Archive.get(description=body["description"])
157+
except Archive.DoesNotExist:
158+
msg = "No archive with the unique description {} exists in the database!".format(body["description"])
159+
self.set_status(500, msg)
160+
self.write_json({"status": msg})
161+
162+
if body["action"] == "set_removable":
163+
removal = Removal.create(archive=archive, timestamp_scheduled=dt.datetime.utcnow())
164+
165+
self.write_json({"status": "scheduled", "removal":
166+
{"id": removal.id,
167+
"timestamp_scheduled": str(removal.timestamp_scheduled),
168+
"description": removal.archive.description,
169+
"path": removal.archive.path,
170+
"host": removal.archive.host,
171+
"done": removal.done}})
172+
elif body["action"] == "set_removed":
173+
pass
174+
else:
175+
msg = "Expecting parameter 'action' to be 'set_removable' or set_removed'."
176+
raise HTTPError(400, msg)
109177
"""
110-
pass
111-
112-
113-
class RemovalHandler(BaseHandler):
114178

115179
@gen.coroutine
116-
def post(self, archive):
180+
def get(self):
117181
"""
118-
Archive `foo` was removed from disk at date `bar`.
182+
HTTP GET /removal is in this imagined implementation supposed to return those Archive objects
183+
that are removable and are verified. One could probably do this by e.g.
119184
120-
:param archive: Path to archive removed from disk
121-
:param description: The TSM description of the archive we removed
185+
- fetch latest date from Verify, which has done == False, and call this X
186+
- fetch all Uploads that have has a timestamp older or equal to X
187+
- the set of Archives belonging to those Uploads should be OK to remove
122188
"""
123189
pass
124190

archive_db/models/Model.py

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
from peewee import *
22

3-
# TODO: Shall we log failed operations? (not uploaded OK, not verified OK)
4-
# TODO: Shall we have anything to do with staging operations?
3+
# For schema migrations, see http://docs.peewee-orm.com/en/latest/peewee/database.html#schema-migrations
4+
# and http://docs.peewee-orm.com/en/latest/peewee/playhouse.html#migrate
5+
#
6+
# Make sure that we *always*, as extra security, take a backup of the previous
7+
# db before doing a migration. We should also take continous backups
58

69
db_proxy = Proxy()
710

@@ -48,8 +51,13 @@ class Removal(ChildModel):
4851
archive = ForeignKeyField(Archive, related_name="removals")
4952
timestamp = DateTimeField()
5053

51-
# For schema migrations, see http://docs.peewee-orm.com/en/latest/peewee/database.html#schema-migrations
52-
# and http://docs.peewee-orm.com/en/latest/peewee/playhouse.html#migrate
53-
#
54-
# Make sure that we *always*, as extra security, take a backup of the previous
55-
# db before doing a migration. We should also take continous backups
54+
"""
55+
To let archive-remove better support archives staged/marked/scheduled for removal I envision that
56+
one could modify this to something like the following instead:
57+
58+
archive = ForeignKeyField(Archive, related_name="removals")
59+
done = BooleanField(default=False) # False = archive has been scheduled for removal; True = archive has been removed.
60+
timestamp_scheduled = DateTimeField() # Or one can just let the queries look to see which timestamp has been filled with a value.
61+
timestamp_done = DateTimeField()
62+
"""
63+

config/app.config

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,4 @@ archive_db_log_directory: /var/arteria/archive-db/
99

1010
# Path to the Sqlite db
1111
archive_db_path: /var/db/arteria/archive.db
12+

tests/test_models.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ def get_app(self):
2727
return Application(routes())
2828

2929
def go(self, target, method, body=""):
30-
return self.fetch(self.API_BASE + target, method=method, body=json_encode(body), headers={"Content-Type": "application/json"})
30+
return self.fetch(self.API_BASE + target, method=method, body=json_encode(body), headers={"Content-Type": "application/json"}, allow_nonstandard_methods=True)
3131

3232
def create_data(self):
3333
for i in range(self.num_archives):
@@ -89,3 +89,14 @@ def test_create_upload_for_existing_archive(self):
8989
resp = json_decode(resp.body)
9090
self.assertEqual(resp["status"], "created")
9191
self.assertEqual(resp["upload"]["id"], upload_two)
92+
93+
# Populating the db in a similar way as in self.create_data() does not make the data available for
94+
# the handlers, as they seem to live in an other in-memory instance of the db. Therefore a
95+
# failing test will have to do for now.
96+
def test_failing_fetch_random_unverified_archive(self):
97+
# I.e. our lookback window is [today - 5 - 1, today - 1] days.
98+
body = {"age": "5", "safety_margin": "1"}
99+
resp = self.go("/randomarchive", method="GET", body=body)
100+
self.assertEqual(resp.code, 500)
101+
102+

0 commit comments

Comments
 (0)