Skip to content

Commit 0bcac52

Browse files
committed
services: metadata input on zenodo deposit creation
* adds serializers/validation for metadata input * adds unit tests for zenodo serializer * closes #1952 Signed-off-by: Ilias Koutsakis <[email protected]>
1 parent 32f7798 commit 0bcac52

File tree

7 files changed

+499
-52
lines changed

7 files changed

+499
-52
lines changed

cap/modules/deposit/api.py

+22-15
Original file line numberDiff line numberDiff line change
@@ -61,14 +61,15 @@
6161
from cap.modules.repos.tasks import download_repo, download_repo_file
6262
from cap.modules.repos.utils import (create_webhook, disconnect_subscriber,
6363
parse_git_url)
64+
from cap.modules.services.serializers.zenodo import ZenodoUploadSchema
6465
from cap.modules.schemas.resolvers import (resolve_schema_by_url,
6566
schema_name_to_url)
6667
from cap.modules.user.errors import DoesNotExistInLDAP
6768
from cap.modules.user.utils import (get_existing_or_register_role,
6869
get_existing_or_register_user)
6970

7071
from .errors import (DepositValidationError, UpdateDepositPermissionsError,
71-
ReviewError)
72+
ReviewError, InputValidationError)
7273
from .fetchers import cap_deposit_fetcher
7374
from .minters import cap_deposit_minter
7475
from .permissions import (AdminDepositPermission, CloneDepositPermission,
@@ -257,7 +258,7 @@ def upload(self, pid, *args, **kwargs):
257258
with UpdateDepositPermission(self).require(403):
258259
if request:
259260
_, rec = request.view_args.get('pid_value').data
260-
record_uuid = str(rec.id)
261+
recid = str(rec.id)
261262
data = request.get_json()
262263
target = data.get('target')
263264

@@ -269,20 +270,26 @@ def upload(self, pid, *args, **kwargs):
269270
'Please connect your Zenodo account '
270271
'before creating a deposit.')
271272

272-
files = data.get('files')
273-
bucket = data.get('bucket')
274-
zenodo_data = data.get('zenodo_data', {})
273+
files = data.get('files', [])
274+
zenodo_data = data.get('zenodo_data')
275+
input = dict(files=files, data=zenodo_data) \
276+
if zenodo_data else dict(files=files)
275277

276-
if files and bucket:
277-
zenodo_deposit = create_zenodo_deposit(token, zenodo_data) # noqa
278-
self.setdefault('_zenodo', []).append(zenodo_deposit)
278+
if files:
279+
_, errors = ZenodoUploadSchema(recid=recid).load(input)
280+
if errors:
281+
raise InputValidationError(
282+
'Validation error in Zenodo input data.',
283+
errors=errors)
284+
285+
deposit = create_zenodo_deposit(token, zenodo_data)
286+
self.setdefault('_zenodo', []).append(deposit)
279287
self.commit()
280288

281289
# upload files to zenodo deposit
282-
upload_to_zenodo.delay(
283-
files, bucket, token,
284-
zenodo_deposit['id'],
285-
zenodo_deposit['links']['bucket'])
290+
upload_to_zenodo.delay(files, recid, token,
291+
deposit['id'],
292+
deposit['links']['bucket'])
286293
else:
287294
raise FileUploadError(
288295
'You cannot create an empty Zenodo deposit. '
@@ -307,7 +314,7 @@ def upload(self, pid, *args, **kwargs):
307314
'You cannot create a webhook on a file')
308315

309316
download_repo_file(
310-
record_uuid,
317+
recid,
311318
f'repositories/{host}/{owner}/{repo}/{api.branch or api.sha}/{filepath}', # noqa
312319
*api.get_file_download(filepath),
313320
api.auth_headers,
@@ -325,10 +332,10 @@ def upload(self, pid, *args, **kwargs):
325332
'You cannot create a push webhook'
326333
' for a specific sha.')
327334

328-
create_webhook(record_uuid, api, event_type)
335+
create_webhook(recid, api, event_type)
329336
else:
330337
download_repo.delay(
331-
record_uuid,
338+
recid,
332339
f'repositories/{host}/{owner}/{repo}/{api.branch or api.sha}.tar.gz', # noqa
333340
api.get_repo_download(),
334341
api.auth_headers)

cap/modules/deposit/errors.py

+15
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,21 @@ def __init__(self, description, errors=None, **kwargs):
138138
self.errors = [FieldError(e[0], e[1]) for e in errors.items()]
139139

140140

141+
class InputValidationError(RESTValidationError):
142+
"""Review validation error exception."""
143+
144+
code = 400
145+
146+
description = "Validation error. Try again with valid data"
147+
148+
def __init__(self, description, errors=None, **kwargs):
149+
"""Initialize exception."""
150+
super(InputValidationError, self).__init__(**kwargs)
151+
152+
self.description = description or self.description
153+
self.errors = [FieldError(e[0], e[1]) for e in errors.items()]
154+
155+
141156
class DataValidationError(RESTValidationError):
142157
"""Review validation error exception."""
143158

cap/modules/deposit/tasks.py

+9-7
Original file line numberDiff line numberDiff line change
@@ -28,29 +28,31 @@
2828
import requests
2929
from flask import current_app
3030
from celery import shared_task
31-
from invenio_db import db
32-
from invenio_files_rest.models import FileInstance, ObjectVersion
31+
from invenio_files_rest.models import FileInstance
3332

3433

3534
@shared_task(autoretry_for=(Exception, ),
3635
retry_kwargs={
3736
'max_retries': 5,
3837
'countdown': 10
3938
})
40-
def upload_to_zenodo(files, bucket, token, zenodo_depid, zenodo_bucket_url):
39+
def upload_to_zenodo(files, recid, token, zenodo_depid, zenodo_bucket_url):
4140
"""Upload to Zenodo the files the user selected."""
41+
from cap.modules.deposit.api import CAPDeposit
42+
rec = CAPDeposit.get_record(recid)
43+
4244
for filename in files:
43-
file_obj = ObjectVersion.get(bucket, filename)
45+
file_obj = rec.files[filename]
4446
file_ins = FileInstance.get(file_obj.file_id)
4547

4648
with open(file_ins.uri, 'rb') as fp:
47-
file = requests.put(
49+
resp = requests.put(
4850
url=f'{zenodo_bucket_url}/{filename}',
4951
data=fp,
5052
params=dict(access_token=token),
5153
)
5254

53-
if not file.ok:
55+
if not resp.ok:
5456
current_app.logger.error(
5557
f'Uploading file {filename} to deposit {zenodo_depid} '
56-
f'failed with {file.status_code}.')
58+
f'failed with {resp.status_code}.')

cap/modules/deposit/utils.py

+7-18
Original file line numberDiff line numberDiff line change
@@ -27,13 +27,13 @@
2727

2828
import requests
2929
from flask import current_app
30-
from flask_login import current_user
3130
from invenio_access.models import Role
3231
from invenio_db import db
3332

3433
from cap.modules.deposit.errors import AuthorizationError, \
3534
DataValidationError, FileUploadError
3635
from cap.modules.records.utils import url_to_api_url
36+
from cap.modules.services.serializers.zenodo import ZenodoDepositSchema
3737

3838

3939
def clean_empty_values(data):
@@ -82,13 +82,15 @@ def add_api_to_links(links):
8282
return response
8383

8484

85-
def create_zenodo_deposit(token, data):
85+
def create_zenodo_deposit(token, data=None):
8686
"""Create a Zenodo deposit using the logged in user's credentials."""
8787
zenodo_url = current_app.config.get("ZENODO_SERVER_URL")
88+
zenodo_data = {'metadata': data} if data else {}
89+
8890
deposit = requests.post(
8991
url=f'{zenodo_url}/deposit/depositions',
9092
params=dict(access_token=token),
91-
json={'metadata': data},
93+
json=zenodo_data,
9294
headers={'Content-Type': 'application/json'}
9395
)
9496

@@ -105,18 +107,5 @@ def create_zenodo_deposit(token, data):
105107
raise FileUploadError(
106108
'Something went wrong, Zenodo deposit not created.')
107109

108-
# TODO: fix with serializers
109-
data = deposit.json()
110-
zenodo_deposit = {
111-
'id': data['id'],
112-
'title': data.get('metadata', {}).get('title'),
113-
'creator': current_user.id,
114-
'created': data['created'],
115-
'links': {
116-
'self': data['links']['self'],
117-
'bucket': data['links']['bucket'],
118-
'html': data['links']['html'],
119-
'publish': data['links']['publish'],
120-
}
121-
}
122-
return zenodo_deposit
110+
data = ZenodoDepositSchema().dump(deposit.json()).data
111+
return data
+166
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
# -*- coding: utf-8 -*-
2+
#
3+
# This file is part of CERN Analysis Preservation Framework.
4+
# Copyright (C) 2020 CERN.
5+
#
6+
# CERN Analysis Preservation Framework is free software; you can redistribute
7+
# it and/or modify it under the terms of the GNU General Public License as
8+
# published by the Free Software Foundation; either version 2 of the
9+
# License, or (at your option) any later version.
10+
#
11+
# CERN Analysis Preservation Framework is distributed in the hope that it will
12+
# be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14+
# General Public License for more details.
15+
#
16+
# You should have received a copy of the GNU General Public License
17+
# along with CERN Analysis Preservation Framework; if not, write to the
18+
# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
19+
# MA 02111-1307, USA.
20+
#
21+
# In applying this license, CERN does not
22+
# waive the privileges and immunities granted to it by virtue of its status
23+
# as an Intergovernmental Organization or submit itself to any jurisdiction.
24+
# or submit itself to any jurisdiction.
25+
26+
"""Zenodo Serializer/Validator."""
27+
28+
import arrow
29+
from flask_login import current_user
30+
from marshmallow import Schema, fields, ValidationError, validate, validates, \
31+
validates_schema
32+
33+
DATE_REGEX = r'\d{4}-\d{2}-\d{2}'
34+
DATE_ERROR = 'The date should follow the pattern YYYY-mm-dd.'
35+
36+
UPLOAD_TYPES = [
37+
'publication',
38+
'poster',
39+
'presentation',
40+
'dataset',
41+
'image',
42+
'video',
43+
'software',
44+
'lesson',
45+
'physicalobject',
46+
'other'
47+
]
48+
LICENSES = [
49+
'CC-BY-4.0',
50+
'CC-BY-1.0',
51+
'CC-BY-2.0',
52+
'CC-BY-3.0'
53+
]
54+
ACCESS_RIGHTS = [
55+
'open',
56+
'embargoed',
57+
'restricted',
58+
'closed'
59+
]
60+
61+
62+
def choice_error_msg(choices):
63+
return f'Not a valid choice. Select one of: {choices}'
64+
65+
66+
class ZenodoCreatorsSchema(Schema):
67+
name = fields.String(required=True)
68+
affiliation = fields.String()
69+
orcid = fields.String()
70+
71+
72+
class ZenodoDepositMetadataSchema(Schema):
73+
title = fields.String(required=True)
74+
description = fields.String(required=True)
75+
version = fields.String()
76+
77+
keywords = fields.List(fields.String())
78+
creators = fields.List(
79+
fields.Nested(ZenodoCreatorsSchema), required=True)
80+
81+
upload_type = fields.String(required=True, validate=validate.OneOf(
82+
UPLOAD_TYPES, error=choice_error_msg(UPLOAD_TYPES)))
83+
license = fields.String(required=True, validate=validate.OneOf(
84+
LICENSES, error=choice_error_msg(LICENSES)))
85+
access_right = fields.String(required=True, validate=validate.OneOf(
86+
ACCESS_RIGHTS, error=choice_error_msg(ACCESS_RIGHTS)))
87+
88+
publication_date = fields.String(
89+
required=True, validate=validate.Regexp(DATE_REGEX, error=DATE_ERROR))
90+
embargo_date = fields.String(
91+
validate=validate.Regexp(DATE_REGEX, error=DATE_ERROR))
92+
access_conditions = fields.String()
93+
94+
@validates('embargo_date')
95+
def validate_embargo_date(self, value):
96+
"""Validate that embargo date is in the future."""
97+
if arrow.get(value).date() <= arrow.utcnow().date():
98+
raise ValidationError(
99+
'Embargo date must be in the future.',
100+
field_names=['embargo_date']
101+
)
102+
103+
@validates_schema()
104+
def validate_license(self, data, **kwargs):
105+
"""Validate license according to what Zenodo expects."""
106+
access = data.get('access_right')
107+
if access in ['open', 'embargoed'] and 'license' not in data:
108+
raise ValidationError(
109+
'Required when access right is open or embargoed.',
110+
field_names=['license']
111+
)
112+
if access == 'embargoed' and 'embargo_date' not in data:
113+
raise ValidationError(
114+
'Required when access right is embargoed.',
115+
field_names=['embargo_date']
116+
)
117+
if access == 'restricted' and 'access_conditions' not in data:
118+
raise ValidationError(
119+
'Required when access right is restricted.',
120+
field_names=['access_conditions']
121+
)
122+
123+
124+
class ZenodoUploadSchema(Schema):
125+
files = fields.List(fields.String(), required=True)
126+
data = fields.Nested(ZenodoDepositMetadataSchema, default=dict())
127+
128+
def __init__(self, *args, **kwargs):
129+
self.recid = kwargs.pop('recid') if 'recid' in kwargs else None
130+
super(Schema, self).__init__(*args, **kwargs)
131+
132+
@validates_schema()
133+
def validate_files(self, data, **kwargs):
134+
"""Check if the files exist in this deposit."""
135+
from cap.modules.deposit.api import CAPDeposit
136+
rec = CAPDeposit.get_record(self.recid)
137+
138+
for _file in data['files']:
139+
if _file not in rec.files.keys:
140+
raise ValidationError(
141+
f'File `{_file}` not found in record.',
142+
field_names=['files']
143+
)
144+
145+
146+
class ZenodoDepositSchema(Schema):
147+
id = fields.Int(dump_only=True)
148+
created = fields.String(dump_only=True)
149+
150+
title = fields.Method('get_title', dump_only=True, allow_none=True)
151+
creator = fields.Method('get_creator', dump_only=True, allow_none=True)
152+
links = fields.Method('get_links', dump_only=True)
153+
154+
def get_creator(self, data):
155+
return current_user.id if current_user else None
156+
157+
def get_title(self, data):
158+
return data.get('metadata', {}).get('title')
159+
160+
def get_links(self, data):
161+
return {
162+
'self': data['links']['self'],
163+
'bucket': data['links']['bucket'],
164+
'html': data['links']['html'],
165+
'publish': data['links']['publish']
166+
}

0 commit comments

Comments
 (0)