Skip to content

Commit c725e75

Browse files
committed
services: create zenodo deposit through CAP
* creates a Zenodo deposit, with files from CAP * saves metadata about the Zenodo deposit, and attaches it to a CAP deposit * integration tests * closes #1938 * closes #1934 Signed-off-by: Ilias Koutsakis <[email protected]>
1 parent 08546ed commit c725e75

File tree

8 files changed

+644
-53
lines changed

8 files changed

+644
-53
lines changed

cap/config.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -720,10 +720,7 @@ def _(x):
720720

721721
# Zenodo
722722
# ======
723-
ZENODO_SERVER_URL = os.environ.get('APP_ZENODO_SERVER_URL',
724-
'https://zenodo.org/api')
725-
726-
ZENODO_ACCESS_TOKEN = os.environ.get('APP_ZENODO_ACCESS_TOKEN', 'CHANGE_ME')
723+
ZENODO_SERVER_URL = os.environ.get('APP_ZENODO_SERVER_URL', 'https://zenodo.org/api') # noqa
727724

728725
# Endpoints
729726
# =========

cap/modules/deposit/api.py

+79-48
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
from sqlalchemy.orm.exc import NoResultFound
5050
from werkzeug.local import LocalProxy
5151

52+
from cap.modules.auth.ext import _fetch_token
5253
from cap.modules.deposit.errors import DisconnectWebhookError, FileUploadError
5354
from cap.modules.deposit.validators import NoRequiredValidator
5455
from cap.modules.experiments.permissions import exp_need_factory
@@ -75,6 +76,8 @@
7576
UpdateDepositPermission)
7677

7778
from .review import Reviewable
79+
from .tasks import upload_to_zenodo
80+
from .utils import create_zenodo_deposit
7881

7982
_datastore = LocalProxy(lambda: current_app.extensions['security'].datastore)
8083

@@ -254,53 +257,82 @@ def upload(self, pid, *args, **kwargs):
254257
_, rec = request.view_args.get('pid_value').data
255258
record_uuid = str(rec.id)
256259
data = request.get_json()
257-
webhook = data.get('webhook', False)
258-
event_type = data.get('event_type', 'release')
259-
260-
try:
261-
url = data['url']
262-
except KeyError:
263-
raise FileUploadError('Missing url parameter.')
264-
265-
try:
266-
host, owner, repo, branch, filepath = parse_git_url(url)
267-
api = create_git_api(host, owner, repo, branch,
268-
current_user.id)
269-
270-
if filepath:
271-
if webhook:
272-
raise FileUploadError(
273-
'You cannot create a webhook on a file')
274-
275-
download_repo_file(
276-
record_uuid,
277-
f'repositories/{host}/{owner}/{repo}/{api.branch or api.sha}/{filepath}', # noqa
278-
*api.get_file_download(filepath),
279-
api.auth_headers,
280-
)
281-
elif webhook:
282-
if event_type == 'release':
283-
if branch:
284-
raise FileUploadError(
285-
'You cannot create a release webhook'
286-
' for a specific branch or sha.')
260+
target = data.get('target')
261+
262+
if target == 'zenodo':
263+
# check for token
264+
token = _fetch_token('zenodo')
265+
if not token:
266+
raise FileUploadError(
267+
'Please connect your Zenodo account '
268+
'before creating a deposit.')
269+
270+
files = data.get('files')
271+
bucket = data.get('bucket')
272+
zenodo_data = data.get('zenodo_data', {})
273+
274+
if files and bucket:
275+
zenodo_deposit = create_zenodo_deposit(token, zenodo_data) # noqa
276+
self.setdefault('_zenodo', []).append(zenodo_deposit)
277+
self.commit()
278+
279+
# upload files to zenodo deposit
280+
upload_to_zenodo.delay(
281+
record_uuid, files, bucket, token,
282+
zenodo_deposit['id'],
283+
zenodo_deposit['links']['bucket'])
284+
else:
285+
raise FileUploadError(
286+
'You cannot create an empty Zenodo deposit. '
287+
'Please add some files.')
288+
else:
289+
webhook = data.get('webhook', False)
290+
event_type = data.get('event_type', 'release')
287291

288-
if event_type == 'push' and \
289-
api.branch is None and api.sha:
290-
raise FileUploadError(
291-
'You cannot create a push webhook'
292-
' for a specific sha.')
292+
try:
293+
url = data['url']
294+
except KeyError:
295+
raise FileUploadError('Missing url parameter.')
293296

294-
create_webhook(record_uuid, api, event_type)
295-
else:
296-
download_repo.delay(
297-
record_uuid,
298-
f'repositories/{host}/{owner}/{repo}/{api.branch or api.sha}.tar.gz', # noqa
299-
api.get_repo_download(),
300-
api.auth_headers)
297+
try:
298+
host, owner, repo, branch, filepath = parse_git_url(url) # noqa
299+
api = create_git_api(host, owner, repo, branch,
300+
current_user.id)
301+
302+
if filepath:
303+
if webhook:
304+
raise FileUploadError(
305+
'You cannot create a webhook on a file')
306+
307+
download_repo_file(
308+
record_uuid,
309+
f'repositories/{host}/{owner}/{repo}/{api.branch or api.sha}/{filepath}', # noqa
310+
*api.get_file_download(filepath),
311+
api.auth_headers,
312+
)
313+
elif webhook:
314+
if event_type == 'release':
315+
if branch:
316+
raise FileUploadError(
317+
'You cannot create a release webhook'
318+
' for a specific branch or sha.')
319+
320+
if event_type == 'push' and \
321+
api.branch is None and api.sha:
322+
raise FileUploadError(
323+
'You cannot create a push webhook'
324+
' for a specific sha.')
325+
326+
create_webhook(record_uuid, api, event_type)
327+
else:
328+
download_repo.delay(
329+
record_uuid,
330+
f'repositories/{host}/{owner}/{repo}/{api.branch or api.sha}.tar.gz', # noqa
331+
api.get_repo_download(),
332+
api.auth_headers)
301333

302-
except GitError as e:
303-
raise FileUploadError(str(e))
334+
except GitError as e:
335+
raise FileUploadError(str(e))
304336

305337
return self
306338

@@ -584,16 +616,15 @@ def validate(self, **kwargs):
584616

585617
validator = NoRequiredValidator(schema, resolver=resolver)
586618

587-
result = {}
588-
result['errors'] = [
619+
errors = [
589620
FieldError(
590621
list(error.path)+error.validator_value,
591622
str(error.message))
592623
for error in validator.iter_errors(self)
593624
]
594625

595-
if result['errors']:
596-
raise DepositValidationError(None, errors=result['errors'])
626+
if errors:
627+
raise DepositValidationError(None, errors=errors)
597628
except RefResolutionError:
598629
raise DepositValidationError('Schema {} not found.'.format(
599630
self['$schema']))

cap/modules/deposit/errors.py

+27
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,18 @@ def __init__(self, description, **kwargs):
8787
self.description = description or self.description
8888

8989

90+
class AuthorizationError(RESTException):
91+
"""Exception during authorization."""
92+
93+
code = 401
94+
95+
def __init__(self, description, **kwargs):
96+
"""Initialize exception."""
97+
super(AuthorizationError, self).__init__(**kwargs)
98+
99+
self.description = description or self.description
100+
101+
90102
class DisconnectWebhookError(RESTException):
91103
"""Exception during disconnecting webhook for analysis."""
92104

@@ -124,3 +136,18 @@ def __init__(self, description, errors=None, **kwargs):
124136

125137
self.description = description or self.description
126138
self.errors = [FieldError(e[0], e[1]) for e in errors.items()]
139+
140+
141+
class DataValidationError(RESTValidationError):
142+
"""Review validation error exception."""
143+
144+
code = 400
145+
146+
description = "Validation error. Try again with valid data"
147+
148+
def __init__(self, description, errors=None, **kwargs):
149+
"""Initialize exception."""
150+
super(DataValidationError, self).__init__(**kwargs)
151+
152+
self.description = description or self.description
153+
self.errors = [FieldError(e['field'], e['message']) for e in errors]

cap/modules/deposit/tasks.py

+78
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
# -*- coding: utf-8 -*-
2+
#
3+
# This file is part of CERN Analysis Preservation Framework.
4+
# Copyright (C) 2018 CERN.
5+
#
6+
# CERN Analysis Preservation Framework is free software; you can redistribute
7+
# it and/or modify it under the terms of the GNU General Public License as
8+
# published by the Free Software Foundation; either version 2 of the
9+
# License, or (at your option) any later version.
10+
#
11+
# CERN Analysis Preservation Framework is distributed in the hope that it will
12+
# be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14+
# General Public License for more details.
15+
#
16+
# You should have received a copy of the GNU General Public License
17+
# along with CERN Analysis Preservation Framework; if not, write to the
18+
# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
19+
# MA 02111-1307, USA.
20+
#
21+
# In applying this license, CERN does not
22+
# waive the privileges and immunities granted to it by virtue of its status
23+
# as an Intergovernmental Organization or submit itself to any jurisdiction.
24+
"""Tasks."""
25+
26+
from __future__ import absolute_import, print_function
27+
28+
import requests
29+
from flask import current_app
30+
from celery import shared_task
31+
from invenio_db import db
32+
from invenio_files_rest.models import FileInstance, ObjectVersion
33+
34+
35+
@shared_task(autoretry_for=(Exception, ),
36+
retry_kwargs={
37+
'max_retries': 5,
38+
'countdown': 10
39+
})
40+
def upload_to_zenodo(record_uuid, files, bucket, token,
41+
zenodo_depid, zenodo_bucket_url):
42+
"""Upload code to zenodo."""
43+
from cap.modules.deposit.api import CAPDeposit
44+
record = CAPDeposit.get_record(record_uuid)
45+
46+
file_list = []
47+
for filename in files:
48+
file_obj = ObjectVersion.get(bucket, filename)
49+
file_ins = FileInstance.get(file_obj.file_id)
50+
51+
# upload each file in the deposit
52+
with open(file_ins.uri, 'rb') as fp:
53+
file = requests.put(
54+
url=f'{zenodo_bucket_url}/{filename}',
55+
data=fp,
56+
params=dict(access_token=token),
57+
)
58+
59+
if file.ok:
60+
data = file.json()
61+
file_list.append({
62+
'self': data['links']['self'],
63+
'key': data['key'],
64+
'size': data['size']
65+
})
66+
else:
67+
current_app.logger.error(
68+
f'Uploading file {filename} to deposit {zenodo_depid} '
69+
f'failed with {file.status_code}.')
70+
71+
if file_list:
72+
# get the specific deposit we wish to update with files
73+
deposit = list(
74+
filter(lambda d: d['id'] == zenodo_depid, record['_zenodo']))
75+
76+
deposit[0]['files'] += file_list
77+
record.commit()
78+
db.session.commit()

cap/modules/deposit/utils.py

+43
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,13 @@
2525

2626
from __future__ import absolute_import, print_function
2727

28+
import requests
29+
from flask import current_app
2830
from invenio_access.models import Role
2931
from invenio_db import db
3032

33+
from cap.modules.deposit.errors import AuthorizationError, \
34+
DataValidationError, FileUploadError
3135
from cap.modules.records.utils import url_to_api_url
3236

3337

@@ -75,3 +79,42 @@ def add_api_to_links(links):
7579
item['links'] = add_api_to_links(item.get('links'))
7680

7781
return response
82+
83+
84+
def create_zenodo_deposit(token, data):
85+
"""Create a Zenodo deposit using the logged in user's credentials."""
86+
zenodo_url = current_app.config.get("ZENODO_SERVER_URL")
87+
deposit = requests.post(
88+
url=f'{zenodo_url}/deposit/depositions',
89+
params=dict(access_token=token),
90+
json={'metadata': data},
91+
headers={'Content-Type': 'application/json'}
92+
)
93+
94+
if not deposit.ok:
95+
if deposit.status_code == 401:
96+
raise AuthorizationError(
97+
'Authorization to Zenodo failed. Please reconnect.')
98+
if deposit.status_code == 400:
99+
data = deposit.json()
100+
if data.get('message') == 'Validation error.':
101+
raise DataValidationError(
102+
'Validation error on creating the Zenodo deposit.',
103+
errors=data.get('errors'))
104+
raise FileUploadError(
105+
'Something went wrong, Zenodo deposit not created.')
106+
107+
# TODO: fix with serializers
108+
data = deposit.json()
109+
zenodo_deposit = {
110+
'id': data['id'],
111+
'title': data.get('metadata', {}).get('title'),
112+
'links': {
113+
'self': data['links']['self'],
114+
'bucket': data['links']['bucket'],
115+
'html': data['links']['html'],
116+
'publish': data['links']['publish'],
117+
},
118+
'files': []
119+
}
120+
return zenodo_deposit

docker-services.yml

+2
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ services:
2727
- "INVENIO_RATELIMIT_STORAGE_URL=redis://cache:6379/3"
2828
- "INVENIO_CERN_APP_CREDENTIALS_KEY=CHANGE_ME"
2929
- "INVENIO_CERN_APP_CREDENTIALS_SECRET=CHANGE_ME"
30+
- "INVENIO_ZENODO_CLIENT_ID=CHANGE_ME"
31+
- "INVENIO_ZENODO_CLIENT_SECRET=CHANGE_ME"
3032
- "DEV_HOST=CHANGE_ME"
3133
lb:
3234
build: ./docker/haproxy/

tests/conftest.py

+18-1
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import tempfile
2929
from datetime import datetime, timedelta
3030
from uuid import uuid4
31+
from six import BytesIO
3132

3233
import pytest
3334
from flask import current_app
@@ -108,7 +109,8 @@ def default_config():
108109
DEBUG=False,
109110
TESTING=True,
110111
APP_GITLAB_OAUTH_ACCESS_TOKEN='testtoken',
111-
MAIL_DEFAULT_SENDER="[email protected]")
112+
MAIL_DEFAULT_SENDER="[email protected]",
113+
ZENODO_SERVER_URL='https://zenodo-test.org')
112114

113115

114116
@pytest.fixture(scope='session')
@@ -401,6 +403,21 @@ def deposit(example_user, create_deposit):
401403
)
402404

403405

406+
@pytest.fixture
407+
def deposit_with_file(example_user, create_schema, create_deposit):
408+
"""New deposit with files."""
409+
create_schema('test-schema', experiment='CMS')
410+
return create_deposit(
411+
example_user,
412+
'test-schema',
413+
{
414+
'$ana_type': 'test-schema',
415+
'title': 'test title'
416+
},
417+
files={'test-file.txt': BytesIO(b'Hello world!')},
418+
experiment='CMS')
419+
420+
404421
@pytest.fixture
405422
def record(example_user, create_deposit):
406423
"""Example record."""

0 commit comments

Comments
 (0)