forked from CERNDocumentServer/cds-videos
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathjson.py
More file actions
125 lines (105 loc) · 4.78 KB
/
json.py
File metadata and controls
125 lines (105 loc) · 4.78 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# -*- coding: utf-8 -*-
#
# This file is part of CDS.
# Copyright (C) 2017, 2018 CERN.
#
# CDS is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# CDS is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with CDS. If not, see <http://www.gnu.org/licenses/>.
#
# In applying this licence, CERN does not waive the privileges and immunities
# granted to it by virtue of its status as an Intergovernmental Organization
# or submit itself to any jurisdiction.
"""CDS JSON Serializer."""
from flask import has_request_context
from flask_security import current_user
from invenio_records_rest.serializers.json import JSONSerializer
from ..api import CDSRecord
from ..permissions import (
has_read_record_eos_path_permission,
has_read_record_permission,
)
from ..utils import HTMLTagRemover, remove_html_tags
from marshmallow_utils.html import sanitize_html
class CDSJSONSerializer(JSONSerializer):
"""CDS JSON serializer.
Adds or removes fields depending on access rights.
"""
html_tag_remover = HTMLTagRemover()
def dump(self, obj, context=None):
"""Serialize object with schema."""
return self.schema_class(context=context).dump(obj)
def _sanitize_metadata(self, metadata):
"""Sanitize title, description and translations in metadata."""
try:
if "title" in metadata and "title" in metadata["title"]:
title = metadata["title"]["title"]
title = self.html_tag_remover.unescape(title)
metadata["title"]["title"] = remove_html_tags(
self.html_tag_remover, title
)
if "description" in metadata:
description = metadata["description"]
description = self.html_tag_remover.unescape(description)
metadata["description"] = sanitize_html(description)
if "translations" in metadata:
for t in metadata["translations"]:
if "title" in t and "title" in t["title"]:
t_title = t["title"]["title"]
t_title = self.html_tag_remover.unescape(t_title)
t["title"]["title"] = remove_html_tags(
self.html_tag_remover, t_title
)
if "description" in t:
t_desc = t["description"]
t_desc = self.html_tag_remover.unescape(t_desc)
t["description"] = sanitize_html(t_desc)
except KeyError:
# ignore error if keys are missing
pass
return metadata
def preprocess_record(self, pid, record, links_factory=None):
"""Include ``_eos_library_path`` for single record retrievals."""
result = super(CDSJSONSerializer, self).preprocess_record(
pid, record, links_factory=links_factory
)
# Add/remove files depending on access right.
if isinstance(record, CDSRecord):
metadata = result["metadata"]
if "_eos_library_path" in record and (
not has_request_context()
or not has_read_record_eos_path_permission(current_user, record)
):
metadata.pop("_eos_library_path")
# sanitize title by unescaping and stripping html tags
try:
metadata = self._sanitize_metadata(metadata)
if has_request_context():
metadata["videos"] = [
video
for video in metadata["videos"]
if has_read_record_permission(current_user, video)
]
except KeyError:
# ignore error if keys are missing in the metadata
pass
return result
def preprocess_search_hit(self, pid, record_hit, links_factory=None):
"""Prepare a record hit from opensearch for serialization."""
# do not pass links_factory when fetching data from ES, otherwise it
# will load the record from db for each search result
# see: cds.modules.records.links.record_link_factory
result = super(CDSJSONSerializer, self).preprocess_search_hit(pid, record_hit)
if "metadata" in result:
metadata = result["metadata"]
result["metadata"] = self._sanitize_metadata(result["metadata"])
return result