Skip to content

Commit 6931b68

Browse files
committed
Check for null dates (type and value) in the metadata
1 parent 7642a7a commit 6931b68

File tree

4 files changed

+77
-64
lines changed

4 files changed

+77
-64
lines changed

Pipfile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ factory-boy = "*"
1111
termcolor = "*"
1212
python-dotenv = "*"
1313
pytest-env = "*"
14+
pytest-sugar = "*"
1415

1516
[packages]
1617
flask = "*"

Pipfile.lock

Lines changed: 8 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

viringo/catalogs.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -240,8 +240,10 @@ def build_metadata_map(self, result):
240240
dates = []
241241
if result.publication_year:
242242
dates.append(str(result.publication_year))
243-
dates.extend([date['type'] + ": " + str(date['date'])
244-
for date in result.dates])
243+
date_strings = [
244+
f"{d.get('type')}: {d.get('date')}" for d in result.dates
245+
]
246+
dates.extend(date_strings)
245247

246248
rights = []
247249
for right in result.rights:

viringo/services/datacite.py

Lines changed: 64 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -73,97 +73,100 @@ def __init__(
7373
def build_metadata(data):
7474
"""Parse single json-api data dict into metadata object"""
7575
result = Metadata()
76-
7776
result.identifier = data.get('id')
7877

78+
attributes = data.get("attributes", {})
79+
7980
# Here we want to parse a ISO date but convert to UTC and then remove the TZinfo entirely
8081
# This is because OAI always works in UTC.
81-
created = dateutil.parser.parse(data['attributes']['created'])
82+
created = dateutil.parser.parse(attributes['created'])
8283
result.created_datetime = created.astimezone(
8384
dateutil.tz.UTC).replace(tzinfo=None)
84-
updated = dateutil.parser.parse(data['attributes']['updated'])
85+
updated = dateutil.parser.parse(attributes['updated'])
8586
result.updated_datetime = updated.astimezone(
8687
dateutil.tz.UTC).replace(tzinfo=None)
8788

88-
result.xml = base64.b64decode(data['attributes']['xml']) \
89-
if data['attributes']['xml'] is not None else None
89+
result.xml = base64.b64decode(attributes['xml']) \
90+
if attributes['xml'] is not None else None
9091

91-
result.metadata_version = data['attributes']['metadataVersion'] \
92-
if data['attributes']['metadataVersion'] is not None else None
92+
result.metadata_version = attributes.get('metadataVersion')
9393

9494
result.titles = [
95-
title.get('title', '') for title in data['attributes']['titles']
96-
] if data['attributes']['titles'] is not None else []
95+
title.get('title', '') for title in attributes.get('titles',[])
96+
]
9797

9898
result.creators = [
99-
creator.get('name', '') for creator in data['attributes']['creators']
100-
] if data['attributes']['creators'] is not None else []
99+
creator.get('name', '') for creator in attributes.get('creators',[])
100+
]
101101

102102
result.subjects = [
103-
subject.get('subject', '') for subject in data['attributes']['subjects']
104-
] if data['attributes']['subjects'] is not None else []
105-
103+
subject.get('subject', '') for subject in attributes.get('subjects', [])
104+
]
106105
result.descriptions = [
107-
description.get('description', '') for description in data['attributes']['descriptions']
108-
] if data['attributes']['descriptions'] is not None else []
106+
description.get('description', '') for description in attributes.get('descriptions',[])
107+
]
109108

110-
result.publisher = data['attributes'].get('publisher') or ''
111-
result.publication_year = data['attributes'].get('publicationYear') or ''
109+
result.publisher = attributes.get('publisher', '')
110+
result.publication_year = attributes.get('publicationYear', '')
112111

113112
result.dates = []
114-
if data['attributes']['dates'] is not None:
115-
for date in data['attributes']['dates']:
116-
if 'date' in date and 'dateType' in date:
117-
result.dates.append(
118-
{'type': date['dateType'], 'date': date['date']})
119-
120-
result.contributors = data['attributes'].get('contributors') or []
121-
result.funding_references = data['attributes'].get(
122-
'fundingReferences') or []
123-
result.sizes = data['attributes'].get('sizes') or []
124-
result.geo_locations = data['attributes'].get('geoLocations') or []
113+
for date in attributes.get('dates',[]):
114+
type_ = date.get('dateType')
115+
value = date.get('date')
116+
if bool(type_) and bool(value) :
117+
result.dates.append(
118+
{
119+
'type': type_,
120+
'date': value
121+
}
122+
)
123+
124+
result.contributors = attributes.get('contributors',[])
125+
result.funding_references = attributes.get('fundingReferences',[])
126+
result.sizes = attributes.get('sizes', [])
127+
result.geo_locations = attributes.get('geoLocations', [])
125128

126129
result.resource_types = []
127-
result.resource_types += [data['attributes']['types'].get('resourceTypeGeneral')] \
128-
if data['attributes']['types'].get('resourceTypeGeneral') is not None else []
129-
result.resource_types += [data['attributes']['types'].get('resourceType')] \
130-
if data['attributes']['types'].get('resourceType') is not None else []
130+
result.resource_types += [attributes['types'].get('resourceTypeGeneral')] \
131+
if attributes['types'].get('resourceTypeGeneral') is not None else []
132+
result.resource_types += [attributes['types'].get('resourceType')] \
133+
if attributes['types'].get('resourceType') is not None else []
131134

132-
result.formats = data['attributes'].get('formats') or []
135+
result.formats = attributes.get('formats', [])
133136

134137
result.identifiers = []
135138

136139
# handle missing identifiers attribute
137-
if data['attributes']['identifiers'] is not None:
138-
for identifier in data['attributes']['identifiers']:
139-
if identifier['identifier']:
140-
# Special handling for the fact the API could return bad identifier
141-
# as a list rather than a string
142-
if isinstance(identifier['identifier'], list):
143-
identifier['identifier'] = ','.join(
144-
identifier['identifier'])
145-
146-
result.identifiers.append({
147-
'type': identifier['identifierType'],
148-
'identifier': strip_uri_prefix(identifier['identifier'])
149-
})
150-
151-
result.language = data['attributes'].get('language') or ''
140+
for identifier in attributes.get('identifiers',[]):
141+
if identifier['identifier']:
142+
# Special handling for the fact the API could return bad identifier
143+
# as a list rather than a string
144+
if isinstance(identifier['identifier'], list):
145+
identifier['identifier'] = ','.join(
146+
identifier['identifier'])
147+
148+
result.identifiers.append({
149+
'type': identifier['identifierType'],
150+
'identifier': strip_uri_prefix(identifier['identifier'])
151+
})
152+
153+
result.language = attributes.get('language', '')
152154

153155
result.relations = []
154-
if data['attributes']['relatedIdentifiers'] is not None:
155-
for related in data['attributes']['relatedIdentifiers']:
156-
if 'relatedIdentifier' in related:
157-
result.relations.append({
158-
'type': related['relatedIdentifierType'],
159-
'identifier': related['relatedIdentifier']
160-
})
156+
for related in attributes.get('relatedIdentifiers',[]):
157+
if 'relatedIdentifier' in related:
158+
result.relations.append({
159+
'type': related['relatedIdentifierType'],
160+
'identifier': related['relatedIdentifier']
161+
})
161162

162163
result.rights = [
163-
{'statement': right.get('rights', None),
164-
'uri': right.get('rightsUri', None)}
165-
for right in data['attributes']['rightsList']
166-
] if data['attributes']['rightsList'] is not None else []
164+
{
165+
'statement': right.get('rights', None),
166+
'uri': right.get('rightsUri', None)
167+
}
168+
for right in attributes.get('rightsList', [])
169+
]
167170

168171
result.client = data['relationships']['client']['data'].get(
169172
'id').upper() or ''
@@ -173,7 +176,7 @@ def build_metadata(data):
173176

174177
# We make the active decision based upon if there is metadata and the isActive flag
175178
# This is the same as the previous oai-pmh datacite implementation.
176-
result.active = True if result.xml and data['attributes'].get(
179+
result.active = True if result.xml and attributes.get(
177180
'isActive', True) else False
178181

179182
return result

0 commit comments

Comments
 (0)