Skip to content

Commit 3703032

Browse files
committed
Make YSO importer resilient when encountering corrupt URIs
1 parent fd53872 commit 3703032

File tree

1 file changed

+29
-16
lines changed

1 file changed

+29
-16
lines changed

events/importer/yso.py

Lines changed: 29 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import logging
44

55
import rdflib
6-
from django.core.exceptions import ObjectDoesNotExist
6+
from django.core.exceptions import ObjectDoesNotExist, ValidationError
77
from django_orghierarchy.models import Organization
88
from rdflib import RDF
99
from rdflib.namespace import DCTERMS, OWL, SKOS
@@ -44,7 +44,14 @@
4444

4545

4646
def get_yso_id(subject):
47-
return ':'.join(subject.split('/')[-2:])
47+
# we must validate the id, yso API might contain invalid data
48+
try:
49+
data_source, origin_id = subject.split('/')[-2:]
50+
except ValueError:
51+
raise ValidationError('Subject ' + subject + ' has invalid YSO id')
52+
if data_source != 'yso':
53+
raise ValidationError('Subject ' + subject + ' has invalid YSO id')
54+
return ':'.join((data_source, origin_id))
4855

4956

5057
def get_subject(yso_id):
@@ -137,19 +144,22 @@ def save_keywords(self, graph):
137144
labels_to_create = set()
138145
for subject, label in graph.subject_objects(SKOS.altLabel):
139146
if (subject, RDF.type, SKOS.Concept) in graph:
140-
yid = get_yso_id(subject)
141-
if bulk_mode:
142-
if label.language is not None:
143-
language = label.language
144-
if label.language == 'se':
145-
# YSO doesn't contain se, assume an error.
146-
language = 'sv'
147-
labels_to_create.add((str(label), language))
148-
keyword_labels.setdefault(yid, []).append(label)
149-
else:
150-
label = self.save_alt_label(label_syncher, graph, label)
151-
if label:
152-
keyword_labels.setdefault(yid, []).append(label)
147+
try:
148+
yid = get_yso_id(subject)
149+
if bulk_mode:
150+
if label.language is not None:
151+
language = label.language
152+
if label.language == 'se':
153+
# YSO doesn't contain se, assume an error.
154+
language = 'sv'
155+
labels_to_create.add((str(label), language))
156+
keyword_labels.setdefault(yid, []).append(label)
157+
else:
158+
label = self.save_alt_label(label_syncher, graph, label)
159+
if label:
160+
keyword_labels.setdefault(yid, []).append(label)
161+
except ValidationError as e:
162+
logger.error(e)
153163

154164
if bulk_mode:
155165
KeywordLabel.objects.bulk_create([
@@ -183,7 +193,10 @@ def save_keywords(self, graph):
183193
check_deleted_func=lambda obj: obj.deprecated)
184194
save_set = set()
185195
for subject in graph.subjects(RDF.type, SKOS.Concept):
186-
self.save_keyword(syncher, graph, subject, keyword_labels, save_set)
196+
try:
197+
self.save_keyword(syncher, graph, subject, keyword_labels, save_set)
198+
except ValidationError as e:
199+
logger.error(e)
187200
syncher.finish(force=self.options['force'])
188201

189202
def save_keyword_label_relationships_in_bulk(self, keyword_labels):

0 commit comments

Comments
 (0)