@@ -33,7 +33,8 @@ def __init__(self, *,
33
33
context = None ,
34
34
record_thing_link = 'mainEntity' ,
35
35
system_base_iri = None ,
36
- union = 'all.jsonld.lines' ):
36
+ union = 'all.jsonld.lines' ,
37
+ last_backwards_id_time = None ):
37
38
self .datasets_description = datasets_description
38
39
self .datasets = {}
39
40
self .current_ds_resources = set ()
@@ -49,6 +50,11 @@ def __init__(self, *,
49
50
self .current_ds_file = None
50
51
self .no_records = False
51
52
53
+ self .last_backwards_id_time = (
54
+ timeutil .w3c_dtz_to_ms (last_backwards_id_time )
55
+ if isinstance (last_backwards_id_time , str )
56
+ else None )
57
+
52
58
if datasets_description :
53
59
self ._handlers_from_datasets_description (datasets_description )
54
60
@@ -155,7 +161,8 @@ def _compile_dataset(self, name, result):
155
161
data = self .to_jsonld (data )
156
162
157
163
ds_url = urljoin (self .dataset_id , name )
158
- self ._create_dataset_description (ds_url , ds_created_ms , ds_modified_ms )
164
+ self ._create_dataset_description (
165
+ ds_url , ds_created_ms , ds_created_ms = ds_created_ms )
159
166
160
167
base_id = urljoin (self .dataset_id , base )
161
168
@@ -172,10 +179,6 @@ def _compile_dataset(self, name, result):
172
179
modified_ms = None
173
180
fpath = urlparse (nodeid ).path [1 :]
174
181
175
- if self .no_records :
176
- self .write (node , fpath )
177
- continue
178
-
179
182
meta = node .pop ('meta' , None )
180
183
if meta :
181
184
if 'created' in meta :
@@ -189,10 +192,25 @@ def _compile_dataset(self, name, result):
189
192
node ,
190
193
created_ms ,
191
194
modified_ms ,
192
- datasets = [self .dataset_id , ds_url ])
193
- self .write (desc , fpath )
195
+ datasets = [self .dataset_id , ds_url ],
196
+ ds_created_ms = ds_created_ms )
197
+
198
+ # Keep sameAs "fowards" form in meta even if no_records is used
199
+ if self .no_records :
200
+ meta = meta or {}
201
+ sameas = meta .setdefault ('sameAs' , [])
202
+ rec = desc ['@graph' ][0 ]
203
+ if 'sameAs' in rec :
204
+ sameas .append ({"@id" : rec ['@id' ]})
205
+ for same in rec .get ('sameAs' , []):
206
+ sameas .append (same )
207
+ node ['meta' ] = meta
208
+ self .write (node , fpath )
209
+ else :
210
+ self .write (desc , fpath )
194
211
195
- def _create_dataset_description (self , ds_url , created_ms , modified_ms = None , label = None ):
212
+ def _create_dataset_description (self , ds_url , created_ms , modified_ms = None ,
213
+ label = None , ds_created_ms = None ):
196
214
if not label :
197
215
label = ds_url .rsplit ('/' , 1 )[- 1 ]
198
216
ds = {
@@ -211,7 +229,7 @@ def _create_dataset_description(self, ds_url, created_ms, modified_ms=None, labe
211
229
return
212
230
213
231
desc = self ._to_node_description (ds , created_ms , modified_ms ,
214
- datasets = {self .dataset_id , ds_url })
232
+ datasets = {self .dataset_id , ds_url }, ds_created_ms = ds_created_ms )
215
233
216
234
record = desc ['@graph' ][0 ]
217
235
if self .tool_id :
@@ -220,14 +238,16 @@ def _create_dataset_description(self, ds_url, created_ms, modified_ms=None, labe
220
238
self .write (desc , ds_path )
221
239
222
240
def _to_node_description (self , node , created_ms ,
223
- modified_ms = None , datasets = None ):
241
+ modified_ms = None , datasets = None , ds_created_ms = None ):
224
242
assert self .record_thing_link not in node
225
243
226
244
node_id = node ['@id' ]
227
245
228
246
record = OrderedDict ()
229
247
record ['@type' ] = 'Record'
230
- record ['@id' ] = self .generate_record_id (created_ms , node_id )
248
+
249
+ self .set_record_id (record , created_ms , node_id , ds_created_ms )
250
+
231
251
record [self .record_thing_link ] = {'@id' : node_id }
232
252
233
253
# Add provenance
@@ -241,9 +261,19 @@ def _to_node_description(self, node, created_ms,
241
261
242
262
return {'@graph' : items }
243
263
244
- def generate_record_id (self , created_ms , node_id ):
245
- # FIXME: backwards_form=created_ms < 2015
246
- slug = lxlslug .librisencode (created_ms , lxlslug .checksum (node_id ))
264
+ def set_record_id (self , record , created_ms , node_id , ds_created_ms = None ):
265
+ if ds_created_ms is None :
266
+ ds_created_ms = created_ms
267
+ backwards_form = ds_created_ms < self .last_backwards_id_time
268
+ # TODO: use normal form and keep backwards_form as sameAs until "GC:able"?
269
+ record ['@id' ] = self .generate_record_id (created_ms , node_id , backwards_form )
270
+ if backwards_form :
271
+ record ['sameAs' ] = [{'@id' : self .generate_record_id (created_ms , node_id )}]
272
+
273
+ def generate_record_id (self , created_ms , node_id , backwards_form = False ):
274
+ slug = lxlslug .librisencode (
275
+ created_ms , lxlslug .checksum (node_id ), backwards_form = backwards_form
276
+ )
247
277
return urljoin (self .system_base_iri , slug )
248
278
249
279
def write (self , node , name ):
0 commit comments