Skip to content

Commit ff20b31

Browse files
committed
Adapting merge code to derivative datasets layouts
1 parent ca61a6f commit ff20b31

32 files changed

+793
-135
lines changed

bids_prov/merge.py

Lines changed: 47 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from JSON files into one RDF graph.
66
"""
77

8+
from pathlib import Path
89
import json
910
from argparse import ArgumentParser
1011
from io import StringIO
@@ -18,6 +19,18 @@
1819
from bids import BIDSLayout
1920
from bids.layout.models import BIDSFile, BIDSJSONFile
2021

22+
def get_associated_sidecar(layout: BIDSLayout, data_file: BIDSFile) -> dict:
23+
""" This function is a workaround to BIDSFile.get_associations function not working with
24+
derivative datasets.
25+
26+
Return the associated sidecar of a BIDSFile in a given BIDSLayout.
27+
"""
28+
filename = Path(data_file.path)
29+
extensions = ''.join(filename.suffixes)
30+
sidecar_filename = str(filename).replace(extensions, '.json')
31+
32+
return layout.get_file(sidecar_filename)
33+
2134
def filter_provenance_group(files: list, group: str) -> list:
2235
""" Filter a given BIDSFile list, returning the sub-list containig the BIDS
2336
`prov` entity equal to group
@@ -74,7 +87,6 @@ def get_dataset_entity_record(description_file: BIDSJSONFile) -> dict:
7487
AND
7588
- at least one of the objects of the GeneratedBy has a Id field.
7689
"""
77-
7890
metadata = description_file.get_dict()
7991

8092
# Provenance Entity record for the dataset
@@ -92,8 +104,10 @@ def get_dataset_entity_record(description_file: BIDSJSONFile) -> dict:
92104

93105
return entity
94106

95-
def get_entity_record(data_file: BIDSFile) -> dict:
96-
""" Return an Entity provenance record from metadata of a BIDS file. """
107+
def get_entity_record(layout: BIDSLayout, data_file: BIDSFile) -> dict:
108+
""" Return an Entity provenance record from metadata of a BIDS file
109+
in a given BIDSLayout.
110+
"""
97111

98112
# Provenance Entity record for the data file
99113
entity = {
@@ -103,7 +117,12 @@ def get_entity_record(data_file: BIDSFile) -> dict:
103117
}
104118

105119
# Get provenance-related metadata
106-
metadata = data_file.get_metadata()
120+
## metadata = data_file.get_metadata()
121+
sidecar = get_associated_sidecar(layout, data_file)
122+
if sidecar is None:
123+
return None
124+
125+
metadata = sidecar.get_dict()
107126
if 'GeneratedBy' in metadata:
108127
entity['GeneratedBy'] = metadata['GeneratedBy']
109128
if 'Digest' in metadata:
@@ -113,11 +132,14 @@ def get_entity_record(data_file: BIDSFile) -> dict:
113132

114133
return entity
115134

116-
def get_sidecar_entity_record(data_file: BIDSFile) -> dict:
117-
""" Return an Entity provenance record for the sidecar of a BIDS file, given its metadata. """
135+
def get_sidecar_entity_record(layout: BIDSLayout, data_file: BIDSFile) -> dict:
136+
""" Return an Entity provenance record for the sidecar of a BIDS file, in a given BIDSLayout.
137+
"""
118138

119139
# Get sidecar associated with the data_file
120-
sidecar = data_file.get_associations()[0]
140+
sidecar = get_associated_sidecar(layout, data_file)
141+
if sidecar is None:
142+
return None
121143

122144
# Provenance Entity record for the sidecar JSON file
123145
entity = {
@@ -127,26 +149,25 @@ def get_sidecar_entity_record(data_file: BIDSFile) -> dict:
127149
}
128150

129151
# Get provenance-related metadata
130-
metadata = data_file.get_metadata()
152+
metadata = sidecar.get_dict()
131153
if 'SidecarGeneratedBy' in metadata:
132154
entity['GeneratedBy'] = metadata['SidecarGeneratedBy']
133155
return entity
134156

135157
return None
136158

137-
def get_entities_in_group(input_graph: dict, group: str = None) -> list:
159+
def get_linked_entities(input_graph: dict) -> list:
138160
""" Return the Ids of Entity provenance records from the provenance graph
139-
and belonging to the same provenance group.
161+
that were either used or generated by an Activity.
140162
141163
Arguments:
142164
- input_graph, dict: JSON-LD graph containing provenance records
143-
- group, str: label of the provenance group
144165
Return:
145166
- list: list of Entity provenance records from the graph that
146-
belong to the group
167+
were either used or generated by an Activity
147168
"""
148169

149-
# Open file & create graph from it
170+
# Create RDF graph from input JSON-LD
150171
graph = Dataset()
151172
graph.parse(StringIO(json.dumps(jsonld.expand(input_graph))), format='json-ld')
152173

@@ -180,7 +201,7 @@ def get_entities_in_group(input_graph: dict, group: str = None) -> list:
180201
used_entities = [s.n3(graph.namespace_manager).replace('<', '').replace('>', '')
181202
for s, _, _ in graph.query(query)]
182203

183-
# Return all prov:Entity in the group
204+
# Return all linked prov:Entity
184205
return list(set(used_entities + generated_entities))
185206

186207
def merge_records(layout: BIDSLayout, group: str = None) -> dict:
@@ -212,10 +233,11 @@ def merge_records(layout: BIDSLayout, group: str = None) -> dict:
212233

213234
# Get provenance metadata from other JSON files in the dataset
214235
for data_file in get_described_files(layout):
215-
entity = get_entity_record(data_file)
216-
base_provenance['Records']['Entities'].append(entity)
217-
for sidecar in get_described_sidecars(layout):
218-
entity = get_sidecar_entity_record(sidecar)
236+
entity = get_entity_record(layout, data_file)
237+
if entity is not None:
238+
base_provenance['Records']['Entities'].append(entity)
239+
for data_file in get_described_sidecars(layout):
240+
entity = get_sidecar_entity_record(layout, data_file)
219241
if entity is not None:
220242
base_provenance['Records']['Entities'].append(entity)
221243
for dataset in get_described_datasets(layout):
@@ -224,7 +246,7 @@ def merge_records(layout: BIDSLayout, group: str = None) -> dict:
224246
base_provenance['Records']['Entities'].append(entity)
225247

226248
# Filter on provenance group
227-
entities_in_group = get_entities_in_group(base_provenance, group)
249+
entities_in_group = get_linked_entities(base_provenance)
228250
entities = []
229251
for entity in base_provenance['Records']['Entities']:
230252
if entity['Id'] in entities_in_group:
@@ -239,6 +261,8 @@ def entry_point():
239261
parser = ArgumentParser()
240262
parser.add_argument('--dataset', '-d', type=str, default='.',
241263
help='The path to the input BIDS dataset.')
264+
parser.add_argument('--derivative', action='store_true',
265+
help='Set this option to specify the dataset is a BIDS derivative dataset.')
242266
parser.add_argument('--output_file', '-o', type=str, required=True,
243267
help='Output JSON-LD file containing the provenance graph for the input dataset.')
244268
parser.add_argument('--group', '-g', type=str,
@@ -248,7 +272,10 @@ def entry_point():
248272
# Write output JSON-LD file
249273
with open(arguments.output_file, 'w', encoding = 'utf-8') as file:
250274
file.write(
251-
json.dumps(merge_records(BIDSLayout(arguments.dataset), arguments.group), indent = 2)
275+
json.dumps(merge_records(
276+
BIDSLayout(arguments.dataset, is_derivative=arguments.derivative),
277+
arguments.group
278+
), indent = 2)
252279
)
253280

254281
if __name__ == '__main__':

0 commit comments

Comments
 (0)