Skip to content

Commit bfb7cf4

Browse files
committed
Filtering entities in group
1 parent 12b623b commit bfb7cf4

File tree

1 file changed

+63
-1
lines changed

1 file changed

+63
-1
lines changed

bids_prov/merge.py

Lines changed: 63 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,13 @@
77

88
import json
99
from argparse import ArgumentParser
10+
from io import StringIO
11+
12+
from pyld import jsonld
13+
14+
from rdflib import Dataset
15+
from rdflib.namespace import PROV
16+
from rdflib.plugins.sparql import prepareQuery
1017

1118
from bids import BIDSLayout
1219
from bids.layout.models import BIDSFile, BIDSJSONFile
@@ -127,6 +134,54 @@ def get_sidecar_entity_record(data_file: BIDSFile) -> dict:
127134

128135
return None
129136

137+
def get_entities_in_group(input_graph: dict, group: str = None) -> list:
138+
""" Return the Ids of Entity provenance records from the provenance graph
139+
and belonging to the same provenance group.
140+
141+
Arguments:
142+
- input_graph, dict: JSON-LD graph containing provenance records
143+
- group, str: label of the provenance group
144+
Return:
145+
- list: list of Entity provenance records from the graph that
146+
belong to the group
147+
"""
148+
149+
# Open file & create graph from it
150+
graph = Dataset()
151+
graph.parse(StringIO(json.dumps(jsonld.expand(input_graph))), format='json-ld')
152+
153+
# Search for all prov:Entity GeneratedBy a prov:Activity in the graph
154+
query = prepareQuery("""
155+
SELECT ?s ?p ?o WHERE {
156+
?s a prov:Entity .
157+
?act a prov:Activity .
158+
?s prov:wasGeneratedBy ?act .
159+
?s ?p ?o .
160+
}
161+
GROUP BY ?s
162+
""",
163+
initNs = {'prov': PROV}
164+
)
165+
generated_entities = [s.n3(graph.namespace_manager).replace('<', '').replace('>', '')
166+
for s, _, _ in graph.query(query)]
167+
168+
# Search for all prov:Entity used a prov:Activity in the graph
169+
query = prepareQuery("""
170+
SELECT ?s ?p ?o WHERE {
171+
?s a prov:Entity .
172+
?act a prov:Activity .
173+
?act prov:used ?s .
174+
?s ?p ?o .
175+
}
176+
GROUP BY ?s
177+
""",
178+
initNs = {'prov': PROV}
179+
)
180+
used_entities = [s.n3(graph.namespace_manager).replace('<', '').replace('>', '')
181+
for s, _, _ in graph.query(query)]
182+
183+
# Return all prov:Entity in the group
184+
return list(set(used_entities + generated_entities))
130185

131186
def merge_records(layout: BIDSLayout, group: str = None) -> dict:
132187
""" Merge provenace records of a dataset (`layout`) from the provenance group `group`.
@@ -156,7 +211,6 @@ def merge_records(layout: BIDSLayout, group: str = None) -> dict:
156211
base_provenance['Records']['Software'] += file.get_dict()['Software']
157212

158213
# Get provenance metadata from other JSON files in the dataset
159-
# TODO : how to filter on provenance group ?
160214
for data_file in get_described_files(layout):
161215
entity = get_entity_record(data_file)
162216
base_provenance['Records']['Entities'].append(entity)
@@ -169,6 +223,14 @@ def merge_records(layout: BIDSLayout, group: str = None) -> dict:
169223
if entity is not None:
170224
base_provenance['Records']['Entities'].append(entity)
171225

226+
# Filter on provenance group
227+
entities_in_group = get_entities_in_group(base_provenance, group)
228+
entities = []
229+
for entity in base_provenance['Records']['Entities']:
230+
if entity['Id'] in entities_in_group:
231+
entities.append(entity)
232+
base_provenance['Records']['Entities'] = entities
233+
172234
return base_provenance
173235

174236
def entry_point():

0 commit comments

Comments
 (0)