77
88import json
99from argparse import ArgumentParser
10+ from io import StringIO
11+
12+ from pyld import jsonld
13+
14+ from rdflib import Dataset
15+ from rdflib .namespace import PROV
16+ from rdflib .plugins .sparql import prepareQuery
1017
1118from bids import BIDSLayout
1219from bids .layout .models import BIDSFile , BIDSJSONFile
@@ -127,6 +134,54 @@ def get_sidecar_entity_record(data_file: BIDSFile) -> dict:
127134
128135 return None
129136
137+ def get_entities_in_group (input_graph : dict , group : str = None ) -> list :
138+ """ Return the Ids of Entity provenance records from the provenance graph
139+ and belonging to the same provenance group.
140+
141+ Arguments:
142+ - input_graph, dict: JSON-LD graph containing provenance records
143+ - group, str: label of the provenance group
144+ Return:
145+ - list: list of Entity provenance records from the graph that
146+ belong to the group
147+ """
148+
149+ # Open file & create graph from it
150+ graph = Dataset ()
151+ graph .parse (StringIO (json .dumps (jsonld .expand (input_graph ))), format = 'json-ld' )
152+
153+ # Search for all prov:Entity GeneratedBy a prov:Activity in the graph
154+ query = prepareQuery ("""
155+ SELECT ?s ?p ?o WHERE {
156+ ?s a prov:Entity .
157+ ?act a prov:Activity .
158+ ?s prov:wasGeneratedBy ?act .
159+ ?s ?p ?o .
160+ }
161+ GROUP BY ?s
162+ """ ,
163+ initNs = {'prov' : PROV }
164+ )
165+ generated_entities = [s .n3 (graph .namespace_manager ).replace ('<' , '' ).replace ('>' , '' )
166+ for s , _ , _ in graph .query (query )]
167+
168+ # Search for all prov:Entity used a prov:Activity in the graph
169+ query = prepareQuery ("""
170+ SELECT ?s ?p ?o WHERE {
171+ ?s a prov:Entity .
172+ ?act a prov:Activity .
173+ ?act prov:used ?s .
174+ ?s ?p ?o .
175+ }
176+ GROUP BY ?s
177+ """ ,
178+ initNs = {'prov' : PROV }
179+ )
180+ used_entities = [s .n3 (graph .namespace_manager ).replace ('<' , '' ).replace ('>' , '' )
181+ for s , _ , _ in graph .query (query )]
182+
183+ # Return all prov:Entity in the group
184+ return list (set (used_entities + generated_entities ))
130185
131186def merge_records (layout : BIDSLayout , group : str = None ) -> dict :
132187 """ Merge provenace records of a dataset (`layout`) from the provenance group `group`.
@@ -156,7 +211,6 @@ def merge_records(layout: BIDSLayout, group: str = None) -> dict:
156211 base_provenance ['Records' ]['Software' ] += file .get_dict ()['Software' ]
157212
158213 # Get provenance metadata from other JSON files in the dataset
159- # TODO : how to filter on provenance group ?
160214 for data_file in get_described_files (layout ):
161215 entity = get_entity_record (data_file )
162216 base_provenance ['Records' ]['Entities' ].append (entity )
@@ -169,6 +223,14 @@ def merge_records(layout: BIDSLayout, group: str = None) -> dict:
169223 if entity is not None :
170224 base_provenance ['Records' ]['Entities' ].append (entity )
171225
226+ # Filter on provenance group
227+ entities_in_group = get_entities_in_group (base_provenance , group )
228+ entities = []
229+ for entity in base_provenance ['Records' ]['Entities' ]:
230+ if entity ['Id' ] in entities_in_group :
231+ entities .append (entity )
232+ base_provenance ['Records' ]['Entities' ] = entities
233+
172234 return base_provenance
173235
174236def entry_point ():
0 commit comments