Skip to content

Commit 4fa2a61

Browse files
committed
Simplified version of the provenance
1 parent 92fad0a commit 4fa2a61

File tree

6 files changed

+41074
-34444
lines changed

6 files changed

+41074
-34444
lines changed

examples/fmriprep/README.md

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@ From that, we generate the JSON-LD graph `prov/merge/prov-fmriprep.prov.jsonld`.
113113

114114
### Limitations
115115

116+
* For now, we use a simplified description of the provenance, leaving aside software and environments as well as keys such as `Digest`, `Version`, `EnvVar`, `StartedAtTime`, `EndedAtTime`.
116117
* Some entities end up with several labels / atlocation. E.g.:
117118
```JSON-LD
118119
{
@@ -129,8 +130,6 @@ From that, we generate the JSON-LD graph `prov/merge/prov-fmriprep.prov.jsonld`.
129130
"https://github.com/bids-standard/BEP028_BIDSprov/terms/Digest": "sha512:c585500ee6565b5e8277e3cf72dcdef81768439e7998c258d9e3cfc4042cf2d3fa80ecd359400deda90a4ed141e3180b78a942b32827bd41fb0ca367c8f91c9c"
130131
}
131132
```
132-
133-
* Nipype generated entities both for its interface and the execution of the commands. In the BIDS-Prov records, we only keep the entities describing commands.
134133
* Some terms are missing in the BIDS-Prov context although they are in the specification (such as `Digest`, `Version`, `EnvVar`)
135134
* For now, the conversion script is not able to transform RDF triplets into dictionaries, as requested for `Digest` or `EnvVar` objects.
136-
* IRIs are not human readable enough
135+
* IRIs are not human readable enough (e.g.: `http://iri.nidash.org/262c247816c9fc071309a1da8bad277d`)

examples/fmriprep/derivatives/fmriprep/code/convert_prov.py

Lines changed: 8 additions & 128 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
from rdflib.namespace import RDF, RDFS, PROV
1010
from rdflib.plugins.sparql import prepareQuery
1111

12+
from queries import queries, simple_queries
13+
1214
# Dict of namespaces to be used in queries
1315
NAMESPACES = {
1416
'rdfs': RDFS,
@@ -28,136 +30,14 @@
2830
# Create an empty graph for output provenance
2931
bids_prov = Graph()
3032

31-
# Create a list of queries to extract data from the input file
32-
query_labels = [
33-
'1. Extract output file entities',
34-
'2. Extract input file entities',
35-
'3. Extract activities',
36-
'4. Extract agents',
37-
'5. Extract environments'
38-
]
39-
queries = [
40-
# 1. Extract output file entities
41-
"""
42-
CONSTRUCT {
43-
?s rdfs:label ?label .
44-
?s prov:atLocation ?atlocation .
45-
?s prov:wasGeneratedBy ?act .
46-
?s rdf:type ?type .
47-
}
48-
WHERE {
49-
?s ?p ?o .
50-
?s prov:qualifiedGeneration ?gen . # entity has a qualified generation
51-
?gen prov:activity ?act . # this qualified generation has an activity
52-
?act nipype:command ?x . # this activity has a command (disables activities representing nipype interfaces)
53-
?s prov:value ?label .
54-
?s prov:atLocation ?atlocation .
55-
?s rdf:type prov:Entity .
56-
?s rdf:type ?type .
57-
?s crypto:sha512 ?sha .
58-
BIND(STR(?label) as ?label)
59-
BIND(STR(?atlocation) as ?atlocation)
60-
}
61-
""",
62-
# 2. Extract input file entities
63-
"""
64-
CONSTRUCT {
65-
?s rdfs:label ?label .
66-
?s prov:atLocation ?atlocation .
67-
?s rdf:type prov:Entity .
68-
?s bidsprov:Digest ?sha .
69-
}
70-
WHERE {
71-
?s ?p ?o .
72-
?collection prov:hadMember ?s .
73-
?collection rdf:type nipype:Inputs .
74-
?s prov:value ?label .
75-
?s prov:atLocation ?atlocation .
76-
?s rdf:type prov:Entity .
77-
?s crypto:sha512 ?sha .
78-
FILTER NOT EXISTS { ?s prov:wasGeneratedBy ?x . } # Entity was not generated by anything
79-
BIND(STR(?label) as ?label)
80-
BIND(STR(?atlocation) as ?atlocation)
81-
BIND(CONCAT("sha512:", STR(?sha)) as ?sha)
82-
}
83-
""",
84-
# 3. Extract activities
85-
"""
86-
CONSTRUCT {
87-
?s rdfs:label ?label .
88-
?s rdf:type prov:Activity .
89-
?s bidsprov:Command ?command . # we select activities with commands only (disables activities representing nipype interfaces)
90-
?s prov:wasAssociatedWith ?associated .
91-
# ?s prov:used ?used . # comment this line to remove prov:used environments
92-
?s prov:used ?usedent .
93-
?s prov:startedAtTime ?started .
94-
?s prov:endedAtTime ?ended .
95-
}
96-
WHERE {
97-
?s ?p ?o .
98-
?s rdfs:label ?label .
99-
?s rdf:type prov:Activity .
100-
?s nipype:command ?command .
101-
?s prov:wasAssociatedWith ?associated .
102-
?s prov:used ?used .
103-
?s prov:startedAtTime ?started .
104-
?s prov:endedAtTime ?ended .
105-
?s prov:qualifiedUsage ?qu .
106-
?qu prov:entity ?usedent .
107-
?usedent prov:atLocation ?x .
108-
BIND(STR(?label) as ?label)
109-
BIND(STR(?command) as ?command)
110-
}
111-
""",
112-
# 4. Extract agents
113-
"""
114-
CONSTRUCT {
115-
?s rdfs:label ?label .
116-
?s rdf:type prov:Agent .
117-
?s bidsprov:Version ?version .
118-
}
119-
WHERE {
120-
?s ?p ?o .
121-
?s rdfs:label ?label .
122-
?s rdf:type prov:SoftwareAgent .
123-
?s nipype:version ?version .
124-
BIND(STR(?label) as ?label)
125-
BIND(STR(?version) as ?version)
126-
}
127-
""",
128-
# 5. Extract environments
129-
"""
130-
CONSTRUCT {
131-
?s rdfs:label ?label .
132-
?s rdf:type bidsprov:Environment .
133-
?s bidsprov:EnvVar ?envvar .
134-
?envvar rdfs:label ?envvarkey .
135-
?envvar prov:value ?envvarval .
136-
}
137-
WHERE {
138-
?s ?p ?o .
139-
?s rdfs:label ?label .
140-
?s rdf:type nipype:Environment .
141-
?envvar a prov:Entity .
142-
?envvar nipype:environmentVariable ?envvarkey .
143-
?envvar prov:value ?envvarval .
144-
?s prov:hadMember ?envvar .
145-
BIND(STR(?label) as ?label)
146-
BIND(STR(?envvarkey) as ?envvarkey)
147-
BIND(STR(?envvarval) as ?envvarval)
148-
}
149-
"""
150-
]
151-
15233
# Query input graph
153-
for label, query in zip(query_labels, queries):
34+
for label, query in simple_queries.items():
15435
print(label)
155-
if 'environments' not in label:
156-
q = prepareQuery(query, initNs = NAMESPACES)
157-
for graph in nipype_prov.graphs():
158-
queried_graph = graph.query(q)
159-
if len(queried_graph) > 0:
160-
bids_prov += queried_graph
36+
q = prepareQuery(query, initNs = NAMESPACES)
37+
for graph in nipype_prov.graphs():
38+
queried_graph = graph.query(q)
39+
if len(queried_graph) > 0:
40+
bids_prov += queried_graph
16141

16242
# Serialize output graph to JSON-LD and compact
16343
compacted = jsonld.compact(
Lines changed: 215 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,215 @@
1+
#!/usr/bin/python
2+
# coding: utf-8
3+
4+
""" Queries for nipype RDF provenance to BIDS-Prov provenance """
5+
6+
queries = {
7+
"1a. Extract output file entities": """
8+
CONSTRUCT {
9+
?s rdfs:label ?label .
10+
?s prov:atLocation ?atlocation .
11+
?s prov:wasGeneratedBy ?act .
12+
?s rdf:type ?type .
13+
}
14+
WHERE {
15+
?s ?p ?o .
16+
?s prov:qualifiedGeneration ?gen . # entity has a qualified generation
17+
?gen prov:activity ?act . # this qualified generation has an activity
18+
# ?act nipype:command ?x . # this activity has a command (disables activities representing nipype interfaces)
19+
?s prov:value ?label .
20+
?s prov:atLocation ?atlocation .
21+
?s rdf:type prov:Entity .
22+
?s rdf:type ?type .
23+
?s crypto:sha512 ?sha .
24+
BIND(STR(?label) as ?label)
25+
BIND(STR(?atlocation) as ?atlocation)
26+
}
27+
""",
28+
"1b. Extract input file entities": """
29+
CONSTRUCT {
30+
?s rdfs:label ?label .
31+
?s prov:atLocation ?atlocation .
32+
?s rdf:type prov:Entity .
33+
?s bidsprov:Digest ?sha .
34+
}
35+
WHERE {
36+
?s ?p ?o .
37+
?collection prov:hadMember ?s .
38+
?collection rdf:type nipype:Inputs .
39+
?s prov:value ?label .
40+
?s prov:atLocation ?atlocation .
41+
?s rdf:type prov:Entity .
42+
?s crypto:sha512 ?sha .
43+
FILTER NOT EXISTS { ?s prov:wasGeneratedBy ?x . } # Entity was not generated by anything
44+
BIND(STR(?label) as ?label)
45+
BIND(STR(?atlocation) as ?atlocation)
46+
BIND(CONCAT("sha512:", STR(?sha)) as ?sha)
47+
}
48+
""",
49+
"2a. Extract activities with commands": """
50+
CONSTRUCT {
51+
?s rdfs:label ?label .
52+
?s rdf:type prov:Activity .
53+
?s bidsprov:Command ?command . # we select activities with commands only
54+
?s prov:wasAssociatedWith ?associated .
55+
# ?s prov:used ?used . # comment this line to remove prov:used environments
56+
?s prov:used ?usedent .
57+
?s prov:startedAtTime ?started .
58+
?s prov:endedAtTime ?ended .
59+
}
60+
WHERE {
61+
?s ?p ?o .
62+
?s rdfs:label ?label .
63+
?s rdf:type prov:Activity .
64+
?s nipype:command ?command .
65+
?s prov:wasAssociatedWith ?associated .
66+
?s prov:used ?used .
67+
?s prov:startedAtTime ?started .
68+
?s prov:endedAtTime ?ended .
69+
?s prov:qualifiedUsage ?qu .
70+
?qu prov:entity ?usedent .
71+
?usedent prov:atLocation ?x .
72+
BIND(STR(?label) as ?label)
73+
BIND(STR(?command) as ?command)
74+
}
75+
""",
76+
"2b. Extract activities with no commands": """
77+
CONSTRUCT {
78+
?s rdfs:label ?label .
79+
?s rdf:type prov:Activity .
80+
?s prov:wasAssociatedWith ?associated .
81+
# ?s prov:used ?used . # comment this line to remove prov:used environments
82+
?s prov:used ?usedent .
83+
?s prov:startedAtTime ?started .
84+
?s prov:endedAtTime ?ended .
85+
}
86+
WHERE {
87+
?s ?p ?o .
88+
?s rdfs:label ?label .
89+
?s rdf:type prov:Activity .
90+
?s prov:wasAssociatedWith ?associated .
91+
?s prov:used ?used .
92+
?s prov:startedAtTime ?started .
93+
?s prov:endedAtTime ?ended .
94+
?s prov:qualifiedUsage ?qu .
95+
?qu prov:entity ?usedent .
96+
?usedent prov:atLocation ?x .
97+
FILTER NOT EXISTS { ?s nipype:command ?command . } # Activity does not have any command
98+
BIND(STR(?label) as ?label)
99+
}
100+
""",
101+
"3. Extract agents": """
102+
CONSTRUCT {
103+
?s rdfs:label ?label .
104+
?s rdf:type prov:Agent .
105+
?s bidsprov:Version ?version .
106+
}
107+
WHERE {
108+
?s ?p ?o .
109+
?s rdfs:label ?label .
110+
?s rdf:type prov:SoftwareAgent .
111+
?s nipype:version ?version .
112+
BIND(STR(?label) as ?label)
113+
BIND(STR(?version) as ?version)
114+
}
115+
""",
116+
"4. Extract environments": """
117+
CONSTRUCT {
118+
?s rdfs:label ?label .
119+
?s rdf:type bidsprov:Environment .
120+
?s bidsprov:EnvVar ?envvar .
121+
?envvar rdfs:label ?envvarkey .
122+
?envvar prov:value ?envvarval .
123+
}
124+
WHERE {
125+
?s ?p ?o .
126+
?s rdfs:label ?label .
127+
?s rdf:type nipype:Environment .
128+
?envvar a prov:Entity .
129+
?envvar nipype:environmentVariable ?envvarkey .
130+
?envvar prov:value ?envvarval .
131+
?s prov:hadMember ?envvar .
132+
BIND(STR(?label) as ?label)
133+
BIND(STR(?envvarkey) as ?envvarkey)
134+
BIND(STR(?envvarval) as ?envvarval)
135+
}
136+
"""
137+
}
138+
139+
simple_queries = {
140+
"1a. Extract output file entities": """
141+
CONSTRUCT {
142+
?s rdfs:label ?label .
143+
?s prov:atLocation ?atlocation .
144+
?s prov:wasGeneratedBy ?act .
145+
?s rdf:type prov:Entity .
146+
}
147+
WHERE {
148+
?s ?p ?o .
149+
?s prov:qualifiedGeneration ?gen . # entity has a qualified generation
150+
?gen prov:activity ?act . # this qualified generation has an activity
151+
?s prov:value ?label .
152+
?s prov:atLocation ?atlocation .
153+
?s rdf:type prov:Entity .
154+
?s rdf:type ?type .
155+
BIND(STR(?label) as ?label)
156+
BIND(STR(?atlocation) as ?atlocation)
157+
}
158+
""",
159+
"1b. Extract input file entities": """
160+
CONSTRUCT {
161+
?s rdfs:label ?label .
162+
?s prov:atLocation ?atlocation .
163+
?s rdf:type prov:Entity .
164+
}
165+
WHERE {
166+
?s ?p ?o .
167+
?collection prov:hadMember ?s .
168+
?collection rdf:type nipype:Inputs .
169+
?s prov:value ?label .
170+
?s prov:atLocation ?atlocation .
171+
?s rdf:type prov:Entity .
172+
FILTER NOT EXISTS { ?s prov:wasGeneratedBy ?x . } # Entity was not generated by anything
173+
BIND(STR(?label) as ?label)
174+
BIND(STR(?atlocation) as ?atlocation)
175+
}
176+
""",
177+
"2a. Extract activities with commands": """
178+
CONSTRUCT {
179+
?s rdfs:label ?label .
180+
?s rdf:type prov:Activity .
181+
?s bidsprov:Command ?command . # we select activities with commands only
182+
?s prov:used ?usedent .
183+
}
184+
WHERE {
185+
?s ?p ?o .
186+
?s rdfs:label ?label .
187+
?s rdf:type prov:Activity .
188+
?s nipype:command ?command .
189+
?s prov:used ?used .
190+
?s prov:qualifiedUsage ?qu .
191+
?qu prov:entity ?usedent .
192+
?usedent prov:atLocation ?x .
193+
BIND(STR(?label) as ?label)
194+
BIND(STR(?command) as ?command)
195+
}
196+
""",
197+
"2b. Extract activities with no commands": """
198+
CONSTRUCT {
199+
?s rdfs:label ?label .
200+
?s rdf:type prov:Activity .
201+
?s prov:used ?usedent .
202+
}
203+
WHERE {
204+
?s ?p ?o .
205+
?s rdfs:label ?label .
206+
?s rdf:type prov:Activity .
207+
?s prov:used ?used .
208+
?s prov:qualifiedUsage ?qu .
209+
?qu prov:entity ?usedent .
210+
?usedent prov:atLocation ?x .
211+
FILTER NOT EXISTS { ?s nipype:command ?command . } # Activity does not have any command
212+
BIND(STR(?label) as ?label)
213+
}
214+
"""
215+
}

0 commit comments

Comments
 (0)