|
| 1 | +#!/usr/bin/python |
| 2 | +# coding: utf-8 |
| 3 | + |
| 4 | +""" Convert nipype provenance traces into one BIDS-Prov compliant JSON-LD graph """ |
| 5 | + |
| 6 | +import json |
| 7 | +from pyld import jsonld |
| 8 | +from rdflib import Dataset, Graph, Namespace |
| 9 | +from rdflib.namespace import RDF, RDFS, PROV |
| 10 | +from rdflib.plugins.sparql import prepareQuery |
| 11 | + |
| 12 | +# Dict of namespaces to be used in queries |
| 13 | +NAMESPACES = { |
| 14 | + 'rdfs': RDFS, |
| 15 | + 'rdf': RDF, |
| 16 | + 'prov': PROV, |
| 17 | + 'nipype': Namespace("http://nipy.org/nipype/terms/"), |
| 18 | + 'niiri': Namespace("http://iri.nidash.org/"), |
| 19 | + 'crypto': Namespace("http://id.loc.gov/vocabulary/preservation/cryptographicHashFunctions/"), |
| 20 | + 'bidsprov': Namespace("https://github.com/bids-standard/BEP028_BIDSprov/terms/") |
| 21 | +} |
| 22 | + |
| 23 | +# Parse the nipype RDF provenance file |
| 24 | +# We use Dataset as there might be several graphs in the file |
| 25 | +nipype_prov = Dataset() |
| 26 | +nipype_prov.parse('prov/workflow_provenance_20250314T155959.trig', format='trig') |
| 27 | + |
| 28 | +# Create an empty graph for output provenance |
| 29 | +bids_prov = Graph() |
| 30 | + |
| 31 | +# Create a list of queries to extract data from the input file |
| 32 | +query_labels = [ |
| 33 | + '1. Extract output file entities', |
| 34 | + '2. Extract input file entities', |
| 35 | + '3. Extract activities', |
| 36 | + '4. Extract agents', |
| 37 | + '5. Extract environments' |
| 38 | +] |
| 39 | +queries = [ |
| 40 | + # 1. Extract output file entities |
| 41 | + """ |
| 42 | + CONSTRUCT { |
| 43 | + ?s rdfs:label ?label . |
| 44 | + ?s prov:atLocation ?atlocation . |
| 45 | + ?s prov:wasGeneratedBy ?act . |
| 46 | + ?s rdf:type ?type . |
| 47 | + } |
| 48 | + WHERE { |
| 49 | + ?s ?p ?o . |
| 50 | + ?s prov:qualifiedGeneration ?gen . # entity has a qualified generation |
| 51 | + ?gen prov:activity ?act . # this qualified generation has an activity |
| 52 | + ?act nipype:command ?x . # this activity has a command (disables activities representing nipype interfaces) |
| 53 | + ?s prov:value ?label . |
| 54 | + ?s prov:atLocation ?atlocation . |
| 55 | + ?s rdf:type prov:Entity . |
| 56 | + ?s rdf:type ?type . |
| 57 | + ?s crypto:sha512 ?sha . |
| 58 | + BIND(STR(?label) as ?label) |
| 59 | + BIND(STR(?atlocation) as ?atlocation) |
| 60 | + } |
| 61 | + """, |
| 62 | + # 2. Extract input file entities |
| 63 | + """ |
| 64 | + CONSTRUCT { |
| 65 | + ?s rdfs:label ?label . |
| 66 | + ?s prov:atLocation ?atlocation . |
| 67 | + ?s rdf:type prov:Entity . |
| 68 | + ?s bidsprov:Digest ?sha . |
| 69 | + } |
| 70 | + WHERE { |
| 71 | + ?s ?p ?o . |
| 72 | + ?collection prov:hadMember ?s . |
| 73 | + ?collection rdf:type nipype:Inputs . |
| 74 | + ?s prov:value ?label . |
| 75 | + ?s prov:atLocation ?atlocation . |
| 76 | + ?s rdf:type prov:Entity . |
| 77 | + ?s crypto:sha512 ?sha . |
| 78 | + FILTER NOT EXISTS { ?s prov:wasGeneratedBy ?x . } # Entity was not generated by anything |
| 79 | + BIND(STR(?label) as ?label) |
| 80 | + BIND(STR(?atlocation) as ?atlocation) |
| 81 | + BIND(CONCAT("sha512:", STR(?sha)) as ?sha) |
| 82 | + } |
| 83 | + """, |
| 84 | + # 3. Extract activities |
| 85 | + """ |
| 86 | + CONSTRUCT { |
| 87 | + ?s rdfs:label ?label . |
| 88 | + ?s rdf:type prov:Activity . |
| 89 | + ?s bidsprov:Command ?command . # we select activities with commands only (disables activities representing nipype interfaces) |
| 90 | + ?s prov:wasAssociatedWith ?associated . |
| 91 | + # ?s prov:used ?used . # comment this line to remove prov:used environments |
| 92 | + ?s prov:used ?usedent . |
| 93 | + ?s prov:startedAtTime ?started . |
| 94 | + ?s prov:endedAtTime ?ended . |
| 95 | + } |
| 96 | + WHERE { |
| 97 | + ?s ?p ?o . |
| 98 | + ?s rdfs:label ?label . |
| 99 | + ?s rdf:type prov:Activity . |
| 100 | + ?s nipype:command ?command . |
| 101 | + ?s prov:wasAssociatedWith ?associated . |
| 102 | + ?s prov:used ?used . |
| 103 | + ?s prov:startedAtTime ?started . |
| 104 | + ?s prov:endedAtTime ?ended . |
| 105 | + ?s prov:qualifiedUsage ?qu . |
| 106 | + ?qu prov:entity ?usedent . |
| 107 | + ?usedent prov:atLocation ?x . |
| 108 | + BIND(STR(?label) as ?label) |
| 109 | + BIND(STR(?command) as ?command) |
| 110 | + } |
| 111 | + """, |
| 112 | + # 4. Extract agents |
| 113 | + """ |
| 114 | + CONSTRUCT { |
| 115 | + ?s rdfs:label ?label . |
| 116 | + ?s rdf:type prov:Agent . |
| 117 | + ?s bidsprov:Version ?version . |
| 118 | + } |
| 119 | + WHERE { |
| 120 | + ?s ?p ?o . |
| 121 | + ?s rdfs:label ?label . |
| 122 | + ?s rdf:type prov:SoftwareAgent . |
| 123 | + ?s nipype:version ?version . |
| 124 | + BIND(STR(?label) as ?label) |
| 125 | + BIND(STR(?version) as ?version) |
| 126 | + } |
| 127 | + """, |
| 128 | + # 5. Extract environments |
| 129 | + """ |
| 130 | + CONSTRUCT { |
| 131 | + ?s rdfs:label ?label . |
| 132 | + ?s rdf:type bidsprov:Environment . |
| 133 | + ?s bidsprov:EnvVar ?envvar . |
| 134 | + ?envvar rdfs:label ?envvarkey . |
| 135 | + ?envvar prov:value ?envvarval . |
| 136 | + } |
| 137 | + WHERE { |
| 138 | + ?s ?p ?o . |
| 139 | + ?s rdfs:label ?label . |
| 140 | + ?s rdf:type nipype:Environment . |
| 141 | + ?envvar a prov:Entity . |
| 142 | + ?envvar nipype:environmentVariable ?envvarkey . |
| 143 | + ?envvar prov:value ?envvarval . |
| 144 | + ?s prov:hadMember ?envvar . |
| 145 | + BIND(STR(?label) as ?label) |
| 146 | + BIND(STR(?envvarkey) as ?envvarkey) |
| 147 | + BIND(STR(?envvarval) as ?envvarval) |
| 148 | + } |
| 149 | + """ |
| 150 | + ] |
| 151 | + |
| 152 | +# Query input graph |
| 153 | +for label, query in zip(query_labels, queries): |
| 154 | + print(label) |
| 155 | + if 'environments' not in label: |
| 156 | + q = prepareQuery(query, initNs = NAMESPACES) |
| 157 | + for graph in nipype_prov.graphs(): |
| 158 | + queried_graph = graph.query(q) |
| 159 | + if len(queried_graph) > 0: |
| 160 | + bids_prov += queried_graph |
| 161 | + |
| 162 | +# Serialize output graph to JSON-LD and compact |
| 163 | +compacted = jsonld.compact( |
| 164 | + json.loads(bids_prov.serialize(format='json-ld')), |
| 165 | + 'https://raw.githubusercontent.com/bids-standard/BEP028_BIDSprov/master/context.json' |
| 166 | + ) |
| 167 | + |
| 168 | +# Write compacted JSON-LD |
| 169 | +with open('prov/workflow_provenance_20250314T155959_compacted.jsonld', 'w', encoding='utf-8') as file: |
| 170 | + file.write(json.dumps(compacted, indent=2)) |
| 171 | + |
| 172 | +# Merge records into a BIDS-Prov skeleton |
| 173 | +bids_prov_skeleton = { |
| 174 | + "@context": "https://raw.githubusercontent.com/bids-standard/BEP028_BIDSprov/master/context.json", |
| 175 | + "BIDSProvVersion": "0.0.1", |
| 176 | + "Records": { |
| 177 | + "Software": [], |
| 178 | + "Activities": [], |
| 179 | + "Entities": [], |
| 180 | + "Environments": [] |
| 181 | + } |
| 182 | +} |
| 183 | +for record in compacted['@graph']: |
| 184 | + if 'Type' not in record: |
| 185 | + continue |
| 186 | + if record['Type'] == 'Software': |
| 187 | + bids_prov_skeleton['Records']['Software'].append(record) |
| 188 | + elif record['Type'] == 'Activities': |
| 189 | + bids_prov_skeleton['Records']['Activities'].append(record) |
| 190 | + elif 'Environment' in record['Type']: |
| 191 | + bids_prov_skeleton['Records']['Environments'].append(record) |
| 192 | + else: |
| 193 | + bids_prov_skeleton['Records']['Entities'].append(record) |
| 194 | + |
| 195 | +# Write BIDS-Prov JSON-LD |
| 196 | +with open('prov/workflow_provenance_20250314T155959_bidsprov.jsonld', 'w', encoding='utf-8') as file: |
| 197 | + file.write(json.dumps(bids_prov_skeleton, indent=2)) |
0 commit comments