|
| 1 | +from dataclasses import dataclass |
| 2 | +from functools import cached_property |
| 3 | +from typing import Dict, List, Optional |
| 4 | + |
| 5 | +import pandas as pd |
| 6 | +import rdflib |
| 7 | +import rdflib.query |
| 8 | +from rdflib import URIRef |
| 9 | + |
| 10 | +from buildingmotif.dataclasses.model import Model |
| 11 | +from buildingmotif.dataclasses.shape_collection import ShapeCollection |
| 12 | +from buildingmotif.dataclasses.validation import ValidationContext |
| 13 | +from buildingmotif.namespaces import OWL, SH, A |
| 14 | +from buildingmotif.utils import ( |
| 15 | + copy_graph, |
| 16 | + rewrite_shape_graph, |
| 17 | + shacl_inference, |
| 18 | + shacl_validate, |
| 19 | + skolemize_shapes, |
| 20 | +) |
| 21 | + |
| 22 | + |
| 23 | +@dataclass |
| 24 | +class CompiledModel: |
| 25 | + """ |
| 26 | + This class represents a model that has been compiled against a set of ShapeCollections. |
| 27 | + """ |
| 28 | + |
| 29 | + model: Model |
| 30 | + shape_collections: List[ShapeCollection] |
| 31 | + _compiled_graph: rdflib.Graph |
| 32 | + |
| 33 | + def __init__( |
| 34 | + self, |
| 35 | + model: Model, |
| 36 | + shape_collections: List[ShapeCollection], |
| 37 | + compiled_graph: rdflib.Graph, |
| 38 | + shacl_engine: str = "default", |
| 39 | + ): |
| 40 | + self.model = model |
| 41 | + self.shape_collections = shape_collections |
| 42 | + ontology_graph = rdflib.Graph() |
| 43 | + for shape_collection in shape_collections: |
| 44 | + ontology_graph += shape_collection.graph |
| 45 | + |
| 46 | + ontology_graph = skolemize_shapes(ontology_graph) |
| 47 | + |
| 48 | + shacl_engine = ( |
| 49 | + self.model._bm.shacl_engine |
| 50 | + if (shacl_engine == "default" or not shacl_engine) |
| 51 | + else shacl_engine |
| 52 | + ) |
| 53 | + |
| 54 | + self._compiled_graph = shacl_inference( |
| 55 | + compiled_graph, ontology_graph, shacl_engine |
| 56 | + ) |
| 57 | + |
| 58 | + @cached_property |
| 59 | + def graph(self) -> rdflib.Graph: |
| 60 | + g = copy_graph(self._compiled_graph) |
| 61 | + for shape_collection in self.shape_collections: |
| 62 | + g += shape_collection.graph |
| 63 | + return g |
| 64 | + |
| 65 | + def validate_model_against_shapes( |
| 66 | + self, |
| 67 | + shapes_to_test: List[rdflib.URIRef], |
| 68 | + target_class: rdflib.URIRef, |
| 69 | + ) -> Dict[rdflib.URIRef, "ValidationContext"]: |
| 70 | + """Validates the model against a list of shapes and generates a |
| 71 | + validation report for each. |
| 72 | +
|
| 73 | + :param shapes_to_test: list of shape URIs to validate the model against |
| 74 | + :type shapes_to_test: List[URIRef] |
| 75 | + :param target_class: the class upon which to run the selected shapes |
| 76 | + :type target_class: URIRef |
| 77 | + :return: a dictionary that relates each shape to test URIRef to a |
| 78 | + ValidationContext |
| 79 | + :rtype: Dict[URIRef, ValidationContext] |
| 80 | + """ |
| 81 | + model_graph = copy_graph(self._compiled_graph) |
| 82 | + |
| 83 | + results = {} |
| 84 | + |
| 85 | + targets = model_graph.query( |
| 86 | + f""" |
| 87 | + PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> |
| 88 | + PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> |
| 89 | + SELECT ?target |
| 90 | + WHERE {{ |
| 91 | + ?target rdf:type/rdfs:subClassOf* <{target_class}> |
| 92 | +
|
| 93 | + }} |
| 94 | + """ |
| 95 | + ) |
| 96 | + # skolemize the shape graph so we have consistent identifiers across |
| 97 | + # validation through the interpretation of the validation report |
| 98 | + ontology_graph = self.graph.skolemize() |
| 99 | + |
| 100 | + for shape_uri in shapes_to_test: |
| 101 | + temp_model_graph = copy_graph(model_graph) |
| 102 | + for (s,) in targets: |
| 103 | + temp_model_graph.add((URIRef(s), A, shape_uri)) |
| 104 | + |
| 105 | + valid, report_g, report_str = shacl_validate( |
| 106 | + temp_model_graph, ontology_graph, engine=self.model._bm.shacl_engine |
| 107 | + ) |
| 108 | + |
| 109 | + results[shape_uri] = ValidationContext( |
| 110 | + self.shape_collections, |
| 111 | + ontology_graph, |
| 112 | + valid, |
| 113 | + report_g, |
| 114 | + report_str, |
| 115 | + self.model, |
| 116 | + ) |
| 117 | + |
| 118 | + return results |
| 119 | + |
| 120 | + def validate( |
| 121 | + self, |
| 122 | + error_on_missing_imports: bool = True, |
| 123 | + ) -> "ValidationContext": |
| 124 | + """Validates this model against the given list of ShapeCollections. |
| 125 | + If no list is provided, the model will be validated against the model's "manifest". |
| 126 | + If a list of shape collections is provided, the manifest will *not* be automatically |
| 127 | + included in the set of shape collections. |
| 128 | +
|
| 129 | + Loads all of the ShapeCollections into a single graph. |
| 130 | +
|
| 131 | + :param error_on_missing_imports: if True, raises an error if any of the dependency |
| 132 | + ontologies are missing (i.e. they need to be loaded into BuildingMOTIF), defaults |
| 133 | + to True |
| 134 | + :type error_on_missing_imports: bool, optional |
| 135 | + :return: An object containing useful properties/methods to deal with |
| 136 | + the validation results |
| 137 | + :rtype: ValidationContext |
| 138 | + """ |
| 139 | + # TODO: determine the return types; At least a bool for valid/invalid, |
| 140 | + # but also want a report. Is this the base pySHACL report? Or a useful |
| 141 | + # transformation, like a list of deltas for potential fixes? |
| 142 | + shapeg = copy_graph(self._compiled_graph) |
| 143 | + # aggregate shape graphs |
| 144 | + for sc in self.shape_collections: |
| 145 | + shapeg += sc.resolve_imports( |
| 146 | + error_on_missing_imports=error_on_missing_imports |
| 147 | + ).graph |
| 148 | + # inline sh:node for interpretability |
| 149 | + shapeg = rewrite_shape_graph(shapeg) |
| 150 | + |
| 151 | + # remove imports from sg |
| 152 | + shapeg.remove((None, OWL.imports, None)) |
| 153 | + |
| 154 | + # skolemize the shape graph so we have consistent identifiers across |
| 155 | + # validation through the interpretation of the validation report |
| 156 | + shapeg = skolemize_shapes(shapeg) |
| 157 | + |
| 158 | + # remove imports from data graph |
| 159 | + shapeg.remove((None, OWL.imports, None)) |
| 160 | + |
| 161 | + # validate the data graph |
| 162 | + valid, report_g, report_str = shacl_validate( |
| 163 | + shapeg, engine=self.model._bm.shacl_engine |
| 164 | + ) |
| 165 | + return ValidationContext( |
| 166 | + self.shape_collections, |
| 167 | + shapeg, |
| 168 | + valid, |
| 169 | + report_g, |
| 170 | + report_str, |
| 171 | + self.model, |
| 172 | + ) |
| 173 | + |
| 174 | + def defining_shape_collection( |
| 175 | + self, shape: rdflib.URIRef |
| 176 | + ) -> Optional[ShapeCollection]: |
| 177 | + """ |
| 178 | + Given a shape, return the ShapeCollection that defines it. The search is limited to the |
| 179 | + ShapeCollections that were used to compile this model. |
| 180 | +
|
| 181 | + :param shape: the shape to search for |
| 182 | + :type shape: rdflib.URIRef |
| 183 | + :return: the ShapeCollection that defines the shape, or None if the shape is not defined |
| 184 | + :rtype: Optional[ShapeCollection] |
| 185 | + """ |
| 186 | + for sc in self.shape_collections: |
| 187 | + if (shape, A, SH.NodeShape) in sc.graph: |
| 188 | + return sc |
| 189 | + return None |
| 190 | + |
| 191 | + def shape_to_table(self, shape: rdflib.URIRef, table: str, conn): |
| 192 | + """ |
| 193 | + Turn the shape into a SPARQL query and execute it on the model's graph, storing the results in a table. |
| 194 | +
|
| 195 | + :param shape: the shape to query |
| 196 | + :type shape: rdflib.URIRef |
| 197 | + :param table: the name of the table to store the results in |
| 198 | + :type table: str |
| 199 | + :param conn: the connection to the database |
| 200 | + :type conn: sqlalchemy.engine.base.Connection |
| 201 | + """ |
| 202 | + metadata = self.shape_to_df(shape) |
| 203 | + metadata.to_sql(table, conn, if_exists="replace", index=False) |
| 204 | + |
| 205 | + def shape_to_df(self, shape: rdflib.URIRef) -> pd.DataFrame: |
| 206 | + """ |
| 207 | + Turn the shape into a SPARQL query and execute it on the model's graph, storing the results in a dataframe. |
| 208 | +
|
| 209 | + :param shape: the shape to query |
| 210 | + :type shape: rdflib.URIRef |
| 211 | + :return: the results of the query |
| 212 | + :rtype: pd.DataFrame |
| 213 | + """ |
| 214 | + defining_sc = self.defining_shape_collection(shape) |
| 215 | + if defining_sc is None: |
| 216 | + raise ValueError( |
| 217 | + f"Shape {shape} is not defined in any of the shape collections" |
| 218 | + ) |
| 219 | + query = defining_sc.shape_to_query(shape) |
| 220 | + metadata = pd.DataFrame(self.graph.query(query).bindings, dtype="string") |
| 221 | + # metadata.columns will be rdflib.term.Variable objects, so we need to convert them to strings |
| 222 | + metadata.columns = [str(col) for col in metadata.columns] |
| 223 | + # convert the rdflib terms to Python types |
| 224 | + return metadata.map(lambda x: x.toPython()) |
0 commit comments