Skip to content

Commit 5759706

Browse files
authored
Add CompiledModel class which can generate DataFrames and Tables (#359)
* add CompiledModel class * format code * fix variable names, python deps * format * add compiled model in its own class, fix some deps * avoid subclassing Model * python 3.10 thru 3.13 * no python 3.13 yet * udpate poetry from merge * bump pygit2 * bumping versions, fixing poetry lock * bump flake * bump flake8 * poetry lock * changes for shacl path * remove snakeviz * fixing test against shapes * saving time on 223P validation * fixing notebooks * Improving tests * updating tests and fixing some subtle bugs * use renamed method * updating QUDT * update Brick * update imports for brick library tests
1 parent 891e498 commit 5759706

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

48 files changed

+73044
-34308
lines changed

.github/workflows/ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ jobs:
4242
runs-on: ubuntu-latest
4343
strategy:
4444
matrix:
45-
python-version: ['3.8', '3.9', '3.10', '3.11']
45+
python-version: ['3.10', '3.11', '3.12']
4646
steps:
4747
- name: checkout
4848
uses: actions/checkout@v4

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ repos:
1010
- id: black
1111
entry: poetry run black
1212
- repo: https://github.com/pycqa/flake8
13-
rev: 5.0.0
13+
rev: 7.0.0
1414
hooks:
1515
- id: flake8
1616
entry: poetry run flake8 buildingmotif

buildingmotif/api/views/model.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -263,8 +263,8 @@ def validate_shape(models_id: int) -> flask.Response:
263263
target_class = URIRef(body.get("target_class"))
264264

265265
# test
266-
conformance = model.test_model_against_shapes(
267-
shape_collections=shape_collections,
266+
compiled = model.compile(shape_collections)
267+
conformance = compiled.validate_model_against_shapes(
268268
shapes_to_test=shape_uris,
269269
target_class=target_class,
270270
)
Lines changed: 224 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,224 @@
1+
from dataclasses import dataclass
2+
from functools import cached_property
3+
from typing import Dict, List, Optional
4+
5+
import pandas as pd
6+
import rdflib
7+
import rdflib.query
8+
from rdflib import URIRef
9+
10+
from buildingmotif.dataclasses.model import Model
11+
from buildingmotif.dataclasses.shape_collection import ShapeCollection
12+
from buildingmotif.dataclasses.validation import ValidationContext
13+
from buildingmotif.namespaces import OWL, SH, A
14+
from buildingmotif.utils import (
15+
copy_graph,
16+
rewrite_shape_graph,
17+
shacl_inference,
18+
shacl_validate,
19+
skolemize_shapes,
20+
)
21+
22+
23+
@dataclass
24+
class CompiledModel:
25+
"""
26+
This class represents a model that has been compiled against a set of ShapeCollections.
27+
"""
28+
29+
model: Model
30+
shape_collections: List[ShapeCollection]
31+
_compiled_graph: rdflib.Graph
32+
33+
def __init__(
34+
self,
35+
model: Model,
36+
shape_collections: List[ShapeCollection],
37+
compiled_graph: rdflib.Graph,
38+
shacl_engine: str = "default",
39+
):
40+
self.model = model
41+
self.shape_collections = shape_collections
42+
ontology_graph = rdflib.Graph()
43+
for shape_collection in shape_collections:
44+
ontology_graph += shape_collection.graph
45+
46+
ontology_graph = skolemize_shapes(ontology_graph)
47+
48+
shacl_engine = (
49+
self.model._bm.shacl_engine
50+
if (shacl_engine == "default" or not shacl_engine)
51+
else shacl_engine
52+
)
53+
54+
self._compiled_graph = shacl_inference(
55+
compiled_graph, ontology_graph, shacl_engine
56+
)
57+
58+
@cached_property
59+
def graph(self) -> rdflib.Graph:
60+
g = copy_graph(self._compiled_graph)
61+
for shape_collection in self.shape_collections:
62+
g += shape_collection.graph
63+
return g
64+
65+
def validate_model_against_shapes(
66+
self,
67+
shapes_to_test: List[rdflib.URIRef],
68+
target_class: rdflib.URIRef,
69+
) -> Dict[rdflib.URIRef, "ValidationContext"]:
70+
"""Validates the model against a list of shapes and generates a
71+
validation report for each.
72+
73+
:param shapes_to_test: list of shape URIs to validate the model against
74+
:type shapes_to_test: List[URIRef]
75+
:param target_class: the class upon which to run the selected shapes
76+
:type target_class: URIRef
77+
:return: a dictionary that relates each shape to test URIRef to a
78+
ValidationContext
79+
:rtype: Dict[URIRef, ValidationContext]
80+
"""
81+
model_graph = copy_graph(self._compiled_graph)
82+
83+
results = {}
84+
85+
targets = model_graph.query(
86+
f"""
87+
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
88+
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
89+
SELECT ?target
90+
WHERE {{
91+
?target rdf:type/rdfs:subClassOf* <{target_class}>
92+
93+
}}
94+
"""
95+
)
96+
# skolemize the shape graph so we have consistent identifiers across
97+
# validation through the interpretation of the validation report
98+
ontology_graph = self.graph.skolemize()
99+
100+
for shape_uri in shapes_to_test:
101+
temp_model_graph = copy_graph(model_graph)
102+
for (s,) in targets:
103+
temp_model_graph.add((URIRef(s), A, shape_uri))
104+
105+
valid, report_g, report_str = shacl_validate(
106+
temp_model_graph, ontology_graph, engine=self.model._bm.shacl_engine
107+
)
108+
109+
results[shape_uri] = ValidationContext(
110+
self.shape_collections,
111+
ontology_graph,
112+
valid,
113+
report_g,
114+
report_str,
115+
self.model,
116+
)
117+
118+
return results
119+
120+
def validate(
121+
self,
122+
error_on_missing_imports: bool = True,
123+
) -> "ValidationContext":
124+
"""Validates this model against the given list of ShapeCollections.
125+
If no list is provided, the model will be validated against the model's "manifest".
126+
If a list of shape collections is provided, the manifest will *not* be automatically
127+
included in the set of shape collections.
128+
129+
Loads all of the ShapeCollections into a single graph.
130+
131+
:param error_on_missing_imports: if True, raises an error if any of the dependency
132+
ontologies are missing (i.e. they need to be loaded into BuildingMOTIF), defaults
133+
to True
134+
:type error_on_missing_imports: bool, optional
135+
:return: An object containing useful properties/methods to deal with
136+
the validation results
137+
:rtype: ValidationContext
138+
"""
139+
# TODO: determine the return types; At least a bool for valid/invalid,
140+
# but also want a report. Is this the base pySHACL report? Or a useful
141+
# transformation, like a list of deltas for potential fixes?
142+
shapeg = copy_graph(self._compiled_graph)
143+
# aggregate shape graphs
144+
for sc in self.shape_collections:
145+
shapeg += sc.resolve_imports(
146+
error_on_missing_imports=error_on_missing_imports
147+
).graph
148+
# inline sh:node for interpretability
149+
shapeg = rewrite_shape_graph(shapeg)
150+
151+
# remove imports from sg
152+
shapeg.remove((None, OWL.imports, None))
153+
154+
# skolemize the shape graph so we have consistent identifiers across
155+
# validation through the interpretation of the validation report
156+
shapeg = skolemize_shapes(shapeg)
157+
158+
# remove imports from data graph
159+
shapeg.remove((None, OWL.imports, None))
160+
161+
# validate the data graph
162+
valid, report_g, report_str = shacl_validate(
163+
shapeg, engine=self.model._bm.shacl_engine
164+
)
165+
return ValidationContext(
166+
self.shape_collections,
167+
shapeg,
168+
valid,
169+
report_g,
170+
report_str,
171+
self.model,
172+
)
173+
174+
def defining_shape_collection(
175+
self, shape: rdflib.URIRef
176+
) -> Optional[ShapeCollection]:
177+
"""
178+
Given a shape, return the ShapeCollection that defines it. The search is limited to the
179+
ShapeCollections that were used to compile this model.
180+
181+
:param shape: the shape to search for
182+
:type shape: rdflib.URIRef
183+
:return: the ShapeCollection that defines the shape, or None if the shape is not defined
184+
:rtype: Optional[ShapeCollection]
185+
"""
186+
for sc in self.shape_collections:
187+
if (shape, A, SH.NodeShape) in sc.graph:
188+
return sc
189+
return None
190+
191+
def shape_to_table(self, shape: rdflib.URIRef, table: str, conn):
192+
"""
193+
Turn the shape into a SPARQL query and execute it on the model's graph, storing the results in a table.
194+
195+
:param shape: the shape to query
196+
:type shape: rdflib.URIRef
197+
:param table: the name of the table to store the results in
198+
:type table: str
199+
:param conn: the connection to the database
200+
:type conn: sqlalchemy.engine.base.Connection
201+
"""
202+
metadata = self.shape_to_df(shape)
203+
metadata.to_sql(table, conn, if_exists="replace", index=False)
204+
205+
def shape_to_df(self, shape: rdflib.URIRef) -> pd.DataFrame:
206+
"""
207+
Turn the shape into a SPARQL query and execute it on the model's graph, storing the results in a dataframe.
208+
209+
:param shape: the shape to query
210+
:type shape: rdflib.URIRef
211+
:return: the results of the query
212+
:rtype: pd.DataFrame
213+
"""
214+
defining_sc = self.defining_shape_collection(shape)
215+
if defining_sc is None:
216+
raise ValueError(
217+
f"Shape {shape} is not defined in any of the shape collections"
218+
)
219+
query = defining_sc.shape_to_query(shape)
220+
metadata = pd.DataFrame(self.graph.query(query).bindings, dtype="string")
221+
# metadata.columns will be rdflib.term.Variable objects, so we need to convert them to strings
222+
metadata.columns = [str(col) for col in metadata.columns]
223+
# convert the rdflib terms to Python types
224+
return metadata.map(lambda x: x.toPython())

0 commit comments

Comments
 (0)