-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathontology_manager.py
More file actions
99 lines (83 loc) · 3.47 KB
/
ontology_manager.py
File metadata and controls
99 lines (83 loc) · 3.47 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
from rdflib import Graph
from SPARQLWrapper import SPARQLWrapper, TURTLE
from strings2things.app.config import Settings
from rdflib import Literal
from rdflib import XSD
class OntologyManager:
def __init__(self):
self.graph = Graph()
self.label_map: dict[str, str] = {}
# Defer settings creation until load time to avoid import-time failures
self.settings: Settings | None = None
def load_ontologies(self):
# Initialize settings on first load
if self.settings is None:
self.settings = Settings()
print(
f"[INFO] Connecting to SPARQL endpoint: {self.settings.ONTOLOGY_SPARQL_ENDPOINT}"
)
for graph_iri in self.settings.get_graph_iris():
print(f"[INFO] Loading named graph: {graph_iri}")
g = self._load_named_graph(self.settings.ONTOLOGY_SPARQL_ENDPOINT, graph_iri)
self.graph += g
print(f"[INFO] Loaded {len(self.graph)} triples.")
self._build_label_map()
def _load_named_graph(self, endpoint: str, graph_iri: str) -> Graph:
sparql = SPARQLWrapper(endpoint)
assert self.settings is not None, "Settings must be initialized before loading ontologies"
sparql.setCredentials(self.settings.GRAPHDB_USERNAME, self.settings.GRAPHDB_PASSWORD)
sparql.setQuery(
f"""
CONSTRUCT {{ ?s ?p ?o }}
WHERE {{
GRAPH <{graph_iri}> {{ ?s ?p ?o }}
}}
"""
)
sparql.setReturnFormat(TURTLE)
result = sparql.query().convert()
g = Graph()
g.parse(data=result, format="turtle")
return g
def _build_label_map(self):
seen = {}
for s, p, o in self.graph:
if str(p) not in (
"http://www.w3.org/2000/01/rdf-schema#label",
"http://www.w3.org/2004/02/skos/core#prefLabel",
):
continue
if not isinstance(o, Literal):
continue
if o.datatype and o.datatype != XSD.string:
continue
label = str(o).strip().lower()
iri = str(s)
if label in seen:
if seen[label] != iri:
# Mark ambiguity by storing list of IRIs
if isinstance(seen[label], list):
seen[label].append(iri)
else:
seen[label] = [seen[label], iri]
else:
seen[label] = iri
# Check ambiguities and build final label_map
self.label_map = self._check_ambiguities(seen)
print(f"[INFO] Label map built with {len(self.label_map)} unambiguous labels.")
def _check_ambiguities(self, seen: dict[str, str | list[str]]) -> dict[str, str]:
ambiguous_labels = {
label for label, iris in seen.items() if isinstance(iris, list)
}
if ambiguous_labels:
msg = f"Found ambiguous labels: {', '.join(sorted(ambiguous_labels))} \n Please resolve these in your ontology before proceeding."
if (self.settings or Settings()).FAIL_ON_AMBIGUOUS_LABELS:
raise ValueError(msg)
else:
print(f"[WARNING] {msg}")
# Return only unambiguous labels (those with a single IRI string)
return {
label: iris for label, iris in seen.items() if not isinstance(iris, list)
}
def get_label_map(self) -> dict[str, str]:
return self.label_map