Skip to content

Commit 3397652

Browse files
committed
further optimization for complex operands of union
1 parent 6d425b4 commit 3397652

File tree

3 files changed

+186
-132
lines changed

3 files changed

+186
-132
lines changed

examples/class_query_example.py

Lines changed: 78 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
1616
python examples/class_query_example.py
1717
"""
18-
18+
from owlapy import dl_to_owl_expression, owl_expression_to_dl
1919
from owlapy.class_expression import (
2020
OWLClass,
2121
OWLObjectComplementOf,
@@ -171,10 +171,85 @@
171171
print("=" * 60)
172172

173173
context6 = OWLObjectUnionOf([Person, CONTEXT_POSITION_MARKER])
174+
175+
pos6 = [
176+
OWLNamedIndividual(IRI(NS, "F9F164")),
177+
OWLNamedIndividual(IRI(NS, "F10M188")),
178+
OWLNamedIndividual(IRI(NS, "F9M159")),
179+
OWLNamedIndividual(IRI(NS, "F10M176")),
180+
]
181+
182+
neg6 = [
183+
OWLNamedIndividual(IRI(NS, "F6F96")),
184+
OWLNamedIndividual(IRI(NS, "F10M173")),
185+
OWLNamedIndividual(IRI(NS, "F6F70")),
186+
OWLNamedIndividual(IRI(NS, "F8F133")),
187+
]
188+
174189
query6 = owl_expression_to_class_query(
175190
context=context6,
176-
positive_examples=positives,
177-
negative_examples=negatives,
191+
positive_examples=pos6,
192+
negative_examples=neg6,
178193
)
179194
print(query6)
180195

196+
# Query the SPARQL endpoint to verify results
197+
import requests
198+
199+
SPARQL_ENDPOINT = "http://localhost:3030/family/sparql"
200+
201+
print("\nQuerying SPARQL endpoint for Person ⊔ MARKER results:")
202+
try:
203+
response = requests.post(SPARQL_ENDPOINT, data={"query": query6}, timeout=10)
204+
response.raise_for_status()
205+
results = response.json()
206+
for row in results["results"]["bindings"]:
207+
cls = row["class"]["value"]
208+
pos_hits = row["posHits"]["value"]
209+
neg_hits = row["negHits"]["value"]
210+
print(f" ?class = <{cls}>, posHits = {pos_hits}, negHits = {neg_hits}")
211+
except Exception as e:
212+
print(f" (Could not reach endpoint: {e})")
213+
214+
# ---------------------------------------------------------------------------
215+
# Example 7 – nested union: ∃ hasParent.((∃ hasSibling.⊤) ⊔ MARKER)
216+
#
217+
# A more complex scenario where the union with the marker is nested inside
218+
# an existential restriction. This tests that intermediate variables are
219+
# independent between the positive and negative blocks.
220+
# ---------------------------------------------------------------------------
221+
print("\n" + "=" * 60)
222+
print("Example 7 – ∃ hasParent.((∃ hasSibling.⊤) ⊔ MARKER)")
223+
print("=" * 60)
224+
225+
hasParent = OWLObjectProperty(IRI(NS, "hasParent"))
226+
hasSibling = OWLObjectProperty(IRI(NS, "hasSibling"))
227+
228+
context7 = OWLObjectSomeValuesFrom(
229+
hasParent,
230+
OWLObjectUnionOf([
231+
OWLObjectSomeValuesFrom(hasSibling, OWLClass(IRI("http://www.w3.org/2002/07/owl#", "Thing"))),
232+
CONTEXT_POSITION_MARKER,
233+
]),
234+
)
235+
print(f"Context DL: {owl_expression_to_dl(context7)}")
236+
237+
query7 = owl_expression_to_class_query(
238+
context=context7,
239+
positive_examples=pos6,
240+
negative_examples=neg6,
241+
)
242+
print(query7)
243+
244+
print("\nQuerying SPARQL endpoint for ∃ hasParent.((∃ hasSibling.⊤) ⊔ MARKER) results:")
245+
try:
246+
response = requests.post(SPARQL_ENDPOINT, data={"query": query7}, timeout=10)
247+
response.raise_for_status()
248+
results = response.json()
249+
for row in results["results"]["bindings"]:
250+
cls = row["class"]["value"]
251+
pos_hits = row["posHits"]["value"]
252+
neg_hits = row["negHits"]["value"]
253+
print(f" ?class = <{cls}>, posHits = {pos_hits}, negHits = {neg_hits}")
254+
except Exception as e:
255+
print(f" (Could not reach endpoint: {e})")

owlapy/marked_entity_generator_converter.py

Lines changed: 108 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,8 @@ def convert(self, root_variable: str,
117117
marker_mode: bool = False,
118118
property_marker_mode: bool = False,
119119
inverted: bool = False,
120-
negated_class_marker_mode: bool = False):
120+
negated_class_marker_mode: bool = False,
121+
_preserve_mapping: bool = False):
121122
"""Like the parent ``convert`` but accepts extra marker flags.
122123
123124
When *marker_mode* is ``True`` the :data:`CONTEXT_POSITION_MARKER`
@@ -130,15 +131,48 @@ class is treated specially – emitting ``?var a ?class .``.
130131
131132
When *property_marker_mode* is ``True`` the marker emits
132133
``?var ?prop [] .`` (or ``[] ?prop ?var .`` when *inverted* is ``True``).
134+
135+
When *_preserve_mapping* is ``True``, the existing
136+
:class:`VariablesMapping` counters are preserved so that a
137+
subsequent conversion produces fresh intermediate variable names
138+
(e.g. ``?s_3`` instead of ``?s_1``). This is essential when the
139+
positive and negative blocks appear in the same outer query scope.
133140
"""
134141
self._marker_mode = marker_mode
135142
self._property_marker_mode = property_marker_mode
136143
self._negated_class_marker_mode = negated_class_marker_mode
137144
self._inverted = inverted
138145
self._inside_filter = False
139-
return super().convert(root_variable, ce,
140-
for_all_de_morgan=for_all_de_morgan,
141-
named_individuals=named_individuals)
146+
if _preserve_mapping and hasattr(self, 'mapping') and self.mapping is not None:
147+
# Inline the parent's convert() logic but keep the existing
148+
# VariablesMapping so intermediate variable names continue from
149+
# where the previous conversion left off.
150+
from collections import defaultdict
151+
self.ce = ce
152+
self.sparql = []
153+
self.variables = []
154+
self.parent = []
155+
self.parent_var = []
156+
self.properties = defaultdict(list)
157+
self.variable_entities = set()
158+
self._intersection = defaultdict(bool)
159+
self.cnt = 0
160+
# Do NOT reset self.mapping – keep the existing counters so that
161+
# new intermediate variables get fresh names. But clear the
162+
# entity-to-variable dict to avoid stale assignments.
163+
self.mapping.dict = dict()
164+
self.grouping_vars = defaultdict(set)
165+
self.having_conditions = defaultdict(set)
166+
self.for_all_de_morgan = for_all_de_morgan
167+
self.named_individuals = named_individuals
168+
with self.stack_variable(root_variable):
169+
with self.stack_parent(ce):
170+
self.process(ce)
171+
return self.sparql
172+
else:
173+
return super().convert(root_variable, ce,
174+
for_all_de_morgan=for_all_de_morgan,
175+
named_individuals=named_individuals)
142176

143177
# -- process overloads ----------------------------------------------------
144178
# We need to re-register the singledispatchmethod overloads because
@@ -540,26 +574,49 @@ def as_class_query(
540574
context_parts.extend(tp)
541575
context_string = "".join(context_parts)
542576

543-
# -- 2. Build negative context string (variable replacement) ----------
577+
# -- 2. Build negative context string ---------------------------------
544578
values_neg = _generate_values_stmt(root_variable_neg, negative_list)
545-
neg_context = re.sub(
546-
r"VALUES\s+" + re.escape(root_variable_pos) + r"\s+\{[^}]*}",
547-
values_neg.rstrip(". "),
548-
context_string,
549-
)
550-
# Replace remaining occurrences of the positive variable
551-
neg_context = neg_context.replace(f"{root_variable_pos} ", f"{root_variable_neg} ")
552-
neg_context = neg_context.replace(f"{root_variable_pos})", f"{root_variable_neg})")
579+
580+
has_union_marker = self._contains_union_with_marker(context)
581+
582+
if has_union_marker:
583+
# When a UNION involves the marker, we must do a fresh conversion
584+
# for the negative context so that intermediate variables (e.g.
585+
# ?s_1, ?s_2) are independent between the positive and negative
586+
# blocks. We use _preserve_mapping=True so that the variable
587+
# counter continues from where the positive conversion left off,
588+
# producing distinct variable names (e.g. ?s_3, ?s_4).
589+
neg_tp = self.convert(root_variable_neg, context,
590+
for_all_de_morgan=for_all_de_morgan,
591+
named_individuals=named_individuals,
592+
marker_mode=True,
593+
_preserve_mapping=True)
594+
neg_context_parts = [values_neg]
595+
if filter_expression is not None:
596+
neg_filter_tp = self.convert(root_variable_neg, filter_expression,
597+
for_all_de_morgan=for_all_de_morgan,
598+
named_individuals=named_individuals,
599+
marker_mode=False,
600+
_preserve_mapping=True)
601+
neg_context_parts.append(f"FILTER NOT EXISTS {{ {''.join(neg_filter_tp)} }} ")
602+
neg_context_parts.extend(neg_tp)
603+
neg_context = "".join(neg_context_parts)
604+
else:
605+
neg_context = re.sub(
606+
r"VALUES\s+" + re.escape(root_variable_pos) + r"\s+\{[^}]*}",
607+
values_neg.rstrip(". "),
608+
context_string,
609+
)
610+
# Replace remaining occurrences of the positive variable
611+
neg_context = neg_context.replace(f"{root_variable_pos} ", f"{root_variable_neg} ")
612+
neg_context = neg_context.replace(f"{root_variable_pos})", f"{root_variable_neg})")
553613

554614
# -- 3. Assemble final query ------------------------------------------
555615
# When the context contains a UNION involving the marker, we need
556616
# a different structure: pre-enumerate ?class with a selective
557617
# ``SELECT DISTINCT ?class`` subquery scoped to the example
558618
# individuals so that ?class is visible across UNION branches.
559-
if self._contains_union_with_marker(context):
560-
# Use a selective subquery to pre-enumerate only ?class values
561-
# that appear among the example individuals, avoiding a full
562-
# graph scan that ``?anything a ?class .`` would cause.
619+
if has_union_marker:
563620
binding_subquery = (
564621
" { SELECT DISTINCT ?class WHERE {\n"
565622
" { " + context_string + " }\n"
@@ -773,22 +830,47 @@ def as_property_query(
773830
context_parts.extend(tp)
774831
context_string = "".join(context_parts)
775832

776-
# -- 2. Build negative context string (variable replacement) ----------
833+
# -- 2. Build negative context string ---------------------------------
777834
values_neg = _generate_values_stmt(root_variable_neg, negative_list)
778-
neg_context = re.sub(
779-
r"VALUES\s+" + re.escape(root_variable_pos) + r"\s+\{[^}]*}",
780-
values_neg.rstrip(". "),
781-
context_string,
782-
)
783-
neg_context = neg_context.replace(f"{root_variable_pos} ", f"{root_variable_neg} ")
784-
neg_context = neg_context.replace(f"{root_variable_pos})", f"{root_variable_neg})")
835+
836+
has_union_marker = self._contains_union_with_marker(context)
837+
838+
if has_union_marker:
839+
# When a UNION involves the marker, we must do a fresh conversion
840+
# for the negative context so that intermediate variables (e.g.
841+
# ?s_1, ?s_2) are independent between the positive and negative
842+
# blocks. We use _preserve_mapping=True so that the variable
843+
# counter continues from where the positive conversion left off.
844+
neg_tp = self.convert(root_variable_neg, context,
845+
for_all_de_morgan=for_all_de_morgan,
846+
named_individuals=named_individuals,
847+
property_marker_mode=True,
848+
inverted=inverted,
849+
_preserve_mapping=True)
850+
neg_context_parts = [values_neg]
851+
if filter_expression is not None:
852+
neg_filter_tp = self.convert(root_variable_neg, filter_expression,
853+
for_all_de_morgan=for_all_de_morgan,
854+
named_individuals=named_individuals,
855+
_preserve_mapping=True)
856+
neg_context_parts.append(f"FILTER NOT EXISTS {{ {''.join(neg_filter_tp)} }} ")
857+
neg_context_parts.extend(neg_tp)
858+
neg_context = "".join(neg_context_parts)
859+
else:
860+
neg_context = re.sub(
861+
r"VALUES\s+" + re.escape(root_variable_pos) + r"\s+\{[^}]*}",
862+
values_neg.rstrip(". "),
863+
context_string,
864+
)
865+
neg_context = neg_context.replace(f"{root_variable_pos} ", f"{root_variable_neg} ")
866+
neg_context = neg_context.replace(f"{root_variable_pos})", f"{root_variable_neg})")
785867

786868
# -- 3. Assemble final query -------------------------------------------
787869
# When the context contains a UNION involving the marker, we need
788870
# a different structure: pre-enumerate ?prop with a selective
789871
# ``SELECT DISTINCT ?prop`` subquery scoped to the example
790872
# individuals so that ?prop is visible across UNION branches.
791-
if self._contains_union_with_marker(context):
873+
if has_union_marker:
792874
# Use a selective subquery to pre-enumerate only ?prop values
793875
# that appear among the example individuals, avoiding a full
794876
# graph scan that ``?anything ?prop [] .`` would cause.

0 commit comments

Comments
 (0)