Skip to content

Commit 374bd0e

Browse files
committed
Introduce RDF diffing test suite along with the data
1 parent 6d4566a commit 374bd0e

10 files changed

Lines changed: 19951 additions & 0 deletions

requirements-test.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,4 @@ lxml
33
pandas
44
pyld
55
pytest
6+
pytest_bdd

test/diffTests/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
import pathlib
2+
3+
TEST_FOLDER = pathlib.Path(__file__).parent.parent
4+
PROJECT_DIR_PATH = TEST_FOLDER.parent
5+
TEST_DATA_DIR = TEST_FOLDER / "testData" / "rdf-differ-data"
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
Feature: OWL diffing
2+
3+
Background:
4+
Given the OWL files "tests/test_data/owl/ePO_sample-4.0.0.orig.ttl" and "tests/test_data/owl/ePO_sample-4.0.0.upd.ttl"
5+
And the test prefixes are defined
6+
7+
Scenario Outline: Diffing example resources in the OWL sample
8+
When the diff is run
9+
Then the report should contain the change for "<resource_type>","<instance>","<operation>","<predicate>","<old_value>","<new_value>"
10+
11+
Examples:
12+
| resource_type | instance | operation | predicate | old_value | new_value |
13+
| class | epo:AwardCriterion | added | | | |
14+
| class | epo:AdHocChannel | deleted | | | |
15+
| class | epo:AcquiringCentralPurchasingBody | changed | skos:prefLabel | | rdfs:label |
16+
| class | epo:AwardCriteriaSummary | updated | skos:prefLabel | Award criteria summary | Award criteria summarization |
17+
| datatype_property | epo:describesObjectiveParticipationRules | added | | | |
18+
| datatype_property | epo:describesProfessionRelevantLaw | deleted | | | |
19+
| object_property | epo:followsRulesSetBy | added | | | |
20+
| object_property | epo:exposesChannel | deleted | | | |

test/diffTests/steps/__init__.py

Whitespace-only changes.
Lines changed: 221 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,221 @@
1+
from enum import Enum
2+
import json
3+
import os
4+
import subprocess
5+
from pathlib import Path
6+
7+
import pytest
8+
from pytest_bdd import given, when, then, scenario, parsers
9+
10+
from diffTests import PROJECT_DIR_PATH, TEST_DATA_DIR
11+
12+
13+
SCRIPT_PATH = PROJECT_DIR_PATH / "rdf-differ-ws" / "bash" / "rdf-differ.sh"
14+
BASE_URL = os.environ.get("RDF_DIFFER_BASE_URL", "http://localhost:4030")
15+
SAVED_REPORT = TEST_DATA_DIR / "ePO_sample-4.0.0-upd_diff-report.json"
16+
REUSE_SAVED_REPORT = os.environ.get(
17+
"RDF_DIFFER_REUSE_SAVED_REPORT", "true"
18+
).lower() in ["1", "true", "yes"]
19+
20+
# trick to run diffing only once and not for all scenarios
21+
_diff_cache = {}
22+
23+
SUPPORTED_TYPES = ("class", "datatype_property", "object_property")
24+
25+
@scenario("../features/owl_diff.feature", "Diffing example resources in the OWL sample")
26+
def test_owl_diff_feature():
27+
pass
28+
29+
30+
@pytest.fixture
31+
def ctx(tmp_path):
32+
"""Context fixture to store state between steps."""
33+
return {"tmpdir": tmp_path}
34+
35+
36+
@given("the test prefixes are defined")
37+
def prefixes(ctx):
38+
# Hardcoded prefixes for converting between the feature file
39+
# and the diff reports which are RDF/JSON with no prefixes
40+
ctx["prefixes"] = {
41+
"epo": "http://data.europa.eu/a4g/ontology#",
42+
"skos": "http://www.w3.org/2004/02/skos/core#",
43+
"rdfs": "http://www.w3.org/2000/01/rdf-schema#",
44+
}
45+
return ctx["prefixes"]
46+
47+
48+
@given(parsers.parse('the OWL files "{old}" and "{new}"'))
49+
def owl_files(ctx, old, new):
50+
# store absolute paths
51+
ctx["old"] = str(Path(old))
52+
ctx["new"] = str(Path(new))
53+
return ctx
54+
55+
56+
@when("the diff is run")
57+
def run_diff(ctx):
58+
script = os.path.abspath(os.path.join(os.path.dirname(__file__), SCRIPT_PATH))
59+
outdir = str(ctx["tmpdir"])
60+
old = ctx["old"]
61+
new = ctx["new"]
62+
profile = "owl-core-en-only"
63+
64+
# we keep a record of already run diffs to speed up tests (we set the cache at the end of this function)
65+
key = (old, new)
66+
if key in _diff_cache:
67+
ctx["report"] = _diff_cache[key]
68+
return
69+
70+
if REUSE_SAVED_REPORT:
71+
# use pre-existing report -- for faster testing/debugging of this test skipping the building of the report
72+
report_file = Path(
73+
os.path.abspath(os.path.join(os.path.dirname(__file__), SAVED_REPORT))
74+
)
75+
else:
76+
# run full workflow producing JSON output into temporary dir -- this should be the normal way
77+
# WARNING: as this runs an async call, sometimes this can fail due to race conditions
78+
# (the Celery task queue may be empty if called too fast or too late)
79+
result = subprocess.run(
80+
[
81+
script,
82+
"--base-url",
83+
BASE_URL,
84+
"--old",
85+
old,
86+
"--new",
87+
new,
88+
"--ap",
89+
profile,
90+
"--template",
91+
"json",
92+
"--output",
93+
outdir,
94+
"full",
95+
],
96+
capture_output=False,
97+
text=True,
98+
)
99+
100+
assert (
101+
result.returncode == 0
102+
), f"Diff script failed: {result.stderr}\n{result.stdout}"
103+
report_file = Path(outdir) / "diff.json"
104+
105+
assert report_file.exists(), f"Report file not found: {report_file}"
106+
with open(report_file) as fh:
107+
report = json.load(fh)
108+
ctx["report"] = report
109+
_diff_cache[key] = report
110+
111+
112+
def expand(prefixed, prefixes):
113+
if prefixed is None:
114+
return None
115+
if ":" not in prefixed:
116+
return prefixed
117+
p, local = prefixed.split(":", 1)
118+
if p not in prefixes:
119+
raise ValueError(f"Unknown prefix: {p}")
120+
return prefixes[p] + local
121+
122+
123+
def camel_to_snake(name: str) -> str:
124+
# Convert camelCase or mixed to snake_case (prefLabel -> pref_label)
125+
out = ""
126+
for ch in name:
127+
if ch.isupper():
128+
out += "_" + ch.lower()
129+
else:
130+
out += ch
131+
return out
132+
133+
def build_query_key(operation: str, resource_type: str, prop_snake: str) -> str:
134+
normalized = resource_type.replace("datatype_", "").replace("object_", "")
135+
return f"{operation}_property_{normalized}_{prop_snake}.rq"
136+
137+
# this is only possible in Behave (e.g. {predicate:NullableString})
138+
# @parse.with_pattern(r'.*')
139+
# def parse_nullable_string(text):
140+
# return text
141+
# register_type(NullableString=parse_nullable_string)
142+
143+
144+
# pytest-bdd currently lacks support for optional parameters (empty cells in the feature) in parse, so we use a regex trick
145+
# @then(parsers.parse('the report should contain the change for "{type}","{instance}","{operation}","{predicate}","{old_value}","{new_value}"'))
146+
@then(
147+
parsers.re(
148+
r'the report should contain the change for "(?P<resource_type>[^"]*)","(?P<instance>[^"]*)","(?P<operation>[^"]*)","(?P<predicate>[^"]*)","(?P<old_value>[^"]*)","(?P<new_value>[^"]*)"'
149+
)
150+
)
151+
def assert_report_contains(ctx, resource_type, instance, operation, predicate, old_value, new_value):
152+
report = ctx.get("report")
153+
prefixes = ctx.get("prefixes")
154+
155+
assert report is not None, "Report not found in context"
156+
157+
# normalize inputs
158+
predicate = predicate.strip() or None
159+
new_value = new_value.strip() or None
160+
old_value = old_value.strip() or None
161+
if resource_type == "data_property":
162+
resource_type = "datatype_property"
163+
164+
if operation in ("added", "deleted") and resource_type in SUPPORTED_TYPES:
165+
# unified handling for added/deleted instances
166+
key = f"{operation}_instance_{resource_type}.rq"
167+
assert key in report, f"Missing key {key} in report"
168+
full_instance = expand(instance, prefixes)
169+
bindings = report[key].get("results", {}).get("bindings", [])
170+
assert any(
171+
b.get("resource", {}).get("value") == full_instance for b in bindings
172+
), f"{operation.capitalize()} {resource_type} {full_instance} not found in {key}"
173+
174+
elif operation == "changed" and resource_type in SUPPORTED_TYPES:
175+
prop_prefix, prop_local = predicate.split(":", 1)
176+
prop_snake = camel_to_snake(prop_local)
177+
key = build_query_key(operation, resource_type, prop_snake)
178+
assert key in report, f"Missing key {key} in report"
179+
full_instance = expand(instance, prefixes)
180+
bindings = report[key].get("results", {}).get("bindings", [])
181+
binding = next(
182+
(b for b in bindings if b.get("resource", {}).get("value") == full_instance),
183+
None,
184+
)
185+
assert binding is not None, f"No binding for instance {full_instance} in {key}"
186+
# check oldProperty and newProperty values for the given instance
187+
# where the given predicate is oldProperty
188+
# and the given newValue is newProperty
189+
expected_old = expand(predicate, prefixes)
190+
expected_new = expand(new_value, prefixes)
191+
assert (
192+
binding.get("oldProperty", {}).get("value") == expected_old
193+
), f"oldProperty mismatch: expected {expected_old}, got {binding.get('oldProperty', {}).get('value')}"
194+
assert (
195+
binding.get("newProperty", {}).get("value") == expected_new
196+
), f"newProperty mismatch: expected {expected_new}, got {binding.get('newProperty', {}).get('value')}"
197+
elif operation == "updated" and resource_type in SUPPORTED_TYPES:
198+
prop_prefix, prop_local = predicate.split(":", 1)
199+
prop_snake = camel_to_snake(prop_local)
200+
key = build_query_key(operation, resource_type, prop_snake)
201+
assert key in report, f"Missing key {key} in report"
202+
full_instance = expand(instance, prefixes)
203+
bindings = report[key].get("results", {}).get("bindings", [])
204+
binding = next(
205+
(b for b in bindings if b.get("resource", {}).get("value") == full_instance),
206+
None,
207+
)
208+
assert binding is not None, f"No binding for instance {full_instance} in {key}"
209+
# check oldValue and newValue values for the given predicate of the given instance
210+
expected_old = old_value.strip() if old_value else None
211+
expected_new = new_value.strip() if new_value else None
212+
assert (
213+
binding.get("oldValue", {}).get("value") == expected_old
214+
), f"oldValue mismatch: expected {expected_old}, got {binding.get('oldValue', {}).get('value')}"
215+
assert (
216+
binding.get("newValue", {}).get("value") == expected_new
217+
), f"newValue mismatch: expected {expected_new}, got {binding.get('newValue', {}).get('value')}"
218+
else:
219+
raise AssertionError(
220+
f"Unsupported combination: resource_type={resource_type}, operation={operation}"
221+
)
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
# Minimal ePO test data for OWL-core profile
2+
3+
Given the following versions of a dataset:
4+
5+
- **old:** `ePO_sample-4.0.0.orig.ttl`
6+
- **new:** `ePO_sample-4.0.0.upd.ttl`
7+
8+
The **new** file is a _combined_ OWL and SHACL file that contains also
9+
embedded SHACL data, for testing retrieval of certain constraint information
10+
for added resources, such as the domain, range and cardinality, which would
11+
otherwise not be supported/available in the OWL-core profile.
12+
13+
The following are changes comparing **old** to **new**, where _redundant_
14+
refers to redundant appearances in the existing diff'ing/reporting, and _not
15+
captured_ to the non-appearance thereof. The latter relates to complex cases that are not supported:
16+
17+
1. added class **epo:AwardCriterion**
18+
2. class **epo:AwardCriterion** added `skos:prefLabel` (redundant, from added class)
19+
3. class **epo:AwardCriterion** added `skos:definition` (redundant, from added class)
20+
4. class **epo:AwardCriterion** added `rdfs:subClassOf` (redundant, from added class)
21+
5. class **epo:AwardCriterion** added `rdfs:isDefinedBy` (redundant, from added class)
22+
6. deleted class **epo:AdHocChannel**
23+
7. class **epo:AdHocChannel** deleted `skos:prefLabel` (redundant, from deleted class)
24+
8. class **epo:AdHocChannel** deleted `skos:definition` (redundant, from deleted class)
25+
9. class **epo:AdHocChannel** deleted `rdfs:subClassOf` (redundant, from deleted class)
26+
10. class **epo:AdHocChannel** deleted `rdfs:isDefinedBy` (redundant, from deleted class)
27+
11. class **epo:AcquiringCentralPurchasingBody** `skos:prefLabel` changed to `rdfs:label`
28+
12. class **epo:AcquiringCentralPurchasingBody** added `rdfs:label` (redundant, from changed property)
29+
13. class **epo:AcquiringCentralPurchasingBody** deleted `skos:prefLabel` (redundant, from changed property)
30+
14. class **epo:Document** added `skos:prefLabel` lang _es_
31+
1. class **epo:AccessTerm** deleted `skos:prefLabel`
32+
2. class **epo:AwardCriteriaSummary** updated `skos:prefLabel` (new value "Award criteria summarization"; original value "Award criteria summary" moved to `skos:altLabel`)
33+
3. class **epo:AwardCriteriaSummary** changed `skos:prefLabel` to `skos:altLabel` (cross-property move of original `skos:prefLabel` to `skos:altLabel`; could be ignored as the original property was retained with a new value)
34+
4. class **epo:AwardCriteriaSummary** added `skos:altLabel` (redundant, from changed property; could be considered non-redundant if the cross-property move is ignored)
35+
5. added objectProperty **epo:followsRulesSetBy** with domain `epo:PurchaseContract`, range `epo:FrameworkAgreement` and maxCardinality 1
36+
6. objectProperty **epo:followsRulesSetBy** added `skos:prefLabel` (redundant, from added objectProperty)
37+
7. objectProperty **epo:followsRulesSetBy** added `rdfs:isDefinedBy` (redundant, from added objectProperty)
38+
8. deleted objectProperty **epo:exposesChannel**
39+
9. objectProperty **epo:exposesChannel** deleted `skos:prefLabel` (redundant, from deleted objectProperty)
40+
10. objectProperty **epo:exposesChannel** deleted `rdfs:isDefinedBy` (redundant, from deleted objectProperty)
41+
11. objectProperty **epo:exposesInvoiceeChannel** added `rdfs:label`
42+
12. objectProperty **epo:describesResultNotice** added `skos:altLabel`
43+
13. added datatypeProperty **epo:describesObjectiveParticipationRules**
44+
14. datatypeProperty **epo:describesObjectiveParticipationRules** added `skos:prefLabel` (redundant, from added datatypeProperty)
45+
15. datatypeProperty **epo:describesObjectiveParticipationRules** added `rdfs:isDefinedBy` (redundant, from added datatypeProperty)
46+
16. deleted datatypeProperty **epo:describesProfessionRelevantLaw**
47+
17. datatypeProperty **epo:describesProfessionRelevantLaw** deleted `skos:prefLabel` (redundant, from deleted datatypeProperty)
48+
18. datatypeProperty **epo:describesProfessionRelevantLaw** deleted `rdfs:isDefinedBy` (redundant, from deleted datatypeProperty)
49+
19. datatypeProperty **epo:describesProfession** added `rdfs:label` no lang
50+
20. datatypeProperty **epo:describesVerificationMethod** converted to objectProperty (not captured)
51+
21. objectProperty **epo:distributesOffer** deleted `skos:prefLabel` lang (not captured)
52+
22. objectProperty **epo:actsOnBehalfOf** updated `skos:prefLabel` lang _en_ to _de_ (not captured)
53+
54+
The files are used by the test suite specified in [owl_diff.feature](../../features/owl_diff.feature) and implemented in [test_owl_diff_steps.py](../../steps/test_owl_diff_steps.py). The above description can be also used to facilitate manual tests.

0 commit comments

Comments
 (0)