|
| 1 | +from enum import Enum |
| 2 | +import json |
| 3 | +import os |
| 4 | +import subprocess |
| 5 | +from pathlib import Path |
| 6 | + |
| 7 | +import pytest |
| 8 | +from pytest_bdd import given, when, then, scenario, parsers |
| 9 | + |
| 10 | +from diffTests import PROJECT_DIR_PATH, TEST_DATA_DIR |
| 11 | + |
| 12 | + |
| 13 | +SCRIPT_PATH = PROJECT_DIR_PATH / "rdf-differ-ws" / "bash" / "rdf-differ.sh" |
| 14 | +BASE_URL = os.environ.get("RDF_DIFFER_BASE_URL", "http://localhost:4030") |
| 15 | +SAVED_REPORT = TEST_DATA_DIR / "ePO_sample-4.0.0-upd_diff-report.json" |
| 16 | +REUSE_SAVED_REPORT = os.environ.get( |
| 17 | + "RDF_DIFFER_REUSE_SAVED_REPORT", "true" |
| 18 | +).lower() in ["1", "true", "yes"] |
| 19 | + |
| 20 | +# trick to run diffing only once and not for all scenarios |
| 21 | +_diff_cache = {} |
| 22 | + |
| 23 | +SUPPORTED_TYPES = ("class", "datatype_property", "object_property") |
| 24 | + |
| 25 | +@scenario("../features/owl_diff.feature", "Diffing example resources in the OWL sample") |
| 26 | +def test_owl_diff_feature(): |
| 27 | + pass |
| 28 | + |
| 29 | + |
| 30 | +@pytest.fixture |
| 31 | +def ctx(tmp_path): |
| 32 | + """Context fixture to store state between steps.""" |
| 33 | + return {"tmpdir": tmp_path} |
| 34 | + |
| 35 | + |
| 36 | +@given("the test prefixes are defined") |
| 37 | +def prefixes(ctx): |
| 38 | + # Hardcoded prefixes for converting between the feature file |
| 39 | + # and the diff reports which are RDF/JSON with no prefixes |
| 40 | + ctx["prefixes"] = { |
| 41 | + "epo": "http://data.europa.eu/a4g/ontology#", |
| 42 | + "skos": "http://www.w3.org/2004/02/skos/core#", |
| 43 | + "rdfs": "http://www.w3.org/2000/01/rdf-schema#", |
| 44 | + } |
| 45 | + return ctx["prefixes"] |
| 46 | + |
| 47 | + |
| 48 | +@given(parsers.parse('the OWL files "{old}" and "{new}"')) |
| 49 | +def owl_files(ctx, old, new): |
| 50 | + # store absolute paths |
| 51 | + ctx["old"] = str(Path(old)) |
| 52 | + ctx["new"] = str(Path(new)) |
| 53 | + return ctx |
| 54 | + |
| 55 | + |
| 56 | +@when("the diff is run") |
| 57 | +def run_diff(ctx): |
| 58 | + script = os.path.abspath(os.path.join(os.path.dirname(__file__), SCRIPT_PATH)) |
| 59 | + outdir = str(ctx["tmpdir"]) |
| 60 | + old = ctx["old"] |
| 61 | + new = ctx["new"] |
| 62 | + profile = "owl-core-en-only" |
| 63 | + |
| 64 | + # we keep a record of already run diffs to speed up tests (we set the cache at the end of this function) |
| 65 | + key = (old, new) |
| 66 | + if key in _diff_cache: |
| 67 | + ctx["report"] = _diff_cache[key] |
| 68 | + return |
| 69 | + |
| 70 | + if REUSE_SAVED_REPORT: |
| 71 | + # use pre-existing report -- for faster testing/debugging of this test skipping the building of the report |
| 72 | + report_file = Path( |
| 73 | + os.path.abspath(os.path.join(os.path.dirname(__file__), SAVED_REPORT)) |
| 74 | + ) |
| 75 | + else: |
| 76 | + # run full workflow producing JSON output into temporary dir -- this should be the normal way |
| 77 | + # WARNING: as this runs an async call, sometimes this can fail due to race conditions |
| 78 | + # (the Celery task queue may be empty if called too fast or too late) |
| 79 | + result = subprocess.run( |
| 80 | + [ |
| 81 | + script, |
| 82 | + "--base-url", |
| 83 | + BASE_URL, |
| 84 | + "--old", |
| 85 | + old, |
| 86 | + "--new", |
| 87 | + new, |
| 88 | + "--ap", |
| 89 | + profile, |
| 90 | + "--template", |
| 91 | + "json", |
| 92 | + "--output", |
| 93 | + outdir, |
| 94 | + "full", |
| 95 | + ], |
| 96 | + capture_output=False, |
| 97 | + text=True, |
| 98 | + ) |
| 99 | + |
| 100 | + assert ( |
| 101 | + result.returncode == 0 |
| 102 | + ), f"Diff script failed: {result.stderr}\n{result.stdout}" |
| 103 | + report_file = Path(outdir) / "diff.json" |
| 104 | + |
| 105 | + assert report_file.exists(), f"Report file not found: {report_file}" |
| 106 | + with open(report_file) as fh: |
| 107 | + report = json.load(fh) |
| 108 | + ctx["report"] = report |
| 109 | + _diff_cache[key] = report |
| 110 | + |
| 111 | + |
| 112 | +def expand(prefixed, prefixes): |
| 113 | + if prefixed is None: |
| 114 | + return None |
| 115 | + if ":" not in prefixed: |
| 116 | + return prefixed |
| 117 | + p, local = prefixed.split(":", 1) |
| 118 | + if p not in prefixes: |
| 119 | + raise ValueError(f"Unknown prefix: {p}") |
| 120 | + return prefixes[p] + local |
| 121 | + |
| 122 | + |
| 123 | +def camel_to_snake(name: str) -> str: |
| 124 | + # Convert camelCase or mixed to snake_case (prefLabel -> pref_label) |
| 125 | + out = "" |
| 126 | + for ch in name: |
| 127 | + if ch.isupper(): |
| 128 | + out += "_" + ch.lower() |
| 129 | + else: |
| 130 | + out += ch |
| 131 | + return out |
| 132 | + |
| 133 | +def build_query_key(operation: str, resource_type: str, prop_snake: str) -> str: |
| 134 | + normalized = resource_type.replace("datatype_", "").replace("object_", "") |
| 135 | + return f"{operation}_property_{normalized}_{prop_snake}.rq" |
| 136 | + |
| 137 | +# this is only possible in Behave (e.g. {predicate:NullableString}) |
| 138 | +# @parse.with_pattern(r'.*') |
| 139 | +# def parse_nullable_string(text): |
| 140 | +# return text |
| 141 | +# register_type(NullableString=parse_nullable_string) |
| 142 | + |
| 143 | + |
| 144 | +# pytest-bdd currently lacks support for optional parameters (empty cells in the feature) in parse, so we use a regex trick |
| 145 | +# @then(parsers.parse('the report should contain the change for "{type}","{instance}","{operation}","{predicate}","{old_value}","{new_value}"')) |
| 146 | +@then( |
| 147 | + parsers.re( |
| 148 | + r'the report should contain the change for "(?P<resource_type>[^"]*)","(?P<instance>[^"]*)","(?P<operation>[^"]*)","(?P<predicate>[^"]*)","(?P<old_value>[^"]*)","(?P<new_value>[^"]*)"' |
| 149 | + ) |
| 150 | +) |
| 151 | +def assert_report_contains(ctx, resource_type, instance, operation, predicate, old_value, new_value): |
| 152 | + report = ctx.get("report") |
| 153 | + prefixes = ctx.get("prefixes") |
| 154 | + |
| 155 | + assert report is not None, "Report not found in context" |
| 156 | + |
| 157 | + # normalize inputs |
| 158 | + predicate = predicate.strip() or None |
| 159 | + new_value = new_value.strip() or None |
| 160 | + old_value = old_value.strip() or None |
| 161 | + if resource_type == "data_property": |
| 162 | + resource_type = "datatype_property" |
| 163 | + |
| 164 | + if operation in ("added", "deleted") and resource_type in SUPPORTED_TYPES: |
| 165 | + # unified handling for added/deleted instances |
| 166 | + key = f"{operation}_instance_{resource_type}.rq" |
| 167 | + assert key in report, f"Missing key {key} in report" |
| 168 | + full_instance = expand(instance, prefixes) |
| 169 | + bindings = report[key].get("results", {}).get("bindings", []) |
| 170 | + assert any( |
| 171 | + b.get("resource", {}).get("value") == full_instance for b in bindings |
| 172 | + ), f"{operation.capitalize()} {resource_type} {full_instance} not found in {key}" |
| 173 | + |
| 174 | + elif operation == "changed" and resource_type in SUPPORTED_TYPES: |
| 175 | + prop_prefix, prop_local = predicate.split(":", 1) |
| 176 | + prop_snake = camel_to_snake(prop_local) |
| 177 | + key = build_query_key(operation, resource_type, prop_snake) |
| 178 | + assert key in report, f"Missing key {key} in report" |
| 179 | + full_instance = expand(instance, prefixes) |
| 180 | + bindings = report[key].get("results", {}).get("bindings", []) |
| 181 | + binding = next( |
| 182 | + (b for b in bindings if b.get("resource", {}).get("value") == full_instance), |
| 183 | + None, |
| 184 | + ) |
| 185 | + assert binding is not None, f"No binding for instance {full_instance} in {key}" |
| 186 | + # check oldProperty and newProperty values for the given instance |
| 187 | + # where the given predicate is oldProperty |
| 188 | + # and the given newValue is newProperty |
| 189 | + expected_old = expand(predicate, prefixes) |
| 190 | + expected_new = expand(new_value, prefixes) |
| 191 | + assert ( |
| 192 | + binding.get("oldProperty", {}).get("value") == expected_old |
| 193 | + ), f"oldProperty mismatch: expected {expected_old}, got {binding.get('oldProperty', {}).get('value')}" |
| 194 | + assert ( |
| 195 | + binding.get("newProperty", {}).get("value") == expected_new |
| 196 | + ), f"newProperty mismatch: expected {expected_new}, got {binding.get('newProperty', {}).get('value')}" |
| 197 | + elif operation == "updated" and resource_type in SUPPORTED_TYPES: |
| 198 | + prop_prefix, prop_local = predicate.split(":", 1) |
| 199 | + prop_snake = camel_to_snake(prop_local) |
| 200 | + key = build_query_key(operation, resource_type, prop_snake) |
| 201 | + assert key in report, f"Missing key {key} in report" |
| 202 | + full_instance = expand(instance, prefixes) |
| 203 | + bindings = report[key].get("results", {}).get("bindings", []) |
| 204 | + binding = next( |
| 205 | + (b for b in bindings if b.get("resource", {}).get("value") == full_instance), |
| 206 | + None, |
| 207 | + ) |
| 208 | + assert binding is not None, f"No binding for instance {full_instance} in {key}" |
| 209 | + # check oldValue and newValue values for the given predicate of the given instance |
| 210 | + expected_old = old_value.strip() if old_value else None |
| 211 | + expected_new = new_value.strip() if new_value else None |
| 212 | + assert ( |
| 213 | + binding.get("oldValue", {}).get("value") == expected_old |
| 214 | + ), f"oldValue mismatch: expected {expected_old}, got {binding.get('oldValue', {}).get('value')}" |
| 215 | + assert ( |
| 216 | + binding.get("newValue", {}).get("value") == expected_new |
| 217 | + ), f"newValue mismatch: expected {expected_new}, got {binding.get('newValue', {}).get('value')}" |
| 218 | + else: |
| 219 | + raise AssertionError( |
| 220 | + f"Unsupported combination: resource_type={resource_type}, operation={operation}" |
| 221 | + ) |
0 commit comments