|
1 | 1 | import pytest |
2 | 2 | from rdflib import Graph, URIRef, Literal, Namespace |
3 | 3 | from strings2things.app.core.rdf_transformer import RDFTransformer |
4 | | -from strings2things.app.core.transformation_log import TransformationLog |
5 | 4 |
|
6 | | -EX = Namespace("http://example.org/") |
| 5 | +EX = Namespace("http://example.org/ontology#") |
7 | 6 |
|
8 | 7 |
|
9 | 8 | @pytest.fixture |
10 | 9 | def label_map(): |
| 10 | + # Ontology label map: canonical labels → IRIs |
11 | 11 | return { |
12 | | - "geology": "http://example.org/ontology#Geology", |
13 | | - "biology": "http://example.org/ontology#Biology", |
| 12 | + "geology": str(EX.Geology), |
| 13 | + "biology": str(EX.Biology), |
| 14 | + "physics": str(EX.Physics), |
14 | 15 | } |
15 | 16 |
|
16 | 17 |
|
17 | 18 | @pytest.fixture |
18 | 19 | def input_graph(): |
19 | 20 | g = Graph() |
20 | | - g.add((EX.subj1, EX.hasCategory, Literal("Geology"))) |
21 | | - g.add((EX.subj2, EX.hasCategory, Literal("UnknownLabel"))) |
22 | | - g.add((EX.subj3, EX.hasValue, URIRef("http://example.org/someIRI"))) |
| 21 | + # Exact match example |
| 22 | + g.add((EX.subj1, EX.hasCategory, Literal("geology"))) |
| 23 | + |
| 24 | + # Fuzzy match example (slightly misspelled) |
| 25 | + g.add((EX.subj2, EX.hasCategory, Literal("biolgy"))) |
| 26 | + |
| 27 | + # Unknown label (should remain unchanged) |
| 28 | + g.add((EX.subj3, EX.hasCategory, Literal("unknownlabel"))) |
| 29 | + |
| 30 | + # Non-literal value (should remain untouched) |
| 31 | + g.add((EX.subj4, EX.hasValue, URIRef("http://example.org/someIRI"))) |
23 | 32 | return g |
24 | 33 |
|
25 | 34 |
|
26 | | -def test_rdf_transformer(label_map, input_graph): |
27 | | - transformer = RDFTransformer(label_map) |
| 35 | +def test_rdf_transformer_combined(label_map, input_graph): |
| 36 | + # Initialize transformer with fuzzy matching enabled |
| 37 | + transformer = RDFTransformer(label_map, fuzzy=True, fuzzy_threshold=90) |
28 | 38 | output_graph = transformer.transform(input_graph) |
29 | 39 |
|
30 | | - # --- Check graph triples --- |
31 | | - # Original triple must remain |
32 | | - assert (EX.subj1, EX.hasCategory, Literal("Geology")) in output_graph |
| 40 | + # --- 1. Check graph triples --- |
33 | 41 |
|
34 | | - # Transformed triple must be present |
35 | | - expected_iri = URIRef("http://example.org/ontology#Geology") |
36 | | - assert (EX.subj1, EX.hasCategory, expected_iri) in output_graph |
| 42 | + # Exact match: literal replaced by IRI |
| 43 | + assert (EX.subj1, EX.hasCategory, URIRef("http://example.org/ontology#Geology")) in output_graph |
37 | 44 |
|
38 | | - # UnknownLabel should remain unchanged (no IRI added) |
39 | | - assert (EX.subj2, EX.hasCategory, Literal("UnknownLabel")) in output_graph |
40 | | - assert len([t for t in output_graph.triples((EX.subj2, EX.hasCategory, None))]) == 1 |
| 45 | + # Fuzzy match: literal replaced by IRI |
| 46 | + assert (EX.subj2, EX.hasCategory, URIRef("http://example.org/ontology#Biology")) in output_graph |
41 | 47 |
|
42 | | - # Non-literals should remain untouched |
43 | | - assert (EX.subj3, EX.hasValue, URIRef("http://example.org/someIRI")) in output_graph |
| 48 | + # Unknown label: literal remains unchanged |
| 49 | + assert (EX.subj3, EX.hasCategory, Literal("unknownlabel")) in output_graph |
44 | 50 |
|
45 | | - # --- Check log entries --- |
| 51 | + # Non-literals remain untouched |
| 52 | + assert (EX.subj4, EX.hasValue, URIRef("http://example.org/someIRI")) in output_graph |
| 53 | + |
| 54 | + # --- 2. Check log entries --- |
46 | 55 | log = transformer.log.entries |
47 | | - geology_log = next(e for e in log if e["original"] == "Geology") |
| 56 | + |
| 57 | + # Exact match log |
| 58 | + geology_log = next(e for e in log if e["original"] == "geology") |
48 | 59 | assert geology_log["replacement"] == "http://example.org/ontology#Geology" |
49 | | - assert geology_log["reason"] == "unambiguous match" |
| 60 | + assert geology_log["reason"] == "exact match" |
| 61 | + |
| 62 | + # Fuzzy match log |
| 63 | + biolgy_log = next(e for e in log if e["original"] == "biolgy") |
| 64 | + assert biolgy_log["replacement"] == "http://example.org/ontology#Biology" |
| 65 | + assert "fuzzy match" in biolgy_log["reason"] |
50 | 66 |
|
51 | | - unknown_log = next(e for e in log if e["original"] == "UnknownLabel") |
| 67 | + # Unknown label log |
| 68 | + unknown_log = next(e for e in log if e["original"] == "unknownlabel") |
52 | 69 | assert unknown_log["replacement"] is None |
53 | | - assert unknown_log["reason"] == "no match in label map" |
| 70 | + assert unknown_log["reason"] == "no match found" |
0 commit comments