Skip to content

Commit 494e90f

Browse files
committed
feat: add SHACL validation tool for RDF data
1 parent 6d4fabb commit 494e90f

1 file changed

Lines changed: 228 additions & 0 deletions

File tree

tools/validate_shacl.py

Lines changed: 228 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,228 @@
1+
#!/usr/bin/env python3
2+
"""
3+
SHACL Validation Tool for Gimie
4+
5+
This script validates RDF instance data against SHACL shapes to ensure
6+
that the data model is consistent with the ontology definition.
7+
8+
It can be used to detect when changes to models.py require updates to:
9+
1. The SHACL ontology shapes
10+
2. Example instance data
11+
12+
Usage:
13+
python validate_shacl.py [--shapes SHAPES_FILE] [--data DATA_FILE] [--verbose]
14+
15+
Examples:
16+
# Validate default files
17+
python validate_shacl.py
18+
19+
# Validate specific files
20+
python validate_shacl.py --shapes custom_shapes.ttl --data custom_data.ttl
21+
22+
# Verbose output with detailed validation report
23+
python validate_shacl.py --verbose
24+
25+
Exit codes:
26+
0: Validation successful (data conforms to shapes)
27+
1: Validation failed (data does not conform to shapes)
28+
2: Error (missing files, parse errors, etc.)
29+
"""
30+
31+
import argparse
32+
import sys
33+
from pathlib import Path
34+
from typing import Optional, Tuple
35+
36+
try:
37+
import pyshacl
38+
from rdflib import Graph
39+
except ImportError:
40+
print("Error: Required packages not installed.")
41+
print("Please install them with: pip install pyshacl rdflib")
42+
sys.exit(2)
43+
44+
45+
def load_graph(file_path: Path, format_hint: Optional[str] = None) -> Graph:
46+
"""
47+
Load an RDF graph from a file.
48+
49+
Args:
50+
file_path: Path to the RDF file
51+
format_hint: Optional format hint (turtle, nt, xml, etc.)
52+
53+
Returns:
54+
Loaded RDF graph
55+
56+
Raises:
57+
FileNotFoundError: If the file doesn't exist
58+
Exception: If parsing fails
59+
"""
60+
if not file_path.exists():
61+
raise FileNotFoundError(f"File not found: {file_path}")
62+
63+
graph = Graph()
64+
65+
# Auto-detect format if not provided
66+
if format_hint is None:
67+
if file_path.suffix.lower() in ['.ttl', '.turtle']:
68+
format_hint = 'turtle'
69+
elif file_path.suffix.lower() in ['.nt']:
70+
format_hint = 'nt'
71+
elif file_path.suffix.lower() in ['.xml', '.rdf']:
72+
format_hint = 'xml'
73+
elif file_path.suffix.lower() in ['.jsonld']:
74+
format_hint = 'json-ld'
75+
else:
76+
format_hint = 'turtle' # Default
77+
78+
try:
79+
graph.parse(file_path, format=format_hint)
80+
return graph
81+
except Exception as e:
82+
raise Exception(f"Failed to parse {file_path}: {e}")
83+
84+
85+
def validate_with_shacl(data_graph: Graph, shapes_graph: Graph, verbose: bool = False) -> Tuple[bool, str]:
86+
"""
87+
Validate data graph against SHACL shapes.
88+
89+
Args:
90+
data_graph: RDF graph containing the data to validate
91+
shapes_graph: RDF graph containing SHACL shapes
92+
verbose: Whether to include detailed validation report
93+
94+
Returns:
95+
Tuple of (is_valid, report_text)
96+
"""
97+
try:
98+
# Run SHACL validation
99+
conforms, results_graph, results_text = pyshacl.validate(
100+
data_graph=data_graph,
101+
shacl_graph=shapes_graph,
102+
inference='rdfs',
103+
debug=verbose,
104+
serialize_report_graph='turtle'
105+
)
106+
107+
if verbose or not conforms:
108+
# Include results graph in the report for detailed analysis
109+
report = f"Validation Results:\n"
110+
report += f"Conforms: {conforms}\n\n"
111+
112+
if results_text:
113+
report += f"Validation Report:\n{results_text}\n"
114+
115+
if not conforms and results_graph:
116+
report += f"\nDetailed Results (Turtle):\n"
117+
if hasattr(results_graph, 'serialize'):
118+
report += results_graph.serialize(format='turtle')
119+
else:
120+
report += str(results_graph)
121+
122+
return conforms, report
123+
else:
124+
return conforms, "Validation passed successfully!"
125+
126+
except Exception as e:
127+
return False, f"SHACL validation failed with error: {e}"
128+
129+
130+
def main():
131+
"""Main function."""
132+
parser = argparse.ArgumentParser(
133+
description="Validate RDF data against SHACL shapes",
134+
formatter_class=argparse.RawDescriptionHelpFormatter,
135+
epilog=__doc__.split("Usage:")[1] if "Usage:" in __doc__ else ""
136+
)
137+
138+
parser.add_argument(
139+
'--shapes', '-s',
140+
type=Path,
141+
default=Path('gimie/shacl/gimie_shacl.ttl'),
142+
help='Path to SHACL shapes file (default: gimie/shacl/gimie_shacl.ttl)'
143+
)
144+
145+
parser.add_argument(
146+
'--data', '-d',
147+
type=Path,
148+
required=True,
149+
help='Path to RDF data file to validate'
150+
)
151+
152+
parser.add_argument(
153+
'--verbose', '-v',
154+
action='store_true',
155+
help='Enable verbose output with detailed validation reports'
156+
)
157+
158+
args = parser.parse_args()
159+
160+
# Check if we're running from the correct directory
161+
current_dir = Path.cwd()
162+
gitroot = current_dir
163+
164+
# Try to find the git root
165+
while gitroot.parent != gitroot:
166+
if (gitroot / '.git').exists():
167+
break
168+
gitroot = gitroot.parent
169+
else:
170+
if not (current_dir / '.git').exists():
171+
print("Warning: Not running from git repository root. Some paths may be incorrect.")
172+
173+
# Resolve paths relative to git root or current directory
174+
shapes_file = args.shapes if args.shapes.is_absolute() else gitroot / args.shapes
175+
data_file = args.data if args.data.is_absolute() else gitroot / args.data
176+
177+
print(f"SHACL Validation Tool")
178+
print(f"====================")
179+
print(f"Shapes file: {shapes_file}")
180+
print(f"Data file: {data_file}")
181+
print(f"Verbose: {args.verbose}")
182+
print()
183+
184+
try:
185+
# Load SHACL shapes
186+
print("Loading SHACL shapes...")
187+
shapes_graph = load_graph(shapes_file)
188+
print(f"✓ Loaded {len(shapes_graph)} triples from shapes file")
189+
190+
# Load data
191+
print("Loading RDF data...")
192+
data_graph = load_graph(data_file)
193+
print(f"✓ Loaded {len(data_graph)} triples from data file")
194+
195+
# Run validation
196+
print("\nRunning SHACL validation...")
197+
is_valid, report = validate_with_shacl(data_graph, shapes_graph, args.verbose)
198+
199+
# Print results
200+
print("\nValidation Results:")
201+
print("==================")
202+
if is_valid:
203+
print("✅ VALIDATION PASSED")
204+
print("The instance data conforms to the SHACL shapes.")
205+
else:
206+
print("❌ VALIDATION FAILED")
207+
print("The instance data does NOT conform to the SHACL shapes.")
208+
209+
print("\nReport:")
210+
print("-------")
211+
print(report)
212+
213+
# Exit with appropriate code
214+
sys.exit(0 if is_valid else 1)
215+
216+
except FileNotFoundError as e:
217+
print(f"❌ Error: {e}")
218+
print("\nMake sure you're running this script from the repository root,")
219+
print("or provide correct paths using --shapes and --data arguments.")
220+
sys.exit(2)
221+
222+
except Exception as e:
223+
print(f"❌ Unexpected error: {e}")
224+
sys.exit(2)
225+
226+
227+
if __name__ == '__main__':
228+
main()

0 commit comments

Comments
 (0)