1+ #!/usr/bin/env python3
2+ """
3+ SHACL Validation Tool for Gimie
4+
5+ This script validates RDF instance data against SHACL shapes to ensure
6+ that the data model is consistent with the ontology definition.
7+
8+ It can be used to detect when changes to models.py require updates to:
9+ 1. The SHACL ontology shapes
10+ 2. Example instance data
11+
12+ Usage:
13+ python validate_shacl.py [--shapes SHAPES_FILE] [--data DATA_FILE] [--verbose]
14+
15+ Examples:
16+ # Validate default files
17+ python validate_shacl.py
18+
19+ # Validate specific files
20+ python validate_shacl.py --shapes custom_shapes.ttl --data custom_data.ttl
21+
22+ # Verbose output with detailed validation report
23+ python validate_shacl.py --verbose
24+
25+ Exit codes:
26+ 0: Validation successful (data conforms to shapes)
27+ 1: Validation failed (data does not conform to shapes)
28+ 2: Error (missing files, parse errors, etc.)
29+ """
30+
31+ import argparse
32+ import sys
33+ from pathlib import Path
34+ from typing import Optional , Tuple
35+
36+ try :
37+ import pyshacl
38+ from rdflib import Graph
39+ except ImportError :
40+ print ("Error: Required packages not installed." )
41+ print ("Please install them with: pip install pyshacl rdflib" )
42+ sys .exit (2 )
43+
44+
45+ def load_graph (file_path : Path , format_hint : Optional [str ] = None ) -> Graph :
46+ """
47+ Load an RDF graph from a file.
48+
49+ Args:
50+ file_path: Path to the RDF file
51+ format_hint: Optional format hint (turtle, nt, xml, etc.)
52+
53+ Returns:
54+ Loaded RDF graph
55+
56+ Raises:
57+ FileNotFoundError: If the file doesn't exist
58+ Exception: If parsing fails
59+ """
60+ if not file_path .exists ():
61+ raise FileNotFoundError (f"File not found: { file_path } " )
62+
63+ graph = Graph ()
64+
65+ # Auto-detect format if not provided
66+ if format_hint is None :
67+ if file_path .suffix .lower () in ['.ttl' , '.turtle' ]:
68+ format_hint = 'turtle'
69+ elif file_path .suffix .lower () in ['.nt' ]:
70+ format_hint = 'nt'
71+ elif file_path .suffix .lower () in ['.xml' , '.rdf' ]:
72+ format_hint = 'xml'
73+ elif file_path .suffix .lower () in ['.jsonld' ]:
74+ format_hint = 'json-ld'
75+ else :
76+ format_hint = 'turtle' # Default
77+
78+ try :
79+ graph .parse (file_path , format = format_hint )
80+ return graph
81+ except Exception as e :
82+ raise Exception (f"Failed to parse { file_path } : { e } " )
83+
84+
85+ def validate_with_shacl (data_graph : Graph , shapes_graph : Graph , verbose : bool = False ) -> Tuple [bool , str ]:
86+ """
87+ Validate data graph against SHACL shapes.
88+
89+ Args:
90+ data_graph: RDF graph containing the data to validate
91+ shapes_graph: RDF graph containing SHACL shapes
92+ verbose: Whether to include detailed validation report
93+
94+ Returns:
95+ Tuple of (is_valid, report_text)
96+ """
97+ try :
98+ # Run SHACL validation
99+ conforms , results_graph , results_text = pyshacl .validate (
100+ data_graph = data_graph ,
101+ shacl_graph = shapes_graph ,
102+ inference = 'rdfs' ,
103+ debug = verbose ,
104+ serialize_report_graph = 'turtle'
105+ )
106+
107+ if verbose or not conforms :
108+ # Include results graph in the report for detailed analysis
109+ report = f"Validation Results:\n "
110+ report += f"Conforms: { conforms } \n \n "
111+
112+ if results_text :
113+ report += f"Validation Report:\n { results_text } \n "
114+
115+ if not conforms and results_graph :
116+ report += f"\n Detailed Results (Turtle):\n "
117+ if hasattr (results_graph , 'serialize' ):
118+ report += results_graph .serialize (format = 'turtle' )
119+ else :
120+ report += str (results_graph )
121+
122+ return conforms , report
123+ else :
124+ return conforms , "Validation passed successfully!"
125+
126+ except Exception as e :
127+ return False , f"SHACL validation failed with error: { e } "
128+
129+
130+ def main ():
131+ """Main function."""
132+ parser = argparse .ArgumentParser (
133+ description = "Validate RDF data against SHACL shapes" ,
134+ formatter_class = argparse .RawDescriptionHelpFormatter ,
135+ epilog = __doc__ .split ("Usage:" )[1 ] if "Usage:" in __doc__ else ""
136+ )
137+
138+ parser .add_argument (
139+ '--shapes' , '-s' ,
140+ type = Path ,
141+ default = Path ('gimie/shacl/gimie_shacl.ttl' ),
142+ help = 'Path to SHACL shapes file (default: gimie/shacl/gimie_shacl.ttl)'
143+ )
144+
145+ parser .add_argument (
146+ '--data' , '-d' ,
147+ type = Path ,
148+ required = True ,
149+ help = 'Path to RDF data file to validate'
150+ )
151+
152+ parser .add_argument (
153+ '--verbose' , '-v' ,
154+ action = 'store_true' ,
155+ help = 'Enable verbose output with detailed validation reports'
156+ )
157+
158+ args = parser .parse_args ()
159+
160+ # Check if we're running from the correct directory
161+ current_dir = Path .cwd ()
162+ gitroot = current_dir
163+
164+ # Try to find the git root
165+ while gitroot .parent != gitroot :
166+ if (gitroot / '.git' ).exists ():
167+ break
168+ gitroot = gitroot .parent
169+ else :
170+ if not (current_dir / '.git' ).exists ():
171+ print ("Warning: Not running from git repository root. Some paths may be incorrect." )
172+
173+ # Resolve paths relative to git root or current directory
174+ shapes_file = args .shapes if args .shapes .is_absolute () else gitroot / args .shapes
175+ data_file = args .data if args .data .is_absolute () else gitroot / args .data
176+
177+ print (f"SHACL Validation Tool" )
178+ print (f"====================" )
179+ print (f"Shapes file: { shapes_file } " )
180+ print (f"Data file: { data_file } " )
181+ print (f"Verbose: { args .verbose } " )
182+ print ()
183+
184+ try :
185+ # Load SHACL shapes
186+ print ("Loading SHACL shapes..." )
187+ shapes_graph = load_graph (shapes_file )
188+ print (f"✓ Loaded { len (shapes_graph )} triples from shapes file" )
189+
190+ # Load data
191+ print ("Loading RDF data..." )
192+ data_graph = load_graph (data_file )
193+ print (f"✓ Loaded { len (data_graph )} triples from data file" )
194+
195+ # Run validation
196+ print ("\n Running SHACL validation..." )
197+ is_valid , report = validate_with_shacl (data_graph , shapes_graph , args .verbose )
198+
199+ # Print results
200+ print ("\n Validation Results:" )
201+ print ("==================" )
202+ if is_valid :
203+ print ("✅ VALIDATION PASSED" )
204+ print ("The instance data conforms to the SHACL shapes." )
205+ else :
206+ print ("❌ VALIDATION FAILED" )
207+ print ("The instance data does NOT conform to the SHACL shapes." )
208+
209+ print ("\n Report:" )
210+ print ("-------" )
211+ print (report )
212+
213+ # Exit with appropriate code
214+ sys .exit (0 if is_valid else 1 )
215+
216+ except FileNotFoundError as e :
217+ print (f"❌ Error: { e } " )
218+ print ("\n Make sure you're running this script from the repository root," )
219+ print ("or provide correct paths using --shapes and --data arguments." )
220+ sys .exit (2 )
221+
222+ except Exception as e :
223+ print (f"❌ Unexpected error: { e } " )
224+ sys .exit (2 )
225+
226+
227+ if __name__ == '__main__' :
228+ main ()
0 commit comments