1+ #!/usr/bin/python3
2+ """
3+ Simple content validation for OMI Public Course.
4+ Checks for required Markdown files and validates image references.
5+ Uses problems.json to locate problems and git diff to only check changed problems.
6+ """
7+
8+ import json
9+ import os
10+ import re
11+ import sys
12+ import subprocess
13+ from pathlib import Path
14+ from typing import List
15+
16+
17+ def get_changed_files (repo_root : str ) -> List [str ]:
18+ """Get list of changed files using git diff."""
19+ # Try to get commit range from environment variables
20+ env = os .environ
21+ commit_range = None
22+
23+ if env .get ('TRAVIS_COMMIT_RANGE' ):
24+ commit_range = env ['TRAVIS_COMMIT_RANGE' ]
25+ elif env .get ('CIRCLE_COMPARE_URL' ):
26+ commit_range = env ['CIRCLE_COMPARE_URL' ].split ('/' )[6 ]
27+ elif env .get ('GITHUB_BASE_COMMIT' ):
28+ commit_range = env ['GITHUB_BASE_COMMIT' ] + '...HEAD'
29+ else :
30+ # Default to comparing with the main branch
31+ commit_range = 'origin/main...HEAD'
32+
33+ try :
34+ changes = subprocess .check_output (
35+ ['git' , 'diff' , '--name-only' , '--diff-filter=AMDR' , commit_range ],
36+ cwd = repo_root ,
37+ universal_newlines = True )
38+ return changes .splitlines ()
39+ except subprocess .CalledProcessError as e :
40+ print (f"❌ Failed to get git diff: { e } " )
41+ return []
42+
43+
44+ def load_problems_from_json (repo_root : str ) -> List [dict ]:
45+ """Load problem paths from problems.json file."""
46+ problems_json_path = os .path .join (repo_root , "problems.json" )
47+
48+ if not os .path .exists (problems_json_path ):
49+ raise FileNotFoundError (f"problems.json not found at { problems_json_path } " )
50+
51+ with open (problems_json_path , 'r' , encoding = 'utf-8' ) as f :
52+ data = json .load (f )
53+
54+ return data .get ("problems" , [])
55+
56+
57+ def validate_markdown_files (problem_path : str , problem_title : str ) -> List [str ]:
58+ """Check if required Markdown files exist."""
59+ errors = []
60+
61+ # At least one statement file must exist
62+ statement_files = [
63+ 'statements/es.markdown' ,
64+ 'statements/en.markdown'
65+ ]
66+
67+ statement_exists = any (os .path .exists (os .path .join (problem_path , f )) for f in statement_files )
68+ if not statement_exists :
69+ errors .append (f"Problem '{ problem_title } ': Missing at least one statement file (es or en)" )
70+
71+ return errors
72+
73+
74+ def validate_image_references (problem_path : str , problem_title : str ) -> List [str ]:
75+ """Check if image references in Markdown files point to existing files."""
76+ errors = []
77+
78+ # Find all Markdown files
79+ for root , dirs , files in os .walk (problem_path ):
80+ for file in files :
81+ if file .endswith ('.markdown' ) or file .endswith ('.md' ):
82+ markdown_file = os .path .join (root , file )
83+ errors .extend (_check_file_images (markdown_file , problem_title ))
84+
85+ return errors
86+
87+
88+ def _check_file_images (markdown_file : str , problem_title : str ) -> List [str ]:
89+ """Check image references in a single Markdown file."""
90+ errors = []
91+
92+ try :
93+ with open (markdown_file , 'r' , encoding = 'utf-8' ) as f :
94+ content = f .read ()
95+ except UnicodeDecodeError :
96+ errors .append (f"Problem '{ problem_title } ': Cannot read { markdown_file } (encoding issue)" )
97+ return errors
98+
99+ # Find image references: 
100+ image_pattern = r'!\[([^\]]*)\]\(([^)]+)\)'
101+ matches = re .findall (image_pattern , content )
102+
103+ file_dir = os .path .dirname (markdown_file )
104+
105+ for alt_text , image_path in matches :
106+ # Skip URLs and absolute paths
107+ if image_path .startswith ('http' ) or image_path .startswith ('/' ):
108+ continue
109+
110+ # Handle relative paths
111+ if image_path .startswith ('./' ):
112+ image_path = image_path [2 :]
113+
114+ # Resolve relative to markdown file
115+ full_image_path = os .path .join (file_dir , image_path )
116+
117+ if not os .path .exists (full_image_path ):
118+ errors .append (
119+ f"Problem '{ problem_title } ': Image not found: { image_path } "
120+ f"(in { os .path .relpath (markdown_file , repo_root )} )"
121+ )
122+
123+ return errors
124+
125+
126+ def main ():
127+ """Main validation function."""
128+ global repo_root # Needed for _check_file_images
129+
130+ try :
131+ # Get repository root (assuming script is in utils/ directory)
132+ repo_root = os .path .dirname (os .path .dirname (os .path .abspath (__file__ )))
133+ print (f"Repository root: { repo_root } " )
134+
135+ # Get changed files
136+ changed_files = get_changed_files (repo_root )
137+ print (f"\n Found { len (changed_files )} changed files in git diff" )
138+
139+ # Load problems from problems.json
140+ problems = load_problems_from_json (repo_root )
141+ print (f"Found { len (problems )} problems in problems.json" )
142+
143+ if not problems :
144+ print ("❌ No problems found in problems.json" )
145+ sys .exit (1 )
146+
147+ all_errors = []
148+ checked_problems = 0
149+
150+ print ("\n 🔍 Validating changed problems..." )
151+
152+ # Validate each problem if it has changes
153+ for problem in problems :
154+ problem_path = problem ["path" ]
155+ full_problem_path = os .path .join (repo_root , problem_path )
156+ problem_title = os .path .basename (problem_path )
157+
158+ # Check if this problem has any changes
159+ if not any (f .startswith (problem_path ) for f in changed_files ):
160+ print (f"\n ⏩ Skipping problem: { problem_title } (no changes)" )
161+ continue
162+
163+ checked_problems += 1
164+ print (f"\n 📝 Checking problem: { problem_title } " )
165+ print (f" Path: { problem_path } " )
166+ print (f" Full path: { full_problem_path } " )
167+
168+ if not os .path .exists (full_problem_path ):
169+ error_msg = f"Problem path does not exist: { full_problem_path } "
170+ print (f" ❌ { error_msg } " )
171+ all_errors .append (error_msg )
172+ continue
173+
174+ # Check Markdown files
175+ markdown_errors = validate_markdown_files (full_problem_path , problem_title )
176+ if markdown_errors :
177+ print (" Missing files:" )
178+ for error in markdown_errors :
179+ print (f" ❌ { error } " )
180+ all_errors .extend (markdown_errors )
181+
182+ # Check image references
183+ image_errors = validate_image_references (full_problem_path , problem_title )
184+ if image_errors :
185+ print (" Image issues:" )
186+ for error in image_errors :
187+ print (f" ❌ { error } " )
188+ all_errors .extend (image_errors )
189+
190+ if not markdown_errors and not image_errors :
191+ print (" ✅ No issues found" )
192+
193+ # Report final results
194+ if all_errors :
195+ print ("\n ❌ Validation summary:" )
196+ print (f"Found { len (all_errors )} error(s) in { checked_problems } changed problem(s):" )
197+ for error in all_errors :
198+ print (f" • { error } " )
199+ sys .exit (1 )
200+ else :
201+ print ("\n ✅ All validations passed!" )
202+ print (f" 📊 Checked { checked_problems } changed problem(s)" )
203+ sys .exit (0 )
204+
205+ except Exception as e :
206+ print (f"❌ Validation failed: { str (e )} " , file = sys .stderr )
207+ import traceback
208+ traceback .print_exc ()
209+ sys .exit (1 )
210+
211+
212+ if __name__ == "__main__" :
213+ main ()
0 commit comments