Skip to content

Commit cff8e7a

Browse files
committed
Add a script to remove absolute filepaths from kedro API endpoints.
1 parent 004ebc4 commit cff8e7a

2 files changed

Lines changed: 272 additions & 0 deletions

File tree

Makefile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@ build: ## Build static site in build directory
2929
@echo "🏗️ Building static site..."
3030
@mkdir -p build
3131
python scripts/copy_data.py
32+
@echo "🔧 Fixing API file paths..."
33+
python scripts/fix_api_paths.py
3234
@echo "✅ Build completed! Static site ready in build/ directory"
3335

3436
serve: ## Serve the build directory with Python HTTP server

scripts/fix_api_paths.py

Lines changed: 270 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,270 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Script to replace absolute file paths with relative paths in API response files.
4+
5+
This script finds all instances of '/Users/patcon/repos/kedro-polis-pipelines/'
6+
in JSON files within the build/api/ directory and replaces them with empty string
7+
to make the paths relative.
8+
9+
Example:
10+
Before: "/Users/patcon/repos/kedro-polis-pipelines/data/6carwc4nzj/knn5d_pca_bestkmeans/03_primary/projections.json"
11+
After: "data/6carwc4nzj/knn5d_pca_bestkmeans/03_primary/projections.json"
12+
"""
13+
14+
import os
15+
import json
16+
import re
17+
import argparse
18+
from pathlib import Path
19+
from typing import Dict, Any, List, Tuple, Union
20+
21+
22+
def find_and_replace_paths_in_dict(
23+
data: Union[Dict[str, Any], List[Any]], old_path: str, new_path: str = ""
24+
) -> Tuple[Union[Dict[str, Any], List[Any]], int]:
25+
"""
26+
Recursively find and replace path strings in a dictionary or list.
27+
28+
Args:
29+
data: Dictionary or list to process
30+
old_path: Path string to replace
31+
new_path: Replacement string (default: empty string)
32+
33+
Returns:
34+
Tuple of (modified_data, replacement_count)
35+
"""
36+
replacement_count = 0
37+
38+
if isinstance(data, dict):
39+
result = {}
40+
for key, value in data.items():
41+
if isinstance(value, str) and old_path in value:
42+
result[key] = value.replace(old_path, new_path)
43+
replacement_count += 1
44+
elif isinstance(value, (dict, list)):
45+
result[key], sub_count = find_and_replace_paths_in_dict(
46+
value, old_path, new_path
47+
)
48+
replacement_count += sub_count
49+
else:
50+
result[key] = value
51+
return result, replacement_count
52+
53+
elif isinstance(data, list):
54+
result = []
55+
for item in data:
56+
if isinstance(item, str) and old_path in item:
57+
result.append(item.replace(old_path, new_path))
58+
replacement_count += 1
59+
elif isinstance(item, (dict, list)):
60+
modified_item, sub_count = find_and_replace_paths_in_dict(
61+
item, old_path, new_path
62+
)
63+
result.append(modified_item)
64+
replacement_count += sub_count
65+
else:
66+
result.append(item)
67+
return result, replacement_count
68+
69+
else:
70+
return data, replacement_count
71+
72+
73+
def process_api_file(
74+
file_path: Path, old_path: str, new_path: str = ""
75+
) -> Tuple[bool, int]:
76+
"""
77+
Process a single API file to replace paths.
78+
79+
Args:
80+
file_path: Path to the file to process
81+
old_path: Path string to replace
82+
new_path: Replacement string (default: empty string)
83+
84+
Returns:
85+
Tuple of (success, replacement_count)
86+
"""
87+
try:
88+
# Read the file
89+
with open(file_path, "r", encoding="utf-8") as f:
90+
content = f.read()
91+
92+
# Try to parse as JSON
93+
try:
94+
data = json.loads(content)
95+
modified_data, replacement_count = find_and_replace_paths_in_dict(
96+
data, old_path, new_path
97+
)
98+
99+
if replacement_count > 0:
100+
# Write back the modified JSON
101+
with open(file_path, "w", encoding="utf-8") as f:
102+
json.dump(modified_data, f, indent=2, ensure_ascii=False)
103+
print(f"✓ {file_path}: {replacement_count} replacements")
104+
else:
105+
print(f"- {file_path}: no changes needed")
106+
107+
return True, replacement_count
108+
109+
except json.JSONDecodeError:
110+
# If it's not valid JSON, try simple string replacement
111+
if old_path in content:
112+
modified_content = content.replace(old_path, new_path)
113+
replacement_count = content.count(old_path)
114+
115+
with open(file_path, "w", encoding="utf-8") as f:
116+
f.write(modified_content)
117+
print(f"✓ {file_path}: {replacement_count} replacements (text mode)")
118+
return True, replacement_count
119+
else:
120+
print(f"- {file_path}: no changes needed (text mode)")
121+
return True, 0
122+
123+
except Exception as e:
124+
print(f"✗ Error processing {file_path}: {e}")
125+
return False, 0
126+
127+
128+
def find_api_files(api_dir: Path) -> List[Path]:
129+
"""
130+
Find all files in the API directory (build/api/ only).
131+
132+
Args:
133+
api_dir: Path to the API directory
134+
135+
Returns:
136+
List of file paths to process
137+
"""
138+
files = []
139+
140+
# Only process files in build/api/ directory, not data files
141+
for root, dirs, filenames in os.walk(api_dir):
142+
for filename in filenames:
143+
file_path = Path(root) / filename
144+
# Skip hidden files and directories
145+
if not any(part.startswith(".") for part in file_path.parts):
146+
files.append(file_path)
147+
148+
return sorted(files)
149+
150+
151+
def main():
152+
"""Main function to process all API files."""
153+
# Parse command line arguments
154+
parser = argparse.ArgumentParser(
155+
description="Replace absolute paths with relative paths in API files"
156+
)
157+
parser.add_argument(
158+
"--old-path",
159+
type=str,
160+
help="Absolute path to replace (default: current working directory + '/')",
161+
)
162+
parser.add_argument(
163+
"--new-path",
164+
type=str,
165+
default="",
166+
help="Replacement path (default: empty string)",
167+
)
168+
parser.add_argument(
169+
"--api-dir",
170+
type=str,
171+
default="build/api",
172+
help="API directory to process (default: build/api)",
173+
)
174+
parser.add_argument(
175+
"--dry-run",
176+
action="store_true",
177+
help="Show what would be changed without making changes",
178+
)
179+
180+
args = parser.parse_args()
181+
182+
# Configuration
183+
api_dir = Path(args.api_dir)
184+
185+
# Determine the old path dynamically if not provided
186+
if args.old_path:
187+
old_path = args.old_path
188+
else:
189+
# Use current working directory + trailing slash
190+
old_path = str(Path.cwd()) + "/"
191+
192+
new_path = args.new_path
193+
194+
# Check if API directory exists
195+
if not api_dir.exists():
196+
print(f"Error: API directory '{api_dir}' does not exist.")
197+
return 1
198+
199+
print(f"Processing API files in '{api_dir}'...")
200+
print(
201+
f"Replacing: '{old_path}' -> '{new_path}' {'(empty string for relative paths)' if new_path == '' else ''}"
202+
)
203+
if args.dry_run:
204+
print("DRY RUN MODE: No files will be modified")
205+
print("-" * 70)
206+
207+
# Find all files to process
208+
files_to_process = find_api_files(api_dir)
209+
210+
if not files_to_process:
211+
print("No files found to process.")
212+
return 0
213+
214+
print(f"Found {len(files_to_process)} files to process.\n")
215+
216+
# Process each file
217+
total_files_processed = 0
218+
total_files_modified = 0
219+
total_replacements = 0
220+
failed_files = []
221+
222+
for file_path in files_to_process:
223+
if args.dry_run:
224+
# In dry run mode, just check for matches without modifying
225+
try:
226+
with open(file_path, "r", encoding="utf-8") as f:
227+
content = f.read()
228+
229+
if old_path in content:
230+
replacement_count = content.count(old_path)
231+
print(f"Would modify {file_path}: {replacement_count} replacements")
232+
total_files_modified += 1
233+
total_replacements += replacement_count
234+
else:
235+
print(f"- {file_path}: no changes needed")
236+
237+
total_files_processed += 1
238+
except Exception as e:
239+
print(f"✗ Error reading {file_path}: {e}")
240+
failed_files.append(file_path)
241+
else:
242+
success, replacement_count = process_api_file(file_path, old_path, new_path)
243+
244+
if success:
245+
total_files_processed += 1
246+
if replacement_count > 0:
247+
total_files_modified += 1
248+
total_replacements += replacement_count
249+
else:
250+
failed_files.append(file_path)
251+
252+
# Summary
253+
print("\n" + "=" * 70)
254+
print("SUMMARY:")
255+
print(f"Files processed: {total_files_processed}/{len(files_to_process)}")
256+
print(f"Files modified: {total_files_modified}")
257+
print(f"Total replacements: {total_replacements}")
258+
259+
if failed_files:
260+
print(f"Failed files: {len(failed_files)}")
261+
for failed_file in failed_files:
262+
print(f" - {failed_file}")
263+
return 1
264+
else:
265+
print("All files processed successfully!")
266+
return 0
267+
268+
269+
if __name__ == "__main__":
270+
exit(main())

0 commit comments

Comments
 (0)