|
| 1 | +#!/usr/bin/env python3 |
| 2 | +""" |
| 3 | +Script to replace absolute file paths with relative paths in API response files. |
| 4 | +
|
| 5 | +This script finds all instances of '/Users/patcon/repos/kedro-polis-pipelines/' |
| 6 | +in JSON files within the build/api/ directory and replaces them with empty string |
| 7 | +to make the paths relative. |
| 8 | +
|
| 9 | +Example: |
| 10 | + Before: "/Users/patcon/repos/kedro-polis-pipelines/data/6carwc4nzj/knn5d_pca_bestkmeans/03_primary/projections.json" |
| 11 | + After: "data/6carwc4nzj/knn5d_pca_bestkmeans/03_primary/projections.json" |
| 12 | +""" |
| 13 | + |
| 14 | +import os |
| 15 | +import json |
| 16 | +import re |
| 17 | +import argparse |
| 18 | +from pathlib import Path |
| 19 | +from typing import Dict, Any, List, Tuple, Union |
| 20 | + |
| 21 | + |
| 22 | +def find_and_replace_paths_in_dict( |
| 23 | + data: Union[Dict[str, Any], List[Any]], old_path: str, new_path: str = "" |
| 24 | +) -> Tuple[Union[Dict[str, Any], List[Any]], int]: |
| 25 | + """ |
| 26 | + Recursively find and replace path strings in a dictionary or list. |
| 27 | +
|
| 28 | + Args: |
| 29 | + data: Dictionary or list to process |
| 30 | + old_path: Path string to replace |
| 31 | + new_path: Replacement string (default: empty string) |
| 32 | +
|
| 33 | + Returns: |
| 34 | + Tuple of (modified_data, replacement_count) |
| 35 | + """ |
| 36 | + replacement_count = 0 |
| 37 | + |
| 38 | + if isinstance(data, dict): |
| 39 | + result = {} |
| 40 | + for key, value in data.items(): |
| 41 | + if isinstance(value, str) and old_path in value: |
| 42 | + result[key] = value.replace(old_path, new_path) |
| 43 | + replacement_count += 1 |
| 44 | + elif isinstance(value, (dict, list)): |
| 45 | + result[key], sub_count = find_and_replace_paths_in_dict( |
| 46 | + value, old_path, new_path |
| 47 | + ) |
| 48 | + replacement_count += sub_count |
| 49 | + else: |
| 50 | + result[key] = value |
| 51 | + return result, replacement_count |
| 52 | + |
| 53 | + elif isinstance(data, list): |
| 54 | + result = [] |
| 55 | + for item in data: |
| 56 | + if isinstance(item, str) and old_path in item: |
| 57 | + result.append(item.replace(old_path, new_path)) |
| 58 | + replacement_count += 1 |
| 59 | + elif isinstance(item, (dict, list)): |
| 60 | + modified_item, sub_count = find_and_replace_paths_in_dict( |
| 61 | + item, old_path, new_path |
| 62 | + ) |
| 63 | + result.append(modified_item) |
| 64 | + replacement_count += sub_count |
| 65 | + else: |
| 66 | + result.append(item) |
| 67 | + return result, replacement_count |
| 68 | + |
| 69 | + else: |
| 70 | + return data, replacement_count |
| 71 | + |
| 72 | + |
| 73 | +def process_api_file( |
| 74 | + file_path: Path, old_path: str, new_path: str = "" |
| 75 | +) -> Tuple[bool, int]: |
| 76 | + """ |
| 77 | + Process a single API file to replace paths. |
| 78 | +
|
| 79 | + Args: |
| 80 | + file_path: Path to the file to process |
| 81 | + old_path: Path string to replace |
| 82 | + new_path: Replacement string (default: empty string) |
| 83 | +
|
| 84 | + Returns: |
| 85 | + Tuple of (success, replacement_count) |
| 86 | + """ |
| 87 | + try: |
| 88 | + # Read the file |
| 89 | + with open(file_path, "r", encoding="utf-8") as f: |
| 90 | + content = f.read() |
| 91 | + |
| 92 | + # Try to parse as JSON |
| 93 | + try: |
| 94 | + data = json.loads(content) |
| 95 | + modified_data, replacement_count = find_and_replace_paths_in_dict( |
| 96 | + data, old_path, new_path |
| 97 | + ) |
| 98 | + |
| 99 | + if replacement_count > 0: |
| 100 | + # Write back the modified JSON |
| 101 | + with open(file_path, "w", encoding="utf-8") as f: |
| 102 | + json.dump(modified_data, f, indent=2, ensure_ascii=False) |
| 103 | + print(f"✓ {file_path}: {replacement_count} replacements") |
| 104 | + else: |
| 105 | + print(f"- {file_path}: no changes needed") |
| 106 | + |
| 107 | + return True, replacement_count |
| 108 | + |
| 109 | + except json.JSONDecodeError: |
| 110 | + # If it's not valid JSON, try simple string replacement |
| 111 | + if old_path in content: |
| 112 | + modified_content = content.replace(old_path, new_path) |
| 113 | + replacement_count = content.count(old_path) |
| 114 | + |
| 115 | + with open(file_path, "w", encoding="utf-8") as f: |
| 116 | + f.write(modified_content) |
| 117 | + print(f"✓ {file_path}: {replacement_count} replacements (text mode)") |
| 118 | + return True, replacement_count |
| 119 | + else: |
| 120 | + print(f"- {file_path}: no changes needed (text mode)") |
| 121 | + return True, 0 |
| 122 | + |
| 123 | + except Exception as e: |
| 124 | + print(f"✗ Error processing {file_path}: {e}") |
| 125 | + return False, 0 |
| 126 | + |
| 127 | + |
| 128 | +def find_api_files(api_dir: Path) -> List[Path]: |
| 129 | + """ |
| 130 | + Find all files in the API directory (build/api/ only). |
| 131 | +
|
| 132 | + Args: |
| 133 | + api_dir: Path to the API directory |
| 134 | +
|
| 135 | + Returns: |
| 136 | + List of file paths to process |
| 137 | + """ |
| 138 | + files = [] |
| 139 | + |
| 140 | + # Only process files in build/api/ directory, not data files |
| 141 | + for root, dirs, filenames in os.walk(api_dir): |
| 142 | + for filename in filenames: |
| 143 | + file_path = Path(root) / filename |
| 144 | + # Skip hidden files and directories |
| 145 | + if not any(part.startswith(".") for part in file_path.parts): |
| 146 | + files.append(file_path) |
| 147 | + |
| 148 | + return sorted(files) |
| 149 | + |
| 150 | + |
| 151 | +def main(): |
| 152 | + """Main function to process all API files.""" |
| 153 | + # Parse command line arguments |
| 154 | + parser = argparse.ArgumentParser( |
| 155 | + description="Replace absolute paths with relative paths in API files" |
| 156 | + ) |
| 157 | + parser.add_argument( |
| 158 | + "--old-path", |
| 159 | + type=str, |
| 160 | + help="Absolute path to replace (default: current working directory + '/')", |
| 161 | + ) |
| 162 | + parser.add_argument( |
| 163 | + "--new-path", |
| 164 | + type=str, |
| 165 | + default="", |
| 166 | + help="Replacement path (default: empty string)", |
| 167 | + ) |
| 168 | + parser.add_argument( |
| 169 | + "--api-dir", |
| 170 | + type=str, |
| 171 | + default="build/api", |
| 172 | + help="API directory to process (default: build/api)", |
| 173 | + ) |
| 174 | + parser.add_argument( |
| 175 | + "--dry-run", |
| 176 | + action="store_true", |
| 177 | + help="Show what would be changed without making changes", |
| 178 | + ) |
| 179 | + |
| 180 | + args = parser.parse_args() |
| 181 | + |
| 182 | + # Configuration |
| 183 | + api_dir = Path(args.api_dir) |
| 184 | + |
| 185 | + # Determine the old path dynamically if not provided |
| 186 | + if args.old_path: |
| 187 | + old_path = args.old_path |
| 188 | + else: |
| 189 | + # Use current working directory + trailing slash |
| 190 | + old_path = str(Path.cwd()) + "/" |
| 191 | + |
| 192 | + new_path = args.new_path |
| 193 | + |
| 194 | + # Check if API directory exists |
| 195 | + if not api_dir.exists(): |
| 196 | + print(f"Error: API directory '{api_dir}' does not exist.") |
| 197 | + return 1 |
| 198 | + |
| 199 | + print(f"Processing API files in '{api_dir}'...") |
| 200 | + print( |
| 201 | + f"Replacing: '{old_path}' -> '{new_path}' {'(empty string for relative paths)' if new_path == '' else ''}" |
| 202 | + ) |
| 203 | + if args.dry_run: |
| 204 | + print("DRY RUN MODE: No files will be modified") |
| 205 | + print("-" * 70) |
| 206 | + |
| 207 | + # Find all files to process |
| 208 | + files_to_process = find_api_files(api_dir) |
| 209 | + |
| 210 | + if not files_to_process: |
| 211 | + print("No files found to process.") |
| 212 | + return 0 |
| 213 | + |
| 214 | + print(f"Found {len(files_to_process)} files to process.\n") |
| 215 | + |
| 216 | + # Process each file |
| 217 | + total_files_processed = 0 |
| 218 | + total_files_modified = 0 |
| 219 | + total_replacements = 0 |
| 220 | + failed_files = [] |
| 221 | + |
| 222 | + for file_path in files_to_process: |
| 223 | + if args.dry_run: |
| 224 | + # In dry run mode, just check for matches without modifying |
| 225 | + try: |
| 226 | + with open(file_path, "r", encoding="utf-8") as f: |
| 227 | + content = f.read() |
| 228 | + |
| 229 | + if old_path in content: |
| 230 | + replacement_count = content.count(old_path) |
| 231 | + print(f"Would modify {file_path}: {replacement_count} replacements") |
| 232 | + total_files_modified += 1 |
| 233 | + total_replacements += replacement_count |
| 234 | + else: |
| 235 | + print(f"- {file_path}: no changes needed") |
| 236 | + |
| 237 | + total_files_processed += 1 |
| 238 | + except Exception as e: |
| 239 | + print(f"✗ Error reading {file_path}: {e}") |
| 240 | + failed_files.append(file_path) |
| 241 | + else: |
| 242 | + success, replacement_count = process_api_file(file_path, old_path, new_path) |
| 243 | + |
| 244 | + if success: |
| 245 | + total_files_processed += 1 |
| 246 | + if replacement_count > 0: |
| 247 | + total_files_modified += 1 |
| 248 | + total_replacements += replacement_count |
| 249 | + else: |
| 250 | + failed_files.append(file_path) |
| 251 | + |
| 252 | + # Summary |
| 253 | + print("\n" + "=" * 70) |
| 254 | + print("SUMMARY:") |
| 255 | + print(f"Files processed: {total_files_processed}/{len(files_to_process)}") |
| 256 | + print(f"Files modified: {total_files_modified}") |
| 257 | + print(f"Total replacements: {total_replacements}") |
| 258 | + |
| 259 | + if failed_files: |
| 260 | + print(f"Failed files: {len(failed_files)}") |
| 261 | + for failed_file in failed_files: |
| 262 | + print(f" - {failed_file}") |
| 263 | + return 1 |
| 264 | + else: |
| 265 | + print("All files processed successfully!") |
| 266 | + return 0 |
| 267 | + |
| 268 | + |
| 269 | +if __name__ == "__main__": |
| 270 | + exit(main()) |
0 commit comments