|
| 1 | +#!/usr/bin/env python3 |
| 2 | +"""Debug script to investigate tile lookup failures.""" |
| 3 | + |
| 4 | +import sys |
| 5 | +import numpy as np |
| 6 | + |
| 7 | +def test_tile(registry, year, lon, lat): |
| 8 | + """Test lookup for a specific tile.""" |
| 9 | + from geotessera.registry import coord_to_grid_int |
| 10 | + |
| 11 | + print(f"\n--- Testing tile: year={year}, lon={lon}, lat={lat} ---") |
| 12 | + |
| 13 | + gdf = registry._registry_gdf |
| 14 | + ldf = registry._landmasks_df |
| 15 | + |
| 16 | + lon_i = coord_to_grid_int(lon) |
| 17 | + lat_i = coord_to_grid_int(lat) |
| 18 | + print(f"Grid indices: lon_i={lon_i}, lat_i={lat_i}") |
| 19 | + |
| 20 | + # Check embeddings registry |
| 21 | + matches = gdf[ |
| 22 | + (gdf['year'] == year) & |
| 23 | + (gdf['lon_i'] == lon_i) & |
| 24 | + (gdf['lat_i'] == lat_i) |
| 25 | + ] |
| 26 | + print(f"Embeddings matches: {len(matches)}") |
| 27 | + |
| 28 | + # Check landmasks registry |
| 29 | + if ldf is not None and 'lon_i' in ldf.columns: |
| 30 | + lm_matches = ldf[ |
| 31 | + (ldf['lon_i'] == lon_i) & |
| 32 | + (ldf['lat_i'] == lat_i) |
| 33 | + ] |
| 34 | + print(f"Landmasks matches: {len(lm_matches)}") |
| 35 | + |
| 36 | + # Try the actual functions |
| 37 | + try: |
| 38 | + size = registry.get_tile_file_size(year, lon, lat) |
| 39 | + print(f"get_tile_file_size: OK ({size} bytes)") |
| 40 | + except ValueError as e: |
| 41 | + print(f"get_tile_file_size: FAILED - {e}") |
| 42 | + |
| 43 | + try: |
| 44 | + size = registry.get_landmask_file_size(lon, lat) |
| 45 | + print(f"get_landmask_file_size: OK ({size} bytes)") |
| 46 | + except ValueError as e: |
| 47 | + print(f"get_landmask_file_size: FAILED - {e}") |
| 48 | + |
| 49 | +def main(): |
| 50 | + print("=" * 60) |
| 51 | + print("TILE LOOKUP DEBUG SCRIPT") |
| 52 | + print("=" * 60) |
| 53 | + |
| 54 | + # Target tile that's failing |
| 55 | + target_year = 2024 |
| 56 | + target_lon = 0.15 |
| 57 | + target_lat = 51.25 |
| 58 | + |
| 59 | + print(f"\nTarget tile: year={target_year}, lon={target_lon}, lat={target_lat}") |
| 60 | + print(f"Target lon type: {type(target_lon)}, lat type: {type(target_lat)}") |
| 61 | + |
| 62 | + # Import and load registry |
| 63 | + print("\n--- Loading Registry ---") |
| 64 | + from geotessera.registry import Registry, coord_to_grid_int |
| 65 | + |
| 66 | + registry = Registry(version="v1") |
| 67 | + gdf = registry._registry_gdf |
| 68 | + ldf = registry._landmasks_df |
| 69 | + |
| 70 | + print(f"Embeddings registry: {len(gdf)} rows") |
| 71 | + print(f"Landmasks registry: {len(ldf) if ldf is not None else 'None'} rows") |
| 72 | + |
| 73 | + # Check columns |
| 74 | + print("\n--- Registry Columns ---") |
| 75 | + print(f"Embeddings columns: {list(gdf.columns)}") |
| 76 | + if ldf is not None: |
| 77 | + print(f"Landmasks columns: {list(ldf.columns)}") |
| 78 | + |
| 79 | + # Check data types |
| 80 | + print("\n--- Column Data Types ---") |
| 81 | + print(f"lon dtype: {gdf['lon'].dtype}") |
| 82 | + print(f"lat dtype: {gdf['lat'].dtype}") |
| 83 | + if 'lon_i' in gdf.columns: |
| 84 | + print(f"lon_i dtype: {gdf['lon_i'].dtype}") |
| 85 | + print(f"lat_i dtype: {gdf['lat_i'].dtype}") |
| 86 | + else: |
| 87 | + print("WARNING: lon_i/lat_i columns not found!") |
| 88 | + |
| 89 | + # Compute grid indices for target |
| 90 | + print("\n--- Grid Index Computation ---") |
| 91 | + lon_i = coord_to_grid_int(target_lon) |
| 92 | + lat_i = coord_to_grid_int(target_lat) |
| 93 | + print(f"coord_to_grid_int({target_lon}) = {lon_i} (type: {type(lon_i)})") |
| 94 | + print(f"coord_to_grid_int({target_lat}) = {lat_i} (type: {type(lat_i)})") |
| 95 | + |
| 96 | + # Manual computation |
| 97 | + manual_lon_i = np.int32(np.round(target_lon * 100)) |
| 98 | + manual_lat_i = np.int32(np.round(target_lat * 100)) |
| 99 | + print(f"Manual: np.int32(np.round({target_lon} * 100)) = {manual_lon_i}") |
| 100 | + print(f"Manual: np.int32(np.round({target_lat} * 100)) = {manual_lat_i}") |
| 101 | + |
| 102 | + # Check what's in the registry near the target |
| 103 | + print("\n--- Registry Values Near Target ---") |
| 104 | + |
| 105 | + # Find unique lon_i values near target |
| 106 | + if 'lon_i' in gdf.columns: |
| 107 | + unique_lon_i = sorted(gdf['lon_i'].unique()) |
| 108 | + nearby_lon_i = [x for x in unique_lon_i if abs(x - lon_i) <= 20] |
| 109 | + print(f"lon_i values near {lon_i}: {nearby_lon_i[:20]}...") |
| 110 | + |
| 111 | + unique_lat_i = sorted(gdf['lat_i'].unique()) |
| 112 | + nearby_lat_i = [x for x in unique_lat_i if abs(x - lat_i) <= 20] |
| 113 | + print(f"lat_i values near {lat_i}: {nearby_lat_i[:20]}...") |
| 114 | + |
| 115 | + # Try the lookup |
| 116 | + print("\n--- Attempting Lookup ---") |
| 117 | + |
| 118 | + # First check: does year=2024 exist? |
| 119 | + year_matches = gdf[gdf['year'] == target_year] |
| 120 | + print(f"Tiles with year={target_year}: {len(year_matches)}") |
| 121 | + |
| 122 | + # Check if lon_i matches |
| 123 | + if 'lon_i' in gdf.columns: |
| 124 | + lon_matches = gdf[gdf['lon_i'] == lon_i] |
| 125 | + print(f"Tiles with lon_i={lon_i}: {len(lon_matches)}") |
| 126 | + |
| 127 | + lat_matches = gdf[gdf['lat_i'] == lat_i] |
| 128 | + print(f"Tiles with lat_i={lat_i}: {len(lat_matches)}") |
| 129 | + |
| 130 | + # Combined |
| 131 | + combined = gdf[ |
| 132 | + (gdf['year'] == target_year) & |
| 133 | + (gdf['lon_i'] == lon_i) & |
| 134 | + (gdf['lat_i'] == lat_i) |
| 135 | + ] |
| 136 | + print(f"Tiles matching all criteria: {len(combined)}") |
| 137 | + |
| 138 | + if len(combined) > 0: |
| 139 | + print("\nMATCH FOUND!") |
| 140 | + print(combined[['year', 'lon', 'lat', 'lon_i', 'lat_i']].head()) |
| 141 | + else: |
| 142 | + print("\nNO MATCH - investigating...") |
| 143 | + |
| 144 | + # Check what lon_i values exist for this lat_i |
| 145 | + lat_subset = gdf[gdf['lat_i'] == lat_i] |
| 146 | + if len(lat_subset) > 0: |
| 147 | + print(f" lon_i values where lat_i={lat_i}: {sorted(lat_subset['lon_i'].unique())[:20]}") |
| 148 | + |
| 149 | + # Check what lat_i values exist for this lon_i |
| 150 | + lon_subset = gdf[gdf['lon_i'] == lon_i] |
| 151 | + if len(lon_subset) > 0: |
| 152 | + print(f" lat_i values where lon_i={lon_i}: {sorted(lon_subset['lat_i'].unique())[:20]}") |
| 153 | + |
| 154 | + # Check landmasks |
| 155 | + print("\n--- Landmasks Registry ---") |
| 156 | + if ldf is not None: |
| 157 | + if 'lon_i' in ldf.columns: |
| 158 | + lm_lon_matches = ldf[ldf['lon_i'] == lon_i] |
| 159 | + print(f"Landmasks with lon_i={lon_i}: {len(lm_lon_matches)}") |
| 160 | + |
| 161 | + lm_lat_matches = ldf[ldf['lat_i'] == lat_i] |
| 162 | + print(f"Landmasks with lat_i={lat_i}: {len(lm_lat_matches)}") |
| 163 | + |
| 164 | + lm_combined = ldf[ |
| 165 | + (ldf['lon_i'] == lon_i) & |
| 166 | + (ldf['lat_i'] == lat_i) |
| 167 | + ] |
| 168 | + print(f"Landmasks matching both: {len(lm_combined)}") |
| 169 | + |
| 170 | + if len(lm_combined) == 0: |
| 171 | + # What landmasks exist near this location? |
| 172 | + nearby = ldf[ |
| 173 | + (abs(ldf['lon_i'] - lon_i) <= 10) & |
| 174 | + (abs(ldf['lat_i'] - lat_i) <= 10) |
| 175 | + ] |
| 176 | + print(f"Nearby landmasks (within 10 grid units): {len(nearby)}") |
| 177 | + if len(nearby) > 0: |
| 178 | + print(nearby[['lon', 'lat', 'lon_i', 'lat_i']].head(10)) |
| 179 | + else: |
| 180 | + print("WARNING: lon_i/lat_i columns not in landmasks!") |
| 181 | + |
| 182 | + # Try the actual download function |
| 183 | + print("\n--- Testing get_tile_file_size ---") |
| 184 | + try: |
| 185 | + size = registry.get_tile_file_size(target_year, target_lon, target_lat) |
| 186 | + print(f"SUCCESS: get_tile_file_size returned {size}") |
| 187 | + except ValueError as e: |
| 188 | + print(f"FAILED: {e}") |
| 189 | + |
| 190 | + print("\n--- Testing get_landmask_file_size ---") |
| 191 | + try: |
| 192 | + size = registry.get_landmask_file_size(target_lon, target_lat) |
| 193 | + print(f"SUCCESS: get_landmask_file_size returned {size}") |
| 194 | + except ValueError as e: |
| 195 | + print(f"FAILED: {e}") |
| 196 | + |
| 197 | + # Test all tiles in the bounding box region |
| 198 | + print("\n" + "=" * 60) |
| 199 | + print("TESTING ALL TILES IN BOUNDING BOX") |
| 200 | + print("Bounding box: -0.1,51.3,0.1,51.5 (same as test)") |
| 201 | + print("=" * 60) |
| 202 | + |
| 203 | + bounds = (-0.1, 51.3, 0.1, 51.5) |
| 204 | + year = 2024 |
| 205 | + |
| 206 | + tiles = list(registry.iter_tiles_in_region(bounds, year)) |
| 207 | + print(f"\nFound {len(tiles)} tiles in region") |
| 208 | + |
| 209 | + print("\nTesting each tile:") |
| 210 | + failed_tiles = [] |
| 211 | + for i, (tile_year, tile_lon, tile_lat) in enumerate(tiles): |
| 212 | + print(f"\n[{i+1}/{len(tiles)}] year={tile_year}, lon={tile_lon:.2f}, lat={tile_lat:.2f}") |
| 213 | + |
| 214 | + # Test get_tile_file_size |
| 215 | + try: |
| 216 | + size = registry.get_tile_file_size(tile_year, tile_lon, tile_lat) |
| 217 | + print(f" get_tile_file_size: OK ({size} bytes)") |
| 218 | + except ValueError as e: |
| 219 | + print(f" get_tile_file_size: FAILED - {e}") |
| 220 | + failed_tiles.append(('tile', tile_year, tile_lon, tile_lat, str(e))) |
| 221 | + |
| 222 | + # Test get_landmask_file_size |
| 223 | + try: |
| 224 | + size = registry.get_landmask_file_size(tile_lon, tile_lat) |
| 225 | + print(f" get_landmask_file_size: OK ({size} bytes)") |
| 226 | + except ValueError as e: |
| 227 | + print(f" get_landmask_file_size: FAILED - {e}") |
| 228 | + failed_tiles.append(('landmask', tile_year, tile_lon, tile_lat, str(e))) |
| 229 | + |
| 230 | + print("\n" + "=" * 60) |
| 231 | + print("SUMMARY") |
| 232 | + print("=" * 60) |
| 233 | + print(f"Total tiles tested: {len(tiles)}") |
| 234 | + print(f"Failed lookups: {len(failed_tiles)}") |
| 235 | + |
| 236 | + if failed_tiles: |
| 237 | + print("\nFailed tiles:") |
| 238 | + for kind, y, lon, lat, err in failed_tiles: |
| 239 | + print(f" [{kind}] year={y}, lon={lon:.2f}, lat={lat:.2f}: {err}") |
| 240 | + |
| 241 | + print("\n" + "=" * 60) |
| 242 | + print("DEBUG COMPLETE") |
| 243 | + print("=" * 60) |
| 244 | + |
| 245 | +if __name__ == "__main__": |
| 246 | + main() |
0 commit comments