Skip to content

Commit 8aeff82

Browse files
committed
debug tiles
1 parent aefaa3e commit 8aeff82

File tree

2 files changed

+256
-0
lines changed

2 files changed

+256
-0
lines changed

scripts/debug_tile_lookup.py

Lines changed: 246 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,246 @@
1+
#!/usr/bin/env python3
2+
"""Debug script to investigate tile lookup failures."""
3+
4+
import sys
5+
import numpy as np
6+
7+
def test_tile(registry, year, lon, lat):
8+
"""Test lookup for a specific tile."""
9+
from geotessera.registry import coord_to_grid_int
10+
11+
print(f"\n--- Testing tile: year={year}, lon={lon}, lat={lat} ---")
12+
13+
gdf = registry._registry_gdf
14+
ldf = registry._landmasks_df
15+
16+
lon_i = coord_to_grid_int(lon)
17+
lat_i = coord_to_grid_int(lat)
18+
print(f"Grid indices: lon_i={lon_i}, lat_i={lat_i}")
19+
20+
# Check embeddings registry
21+
matches = gdf[
22+
(gdf['year'] == year) &
23+
(gdf['lon_i'] == lon_i) &
24+
(gdf['lat_i'] == lat_i)
25+
]
26+
print(f"Embeddings matches: {len(matches)}")
27+
28+
# Check landmasks registry
29+
if ldf is not None and 'lon_i' in ldf.columns:
30+
lm_matches = ldf[
31+
(ldf['lon_i'] == lon_i) &
32+
(ldf['lat_i'] == lat_i)
33+
]
34+
print(f"Landmasks matches: {len(lm_matches)}")
35+
36+
# Try the actual functions
37+
try:
38+
size = registry.get_tile_file_size(year, lon, lat)
39+
print(f"get_tile_file_size: OK ({size} bytes)")
40+
except ValueError as e:
41+
print(f"get_tile_file_size: FAILED - {e}")
42+
43+
try:
44+
size = registry.get_landmask_file_size(lon, lat)
45+
print(f"get_landmask_file_size: OK ({size} bytes)")
46+
except ValueError as e:
47+
print(f"get_landmask_file_size: FAILED - {e}")
48+
49+
def main():
50+
print("=" * 60)
51+
print("TILE LOOKUP DEBUG SCRIPT")
52+
print("=" * 60)
53+
54+
# Target tile that's failing
55+
target_year = 2024
56+
target_lon = 0.15
57+
target_lat = 51.25
58+
59+
print(f"\nTarget tile: year={target_year}, lon={target_lon}, lat={target_lat}")
60+
print(f"Target lon type: {type(target_lon)}, lat type: {type(target_lat)}")
61+
62+
# Import and load registry
63+
print("\n--- Loading Registry ---")
64+
from geotessera.registry import Registry, coord_to_grid_int
65+
66+
registry = Registry(version="v1")
67+
gdf = registry._registry_gdf
68+
ldf = registry._landmasks_df
69+
70+
print(f"Embeddings registry: {len(gdf)} rows")
71+
print(f"Landmasks registry: {len(ldf) if ldf is not None else 'None'} rows")
72+
73+
# Check columns
74+
print("\n--- Registry Columns ---")
75+
print(f"Embeddings columns: {list(gdf.columns)}")
76+
if ldf is not None:
77+
print(f"Landmasks columns: {list(ldf.columns)}")
78+
79+
# Check data types
80+
print("\n--- Column Data Types ---")
81+
print(f"lon dtype: {gdf['lon'].dtype}")
82+
print(f"lat dtype: {gdf['lat'].dtype}")
83+
if 'lon_i' in gdf.columns:
84+
print(f"lon_i dtype: {gdf['lon_i'].dtype}")
85+
print(f"lat_i dtype: {gdf['lat_i'].dtype}")
86+
else:
87+
print("WARNING: lon_i/lat_i columns not found!")
88+
89+
# Compute grid indices for target
90+
print("\n--- Grid Index Computation ---")
91+
lon_i = coord_to_grid_int(target_lon)
92+
lat_i = coord_to_grid_int(target_lat)
93+
print(f"coord_to_grid_int({target_lon}) = {lon_i} (type: {type(lon_i)})")
94+
print(f"coord_to_grid_int({target_lat}) = {lat_i} (type: {type(lat_i)})")
95+
96+
# Manual computation
97+
manual_lon_i = np.int32(np.round(target_lon * 100))
98+
manual_lat_i = np.int32(np.round(target_lat * 100))
99+
print(f"Manual: np.int32(np.round({target_lon} * 100)) = {manual_lon_i}")
100+
print(f"Manual: np.int32(np.round({target_lat} * 100)) = {manual_lat_i}")
101+
102+
# Check what's in the registry near the target
103+
print("\n--- Registry Values Near Target ---")
104+
105+
# Find unique lon_i values near target
106+
if 'lon_i' in gdf.columns:
107+
unique_lon_i = sorted(gdf['lon_i'].unique())
108+
nearby_lon_i = [x for x in unique_lon_i if abs(x - lon_i) <= 20]
109+
print(f"lon_i values near {lon_i}: {nearby_lon_i[:20]}...")
110+
111+
unique_lat_i = sorted(gdf['lat_i'].unique())
112+
nearby_lat_i = [x for x in unique_lat_i if abs(x - lat_i) <= 20]
113+
print(f"lat_i values near {lat_i}: {nearby_lat_i[:20]}...")
114+
115+
# Try the lookup
116+
print("\n--- Attempting Lookup ---")
117+
118+
# First check: does year=2024 exist?
119+
year_matches = gdf[gdf['year'] == target_year]
120+
print(f"Tiles with year={target_year}: {len(year_matches)}")
121+
122+
# Check if lon_i matches
123+
if 'lon_i' in gdf.columns:
124+
lon_matches = gdf[gdf['lon_i'] == lon_i]
125+
print(f"Tiles with lon_i={lon_i}: {len(lon_matches)}")
126+
127+
lat_matches = gdf[gdf['lat_i'] == lat_i]
128+
print(f"Tiles with lat_i={lat_i}: {len(lat_matches)}")
129+
130+
# Combined
131+
combined = gdf[
132+
(gdf['year'] == target_year) &
133+
(gdf['lon_i'] == lon_i) &
134+
(gdf['lat_i'] == lat_i)
135+
]
136+
print(f"Tiles matching all criteria: {len(combined)}")
137+
138+
if len(combined) > 0:
139+
print("\nMATCH FOUND!")
140+
print(combined[['year', 'lon', 'lat', 'lon_i', 'lat_i']].head())
141+
else:
142+
print("\nNO MATCH - investigating...")
143+
144+
# Check what lon_i values exist for this lat_i
145+
lat_subset = gdf[gdf['lat_i'] == lat_i]
146+
if len(lat_subset) > 0:
147+
print(f" lon_i values where lat_i={lat_i}: {sorted(lat_subset['lon_i'].unique())[:20]}")
148+
149+
# Check what lat_i values exist for this lon_i
150+
lon_subset = gdf[gdf['lon_i'] == lon_i]
151+
if len(lon_subset) > 0:
152+
print(f" lat_i values where lon_i={lon_i}: {sorted(lon_subset['lat_i'].unique())[:20]}")
153+
154+
# Check landmasks
155+
print("\n--- Landmasks Registry ---")
156+
if ldf is not None:
157+
if 'lon_i' in ldf.columns:
158+
lm_lon_matches = ldf[ldf['lon_i'] == lon_i]
159+
print(f"Landmasks with lon_i={lon_i}: {len(lm_lon_matches)}")
160+
161+
lm_lat_matches = ldf[ldf['lat_i'] == lat_i]
162+
print(f"Landmasks with lat_i={lat_i}: {len(lm_lat_matches)}")
163+
164+
lm_combined = ldf[
165+
(ldf['lon_i'] == lon_i) &
166+
(ldf['lat_i'] == lat_i)
167+
]
168+
print(f"Landmasks matching both: {len(lm_combined)}")
169+
170+
if len(lm_combined) == 0:
171+
# What landmasks exist near this location?
172+
nearby = ldf[
173+
(abs(ldf['lon_i'] - lon_i) <= 10) &
174+
(abs(ldf['lat_i'] - lat_i) <= 10)
175+
]
176+
print(f"Nearby landmasks (within 10 grid units): {len(nearby)}")
177+
if len(nearby) > 0:
178+
print(nearby[['lon', 'lat', 'lon_i', 'lat_i']].head(10))
179+
else:
180+
print("WARNING: lon_i/lat_i columns not in landmasks!")
181+
182+
# Try the actual download function
183+
print("\n--- Testing get_tile_file_size ---")
184+
try:
185+
size = registry.get_tile_file_size(target_year, target_lon, target_lat)
186+
print(f"SUCCESS: get_tile_file_size returned {size}")
187+
except ValueError as e:
188+
print(f"FAILED: {e}")
189+
190+
print("\n--- Testing get_landmask_file_size ---")
191+
try:
192+
size = registry.get_landmask_file_size(target_lon, target_lat)
193+
print(f"SUCCESS: get_landmask_file_size returned {size}")
194+
except ValueError as e:
195+
print(f"FAILED: {e}")
196+
197+
# Test all tiles in the bounding box region
198+
print("\n" + "=" * 60)
199+
print("TESTING ALL TILES IN BOUNDING BOX")
200+
print("Bounding box: -0.1,51.3,0.1,51.5 (same as test)")
201+
print("=" * 60)
202+
203+
bounds = (-0.1, 51.3, 0.1, 51.5)
204+
year = 2024
205+
206+
tiles = list(registry.iter_tiles_in_region(bounds, year))
207+
print(f"\nFound {len(tiles)} tiles in region")
208+
209+
print("\nTesting each tile:")
210+
failed_tiles = []
211+
for i, (tile_year, tile_lon, tile_lat) in enumerate(tiles):
212+
print(f"\n[{i+1}/{len(tiles)}] year={tile_year}, lon={tile_lon:.2f}, lat={tile_lat:.2f}")
213+
214+
# Test get_tile_file_size
215+
try:
216+
size = registry.get_tile_file_size(tile_year, tile_lon, tile_lat)
217+
print(f" get_tile_file_size: OK ({size} bytes)")
218+
except ValueError as e:
219+
print(f" get_tile_file_size: FAILED - {e}")
220+
failed_tiles.append(('tile', tile_year, tile_lon, tile_lat, str(e)))
221+
222+
# Test get_landmask_file_size
223+
try:
224+
size = registry.get_landmask_file_size(tile_lon, tile_lat)
225+
print(f" get_landmask_file_size: OK ({size} bytes)")
226+
except ValueError as e:
227+
print(f" get_landmask_file_size: FAILED - {e}")
228+
failed_tiles.append(('landmask', tile_year, tile_lon, tile_lat, str(e)))
229+
230+
print("\n" + "=" * 60)
231+
print("SUMMARY")
232+
print("=" * 60)
233+
print(f"Total tiles tested: {len(tiles)}")
234+
print(f"Failed lookups: {len(failed_tiles)}")
235+
236+
if failed_tiles:
237+
print("\nFailed tiles:")
238+
for kind, y, lon, lat, err in failed_tiles:
239+
print(f" [{kind}] year={y}, lon={lon:.2f}, lat={lat:.2f}: {err}")
240+
241+
print("\n" + "=" * 60)
242+
print("DEBUG COMPLETE")
243+
print("=" * 60)
244+
245+
if __name__ == "__main__":
246+
main()

tests/cli.ps1

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,16 @@ try {
171171
Write-Host $dryRunString
172172
}
173173

174+
# Debug: Run tile lookup debug script
175+
Write-TestHeader "Debug: Tile Lookup Investigation"
176+
Write-Host "Running debug script to investigate tile lookup..." -ForegroundColor Yellow
177+
try {
178+
$debugOutput = & uv run python scripts/debug_tile_lookup.py 2>&1
179+
Write-Host $debugOutput
180+
} catch {
181+
Write-Host "Debug script failed: $_" -ForegroundColor Red
182+
}
183+
174184
# Test: Download Single UK Tile (TIFF format)
175185
Write-TestHeader "Test: Download Single UK Tile (TIFF format)"
176186

0 commit comments

Comments
 (0)