Skip to content

Commit f490efa

Browse files
committed
debug
1 parent dcdc63c commit f490efa

File tree

1 file changed

+52
-6
lines changed

1 file changed

+52
-6
lines changed

geotessera/registry.py

Lines changed: 52 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -811,6 +811,13 @@ def _load_registry(self):
811811
missing = required_columns - set(self._registry_gdf.columns)
812812
raise ValueError(f"Registry is missing required columns: {missing}")
813813

814+
# Build dictionary index for O(1) lookups - avoids non-deterministic
815+
# pandas DataFrame filtering issues on some platforms (Windows CI)
816+
self._tile_index: Dict[Tuple[int, int, int], int] = {}
817+
for idx, row in enumerate(self._registry_gdf.itertuples()):
818+
key = (int(row.year), int(row.lon_i), int(row.lat_i))
819+
self._tile_index[key] = idx
820+
814821
def _load_landmasks_registry(self):
815822
"""Load landmasks Parquet registry from local path or download from remote with If-Modified-Since refresh."""
816823
if self._landmasks_registry_path and self._landmasks_registry_path.exists():
@@ -892,6 +899,13 @@ def _load_landmasks_registry(self):
892899
if "lat_i" not in self._landmasks_df.columns:
893900
self._landmasks_df["lat_i"] = (self._landmasks_df["lat"] * 100).round().astype(np.int32)
894901

902+
# Build dictionary index for O(1) lookups - avoids non-deterministic
903+
# pandas DataFrame filtering issues on some platforms (Windows CI)
904+
self._landmask_index: Dict[Tuple[int, int], int] = {}
905+
for idx, row in enumerate(self._landmasks_df.itertuples()):
906+
key = (int(row.lon_i), int(row.lat_i))
907+
self._landmask_index[key] = idx
908+
895909
def iter_tiles_in_region(
896910
self, bounds: Tuple[float, float, float, float], year: int
897911
) -> Iterator[Tuple[int, float, float]]:
@@ -1146,12 +1160,20 @@ def fetch_landmask(
11461160
# Use pre-computed integer grid indices for robust comparison
11471161
lon_i = coord_to_grid_int(lon)
11481162
lat_i = coord_to_grid_int(lat)
1149-
matches = self._landmasks_df[
1150-
(self._landmasks_df["lon_i"] == lon_i)
1151-
& (self._landmasks_df["lat_i"] == lat_i)
1152-
]
1153-
if len(matches) > 0:
1154-
file_hash = matches.iloc[0]["hash"]
1163+
1164+
# Use dictionary index for O(1) lookup - more reliable than DataFrame filtering
1165+
key = (int(lon_i), int(lat_i))
1166+
if hasattr(self, "_landmask_index") and key in self._landmask_index:
1167+
idx = self._landmask_index[key]
1168+
file_hash = self._landmasks_df.iloc[idx]["hash"]
1169+
else:
1170+
# Fallback to DataFrame filtering
1171+
matches = self._landmasks_df[
1172+
(self._landmasks_df["lon_i"] == lon_i)
1173+
& (self._landmasks_df["lat_i"] == lat_i)
1174+
]
1175+
if len(matches) > 0:
1176+
file_hash = matches.iloc[0]["hash"]
11551177

11561178
# Download to embeddings_dir
11571179
url = f"{TESSERA_BASE_URL}/{self.version}/{LANDMASKS_DIR_NAME}/{filename}"
@@ -1241,6 +1263,14 @@ def get_tile_file_size(self, year: int, lon: float, lat: float) -> int:
12411263
# Use pre-computed integer grid indices for robust comparison
12421264
lon_i = coord_to_grid_int(lon)
12431265
lat_i = coord_to_grid_int(lat)
1266+
1267+
# Use dictionary index for O(1) lookup - more reliable than DataFrame filtering
1268+
key = (int(year), int(lon_i), int(lat_i))
1269+
if hasattr(self, "_tile_index") and key in self._tile_index:
1270+
idx = self._tile_index[key]
1271+
return int(self._registry_gdf.iloc[idx]["file_size"])
1272+
1273+
# Fallback to DataFrame filtering for compatibility
12441274
matches = self._registry_gdf[
12451275
(self._registry_gdf["year"] == year)
12461276
& (self._registry_gdf["lon_i"] == lon_i)
@@ -1277,6 +1307,14 @@ def get_scales_file_size(self, year: int, lon: float, lat: float) -> int:
12771307
# Use pre-computed integer grid indices for robust comparison
12781308
lon_i = coord_to_grid_int(lon)
12791309
lat_i = coord_to_grid_int(lat)
1310+
1311+
# Use dictionary index for O(1) lookup - more reliable than DataFrame filtering
1312+
key = (int(year), int(lon_i), int(lat_i))
1313+
if hasattr(self, "_tile_index") and key in self._tile_index:
1314+
idx = self._tile_index[key]
1315+
return int(self._registry_gdf.iloc[idx]["scales_size"])
1316+
1317+
# Fallback to DataFrame filtering for compatibility
12801318
matches = self._registry_gdf[
12811319
(self._registry_gdf["year"] == year)
12821320
& (self._registry_gdf["lon_i"] == lon_i)
@@ -1318,6 +1356,14 @@ def get_landmask_file_size(self, lon: float, lat: float) -> int:
13181356
# Use pre-computed integer grid indices for robust comparison
13191357
lon_i = coord_to_grid_int(lon)
13201358
lat_i = coord_to_grid_int(lat)
1359+
1360+
# Use dictionary index for O(1) lookup - more reliable than DataFrame filtering
1361+
key = (int(lon_i), int(lat_i))
1362+
if hasattr(self, "_landmask_index") and key in self._landmask_index:
1363+
idx = self._landmask_index[key]
1364+
return int(self._landmasks_df.iloc[idx]["file_size"])
1365+
1366+
# Fallback to DataFrame filtering for compatibility
13211367
matches = self._landmasks_df[
13221368
(self._landmasks_df["lon_i"] == lon_i)
13231369
& (self._landmasks_df["lat_i"] == lat_i)

0 commit comments

Comments
 (0)