@@ -811,6 +811,13 @@ def _load_registry(self):
811811 missing = required_columns - set (self ._registry_gdf .columns )
812812 raise ValueError (f"Registry is missing required columns: { missing } " )
813813
814+ # Build dictionary index for O(1) lookups - avoids non-deterministic
815+ # pandas DataFrame filtering issues on some platforms (Windows CI)
816+ self ._tile_index : Dict [Tuple [int , int , int ], int ] = {}
817+ for idx , row in enumerate (self ._registry_gdf .itertuples ()):
818+ key = (int (row .year ), int (row .lon_i ), int (row .lat_i ))
819+ self ._tile_index [key ] = idx
820+
814821 def _load_landmasks_registry (self ):
815822 """Load landmasks Parquet registry from local path or download from remote with If-Modified-Since refresh."""
816823 if self ._landmasks_registry_path and self ._landmasks_registry_path .exists ():
@@ -892,6 +899,13 @@ def _load_landmasks_registry(self):
892899 if "lat_i" not in self ._landmasks_df .columns :
893900 self ._landmasks_df ["lat_i" ] = (self ._landmasks_df ["lat" ] * 100 ).round ().astype (np .int32 )
894901
902+ # Build dictionary index for O(1) lookups - avoids non-deterministic
903+ # pandas DataFrame filtering issues on some platforms (Windows CI)
904+ self ._landmask_index : Dict [Tuple [int , int ], int ] = {}
905+ for idx , row in enumerate (self ._landmasks_df .itertuples ()):
906+ key = (int (row .lon_i ), int (row .lat_i ))
907+ self ._landmask_index [key ] = idx
908+
895909 def iter_tiles_in_region (
896910 self , bounds : Tuple [float , float , float , float ], year : int
897911 ) -> Iterator [Tuple [int , float , float ]]:
@@ -1146,12 +1160,20 @@ def fetch_landmask(
11461160 # Use pre-computed integer grid indices for robust comparison
11471161 lon_i = coord_to_grid_int (lon )
11481162 lat_i = coord_to_grid_int (lat )
1149- matches = self ._landmasks_df [
1150- (self ._landmasks_df ["lon_i" ] == lon_i )
1151- & (self ._landmasks_df ["lat_i" ] == lat_i )
1152- ]
1153- if len (matches ) > 0 :
1154- file_hash = matches .iloc [0 ]["hash" ]
1163+
1164+ # Use dictionary index for O(1) lookup - more reliable than DataFrame filtering
1165+ key = (int (lon_i ), int (lat_i ))
1166+ if hasattr (self , "_landmask_index" ) and key in self ._landmask_index :
1167+ idx = self ._landmask_index [key ]
1168+ file_hash = self ._landmasks_df .iloc [idx ]["hash" ]
1169+ else :
1170+ # Fallback to DataFrame filtering
1171+ matches = self ._landmasks_df [
1172+ (self ._landmasks_df ["lon_i" ] == lon_i )
1173+ & (self ._landmasks_df ["lat_i" ] == lat_i )
1174+ ]
1175+ if len (matches ) > 0 :
1176+ file_hash = matches .iloc [0 ]["hash" ]
11551177
11561178 # Download to embeddings_dir
11571179 url = f"{ TESSERA_BASE_URL } /{ self .version } /{ LANDMASKS_DIR_NAME } /{ filename } "
@@ -1241,6 +1263,14 @@ def get_tile_file_size(self, year: int, lon: float, lat: float) -> int:
12411263 # Use pre-computed integer grid indices for robust comparison
12421264 lon_i = coord_to_grid_int (lon )
12431265 lat_i = coord_to_grid_int (lat )
1266+
1267+ # Use dictionary index for O(1) lookup - more reliable than DataFrame filtering
1268+ key = (int (year ), int (lon_i ), int (lat_i ))
1269+ if hasattr (self , "_tile_index" ) and key in self ._tile_index :
1270+ idx = self ._tile_index [key ]
1271+ return int (self ._registry_gdf .iloc [idx ]["file_size" ])
1272+
1273+ # Fallback to DataFrame filtering for compatibility
12441274 matches = self ._registry_gdf [
12451275 (self ._registry_gdf ["year" ] == year )
12461276 & (self ._registry_gdf ["lon_i" ] == lon_i )
@@ -1277,6 +1307,14 @@ def get_scales_file_size(self, year: int, lon: float, lat: float) -> int:
12771307 # Use pre-computed integer grid indices for robust comparison
12781308 lon_i = coord_to_grid_int (lon )
12791309 lat_i = coord_to_grid_int (lat )
1310+
1311+ # Use dictionary index for O(1) lookup - more reliable than DataFrame filtering
1312+ key = (int (year ), int (lon_i ), int (lat_i ))
1313+ if hasattr (self , "_tile_index" ) and key in self ._tile_index :
1314+ idx = self ._tile_index [key ]
1315+ return int (self ._registry_gdf .iloc [idx ]["scales_size" ])
1316+
1317+ # Fallback to DataFrame filtering for compatibility
12801318 matches = self ._registry_gdf [
12811319 (self ._registry_gdf ["year" ] == year )
12821320 & (self ._registry_gdf ["lon_i" ] == lon_i )
@@ -1318,6 +1356,14 @@ def get_landmask_file_size(self, lon: float, lat: float) -> int:
13181356 # Use pre-computed integer grid indices for robust comparison
13191357 lon_i = coord_to_grid_int (lon )
13201358 lat_i = coord_to_grid_int (lat )
1359+
1360+ # Use dictionary index for O(1) lookup - more reliable than DataFrame filtering
1361+ key = (int (lon_i ), int (lat_i ))
1362+ if hasattr (self , "_landmask_index" ) and key in self ._landmask_index :
1363+ idx = self ._landmask_index [key ]
1364+ return int (self ._landmasks_df .iloc [idx ]["file_size" ])
1365+
1366+ # Fallback to DataFrame filtering for compatibility
13211367 matches = self ._landmasks_df [
13221368 (self ._landmasks_df ["lon_i" ] == lon_i )
13231369 & (self ._landmasks_df ["lat_i" ] == lat_i )
0 commit comments