@@ -231,26 +231,19 @@ def parse_grid_name(filename: str) -> Tuple[Optional[float], Optional[float]]:
231231 return None , None
232232
233233
234- def coord_to_grid_index (coord : float ) -> np .int64 :
235- """Convert a coordinate to an integer grid index.
234+ def coord_to_grid_int (coord : float ) -> np .int32 :
235+ """Convert a float coordinate to an integer grid index.
236236
237237 Multiplies by 100 and rounds to get an integer index.
238- This avoids floating-point comparison issues since:
239- - Tiles are on a 0.1° grid with centers at 0.05° offsets
240- - Multiplying by 100 gives integers like 5, 15, 25, -5, -15, etc.
241- - Integer comparison is always exact
242-
243- Uses numpy rounding and int64 to match the DataFrame column computation exactly.
244- Returns np.int64 to ensure type compatibility with pandas int64 columns.
238+ This is the inverse of grid_int_to_coord().
245239
246240 Args:
247- coord: Longitude or latitude coordinate
241+ coord: Longitude or latitude coordinate (e.g., -0.15, 51.55)
248242
249243 Returns:
250- Integer grid index (coordinate * 100, rounded ) as numpy.int64
244+ Integer grid index (e.g., -15, 5155 ) as numpy.int32
251245 """
252- # Use numpy round and int64 to match pandas .round().astype(int) exactly
253- return np .int64 (np .round (coord * 100 ))
246+ return np .int32 (np .round (coord * 100 ))
254247
255248
256249def tile_to_grid_name (lon : float , lat : float ) -> str :
@@ -805,10 +798,11 @@ def _load_registry(self):
805798
806799 self .logger .info (f"Loaded GeoParquet with { len (self ._registry_gdf ):,} tiles" )
807800
808- # Add pre-computed grid index columns for robust coordinate lookup
809- # Using integer indices avoids all floating-point comparison issues
810- self ._registry_gdf ["lon_idx" ] = (self ._registry_gdf ["lon" ] * 100 ).round ().astype (int )
811- self ._registry_gdf ["lat_idx" ] = (self ._registry_gdf ["lat" ] * 100 ).round ().astype (int )
801+ # Convert float lon/lat to integer grid indices (multiply by 100)
802+ # This avoids all floating-point comparison issues across platforms
803+ # Future registries will store integers directly; this is a translation layer
804+ self ._registry_gdf ["lon_i" ] = (self ._registry_gdf ["lon" ] * 100 ).round ().astype (np .int32 )
805+ self ._registry_gdf ["lat_i" ] = (self ._registry_gdf ["lat" ] * 100 ).round ().astype (np .int32 )
812806
813807 # Validate registry structure
814808 required_columns = {"lat" , "lon" , "year" , "hash" , "file_size" }
@@ -890,9 +884,10 @@ def _load_landmasks_registry(self):
890884 )
891885 self ._landmasks_df = None
892886 else :
893- # Add pre-computed grid index columns for robust coordinate lookup
894- self ._landmasks_df ["lon_idx" ] = (self ._landmasks_df ["lon" ] * 100 ).round ().astype (int )
895- self ._landmasks_df ["lat_idx" ] = (self ._landmasks_df ["lat" ] * 100 ).round ().astype (int )
887+ # Convert float lon/lat to integer grid indices (multiply by 100)
888+ # This avoids all floating-point comparison issues across platforms
889+ self ._landmasks_df ["lon_i" ] = (self ._landmasks_df ["lon" ] * 100 ).round ().astype (np .int32 )
890+ self ._landmasks_df ["lat_i" ] = (self ._landmasks_df ["lat" ] * 100 ).round ().astype (np .int32 )
896891
897892 def iter_tiles_in_region (
898893 self , bounds : Tuple [float , float , float , float ], year : int
@@ -941,13 +936,13 @@ def iter_tiles_in_region(
941936
942937 # Drop duplicates and yield (vectorized iteration)
943938 # Use the pre-computed grid indices to ensure consistency with lookups
944- tiles_unique = tiles [["year" , "lon" , "lat" , "lon_idx " , "lat_idx " ]].drop_duplicates ()
945- for year_val , lon_val , lat_val , lon_idx , lat_idx in tiles_unique .values :
939+ tiles_unique = tiles [["year" , "lon" , "lat" , "lon_i " , "lat_i " ]].drop_duplicates ()
940+ for year_val , lon_val , lat_val , lon_i , lat_i in tiles_unique .values :
946941 # Store grid indices on the float values so lookups use consistent values
947942 # We yield the original floats for compatibility but convert them to exact grid centers
948943 # This ensures the yielded coordinates exactly match what's in the registry
949- lon_exact = lon_idx / 100.0
950- lat_exact = lat_idx / 100.0
944+ lon_exact = lon_i / 100.0
945+ lat_exact = lat_i / 100.0
951946 yield (int (year_val ), lon_exact , lat_exact )
952947
953948 def load_blocks_for_region (
@@ -1000,15 +995,15 @@ def get_available_embeddings(self) -> List[Tuple[int, float, float]]:
1000995 Returns:
1001996 List of (year, lon, lat) tuples for all available embedding tiles
1002997 """
1003- unique_tiles = self ._registry_gdf [["year" , "lon_idx " , "lat_idx " ]].drop_duplicates ()
998+ unique_tiles = self ._registry_gdf [["year" , "lon_i " , "lat_i " ]].drop_duplicates ()
1004999
10051000 # Use grid indices to compute exact grid center coordinates
10061001 # This ensures coordinates round-trip correctly for lookups
10071002 return list (
10081003 zip (
10091004 unique_tiles ["year" ].astype (int ).values ,
1010- (unique_tiles ["lon_idx " ].values / 100.0 ),
1011- (unique_tiles ["lat_idx " ].values / 100.0 ),
1005+ (unique_tiles ["lon_i " ].values / 100.0 ),
1006+ (unique_tiles ["lat_i " ].values / 100.0 ),
10121007 )
10131008 )
10141009
@@ -1066,12 +1061,12 @@ def fetch(
10661061 and lat is not None
10671062 ):
10681063 # Use pre-computed integer grid indices for robust comparison
1069- lon_idx = coord_to_grid_index (lon )
1070- lat_idx = coord_to_grid_index (lat )
1064+ lon_i = coord_to_grid_int (lon )
1065+ lat_i = coord_to_grid_int (lat )
10711066 matches = self ._registry_gdf [
10721067 (self ._registry_gdf ["year" ] == year )
1073- & (self ._registry_gdf ["lon_idx " ] == lon_idx )
1074- & (self ._registry_gdf ["lat_idx " ] == lat_idx )
1068+ & (self ._registry_gdf ["lon_i " ] == lon_i )
1069+ & (self ._registry_gdf ["lat_i " ] == lat_i )
10751070 ]
10761071 if len (matches ) > 0 :
10771072 if is_scales :
@@ -1146,11 +1141,11 @@ def fetch_landmask(
11461141 and lat is not None
11471142 ):
11481143 # Use pre-computed integer grid indices for robust comparison
1149- lon_idx = coord_to_grid_index (lon )
1150- lat_idx = coord_to_grid_index (lat )
1144+ lon_i = coord_to_grid_int (lon )
1145+ lat_i = coord_to_grid_int (lat )
11511146 matches = self ._landmasks_df [
1152- (self ._landmasks_df ["lon_idx " ] == lon_idx )
1153- & (self ._landmasks_df ["lat_idx " ] == lat_idx )
1147+ (self ._landmasks_df ["lon_i " ] == lon_i )
1148+ & (self ._landmasks_df ["lat_i " ] == lat_i )
11541149 ]
11551150 if len (matches ) > 0 :
11561151 file_hash = matches .iloc [0 ]["hash" ]
@@ -1195,18 +1190,18 @@ def available_landmasks(self) -> List[Tuple[float, float]]:
11951190 """
11961191 # Use landmasks registry if available
11971192 if self ._landmasks_df is not None :
1198- unique_tiles = self ._landmasks_df [["lon_idx " , "lat_idx " ]].drop_duplicates ()
1193+ unique_tiles = self ._landmasks_df [["lon_i " , "lat_i " ]].drop_duplicates ()
11991194 # Use grid indices to compute exact grid center coordinates
12001195 return list (zip (
1201- unique_tiles ["lon_idx " ].values / 100.0 ,
1202- unique_tiles ["lat_idx " ].values / 100.0
1196+ unique_tiles ["lon_i " ].values / 100.0 ,
1197+ unique_tiles ["lat_i " ].values / 100.0
12031198 ))
12041199
12051200 # Fallback: assume landmasks are available for all embedding tiles
1206- unique_tiles = self ._registry_gdf [["lon_idx " , "lat_idx " ]].drop_duplicates ()
1201+ unique_tiles = self ._registry_gdf [["lon_i " , "lat_i " ]].drop_duplicates ()
12071202 return list (zip (
1208- unique_tiles ["lon_idx " ].values / 100.0 ,
1209- unique_tiles ["lat_idx " ].values / 100.0
1203+ unique_tiles ["lon_i " ].values / 100.0 ,
1204+ unique_tiles ["lat_i " ].values / 100.0
12101205 ))
12111206
12121207 def get_manifest_info (self ) -> Tuple [Optional [str ], Optional [str ]]:
@@ -1241,12 +1236,12 @@ def get_tile_file_size(self, year: int, lon: float, lat: float) -> int:
12411236 )
12421237
12431238 # Use pre-computed integer grid indices for robust comparison
1244- lon_idx = coord_to_grid_index (lon )
1245- lat_idx = coord_to_grid_index (lat )
1239+ lon_i = coord_to_grid_int (lon )
1240+ lat_i = coord_to_grid_int (lat )
12461241 matches = self ._registry_gdf [
12471242 (self ._registry_gdf ["year" ] == year )
1248- & (self ._registry_gdf ["lon_idx " ] == lon_idx )
1249- & (self ._registry_gdf ["lat_idx " ] == lat_idx )
1243+ & (self ._registry_gdf ["lon_i " ] == lon_i )
1244+ & (self ._registry_gdf ["lat_i " ] == lat_i )
12501245 ]
12511246
12521247 if len (matches ) == 0 :
@@ -1277,12 +1272,12 @@ def get_scales_file_size(self, year: int, lon: float, lat: float) -> int:
12771272 )
12781273
12791274 # Use pre-computed integer grid indices for robust comparison
1280- lon_idx = coord_to_grid_index (lon )
1281- lat_idx = coord_to_grid_index (lat )
1275+ lon_i = coord_to_grid_int (lon )
1276+ lat_i = coord_to_grid_int (lat )
12821277 matches = self ._registry_gdf [
12831278 (self ._registry_gdf ["year" ] == year )
1284- & (self ._registry_gdf ["lon_idx " ] == lon_idx )
1285- & (self ._registry_gdf ["lat_idx " ] == lat_idx )
1279+ & (self ._registry_gdf ["lon_i " ] == lon_i )
1280+ & (self ._registry_gdf ["lat_i " ] == lat_i )
12861281 ]
12871282
12881283 if len (matches ) == 0 :
@@ -1318,11 +1313,11 @@ def get_landmask_file_size(self, lon: float, lat: float) -> int:
13181313 )
13191314
13201315 # Use pre-computed integer grid indices for robust comparison
1321- lon_idx = coord_to_grid_index (lon )
1322- lat_idx = coord_to_grid_index (lat )
1316+ lon_i = coord_to_grid_int (lon )
1317+ lat_i = coord_to_grid_int (lat )
13231318 matches = self ._landmasks_df [
1324- (self ._landmasks_df ["lon_idx " ] == lon_idx )
1325- & (self ._landmasks_df ["lat_idx " ] == lat_idx )
1319+ (self ._landmasks_df ["lon_i " ] == lon_i )
1320+ & (self ._landmasks_df ["lat_i " ] == lat_i )
13261321 ]
13271322
13281323 if len (matches ) == 0 :
0 commit comments