debug

avsm · avsm · commit f490efa02e3f · 2025-11-30T10:57:46.000Z
diff --git a/geotessera/registry.py b/geotessera/registry.py
@@ -811,6 +811,13 @@ def _load_registry(self):
             missing = required_columns - set(self._registry_gdf.columns)
             raise ValueError(f"Registry is missing required columns: {missing}")
 
+        # Build dictionary index for O(1) lookups - avoids non-deterministic
+        # pandas DataFrame filtering issues on some platforms (Windows CI)
+        self._tile_index: Dict[Tuple[int, int, int], int] = {}
+        for idx, row in enumerate(self._registry_gdf.itertuples()):
+            key = (int(row.year), int(row.lon_i), int(row.lat_i))
+            self._tile_index[key] = idx
+
     def _load_landmasks_registry(self):
         """Load landmasks Parquet registry from local path or download from remote with If-Modified-Since refresh."""
         if self._landmasks_registry_path and self._landmasks_registry_path.exists():
@@ -892,6 +899,13 @@ def _load_landmasks_registry(self):
                 if "lat_i" not in self._landmasks_df.columns:
                     self._landmasks_df["lat_i"] = (self._landmasks_df["lat"] * 100).round().astype(np.int32)
 
+                # Build dictionary index for O(1) lookups - avoids non-deterministic
+                # pandas DataFrame filtering issues on some platforms (Windows CI)
+                self._landmask_index: Dict[Tuple[int, int], int] = {}
+                for idx, row in enumerate(self._landmasks_df.itertuples()):
+                    key = (int(row.lon_i), int(row.lat_i))
+                    self._landmask_index[key] = idx
+
     def iter_tiles_in_region(
         self, bounds: Tuple[float, float, float, float], year: int
     ) -> Iterator[Tuple[int, float, float]]:
@@ -1146,12 +1160,20 @@ def fetch_landmask(
             # Use pre-computed integer grid indices for robust comparison
             lon_i = coord_to_grid_int(lon)
             lat_i = coord_to_grid_int(lat)
-            matches = self._landmasks_df[
-                (self._landmasks_df["lon_i"] == lon_i)
-                & (self._landmasks_df["lat_i"] == lat_i)
-            ]
-            if len(matches) > 0:
-                file_hash = matches.iloc[0]["hash"]
+
+            # Use dictionary index for O(1) lookup - more reliable than DataFrame filtering
+            key = (int(lon_i), int(lat_i))
+            if hasattr(self, "_landmask_index") and key in self._landmask_index:
+                idx = self._landmask_index[key]
+                file_hash = self._landmasks_df.iloc[idx]["hash"]
+            else:
+                # Fallback to DataFrame filtering
+                matches = self._landmasks_df[
+                    (self._landmasks_df["lon_i"] == lon_i)
+                    & (self._landmasks_df["lat_i"] == lat_i)
+                ]
+                if len(matches) > 0:
+                    file_hash = matches.iloc[0]["hash"]
 
         # Download to embeddings_dir
         url = f"{TESSERA_BASE_URL}/{self.version}/{LANDMASKS_DIR_NAME}/{filename}"
@@ -1241,6 +1263,14 @@ def get_tile_file_size(self, year: int, lon: float, lat: float) -> int:
         # Use pre-computed integer grid indices for robust comparison
         lon_i = coord_to_grid_int(lon)
         lat_i = coord_to_grid_int(lat)
+
+        # Use dictionary index for O(1) lookup - more reliable than DataFrame filtering
+        key = (int(year), int(lon_i), int(lat_i))
+        if hasattr(self, "_tile_index") and key in self._tile_index:
+            idx = self._tile_index[key]
+            return int(self._registry_gdf.iloc[idx]["file_size"])
+
+        # Fallback to DataFrame filtering for compatibility
         matches = self._registry_gdf[
             (self._registry_gdf["year"] == year)
             & (self._registry_gdf["lon_i"] == lon_i)
@@ -1277,6 +1307,14 @@ def get_scales_file_size(self, year: int, lon: float, lat: float) -> int:
         # Use pre-computed integer grid indices for robust comparison
         lon_i = coord_to_grid_int(lon)
         lat_i = coord_to_grid_int(lat)
+
+        # Use dictionary index for O(1) lookup - more reliable than DataFrame filtering
+        key = (int(year), int(lon_i), int(lat_i))
+        if hasattr(self, "_tile_index") and key in self._tile_index:
+            idx = self._tile_index[key]
+            return int(self._registry_gdf.iloc[idx]["scales_size"])
+
+        # Fallback to DataFrame filtering for compatibility
         matches = self._registry_gdf[
             (self._registry_gdf["year"] == year)
             & (self._registry_gdf["lon_i"] == lon_i)
@@ -1318,6 +1356,14 @@ def get_landmask_file_size(self, lon: float, lat: float) -> int:
         # Use pre-computed integer grid indices for robust comparison
         lon_i = coord_to_grid_int(lon)
         lat_i = coord_to_grid_int(lat)
+
+        # Use dictionary index for O(1) lookup - more reliable than DataFrame filtering
+        key = (int(lon_i), int(lat_i))
+        if hasattr(self, "_landmask_index") and key in self._landmask_index:
+            idx = self._landmask_index[key]
+            return int(self._landmasks_df.iloc[idx]["file_size"])
+
+        # Fallback to DataFrame filtering for compatibility
         matches = self._landmasks_df[
             (self._landmasks_df["lon_i"] == lon_i)
             & (self._landmasks_df["lat_i"] == lat_i)