Merge pull request #312 from statisticsnorway/nwa-speed

mortewle · web-flow · commit cdadf604b1bf · 2026-02-03T10:37:31.000+01:00
Nwa speed etc
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "ssb-sgis"
-version = "1.3.6"
+version = "1.3.8"
 description = "GIS functions used at Statistics Norway."
 authors = ["Morten Letnes <morten.letnes@ssb.no>"]
 license = "MIT"
diff --git a/src/sgis/geopandas_tools/general.py b/src/sgis/geopandas_tools/general.py
@@ -688,8 +688,7 @@ def _split_lines_by_points_along_line(lines, points, splitted_col: str | None =
     relevant_lines.geometry = shapely.force_2d(relevant_lines.geometry)
     points.geometry = shapely.force_2d(points.geometry)
 
-    # split the lines with buffer + difference, since shaply.split usually doesn't work
-    # relevant_lines["_idx"] = range(len(relevant_lines))
+    # split the lines with tiny buffer + difference, since shaply.split usually doesn't work
     splitted = relevant_lines.overlay(points_buff, how="difference").explode(
         ignore_index=True
     )
@@ -703,8 +702,9 @@ def _split_lines_by_points_along_line(lines, points, splitted_col: str | None =
     if not len(splitted):
         return pd.concat([the_other_lines, circles], ignore_index=True)
 
-    # the endpoints of the new lines are now sligtly off. Using get_k_nearest_neighbors
-    # to get the exact snapped point coordinates, . This will map the sligtly
+    # the endpoints of the new lines are now sligtly off because of the buffer.
+    # Using get_k_nearest_neighbors
+    # to get the exact snapped point coordinates. This will map the sligtly
     # wrong line endpoints with the point the line was split by.
 
     points["point_coords"] = [(geom.x, geom.y) for geom in points.geometry]
@@ -721,7 +721,6 @@ def get_nearest(splitted: GeoDataFrame, points: GeoDataFrame) -> pd.DataFrame:
             lambda x: x["distance"] <= precision * 2
         ]
 
-    # points = points.set_index("point_coords")
     points.index = points.geometry
     dists_source = get_nearest(splitted_source, points)
     dists_target = get_nearest(splitted_target, points)
@@ -870,9 +869,9 @@ def make_edge_wkt_cols(gdf: GeoDataFrame) -> GeoDataFrame:
     except ValueError:
         gdf, endpoints = _prepare_make_edge_cols(gdf)
 
-    endpoints = endpoints.force_2d()
-    gdf["source_wkt"] = endpoints.groupby(level=0).first().to_wkt()
-    gdf["target_wkt"] = endpoints.groupby(level=0).last().to_wkt()
+    endpoints = endpoints.force_2d().to_wkt()
+    gdf["source_wkt"] = endpoints.groupby(level=0).first()
+    gdf["target_wkt"] = endpoints.groupby(level=0).last()
 
     if index_mapper is not None:
         gdf.index = gdf.index.map(index_mapper)
diff --git a/src/sgis/geopandas_tools/runners.py b/src/sgis/geopandas_tools/runners.py
@@ -239,9 +239,6 @@ def run(
                 left, right = results
                 return left, right
             return results
-            left = np.concatenate([x[0] for x in results])
-            right = np.concatenate([x[1] for x in results])
-            return left, right
         elif (
             (self.n_jobs or 1) > 1
             and len(arr2) / self.n_jobs > 10_000
@@ -264,9 +261,6 @@ def run(
                 left, right = results
                 return left, right
             return results
-            left = np.concatenate([x[0] for x in results])
-            right = np.concatenate([x[1] for x in results])
-            return left, right
 
         return _strtree_query(arr1, arr2, method=method, **kwargs)
 
diff --git a/src/sgis/io/dapla_functions.py b/src/sgis/io/dapla_functions.py
@@ -121,7 +121,7 @@ def read_geopandas(
             ).replace("==", "=")
             glob_func = _get_glob_func(file_system)
             suffix: str = Path(gcs_path).suffix
-            paths = glob_func(str(Path(gcs_path) / expression / f"*{suffix}"))
+            paths = glob_func(_standardize_path(gcs_path) + f"/{expression}/*{suffix}")
             if paths:
                 return _read_geopandas_from_iterable(
                     paths,
@@ -256,7 +256,7 @@ def _read_pyarrow(path: str, file_system, mask=None, **kwargs) -> pyarrow.Table
         if not len(
             {
                 x
-                for x in glob_func(str(Path(path) / "**"))
+                for x in glob_func(str(_standardize_path(path) + "/**"))
                 if not paths_are_equal(path, x)
             }
         ):
@@ -618,15 +618,17 @@ def as_partition_part(col: str, value: Any) -> str:
             as_partition_part(col, value)
             for col, value in zip(partition_cols, group, strict=True)
         )
-        paths.append(Path(path) / partition_parts)
+        paths.append(_standardize_path(path) + f"/{partition_parts}")
         dfs.append(rows)
 
     def threaded_write(rows: DataFrame, path: str) -> None:
         if basename_template is None:
             this_basename = (uuid.uuid4().hex + "-{i}.parquet").replace("-{i}", "0")
         else:
             this_basename = basename_template.replace("-{i}", "0")
-        for i, sibling_path in enumerate(sorted(glob_func(str(Path(path) / "**")))):
+        for i, sibling_path in enumerate(
+            sorted(glob_func(str(_standardize_path(path) + "/**")))
+        ):
             if paths_are_equal(sibling_path, path):
                 continue
             if existing_data_behavior == "delete_matching":
@@ -638,7 +640,7 @@ def threaded_write(rows: DataFrame, path: str) -> None:
             else:
                 this_basename = basename_template.replace("-{i}", str(i + 1))
 
-        out_path = str(Path(path) / this_basename)
+        out_path = str(_standardize_path(path) + "/" + this_basename)
         try:
             with file_system.open(out_path, mode="wb") as file:
                 write_func(rows, file, schema=schema, **kwargs)
@@ -780,7 +782,7 @@ def _read_partitioned_parquet(
     glob_func = _get_glob_func(file_system)
 
     if child_paths is None:
-        child_paths = list(glob_func(str(Path(path) / "**/*.parquet")))
+        child_paths = list(glob_func(str(_standardize_path(path) + "/**/*.parquet")))
 
     filters = _filters_to_expression(filters)
 
@@ -830,7 +832,7 @@ def get_child_paths(path, file_system) -> list[str]:
     glob_func = _get_glob_func(file_system)
     return [
         x
-        for x in glob_func(str(Path(path) / "**/*.parquet"))
+        for x in glob_func(str(_standardize_path(path) + "/**/*.parquet"))
         if not paths_are_equal(x, path)
     ]
 
@@ -938,3 +940,8 @@ def _maybe_strip_prefix(path, file_system):
     if isinstance(file_system, GCSFileSystem) and path.startswith("gs://"):
         return path.replace("gs://", "")
     return path
+
+
+def _standardize_path(path: str | Path) -> str:
+    """Make sure delimiter is '/' and path ends without '/'."""
+    return str(path).replace("\\", "/").replace(r"\"", "/")
diff --git a/src/sgis/networkanalysis/_od_cost_matrix.py b/src/sgis/networkanalysis/_od_cost_matrix.py
@@ -26,13 +26,11 @@ def _od_cost_matrix(
     # calculating all-to-all distances is much faster than looping rowwise,
     # so filtering to rowwise afterwards instead
     if rowwise:
-        rowwise_df = DataFrame(
-            {
-                "origin": origins.index,
-                "destination": destinations.index,
-            }
+        keys = pd.MultiIndex.from_arrays(
+            [origins.index, destinations.index],
+            names=["origin", "destination"],
         )
-        results = rowwise_df.merge(results, on=["origin", "destination"], how="left")
+        results = results.set_index(["origin", "destination"]).loc[keys].reset_index()
 
     results["geom_ori"] = results["origin"].map(origins.geometry)
     results["geom_des"] = results["destination"].map(destinations.geometry)
diff --git a/src/sgis/networkanalysis/_points.py b/src/sgis/networkanalysis/_points.py
@@ -16,10 +16,7 @@
 
 
 class Points:
-    def __init__(
-        self,
-        points: GeoDataFrame,
-    ) -> None:
+    def __init__(self, points: GeoDataFrame) -> None:
         self.gdf = points.copy()
 
     def _make_temp_idx(self, start: int) -> None:
diff --git a/src/sgis/networkanalysis/closing_network_holes.py b/src/sgis/networkanalysis/closing_network_holes.py
@@ -79,6 +79,7 @@ def close_network_holes(
     gdf: GeoDataFrame,
     max_distance: int | float,
     max_angle: int,
+    *,
     hole_col: str | None = "hole",
 ) -> GeoDataFrame:
     """Fills network gaps with straigt lines.
@@ -282,11 +283,13 @@ def _close_holes_all_lines(
 ) -> GeoSeries:
     k = min(len(nodes), 50)
 
+    n_dict = nodes.set_index("wkt")["n"]
+
     # make points for the deadends and the other endpoint of the deadend lines
-    deadends_target = lines.loc[lines["n_target"] == 1].rename(
+    deadends_target = lines.loc[lines["target_wkt"].map(n_dict) == 1].rename(
         columns={"target_wkt": "wkt", "source_wkt": "wkt_other_end"}
     )
-    deadends_source = lines.loc[lines["n_source"] == 1].rename(
+    deadends_source = lines.loc[lines["source_wkt"].map(n_dict) == 1].rename(
         columns={"source_wkt": "wkt", "target_wkt": "wkt_other_end"}
     )
     deadends = pd.concat([deadends_source, deadends_target], ignore_index=True)
@@ -349,12 +352,6 @@ def get_angle_difference(angle1, angle2):
         to_idx = indices[condition]
         to_wkt = nodes.iloc[to_idx]["wkt"]
 
-        # all_angles = all_angles + [
-        #     diff
-        #     for f, diff in zip(from_wkt, angles_difference[condition], strict=True)
-        #     if f not in new_sources
-        # ]
-
         # now add the wkts to the lists of new sources and targets. If the source
         # is already added, the new wks will not be added again
         new_targets = new_targets + [
diff --git a/src/sgis/networkanalysis/finding_isolated_networks.py b/src/sgis/networkanalysis/finding_isolated_networks.py
@@ -57,9 +57,7 @@ def get_connected_components(gdf: GeoDataFrame) -> GeoDataFrame:
 
     gdf["connected"] = gdf.source.map(largest_component_dict).fillna(0)
 
-    gdf = gdf.drop(
-        ["source_wkt", "target_wkt", "source", "target", "n_source", "n_target"], axis=1
-    )
+    gdf = gdf.drop(["source_wkt", "target_wkt", "source", "target"], axis=1)
 
     return gdf
 
@@ -120,8 +118,6 @@ def get_component_size(gdf: GeoDataFrame) -> GeoDataFrame:
     gdf["component_index"] = gdf["source"].map(mapper["component_index"])
     gdf["component_size"] = gdf["source"].map(mapper["component_size"])
 
-    gdf = gdf.drop(
-        ["source_wkt", "target_wkt", "source", "target", "n_source", "n_target"], axis=1
-    )
+    gdf = gdf.drop(["source_wkt", "target_wkt", "source", "target"], axis=1)
 
     return gdf
diff --git a/src/sgis/networkanalysis/network.py b/src/sgis/networkanalysis/network.py
@@ -36,14 +36,12 @@ def __init__(self, gdf: GeoDataFrame) -> None:
             raise TypeError(f"'lines' should be GeoDataFrame, got {type(gdf)}")
 
         if not len(gdf):
-            raise ZeroLinesError
+            raise ZeroLinesError()
 
         self.gdf = self._prepare_network(gdf)
 
         self._make_node_ids()
 
-        self._percent_bidirectional = self._check_percent_bidirectional()
-
     def _make_node_ids(self) -> None:
         """Gives the lines node ids and return lines (edges) and nodes.
 
@@ -55,6 +53,7 @@ def _make_node_ids(self) -> None:
             The lines must be singlepart linestrings.
         """
         self.gdf, self._nodes = make_node_ids(self.gdf)
+        self._percent_bidirectional = self._check_percent_bidirectional()
 
     @staticmethod
     def _prepare_network(gdf: GeoDataFrame) -> GeoDataFrame:
@@ -138,6 +137,8 @@ def _nodes_are_up_to_date(self) -> bool:
         or any superfluous node-ids (meaning rows have been removed from the lines
         gdf).
         """
+        if not hasattr(self, "_nodes"):
+            return False
         new_or_missing = (~self.gdf.source.isin(self._nodes.node_id)) | (
             ~self.gdf.target.isin(self._nodes.node_id)
         )
diff --git a/src/sgis/networkanalysis/networkanalysis.py b/src/sgis/networkanalysis/networkanalysis.py
diff --git a/src/sgis/networkanalysis/networkanalysisrules.py b/src/sgis/networkanalysis/networkanalysisrules.py
diff --git a/src/sgis/networkanalysis/nodes.py b/src/sgis/networkanalysis/nodes.py
diff --git a/tests/test_network_analysis.py b/tests/test_network_analysis.py