Skip to content

Commit cdadf60

Browse files
authored
Merge pull request #312 from statisticsnorway/nwa-speed
Nwa speed etc
2 parents 91c9f2a + 1ea5cf9 commit cdadf60

File tree

13 files changed

+103
-88
lines changed

13 files changed

+103
-88
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "ssb-sgis"
3-
version = "1.3.6"
3+
version = "1.3.8"
44
description = "GIS functions used at Statistics Norway."
55
authors = ["Morten Letnes <morten.letnes@ssb.no>"]
66
license = "MIT"

src/sgis/geopandas_tools/general.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -688,8 +688,7 @@ def _split_lines_by_points_along_line(lines, points, splitted_col: str | None =
688688
relevant_lines.geometry = shapely.force_2d(relevant_lines.geometry)
689689
points.geometry = shapely.force_2d(points.geometry)
690690

691-
# split the lines with buffer + difference, since shaply.split usually doesn't work
692-
# relevant_lines["_idx"] = range(len(relevant_lines))
691+
# split the lines with tiny buffer + difference, since shaply.split usually doesn't work
693692
splitted = relevant_lines.overlay(points_buff, how="difference").explode(
694693
ignore_index=True
695694
)
@@ -703,8 +702,9 @@ def _split_lines_by_points_along_line(lines, points, splitted_col: str | None =
703702
if not len(splitted):
704703
return pd.concat([the_other_lines, circles], ignore_index=True)
705704

706-
# the endpoints of the new lines are now sligtly off. Using get_k_nearest_neighbors
707-
# to get the exact snapped point coordinates, . This will map the sligtly
705+
# the endpoints of the new lines are now sligtly off because of the buffer.
706+
# Using get_k_nearest_neighbors
707+
# to get the exact snapped point coordinates. This will map the sligtly
708708
# wrong line endpoints with the point the line was split by.
709709

710710
points["point_coords"] = [(geom.x, geom.y) for geom in points.geometry]
@@ -721,7 +721,6 @@ def get_nearest(splitted: GeoDataFrame, points: GeoDataFrame) -> pd.DataFrame:
721721
lambda x: x["distance"] <= precision * 2
722722
]
723723

724-
# points = points.set_index("point_coords")
725724
points.index = points.geometry
726725
dists_source = get_nearest(splitted_source, points)
727726
dists_target = get_nearest(splitted_target, points)
@@ -870,9 +869,9 @@ def make_edge_wkt_cols(gdf: GeoDataFrame) -> GeoDataFrame:
870869
except ValueError:
871870
gdf, endpoints = _prepare_make_edge_cols(gdf)
872871

873-
endpoints = endpoints.force_2d()
874-
gdf["source_wkt"] = endpoints.groupby(level=0).first().to_wkt()
875-
gdf["target_wkt"] = endpoints.groupby(level=0).last().to_wkt()
872+
endpoints = endpoints.force_2d().to_wkt()
873+
gdf["source_wkt"] = endpoints.groupby(level=0).first()
874+
gdf["target_wkt"] = endpoints.groupby(level=0).last()
876875

877876
if index_mapper is not None:
878877
gdf.index = gdf.index.map(index_mapper)

src/sgis/geopandas_tools/runners.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -239,9 +239,6 @@ def run(
239239
left, right = results
240240
return left, right
241241
return results
242-
left = np.concatenate([x[0] for x in results])
243-
right = np.concatenate([x[1] for x in results])
244-
return left, right
245242
elif (
246243
(self.n_jobs or 1) > 1
247244
and len(arr2) / self.n_jobs > 10_000
@@ -264,9 +261,6 @@ def run(
264261
left, right = results
265262
return left, right
266263
return results
267-
left = np.concatenate([x[0] for x in results])
268-
right = np.concatenate([x[1] for x in results])
269-
return left, right
270264

271265
return _strtree_query(arr1, arr2, method=method, **kwargs)
272266

src/sgis/io/dapla_functions.py

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ def read_geopandas(
121121
).replace("==", "=")
122122
glob_func = _get_glob_func(file_system)
123123
suffix: str = Path(gcs_path).suffix
124-
paths = glob_func(str(Path(gcs_path) / expression / f"*{suffix}"))
124+
paths = glob_func(_standardize_path(gcs_path) + f"/{expression}/*{suffix}")
125125
if paths:
126126
return _read_geopandas_from_iterable(
127127
paths,
@@ -256,7 +256,7 @@ def _read_pyarrow(path: str, file_system, mask=None, **kwargs) -> pyarrow.Table
256256
if not len(
257257
{
258258
x
259-
for x in glob_func(str(Path(path) / "**"))
259+
for x in glob_func(str(_standardize_path(path) + "/**"))
260260
if not paths_are_equal(path, x)
261261
}
262262
):
@@ -618,15 +618,17 @@ def as_partition_part(col: str, value: Any) -> str:
618618
as_partition_part(col, value)
619619
for col, value in zip(partition_cols, group, strict=True)
620620
)
621-
paths.append(Path(path) / partition_parts)
621+
paths.append(_standardize_path(path) + f"/{partition_parts}")
622622
dfs.append(rows)
623623

624624
def threaded_write(rows: DataFrame, path: str) -> None:
625625
if basename_template is None:
626626
this_basename = (uuid.uuid4().hex + "-{i}.parquet").replace("-{i}", "0")
627627
else:
628628
this_basename = basename_template.replace("-{i}", "0")
629-
for i, sibling_path in enumerate(sorted(glob_func(str(Path(path) / "**")))):
629+
for i, sibling_path in enumerate(
630+
sorted(glob_func(str(_standardize_path(path) + "/**")))
631+
):
630632
if paths_are_equal(sibling_path, path):
631633
continue
632634
if existing_data_behavior == "delete_matching":
@@ -638,7 +640,7 @@ def threaded_write(rows: DataFrame, path: str) -> None:
638640
else:
639641
this_basename = basename_template.replace("-{i}", str(i + 1))
640642

641-
out_path = str(Path(path) / this_basename)
643+
out_path = str(_standardize_path(path) + "/" + this_basename)
642644
try:
643645
with file_system.open(out_path, mode="wb") as file:
644646
write_func(rows, file, schema=schema, **kwargs)
@@ -780,7 +782,7 @@ def _read_partitioned_parquet(
780782
glob_func = _get_glob_func(file_system)
781783

782784
if child_paths is None:
783-
child_paths = list(glob_func(str(Path(path) / "**/*.parquet")))
785+
child_paths = list(glob_func(str(_standardize_path(path) + "/**/*.parquet")))
784786

785787
filters = _filters_to_expression(filters)
786788

@@ -830,7 +832,7 @@ def get_child_paths(path, file_system) -> list[str]:
830832
glob_func = _get_glob_func(file_system)
831833
return [
832834
x
833-
for x in glob_func(str(Path(path) / "**/*.parquet"))
835+
for x in glob_func(str(_standardize_path(path) + "/**/*.parquet"))
834836
if not paths_are_equal(x, path)
835837
]
836838

@@ -938,3 +940,8 @@ def _maybe_strip_prefix(path, file_system):
938940
if isinstance(file_system, GCSFileSystem) and path.startswith("gs://"):
939941
return path.replace("gs://", "")
940942
return path
943+
944+
945+
def _standardize_path(path: str | Path) -> str:
946+
"""Make sure delimiter is '/' and path ends without '/'."""
947+
return str(path).replace("\\", "/").replace(r"\"", "/")

src/sgis/networkanalysis/_od_cost_matrix.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,11 @@ def _od_cost_matrix(
2626
# calculating all-to-all distances is much faster than looping rowwise,
2727
# so filtering to rowwise afterwards instead
2828
if rowwise:
29-
rowwise_df = DataFrame(
30-
{
31-
"origin": origins.index,
32-
"destination": destinations.index,
33-
}
29+
keys = pd.MultiIndex.from_arrays(
30+
[origins.index, destinations.index],
31+
names=["origin", "destination"],
3432
)
35-
results = rowwise_df.merge(results, on=["origin", "destination"], how="left")
33+
results = results.set_index(["origin", "destination"]).loc[keys].reset_index()
3634

3735
results["geom_ori"] = results["origin"].map(origins.geometry)
3836
results["geom_des"] = results["destination"].map(destinations.geometry)

src/sgis/networkanalysis/_points.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,7 @@
1616

1717

1818
class Points:
19-
def __init__(
20-
self,
21-
points: GeoDataFrame,
22-
) -> None:
19+
def __init__(self, points: GeoDataFrame) -> None:
2320
self.gdf = points.copy()
2421

2522
def _make_temp_idx(self, start: int) -> None:

src/sgis/networkanalysis/closing_network_holes.py

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ def close_network_holes(
7979
gdf: GeoDataFrame,
8080
max_distance: int | float,
8181
max_angle: int,
82+
*,
8283
hole_col: str | None = "hole",
8384
) -> GeoDataFrame:
8485
"""Fills network gaps with straigt lines.
@@ -282,11 +283,13 @@ def _close_holes_all_lines(
282283
) -> GeoSeries:
283284
k = min(len(nodes), 50)
284285

286+
n_dict = nodes.set_index("wkt")["n"]
287+
285288
# make points for the deadends and the other endpoint of the deadend lines
286-
deadends_target = lines.loc[lines["n_target"] == 1].rename(
289+
deadends_target = lines.loc[lines["target_wkt"].map(n_dict) == 1].rename(
287290
columns={"target_wkt": "wkt", "source_wkt": "wkt_other_end"}
288291
)
289-
deadends_source = lines.loc[lines["n_source"] == 1].rename(
292+
deadends_source = lines.loc[lines["source_wkt"].map(n_dict) == 1].rename(
290293
columns={"source_wkt": "wkt", "target_wkt": "wkt_other_end"}
291294
)
292295
deadends = pd.concat([deadends_source, deadends_target], ignore_index=True)
@@ -349,12 +352,6 @@ def get_angle_difference(angle1, angle2):
349352
to_idx = indices[condition]
350353
to_wkt = nodes.iloc[to_idx]["wkt"]
351354

352-
# all_angles = all_angles + [
353-
# diff
354-
# for f, diff in zip(from_wkt, angles_difference[condition], strict=True)
355-
# if f not in new_sources
356-
# ]
357-
358355
# now add the wkts to the lists of new sources and targets. If the source
359356
# is already added, the new wks will not be added again
360357
new_targets = new_targets + [

src/sgis/networkanalysis/finding_isolated_networks.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -57,9 +57,7 @@ def get_connected_components(gdf: GeoDataFrame) -> GeoDataFrame:
5757

5858
gdf["connected"] = gdf.source.map(largest_component_dict).fillna(0)
5959

60-
gdf = gdf.drop(
61-
["source_wkt", "target_wkt", "source", "target", "n_source", "n_target"], axis=1
62-
)
60+
gdf = gdf.drop(["source_wkt", "target_wkt", "source", "target"], axis=1)
6361

6462
return gdf
6563

@@ -120,8 +118,6 @@ def get_component_size(gdf: GeoDataFrame) -> GeoDataFrame:
120118
gdf["component_index"] = gdf["source"].map(mapper["component_index"])
121119
gdf["component_size"] = gdf["source"].map(mapper["component_size"])
122120

123-
gdf = gdf.drop(
124-
["source_wkt", "target_wkt", "source", "target", "n_source", "n_target"], axis=1
125-
)
121+
gdf = gdf.drop(["source_wkt", "target_wkt", "source", "target"], axis=1)
126122

127123
return gdf

src/sgis/networkanalysis/network.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,14 +36,12 @@ def __init__(self, gdf: GeoDataFrame) -> None:
3636
raise TypeError(f"'lines' should be GeoDataFrame, got {type(gdf)}")
3737

3838
if not len(gdf):
39-
raise ZeroLinesError
39+
raise ZeroLinesError()
4040

4141
self.gdf = self._prepare_network(gdf)
4242

4343
self._make_node_ids()
4444

45-
self._percent_bidirectional = self._check_percent_bidirectional()
46-
4745
def _make_node_ids(self) -> None:
4846
"""Gives the lines node ids and return lines (edges) and nodes.
4947
@@ -55,6 +53,7 @@ def _make_node_ids(self) -> None:
5553
The lines must be singlepart linestrings.
5654
"""
5755
self.gdf, self._nodes = make_node_ids(self.gdf)
56+
self._percent_bidirectional = self._check_percent_bidirectional()
5857

5958
@staticmethod
6059
def _prepare_network(gdf: GeoDataFrame) -> GeoDataFrame:
@@ -138,6 +137,8 @@ def _nodes_are_up_to_date(self) -> bool:
138137
or any superfluous node-ids (meaning rows have been removed from the lines
139138
gdf).
140139
"""
140+
if not hasattr(self, "_nodes"):
141+
return False
141142
new_or_missing = (~self.gdf.source.isin(self._nodes.node_id)) | (
142143
~self.gdf.target.isin(self._nodes.node_id)
143144
)

0 commit comments

Comments
 (0)