Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 17 additions & 9 deletions src/backend/app/projects/project_routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,10 @@ async def task_split(
project_geojson: UploadFile = File(...),
extract_geojson: Optional[UploadFile] = File(None),
no_of_buildings: int = Form(50),
geom_type: Annotated[DbGeomType, Form()] = DbGeomType.POLYGON,
osm_category: Annotated[Optional[XLSFormType], Form()] = XLSFormType.buildings,
centroid: Annotated[bool, Form()] = None,
use_st_within: Annotated[bool, Form()] = None,
):
"""Split a task into subtasks.

Expand All @@ -246,6 +250,10 @@ async def task_split(
If not included, an extract is generated automatically.
no_of_buildings (int, optional): The number of buildings per subtask.
Defaults to 50.
geom_type (DbGeomType, optional): The geometry type to extract.
osm_category (XLSFormType, optional): The OSM data category to extract.
centroid (bool, optional): Whether to generate centroids for polygon tasks.
use_st_within (bool, optional): Include features within the AOI.

Returns:
The result of splitting the task into subtasks.
Expand All @@ -265,15 +273,15 @@ async def task_split(
log.warning("Parsed geojson file contained no geometries")

log.debug("STARTED task splitting using provided boundary and data extract")
# NOTE here we pass the connection string and allow area-splitter to
# a use psycopg connection (not async)
features = await run_in_threadpool(
lambda: split_by_sql(
merged_boundary,
settings.FMTM_DB_URL,
num_buildings=no_of_buildings,
osm_extract=parsed_extract,
)
features = await split_by_sql(
merged_boundary,
settings.FMTM_DB_URL,
num_buildings=no_of_buildings,
osm_extract=parsed_extract,
geom_type=geom_type,
osm_category=osm_category,
centroid=centroid,
use_st_within=use_st_within,
)
log.debug("COMPLETE task splitting")
return features
Expand Down
146 changes: 102 additions & 44 deletions src/backend/packages/area-splitter/area_splitter/splitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,27 @@
"""Class and helper methods for task splitting."""

import argparse
import ast
import asyncio
import json
import logging
import math
import sys
from io import BytesIO
from pathlib import Path
from textwrap import dedent
from typing import Optional, Tuple, Union

import geojson
import httpx
from app.config import settings
from app.db.enums import DbGeomType, HTTPStatus, XLSFormType
from fastapi import HTTPException
from geojson import Feature, FeatureCollection, GeoJSON
from osm_rawdata.postgres import PostgresClient
from osm_data_client import (
RawDataClient,
RawDataClientConfig,
RawDataOutputOptions,
)
from osm_fieldwork.json_data_models import data_models_path
from psycopg import Connection
from shapely.geometry import Polygon, box, shape
from shapely.ops import unary_union
Expand Down Expand Up @@ -603,12 +612,16 @@ def split_by_square(
return split_features


def split_by_sql(
async def split_by_sql(
aoi: Union[str, FeatureCollection],
db: Union[str, Connection],
geom_type: Optional[str] = DbGeomType.POLYGON,
osm_category: Optional[str] = XLSFormType.buildings,
num_buildings: Optional[int] = None,
outfile: Optional[str] = None,
osm_extract: Optional[Union[str, FeatureCollection]] = None,
centroid: Optional[bool] = False,
use_st_within: Optional[bool] = None,
) -> FeatureCollection:
"""Split an AOI with a field-tm algorithm.

Expand Down Expand Up @@ -636,10 +649,16 @@ def split_by_sql(
Optional param, if not included an extract is generated for you.
It is recommended to leave this param as default, unless you know
what you are doing.
geom_type (str): Type of geometry to extract.
osm_category (str): The OSM data category to extract.
centroid (bool): Generate centroid of polygons.
use_st_within (bool): Include features within the AOI.

Returns:
features (FeatureCollection): A multipolygon of all the task boundaries.
"""
print("geom_type:", geom_type)
geom_type = geom_type.lower()
if not num_buildings:
err = "num_buildings must be passed, until other algorithms are implemented."
log.error(err)
Expand All @@ -651,36 +670,69 @@ def split_by_sql(

# Extracts and parse extract geojson
if not osm_extract:
# We want all polylines for splitting:
# buildings, highways, waterways, railways
config_data = dedent(
"""
select:
from:
- nodes
- ways_poly
- ways_line
where:
tags:
- building: not null
highway: not null
waterway: not null
railway: not null
aeroway: not null
"""
config = RawDataClientConfig(
access_token=settings.RAW_DATA_API_AUTH_TOKEN.get_secret_value()
if settings.RAW_DATA_API_AUTH_TOKEN
else None
)
# Must be a BytesIO JSON object
config_bytes = BytesIO(config_data.encode())
config_filename = XLSFormType(osm_category).name
data_model = f"{data_models_path}/{config_filename}.json"

with open(data_model, encoding="utf-8") as f:
config_data = json.load(f)

data_config = {
("polygon", False): ["ways_poly"],
("point", True): ["ways_poly", "nodes"],
("point", False): ["nodes"],
("polyline", False): ["ways_line"],
}

config_data["from"] = data_config.get((geom_type, centroid))
if geom_type == "polyline":
geom_type = "line" # line is recognized as a geomtype in raw-data-api
if not use_st_within:
use_st_within = False if geom_type == "line" else True

extra_params = {
"fileName": "area_splitter",
"outputType": "geojson",
"geometryType": [geom_type],
"bindZip": False,
"centroid": centroid,
"use_st_within": use_st_within,
"filters": config_data,
}

try:
result = await RawDataClient(config).get_osm_data(
aoi,
output_options=RawDataOutputOptions(download_file=False),
**extra_params,
)

pg = PostgresClient(
"underpass",
config_bytes,
)
# The total FeatureCollection area merged by osm-rawdata automatically
extract_geojson = pg.execQuery(
aoi_featcol,
extra_params={"fileName": "area_splitter", "useStWithin": False},
)
url = result.data.get("download_url")
async with httpx.AsyncClient() as client:
result = await client.get(url)
if result.status_code == 200:
extract_geojson = result.json()
else:
log.error("Failed to download OSM extract data")
except Exception as e:
log.error("Raw data API request failed")
if "status 406" in str(e) and "Area" in str(e):
try:
# Extract the error dict part
error_str = str(e).split("status 406:")[-1].strip()
error_dict = ast.literal_eval(error_str)
msg = error_dict["detail"][0]["msg"]
except Exception:
msg = """Selected area is too large.
Please select an area smaller than 200 kmΒ²."""

raise HTTPException(
status_code=HTTPStatus.UNPROCESSABLE_ENTITY, detail=msg
) from e

else:
extract_geojson = FMTMSplitter.input_to_geojson(osm_extract)
Expand All @@ -694,12 +746,16 @@ def split_by_sql(
if len(feat_array := aoi_featcol.get("features", [])) > 1:
features = []
for index, feat in enumerate(feat_array):
featcol = split_by_sql(
FeatureCollection(features=[feat]),
db,
num_buildings,
f"{Path(outfile).stem}_{index}.geojson)" if outfile else None,
osm_extract,
featcol = await split_by_sql(
aoi=FeatureCollection(features=[feat]),
db=db,
num_buildings=num_buildings,
outfile=f"{Path(outfile).stem}_{index}.geojson" if outfile else None,
osm_extract=osm_extract,
geom_type=geom_type,
osm_category=osm_category,
centroid=centroid,
use_st_within=use_st_within,
)
feats = featcol.get("features", [])
if feats:
Expand Down Expand Up @@ -886,12 +942,14 @@ def main(args_list: list[str] | None = None):
osm_extract=args.extract,
)
elif args.number:
split_by_sql(
args.boundary,
db=args.dburl,
num_buildings=args.number,
outfile=args.outfile,
osm_extract=args.extract,
asyncio.run(
split_by_sql(
args.boundary,
db=args.dburl,
num_buildings=args.number,
outfile=args.outfile,
osm_extract=args.extract,
)
)
# Split by feature using geojson
elif args.source and args.source[3:] != "PG:":
Expand Down
1 change: 0 additions & 1 deletion src/backend/packages/area-splitter/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ dependencies = [
"geojson>=2.5.0",
"shapely>=1.8.1",
"psycopg>=3.1.7",
"osm-rawdata>=0.2.2",
]
requires-python = ">=3.10"
readme = "README.md"
Expand Down
16 changes: 8 additions & 8 deletions src/backend/packages/area-splitter/tests/test_splitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,9 +157,9 @@ def test_split_by_features_geojson(aoi_json):
assert len(features.get("features")) == 4


def test_split_by_sql_fmtm_with_extract(db, aoi_json, extract_json, output_json):
async def test_split_by_sql_fmtm_with_extract(db, aoi_json, extract_json, output_json):
"""Test divide by square from geojson file."""
features = split_by_sql(
features = await split_by_sql(
aoi_json,
db,
num_buildings=5,
Expand All @@ -169,9 +169,9 @@ def test_split_by_sql_fmtm_with_extract(db, aoi_json, extract_json, output_json)
assert sorted(features) == sorted(output_json)


def test_split_by_sql_fmtm_no_extract(aoi_json):
async def test_split_by_sql_fmtm_no_extract(aoi_json):
"""Test FMTM splitting algorithm, with no data extract."""
features = split_by_sql(
features = await split_by_sql(
aoi_json,
# Use separate db connection for longer running test
"postgresql://fmtm:fmtm@fmtm-db:5432/fmtm",
Expand All @@ -182,13 +182,13 @@ def test_split_by_sql_fmtm_no_extract(aoi_json):
assert len(features.get("features")) >= 60


def test_split_by_sql_fmtm_multi_geom(extract_json):
async def test_split_by_sql_fmtm_multi_geom(extract_json):
"""Test divide by square from geojson file with multiple geometries."""
with open(f"{TESTDATA_DIR}/kathmandu_split.geojson") as jsonfile:
parsed_featcol = geojson.load(jsonfile)
features = split_by_sql(
parsed_featcol,
"postgresql://fmtm:fmtm@fmtm-db:5432/fmtm",
features = await split_by_sql(
aoi=parsed_featcol,
db="postgresql://fmtm:fmtm@fmtm-db:5432/fmtm",
num_buildings=10,
osm_extract=extract_json,
)
Expand Down
34 changes: 0 additions & 34 deletions src/backend/uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading