diff --git a/src/backend/app/projects/project_routes.py b/src/backend/app/projects/project_routes.py index 0f7f69b4c4..6d70d1804f 100644 --- a/src/backend/app/projects/project_routes.py +++ b/src/backend/app/projects/project_routes.py @@ -233,6 +233,10 @@ async def task_split( project_geojson: UploadFile = File(...), extract_geojson: Optional[UploadFile] = File(None), no_of_buildings: int = Form(50), + geom_type: Annotated[DbGeomType, Form()] = DbGeomType.POLYGON, + osm_category: Annotated[Optional[XLSFormType], Form()] = XLSFormType.buildings, + centroid: Annotated[bool, Form()] = None, + use_st_within: Annotated[bool, Form()] = None, ): """Split a task into subtasks. @@ -246,6 +250,10 @@ async def task_split( If not included, an extract is generated automatically. no_of_buildings (int, optional): The number of buildings per subtask. Defaults to 50. + geom_type (DbGeomType, optional): The geometry type to extract. + osm_category (XLSFormType, optional): The OSM data category to extract. + centroid (bool, optional): Whether to generate centroids for polygon tasks. + use_st_within (bool, optional): Include features within the AOI. Returns: The result of splitting the task into subtasks. @@ -265,15 +273,15 @@ async def task_split( log.warning("Parsed geojson file contained no geometries") log.debug("STARTED task splitting using provided boundary and data extract") - # NOTE here we pass the connection string and allow area-splitter to - # a use psycopg connection (not async) - features = await run_in_threadpool( - lambda: split_by_sql( - merged_boundary, - settings.FMTM_DB_URL, - num_buildings=no_of_buildings, - osm_extract=parsed_extract, - ) + features = await split_by_sql( + merged_boundary, + settings.FMTM_DB_URL, + num_buildings=no_of_buildings, + osm_extract=parsed_extract, + geom_type=geom_type, + osm_category=osm_category, + centroid=centroid, + use_st_within=use_st_within, ) log.debug("COMPLETE task splitting") return features diff --git a/src/backend/packages/area-splitter/area_splitter/splitter.py b/src/backend/packages/area-splitter/area_splitter/splitter.py index 40bb0901ca..cd37c8a579 100755 --- a/src/backend/packages/area-splitter/area_splitter/splitter.py +++ b/src/backend/packages/area-splitter/area_splitter/splitter.py @@ -15,18 +15,27 @@ """Class and helper methods for task splitting.""" import argparse +import ast +import asyncio import json import logging import math import sys -from io import BytesIO from pathlib import Path -from textwrap import dedent from typing import Optional, Tuple, Union import geojson +import httpx +from app.config import settings +from app.db.enums import DbGeomType, HTTPStatus, XLSFormType +from fastapi import HTTPException from geojson import Feature, FeatureCollection, GeoJSON -from osm_rawdata.postgres import PostgresClient +from osm_data_client import ( + RawDataClient, + RawDataClientConfig, + RawDataOutputOptions, +) +from osm_fieldwork.json_data_models import data_models_path from psycopg import Connection from shapely.geometry import Polygon, box, shape from shapely.ops import unary_union @@ -603,12 +612,16 @@ def split_by_square( return split_features -def split_by_sql( +async def split_by_sql( aoi: Union[str, FeatureCollection], db: Union[str, Connection], + geom_type: Optional[str] = DbGeomType.POLYGON, + osm_category: Optional[str] = XLSFormType.buildings, num_buildings: Optional[int] = None, outfile: Optional[str] = None, osm_extract: Optional[Union[str, FeatureCollection]] = None, + centroid: Optional[bool] = False, + use_st_within: Optional[bool] = None, ) -> FeatureCollection: """Split an AOI with a field-tm algorithm. @@ -636,10 +649,16 @@ def split_by_sql( Optional param, if not included an extract is generated for you. It is recommended to leave this param as default, unless you know what you are doing. + geom_type (str): Type of geometry to extract. + osm_category (str): The OSM data category to extract. + centroid (bool): Generate centroid of polygons. + use_st_within (bool): Include features within the AOI. Returns: features (FeatureCollection): A multipolygon of all the task boundaries. """ + print("geom_type:", geom_type) + geom_type = geom_type.lower() if not num_buildings: err = "num_buildings must be passed, until other algorithms are implemented." log.error(err) @@ -651,36 +670,69 @@ def split_by_sql( # Extracts and parse extract geojson if not osm_extract: - # We want all polylines for splitting: - # buildings, highways, waterways, railways - config_data = dedent( - """ - select: - from: - - nodes - - ways_poly - - ways_line - where: - tags: - - building: not null - highway: not null - waterway: not null - railway: not null - aeroway: not null - """ + config = RawDataClientConfig( + access_token=settings.RAW_DATA_API_AUTH_TOKEN.get_secret_value() + if settings.RAW_DATA_API_AUTH_TOKEN + else None ) - # Must be a BytesIO JSON object - config_bytes = BytesIO(config_data.encode()) + config_filename = XLSFormType(osm_category).name + data_model = f"{data_models_path}/{config_filename}.json" + + with open(data_model, encoding="utf-8") as f: + config_data = json.load(f) + + data_config = { + ("polygon", False): ["ways_poly"], + ("point", True): ["ways_poly", "nodes"], + ("point", False): ["nodes"], + ("polyline", False): ["ways_line"], + } + + config_data["from"] = data_config.get((geom_type, centroid)) + if geom_type == "polyline": + geom_type = "line" # line is recognized as a geomtype in raw-data-api + if not use_st_within: + use_st_within = False if geom_type == "line" else True + + extra_params = { + "fileName": "area_splitter", + "outputType": "geojson", + "geometryType": [geom_type], + "bindZip": False, + "centroid": centroid, + "use_st_within": use_st_within, + "filters": config_data, + } + + try: + result = await RawDataClient(config).get_osm_data( + aoi, + output_options=RawDataOutputOptions(download_file=False), + **extra_params, + ) - pg = PostgresClient( - "underpass", - config_bytes, - ) - # The total FeatureCollection area merged by osm-rawdata automatically - extract_geojson = pg.execQuery( - aoi_featcol, - extra_params={"fileName": "area_splitter", "useStWithin": False}, - ) + url = result.data.get("download_url") + async with httpx.AsyncClient() as client: + result = await client.get(url) + if result.status_code == 200: + extract_geojson = result.json() + else: + log.error("Failed to download OSM extract data") + except Exception as e: + log.error("Raw data API request failed") + if "status 406" in str(e) and "Area" in str(e): + try: + # Extract the error dict part + error_str = str(e).split("status 406:")[-1].strip() + error_dict = ast.literal_eval(error_str) + msg = error_dict["detail"][0]["msg"] + except Exception: + msg = """Selected area is too large. + Please select an area smaller than 200 km².""" + + raise HTTPException( + status_code=HTTPStatus.UNPROCESSABLE_ENTITY, detail=msg + ) from e else: extract_geojson = FMTMSplitter.input_to_geojson(osm_extract) @@ -694,12 +746,16 @@ def split_by_sql( if len(feat_array := aoi_featcol.get("features", [])) > 1: features = [] for index, feat in enumerate(feat_array): - featcol = split_by_sql( - FeatureCollection(features=[feat]), - db, - num_buildings, - f"{Path(outfile).stem}_{index}.geojson)" if outfile else None, - osm_extract, + featcol = await split_by_sql( + aoi=FeatureCollection(features=[feat]), + db=db, + num_buildings=num_buildings, + outfile=f"{Path(outfile).stem}_{index}.geojson" if outfile else None, + osm_extract=osm_extract, + geom_type=geom_type, + osm_category=osm_category, + centroid=centroid, + use_st_within=use_st_within, ) feats = featcol.get("features", []) if feats: @@ -886,12 +942,14 @@ def main(args_list: list[str] | None = None): osm_extract=args.extract, ) elif args.number: - split_by_sql( - args.boundary, - db=args.dburl, - num_buildings=args.number, - outfile=args.outfile, - osm_extract=args.extract, + asyncio.run( + split_by_sql( + args.boundary, + db=args.dburl, + num_buildings=args.number, + outfile=args.outfile, + osm_extract=args.extract, + ) ) # Split by feature using geojson elif args.source and args.source[3:] != "PG:": diff --git a/src/backend/packages/area-splitter/pyproject.toml b/src/backend/packages/area-splitter/pyproject.toml index aaf8acc632..88581c200b 100644 --- a/src/backend/packages/area-splitter/pyproject.toml +++ b/src/backend/packages/area-splitter/pyproject.toml @@ -12,7 +12,6 @@ dependencies = [ "geojson>=2.5.0", "shapely>=1.8.1", "psycopg>=3.1.7", - "osm-rawdata>=0.2.2", ] requires-python = ">=3.10" readme = "README.md" diff --git a/src/backend/packages/area-splitter/tests/test_splitter.py b/src/backend/packages/area-splitter/tests/test_splitter.py index 202f270354..2bf178e90c 100644 --- a/src/backend/packages/area-splitter/tests/test_splitter.py +++ b/src/backend/packages/area-splitter/tests/test_splitter.py @@ -157,9 +157,9 @@ def test_split_by_features_geojson(aoi_json): assert len(features.get("features")) == 4 -def test_split_by_sql_fmtm_with_extract(db, aoi_json, extract_json, output_json): +async def test_split_by_sql_fmtm_with_extract(db, aoi_json, extract_json, output_json): """Test divide by square from geojson file.""" - features = split_by_sql( + features = await split_by_sql( aoi_json, db, num_buildings=5, @@ -169,9 +169,9 @@ def test_split_by_sql_fmtm_with_extract(db, aoi_json, extract_json, output_json) assert sorted(features) == sorted(output_json) -def test_split_by_sql_fmtm_no_extract(aoi_json): +async def test_split_by_sql_fmtm_no_extract(aoi_json): """Test FMTM splitting algorithm, with no data extract.""" - features = split_by_sql( + features = await split_by_sql( aoi_json, # Use separate db connection for longer running test "postgresql://fmtm:fmtm@fmtm-db:5432/fmtm", @@ -182,13 +182,13 @@ def test_split_by_sql_fmtm_no_extract(aoi_json): assert len(features.get("features")) >= 60 -def test_split_by_sql_fmtm_multi_geom(extract_json): +async def test_split_by_sql_fmtm_multi_geom(extract_json): """Test divide by square from geojson file with multiple geometries.""" with open(f"{TESTDATA_DIR}/kathmandu_split.geojson") as jsonfile: parsed_featcol = geojson.load(jsonfile) - features = split_by_sql( - parsed_featcol, - "postgresql://fmtm:fmtm@fmtm-db:5432/fmtm", + features = await split_by_sql( + aoi=parsed_featcol, + db="postgresql://fmtm:fmtm@fmtm-db:5432/fmtm", num_buildings=10, osm_extract=extract_json, ) diff --git a/src/backend/uv.lock b/src/backend/uv.lock index b27d0cf890..0fa21a6f2f 100644 --- a/src/backend/uv.lock +++ b/src/backend/uv.lock @@ -144,7 +144,6 @@ version = "3.0.1" source = { editable = "packages/area-splitter" } dependencies = [ { name = "geojson" }, - { name = "osm-rawdata" }, { name = "psycopg" }, { name = "shapely" }, ] @@ -152,7 +151,6 @@ dependencies = [ [package.metadata] requires-dist = [ { name = "geojson", specifier = ">=2.5.0" }, - { name = "osm-rawdata", specifier = ">=0.2.2" }, { name = "psycopg", specifier = ">=3.1.7" }, { name = "shapely", specifier = ">=1.8.1" }, ] @@ -502,12 +500,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/4d/36/2a115987e2d8c300a974597416d9de88f2444426de9571f4b59b2cca3acc/filelock-3.18.0-py3-none-any.whl", hash = "sha256:c401f4f8377c4464e6db25fff06205fd89bdd83b65eb0488ed1b160f780e21de", size = 16215, upload-time = "2025-03-14T07:11:39.145Z" }, ] -[[package]] -name = "flatdict" -version = "4.0.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/3e/0d/424de6e5612f1399ff69bf86500d6a62ff0a4843979701ae97f120c7f1fe/flatdict-4.0.1.tar.gz", hash = "sha256:cd32f08fd31ed21eb09ebc76f06b6bd12046a24f77beb1fd0281917e47f26742", size = 8341, upload-time = "2020-02-13T19:16:14.3Z" } - [[package]] name = "fmtm" version = "2025.6.1" @@ -1445,23 +1437,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/4d/e1/4de4056c1b766149aaa5707b3e51b00f56d1fa37ba3d5bc797ec6625fb77/osm_login_python-2.0.0-py3-none-any.whl", hash = "sha256:1289410fb1967f584ee52b02d96c85ff2cb104068481fa6530d5f3b138ec7afc", size = 17136, upload-time = "2024-08-09T10:35:42.173Z" }, ] -[[package]] -name = "osm-rawdata" -version = "0.4.2" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "flatdict" }, - { name = "geojson" }, - { name = "psycopg2" }, - { name = "pyyaml" }, - { name = "requests" }, - { name = "shapely" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/b1/91/a171558e91813dd98a5d02568005e41c4b0aa6c3e9888ddf08324705cba4/osm-rawdata-0.4.2.tar.gz", hash = "sha256:48080f49ee9d3cacb621561fc58305f36d0aacfdbcdd2debf9994644dd76c91b", size = 43361, upload-time = "2025-02-03T20:19:03.451Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/06/1f/bc59f613db997e62f132b0bf4da865cf83560c9fb65735165279e6eaa553/osm_rawdata-0.4.2-py3-none-any.whl", hash = "sha256:82bf20d1a20c97d0eeb1a908afb41f3b65b51717e535681bd1373f7f39548432", size = 55082, upload-time = "2025-02-03T20:19:02.21Z" }, -] - [[package]] name = "packaging" version = "25.0" @@ -1675,15 +1650,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/47/fd/4feb52a55c1a4bd748f2acaed1903ab54a723c47f6d0242780f4d97104d4/psycopg_pool-3.2.6-py3-none-any.whl", hash = "sha256:5887318a9f6af906d041a0b1dc1c60f8f0dda8340c2572b74e10907b51ed5da7", size = 38252, upload-time = "2025-02-26T12:03:45.073Z" }, ] -[[package]] -name = "psycopg2" -version = "2.9.10" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/62/51/2007ea29e605957a17ac6357115d0c1a1b60c8c984951c19419b3474cdfd/psycopg2-2.9.10.tar.gz", hash = "sha256:12ec0b40b0273f95296233e8750441339298e6a572f7039da5b260e3c8b60e11", size = 385672, upload-time = "2024-10-16T11:24:54.832Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/ae/49/a6cfc94a9c483b1fa401fbcb23aca7892f60c7269c5ffa2ac408364f80dc/psycopg2-2.9.10-cp313-cp313-win_amd64.whl", hash = "sha256:91fd603a2155da8d0cfcdbf8ab24a2d54bca72795b90d2a3ed2b6da8d979dee2", size = 2569060, upload-time = "2025-01-04T20:09:15.28Z" }, -] - [[package]] name = "pycparser" version = "2.22"