-
-
Notifications
You must be signed in to change notification settings - Fork 795
wip: use search_name and placex for imports on dropped #3900
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
acee5e7
94fa025
a0c5dd7
cea0010
fad4f46
03e6cda
686af65
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,131 @@ | ||
| -- These functions query unified tables instead of partitions, | ||
| -- which allows TIGER data imports on frozen databases. | ||
|
|
||
| -- TODO: add reinstatiation of partition functions if needed | ||
| DROP FUNCTION IF EXISTS getNearestNamedRoadPlaceId(integer, geometry, jsonb) CASCADE; | ||
| DROP FUNCTION IF EXISTS find_road_with_postcode(integer, text) CASCADE; | ||
| DROP FUNCTION IF EXISTS getNearestRoadPlaceId(integer, geometry) CASCADE; | ||
| DROP FUNCTION IF EXISTS getAddressName(integer, geometry) CASCADE; | ||
| DROP FUNCTION IF EXISTS getNearestParallelRoadFeature(integer, geometry) CASCADE; | ||
|
|
||
|
|
||
| CREATE FUNCTION getNearestParallelRoadFeature(in_partition INTEGER, | ||
| line GEOMETRY) | ||
| RETURNS BIGINT | ||
| AS $$ | ||
| DECLARE | ||
| r RECORD; | ||
| search_diameter FLOAT; | ||
| p1 GEOMETRY; | ||
| p2 GEOMETRY; | ||
| p3 GEOMETRY; | ||
| BEGIN | ||
|
|
||
| IF ST_GeometryType(line) not in ('ST_LineString') THEN | ||
| RETURN NULL; | ||
| END IF; | ||
|
|
||
| p1 := ST_LineInterpolatePoint(line, 0); | ||
| p2 := ST_LineInterpolatePoint(line, 0.5); | ||
| p3 := ST_LineInterpolatePoint(line, 1); | ||
|
|
||
| search_diameter := 0.0005; | ||
| WHILE search_diameter < 0.01 LOOP | ||
| FOR r IN | ||
| SELECT place_id FROM placex | ||
| WHERE ST_DWithin(line, geometry, search_diameter) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You need to restrict to road feature here. This is done with: |
||
| ORDER BY (ST_distance(geometry, p1)+ | ||
| ST_distance(geometry, p2)+ | ||
| ST_distance(geometry, p3)) ASC limit 1 | ||
| LOOP | ||
| RETURN r.place_id; | ||
| END LOOP; | ||
| search_diameter := search_diameter * 2; | ||
| END LOOP; | ||
|
|
||
| RETURN NULL; | ||
|
|
||
| END | ||
| $$ | ||
| LANGUAGE plpgsql STABLE PARALLEL SAFE; | ||
|
|
||
|
|
||
| CREATE FUNCTION getNearestNamedRoadPlaceId(in_partition INTEGER, | ||
| point GEOMETRY, | ||
| token_info JSONB) | ||
| RETURNS BIGINT AS $$ | ||
| DECLARE | ||
| parent BIGINT; | ||
| BEGIN | ||
| IF not token_has_addr_street(token_info) THEN | ||
| RETURN NULL; | ||
| END IF; | ||
|
|
||
| SELECT place_id INTO parent | ||
| FROM search_name | ||
| WHERE token_matches_street(token_info, name_vector) | ||
| AND centroid && ST_Expand(point, 0.015) | ||
| AND address_rank between 26 and 27 | ||
| ORDER BY ST_Distance(centroid, point) ASC LIMIT 1; | ||
|
|
||
| RETURN parent; | ||
| END | ||
| $$ | ||
| LANGUAGE plpgsql STABLE PARALLEL SAFE; | ||
|
|
||
|
|
||
| CREATE FUNCTION find_road_with_postcode(in_partition INTEGER, postcode TEXT) | ||
| RETURNS BIGINT AS $$ | ||
| DECLARE | ||
| parent BIGINT; | ||
| BEGIN | ||
| SELECT place_id INTO parent | ||
| FROM placex | ||
| WHERE postcode = postcode | ||
| AND rank_address between 26 and 27 | ||
| AND class = 'highway' | ||
| ORDER BY ST_Distance(centroid, | ||
| (SELECT ST_Centroid(geometry) | ||
| FROM placex WHERE postcode = postcode LIMIT 1)) ASC LIMIT 1; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Two issues with this query: there is no condition that goes to an index on placex, so it will scan over the entire 200mio entries. And postcodes are not in the placex table but in a separate table So the better strategy here that solves both it to find the postcode in the location postcode table and then join on placex to find the nearest road for each result. |
||
|
|
||
| RETURN parent; | ||
| END | ||
| $$ | ||
| LANGUAGE plpgsql STABLE PARALLEL SAFE; | ||
|
|
||
|
|
||
| CREATE FUNCTION getNearestRoadPlaceId(in_partition INTEGER, point GEOMETRY) | ||
| RETURNS BIGINT AS $$ | ||
| DECLARE | ||
| parent BIGINT; | ||
| BEGIN | ||
| SELECT place_id INTO parent | ||
| FROM placex | ||
| WHERE centroid && ST_Expand(point, 0.015) | ||
| AND rank_address between 26 and 27 | ||
| AND class = 'highway' | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The additional class check is not necessary here. Ranks 26 and 27 are guaranteed to be roads. |
||
| ORDER BY ST_Distance(centroid, point) ASC LIMIT 1; | ||
|
|
||
| RETURN parent; | ||
| END | ||
| $$ | ||
| LANGUAGE plpgsql STABLE PARALLEL SAFE; | ||
|
|
||
|
|
||
| CREATE FUNCTION getAddressName(in_partition INTEGER, point GEOMETRY) | ||
| RETURNS TEXT AS $$ | ||
| DECLARE | ||
| name TEXT; | ||
| BEGIN | ||
| SELECT name INTO name | ||
| FROM search_name | ||
| WHERE centroid && ST_Expand(point, 0.015) | ||
| AND rank_address between 26 and 27 | ||
| ORDER BY ST_Distance(centroid, point) ASC LIMIT 1; | ||
|
|
||
| RETURN name; | ||
| END | ||
| $$ | ||
| LANGUAGE plpgsql STABLE PARALLEL SAFE; | ||
|
|
||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. All the function in this file should also check that the partion ID in placex matches the input parameter. |
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -9,8 +9,8 @@ | |
| """ | ||
| import argparse | ||
|
|
||
| from ..db.connection import connect | ||
| from .args import NominatimArgs | ||
| from ..tools import freeze | ||
|
|
||
|
|
||
| class SetupFreeze: | ||
|
|
@@ -29,10 +29,5 @@ def add_args(self, parser: argparse.ArgumentParser) -> None: | |
| pass # No options | ||
|
|
||
| def run(self, args: NominatimArgs) -> int: | ||
| from ..tools import freeze | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I know that function-level import are frowned upon but it is intentional here. If your Python IDE complains about it, please just silence the warning. |
||
|
|
||
| with connect(args.config.get_libpq_dsn()) as conn: | ||
| freeze.drop_update_tables(conn) | ||
| freeze.drop_flatnode_file(args.config.get_path('FLATNODE_FILE')) | ||
|
|
||
| freeze.freeze(args.config) | ||
| return 0 | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -12,7 +12,8 @@ | |
|
|
||
| from psycopg import sql as pysql | ||
|
|
||
| from ..db.connection import Connection, drop_tables, table_exists | ||
| from ..db.connection import Connection, drop_tables, table_exists, connect | ||
| from ..config import Configuration | ||
|
|
||
| UPDATE_TABLES = [ | ||
| 'address_levels', | ||
|
|
@@ -44,6 +45,45 @@ def drop_update_tables(conn: Connection) -> None: | |
| conn.commit() | ||
|
|
||
|
|
||
| def install_frozen_partition_functions(conn: Connection, config: Configuration) -> None: | ||
| """Frozen versions of partition-functions.sql""" | ||
|
|
||
| frozen_functions_file = config.lib_dir.sql / 'functions' / 'frozen-db-functions.sql' | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Simply use |
||
|
|
||
| if not frozen_functions_file.exists(): | ||
| raise FileNotFoundError( | ||
| f"Frozen SQL functions file not found at: {frozen_functions_file}" | ||
| ) | ||
|
|
||
| with open(frozen_functions_file, 'r') as f: | ||
| sql_code = f.read() | ||
|
|
||
| try: | ||
| with conn.cursor() as cur: | ||
| # TODO: execute needs LiteralString | ||
| cur.execute(sql_code) # tyoe: ignore [arg-type] | ||
| conn.commit() | ||
|
|
||
| except Exception as e: | ||
| conn.rollback() | ||
| raise RuntimeError(f"Failed to install frozen-db-functions.sql: {e}") from e | ||
|
|
||
|
|
||
| def freeze(config: Configuration) -> None: | ||
| """Freeze the database for read-only operation.""" | ||
| try: | ||
| with connect(config.get_libpq_dsn()) as connection: | ||
| drop_update_tables(connection) | ||
| install_frozen_partition_functions(connection, config) | ||
| drop_flatnode_file(config.get_path('FLATNODE_FILE')) | ||
|
|
||
| except Exception as e: | ||
| print(f"FREEZE ERROR: {type(e).__name__}: {e}") | ||
| import traceback | ||
| traceback.print_exc() | ||
| raise | ||
|
|
||
|
|
||
| def drop_flatnode_file(fpath: Optional[Path]) -> None: | ||
| """ Remove the flatnode file if it exists. | ||
| """ | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You don't need to explicitly drop the function, just always use 'CREATE OR REPLACE FUNCTION' below. Note that it will only work properly when the function signature is exactly the same. But that should be the case here.