Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
131 changes: 131 additions & 0 deletions lib-sql/functions/frozen-db-functions.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
-- These functions query unified tables instead of partitions,
-- which allows TIGER data imports on frozen databases.

-- TODO: add reinstatiation of partition functions if needed
DROP FUNCTION IF EXISTS getNearestNamedRoadPlaceId(integer, geometry, jsonb) CASCADE;
DROP FUNCTION IF EXISTS find_road_with_postcode(integer, text) CASCADE;
DROP FUNCTION IF EXISTS getNearestRoadPlaceId(integer, geometry) CASCADE;
DROP FUNCTION IF EXISTS getAddressName(integer, geometry) CASCADE;
DROP FUNCTION IF EXISTS getNearestParallelRoadFeature(integer, geometry) CASCADE;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You don't need to explicitly drop the function, just always use 'CREATE OR REPLACE FUNCTION' below. Note that it will only work properly when the function signature is exactly the same. But that should be the case here.



CREATE FUNCTION getNearestParallelRoadFeature(in_partition INTEGER,
line GEOMETRY)
RETURNS BIGINT
AS $$
DECLARE
r RECORD;
search_diameter FLOAT;
p1 GEOMETRY;
p2 GEOMETRY;
p3 GEOMETRY;
BEGIN

IF ST_GeometryType(line) not in ('ST_LineString') THEN
RETURN NULL;
END IF;

p1 := ST_LineInterpolatePoint(line, 0);
p2 := ST_LineInterpolatePoint(line, 0.5);
p3 := ST_LineInterpolatePoint(line, 1);

search_diameter := 0.0005;
WHILE search_diameter < 0.01 LOOP
FOR r IN
SELECT place_id FROM placex
WHERE ST_DWithin(line, geometry, search_diameter)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You need to restrict to road feature here. This is done with: rank_address between 26 and 27.

ORDER BY (ST_distance(geometry, p1)+
ST_distance(geometry, p2)+
ST_distance(geometry, p3)) ASC limit 1
LOOP
RETURN r.place_id;
END LOOP;
search_diameter := search_diameter * 2;
END LOOP;

RETURN NULL;

END
$$
LANGUAGE plpgsql STABLE PARALLEL SAFE;


CREATE FUNCTION getNearestNamedRoadPlaceId(in_partition INTEGER,
point GEOMETRY,
token_info JSONB)
RETURNS BIGINT AS $$
DECLARE
parent BIGINT;
BEGIN
IF not token_has_addr_street(token_info) THEN
RETURN NULL;
END IF;

SELECT place_id INTO parent
FROM search_name
WHERE token_matches_street(token_info, name_vector)
AND centroid && ST_Expand(point, 0.015)
AND address_rank between 26 and 27
ORDER BY ST_Distance(centroid, point) ASC LIMIT 1;

RETURN parent;
END
$$
LANGUAGE plpgsql STABLE PARALLEL SAFE;


CREATE FUNCTION find_road_with_postcode(in_partition INTEGER, postcode TEXT)
RETURNS BIGINT AS $$
DECLARE
parent BIGINT;
BEGIN
SELECT place_id INTO parent
FROM placex
WHERE postcode = postcode
AND rank_address between 26 and 27
AND class = 'highway'
ORDER BY ST_Distance(centroid,
(SELECT ST_Centroid(geometry)
FROM placex WHERE postcode = postcode LIMIT 1)) ASC LIMIT 1;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Two issues with this query: there is no condition that goes to an index on placex, so it will scan over the entire 200mio entries. And postcodes are not in the placex table but in a separate table location_postcode.

So the better strategy here that solves both it to find the postcode in the location postcode table and then join on placex to find the nearest road for each result.


RETURN parent;
END
$$
LANGUAGE plpgsql STABLE PARALLEL SAFE;


CREATE FUNCTION getNearestRoadPlaceId(in_partition INTEGER, point GEOMETRY)
RETURNS BIGINT AS $$
DECLARE
parent BIGINT;
BEGIN
SELECT place_id INTO parent
FROM placex
WHERE centroid && ST_Expand(point, 0.015)
AND rank_address between 26 and 27
AND class = 'highway'
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The additional class check is not necessary here. Ranks 26 and 27 are guaranteed to be roads.

ORDER BY ST_Distance(centroid, point) ASC LIMIT 1;

RETURN parent;
END
$$
LANGUAGE plpgsql STABLE PARALLEL SAFE;


CREATE FUNCTION getAddressName(in_partition INTEGER, point GEOMETRY)
RETURNS TEXT AS $$
DECLARE
name TEXT;
BEGIN
SELECT name INTO name
FROM search_name
WHERE centroid && ST_Expand(point, 0.015)
AND rank_address between 26 and 27
ORDER BY ST_Distance(centroid, point) ASC LIMIT 1;

RETURN name;
END
$$
LANGUAGE plpgsql STABLE PARALLEL SAFE;

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All the function in this file should also check that the partion ID in placex matches the input parameter.


12 changes: 6 additions & 6 deletions src/nominatim_db/clicmd/add_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
import psutil

from .args import NominatimArgs
from ..db.connection import connect
from ..tools.freeze import is_frozen
# from ..db.connection import connect
# from ..tools.freeze import is_frozen


LOG = logging.getLogger()
Expand Down Expand Up @@ -65,10 +65,10 @@ def add_args(self, parser: argparse.ArgumentParser) -> None:
def run(self, args: NominatimArgs) -> int:
from ..tools import add_osm_data

with connect(args.config.get_libpq_dsn()) as conn:
if is_frozen(conn):
print('Database is marked frozen. New data can\'t be added.')
return 1
# with connect(args.config.get_libpq_dsn()) as conn:
# if is_frozen(conn):
# print('Database is marked frozen. New data can\'t be added.')
# return 1

if args.tiger_data:
return asyncio.run(self._add_tiger_data(args))
Expand Down
9 changes: 2 additions & 7 deletions src/nominatim_db/clicmd/freeze.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
"""
import argparse

from ..db.connection import connect
from .args import NominatimArgs
from ..tools import freeze


class SetupFreeze:
Expand All @@ -29,10 +29,5 @@ def add_args(self, parser: argparse.ArgumentParser) -> None:
pass # No options

def run(self, args: NominatimArgs) -> int:
from ..tools import freeze
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know that function-level import are frowned upon but it is intentional here. If your Python IDE complains about it, please just silence the warning.


with connect(args.config.get_libpq_dsn()) as conn:
freeze.drop_update_tables(conn)
freeze.drop_flatnode_file(args.config.get_path('FLATNODE_FILE'))

freeze.freeze(args.config)
return 0
1 change: 1 addition & 0 deletions src/nominatim_db/clicmd/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ async def async_run(self, args: NominatimArgs) -> int:
if args.no_updates:
conn.autocommit = True
freeze.drop_update_tables(conn)
freeze.install_frozen_partition_functions(conn, args.config)
tokenizer.finalize_import(args.config)

LOG.warning('Recompute word counts')
Expand Down
42 changes: 41 additions & 1 deletion src/nominatim_db/tools/freeze.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@

from psycopg import sql as pysql

from ..db.connection import Connection, drop_tables, table_exists
from ..db.connection import Connection, drop_tables, table_exists, connect
from ..config import Configuration

UPDATE_TABLES = [
'address_levels',
Expand Down Expand Up @@ -44,6 +45,45 @@ def drop_update_tables(conn: Connection) -> None:
conn.commit()


def install_frozen_partition_functions(conn: Connection, config: Configuration) -> None:
"""Frozen versions of partition-functions.sql"""

frozen_functions_file = config.lib_dir.sql / 'functions' / 'frozen-db-functions.sql'
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.


if not frozen_functions_file.exists():
raise FileNotFoundError(
f"Frozen SQL functions file not found at: {frozen_functions_file}"
)

with open(frozen_functions_file, 'r') as f:
sql_code = f.read()

try:
with conn.cursor() as cur:
# TODO: execute needs LiteralString
cur.execute(sql_code) # tyoe: ignore [arg-type]
conn.commit()

except Exception as e:
conn.rollback()
raise RuntimeError(f"Failed to install frozen-db-functions.sql: {e}") from e


def freeze(config: Configuration) -> None:
"""Freeze the database for read-only operation."""
try:
with connect(config.get_libpq_dsn()) as connection:
drop_update_tables(connection)
install_frozen_partition_functions(connection, config)
drop_flatnode_file(config.get_path('FLATNODE_FILE'))

except Exception as e:
print(f"FREEZE ERROR: {type(e).__name__}: {e}")
import traceback
traceback.print_exc()
raise


def drop_flatnode_file(fpath: Optional[Path]) -> None:
""" Remove the flatnode file if it exists.
"""
Expand Down
9 changes: 5 additions & 4 deletions src/nominatim_db/tools/tiger_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from ..db.query_pool import QueryPool
from ..data.place_info import PlaceInfo
from ..tokenizer.base import AbstractTokenizer
from . import freeze
# from . import freeze

LOG = logging.getLogger()

Expand Down Expand Up @@ -90,9 +90,10 @@ async def add_tiger_data(data_dir: str, config: Configuration, threads: int,
"""
dsn = config.get_libpq_dsn()

with connect(dsn) as conn:
if freeze.is_frozen(conn):
raise UsageError("Tiger cannot be imported when database frozen (Github issue #3048)")
# ===trying to fix===
# with connect(dsn) as conn:
# if freeze.is_frozen(conn):
# raise UsageError("Tiger cannot be imported when database frozen (Github issue #3048)")

with TigerInput(data_dir) as tar:
if not tar:
Expand Down
26 changes: 17 additions & 9 deletions test/python/tools/test_tiger_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,16 +88,24 @@ async def test_add_tiger_data(def_config, src_dir, tiger_table, tokenizer_mock,


@pytest.mark.asyncio
async def test_add_tiger_data_database_frozen(def_config, temp_db_conn, tiger_table, tokenizer_mock,
tmp_path):
async def test_add_tiger_data_after_drop_updates(def_config,
src_dir,
temp_db_conn,
tiger_table,
tokenizer_mock):
freeze.drop_update_tables(temp_db_conn)

with pytest.raises(UsageError) as excinfo:
await tiger_data.add_tiger_data(str(tmp_path), def_config, 1, tokenizer_mock())

assert "database frozen" in str(excinfo.value)

assert tiger_table.count() == 0
initial_count = tiger_table.count()

result = await tiger_data.add_tiger_data(
str(src_dir / "test" / "testdb" / "tiger"),
def_config,
threads=1,
tokenizer=tokenizer_mock(),
)

assert result == 0
final_count = tiger_table.count()
assert final_count > initial_count


@pytest.mark.asyncio
Expand Down