Skip to content

Commit d375f1c

Browse files
authored
Merge pull request #4101 from Itz-Agasta/artificial-postcodes
Add stable postcode refs for details lookup and search exclusion
2 parents 0bcc27e + 9e71082 commit d375f1c

12 files changed

Lines changed: 272 additions & 21 deletions

File tree

docs/api/Details.md

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ versions of Nominatim. Do not rely on the output in scripts or applications.
1414

1515

1616

17-
The details API supports the following two request formats:
17+
The details API supports the following three request formats:
1818

1919
``` xml
2020
https://nominatim.openstreetmap.org/details?osmtype=[N|W|R]&osmid=<value>&class=<value>
@@ -39,6 +39,16 @@ for a place is different between Nominatim installation (servers) and
3939
changes when data gets reimported. Therefore it cannot be used as
4040
a permanent id and shouldn't be used in bug reports.
4141

42+
``` xml
43+
https://nominatim.openstreetmap.org/details?postcode=<country_code>:<postcode_id>
44+
```
45+
46+
Artificial postcodes do not always have an OSM object reference. For these,
47+
Nominatim provides a stable postcode reference that combines the country code
48+
and postcode. Spaces in the postcode id are replaced with underscores. For
49+
example, `us:94110` refers to postcode `94110` in the United States and
50+
`gb:EH4_7EA` refers to postcode `EH4 7EA` in Great Britain.
51+
4252
!!! danger "Deprecation warning"
4353
The API can also be used with the URL
4454
`https://nominatim.openstreetmap.org/details.php`. This is now deprecated

docs/api/Search.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,7 @@ to the address layer (see above).
231231

232232
| Parameter | Value | Default |
233233
|-----------| ----- | ------- |
234-
| exclude_place_ids | comma-separated list of ids (OSM IDs where possible, otherwise place_ids) |
234+
| exclude_place_ids | comma-separated list of ids (OSM IDs where possible, otherwise place_ids or stable postcode refs) |
235235

236236
If you do not want certain OSM objects to appear in the search
237237
result, give a comma separated list of the ids you want to skip.
@@ -240,8 +240,9 @@ Each entry may be one of:
240240

241241
* a Nominatim internal `place_id` (for example `125279639`)
242242
* an OSM object reference in the form `<osm_type><osm_id>` where `<osm_type>` is one of `N` (node), `W` (way) or `R` (relation), for example `N107775`
243+
* a stable postcode reference in the form `P<country_code>:<postcode_id>`, for example `Pus:94110` or `Pgb:EH4_7EA`
243244

244-
Usage of OSM IDs is recommended because they are server independent. `place_id`s are stil required for results without an OSM object reference (for example, postcodes and countries). When a street is excluded via its OSM ID, then interpolations and TIGER data derived from that street are excluded as well.
245+
Usage of OSM IDs is recommended because they are server independent. Stable postcode refs should be used for artificial postcode results without an OSM object reference. In postcode refs, spaces are replaced with underscores. `place_id`s are still required for results without either kind of stable reference (for example, countries). When a street is excluded via its OSM ID, then interpolations and TIGER data derived from that street are excluded as well.
245246

246247
This can be used to retrieve additional search results. For example, if a
247248
previous query only returned a few results, then including those here would

src/nominatim_api/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,9 @@
2020
from .status import (StatusResult as StatusResult)
2121
from .types import (PlaceID as PlaceID,
2222
OsmID as OsmID,
23+
PostcodeRef as PostcodeRef,
2324
PlaceRef as PlaceRef,
25+
parse_postcode_param as parse_postcode_param,
2426
Point as Point,
2527
Bbox as Bbox,
2628
GeometryFormat as GeometryFormat,

src/nominatim_api/lookup.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,10 @@ def enumerate_free_osm_ids(self) -> Iterable[Tuple[int, ntyp.OsmID]]:
9090
return ((i, p.pid) for i, p in enumerate(self.lookups)
9191
if p.result is None and isinstance(p.pid, ntyp.OsmID))
9292

93+
def enumerate_free_postcode_refs(self) -> Iterable[Tuple[int, ntyp.PostcodeRef]]:
94+
return ((i, p.pid) for i, p in enumerate(self.lookups)
95+
if p.result is None and isinstance(p.pid, ntyp.PostcodeRef))
96+
9397

9498
class DetailedCollector:
9599
""" Result collector for detailed lookup.
@@ -139,6 +143,11 @@ def enumerate_free_osm_ids(self) -> Iterable[Tuple[int, ntyp.OsmID]]:
139143
return [(0, self.place)]
140144
return []
141145

146+
def enumerate_free_postcode_refs(self) -> Iterable[Tuple[int, ntyp.PostcodeRef]]:
147+
if self.result is None and isinstance(self.place, ntyp.PostcodeRef):
148+
return [(0, self.place)]
149+
return []
150+
142151

143152
Collector = Union[LookupCollector, DetailedCollector]
144153

@@ -301,6 +310,26 @@ async def find_in_postcode(conn: SearchConnection, collector: Collector) -> bool
301310
nres.create_from_postcode_row):
302311
return True
303312

313+
postcode_refs = [{'i': i, 'cc': p.country_code, 'pc': p.postcode}
314+
for i, p in collector.enumerate_free_postcode_refs()]
315+
316+
if postcode_refs:
317+
ref_tab = sa.func.JsonArrayEach(sa.type_coerce(postcode_refs, sa.JSON))\
318+
.table_valued(sa.column('value', type_=sa.JSON))
319+
t = conn.t.postcode
320+
sql = sa.select(ref_tab.c.value['i'].as_integer().label('_idx'),
321+
t.c.osm_id, t.c.place_id, t.c.parent_place_id,
322+
t.c.rank_search,
323+
t.c.indexed_date, t.c.postcode, t.c.country_code,
324+
t.c.centroid)\
325+
.where(t.c.osm_id.is_(None))\
326+
.where(t.c.country_code == ref_tab.c.value['cc'].as_string())\
327+
.where(t.c.postcode == ref_tab.c.value['pc'].as_string())
328+
329+
if await collector.add_rows_from_sql(conn, sql, t.c.geometry,
330+
nres.create_from_postcode_row):
331+
return True
332+
304333
osm_ids = [{'i': i, 'oi': p.osm_id}
305334
for i, p in collector.enumerate_free_osm_ids() if p.osm_type == 'R']
306335

src/nominatim_api/search/geocoder.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
import sqlalchemy as sa
1616

1717
from ..connection import SearchConnection
18-
from ..types import PlaceRef, SearchDetails, PlaceID, OsmID
18+
from ..types import PlaceRef, SearchDetails, PlaceID, OsmID, PostcodeRef
1919
from ..results import SearchResult, SearchResults, add_result_details
2020
from ..timeout import Timeout
2121
from ..logging import log
@@ -53,6 +53,19 @@ async def _resolve_excluded_osm_ids(self) -> None:
5353

5454
place_ids: List[PlaceRef] = [e for e in excluded if isinstance(e, PlaceID)]
5555
osm_ids = [e for e in excluded if isinstance(e, OsmID)]
56+
postcode_refs = [e for e in excluded if isinstance(e, PostcodeRef)]
57+
58+
if postcode_refs:
59+
p = self.conn.t.postcode
60+
conditions = [
61+
sa.and_(p.c.osm_id.is_(None),
62+
p.c.country_code == ref.country_code,
63+
p.c.postcode == ref.postcode)
64+
for ref in postcode_refs
65+
]
66+
sql = sa.select(p.c.place_id).where(sa.or_(*conditions))
67+
place_ids.extend(PlaceID(row.place_id)
68+
for row in await self.conn.execute(sql))
5669

5770
if osm_ids:
5871
t = self.conn.t.placex

src/nominatim_api/types.py

Lines changed: 66 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import datetime as dt
1515
import enum
1616
import math
17+
import re
1718
from struct import unpack
1819
from binascii import unhexlify
1920

@@ -22,6 +23,10 @@
2223
from .errors import UsageError
2324

2425

26+
POSTCODE_REF_RE = re.compile(r'^P(?P<cc>[A-Za-z]{2}):(?P<postcode>.+)$')
27+
POSTCODE_PARAM_RE = re.compile(r'^(?P<cc>[A-Za-z]{2}):(?P<postcode>.+)$')
28+
29+
2530
@dataclasses.dataclass
2631
class PlaceID:
2732
""" Reference a place by Nominatim's internal ID.
@@ -81,7 +86,64 @@ def class_as_housenumber(self) -> Optional[int]:
8186
return None
8287

8388

84-
PlaceRef = Union[PlaceID, OsmID]
89+
@dataclasses.dataclass
90+
class PostcodeRef:
91+
""" Reference an artificial postcode by country code and postcode.
92+
"""
93+
country_code: str
94+
postcode: str
95+
96+
def __post_init__(self) -> None:
97+
if len(self.country_code) != 2 or not self.country_code.isalpha():
98+
raise ValueError('Country code must be two letters.')
99+
if not self.postcode:
100+
raise ValueError('Postcode must not be empty.')
101+
self.country_code = self.country_code.lower()
102+
self.postcode = self.postcode.replace('_', ' ')
103+
104+
def __str__(self) -> str:
105+
return f"P{self.country_code}:{self.place_id_part()}"
106+
107+
def place_id_part(self) -> str:
108+
"""Return a URL-safe identifier part for APIs.
109+
"""
110+
return self.postcode.replace(' ', '_')
111+
112+
113+
PlaceRef = Union[PlaceID, OsmID, PostcodeRef]
114+
115+
116+
def parse_place_ref(ref: Any) -> PlaceRef:
117+
""" Parse a stable place reference string.
118+
"""
119+
if isinstance(ref, (PlaceID, OsmID, PostcodeRef)):
120+
return ref
121+
122+
if not isinstance(ref, str):
123+
raise UsageError("Parameter 'place_ref' must be a string.")
124+
125+
if match := POSTCODE_REF_RE.fullmatch(ref):
126+
return PostcodeRef(match.group('cc'), match.group('postcode'))
127+
128+
raise UsageError(f"Invalid place_ref: {ref}")
129+
130+
131+
def parse_postcode_param(ref: Any) -> PostcodeRef:
132+
"""Parse the /details postcode parameter.
133+
"""
134+
if isinstance(ref, PostcodeRef):
135+
return ref
136+
137+
if not isinstance(ref, str):
138+
raise UsageError("Parameter 'postcode' must be a string.")
139+
140+
if match := POSTCODE_REF_RE.fullmatch(ref):
141+
return PostcodeRef(match.group('cc'), match.group('postcode'))
142+
143+
if match := POSTCODE_PARAM_RE.fullmatch(ref):
144+
return PostcodeRef(match.group('cc'), match.group('postcode'))
145+
146+
raise UsageError(f"Invalid postcode ID: {ref}")
85147

86148

87149
class Point(NamedTuple):
@@ -427,7 +489,7 @@ def format_excluded(ids: Any) -> List[PlaceRef]:
427489
for i in plist:
428490
if not i:
429491
continue
430-
if isinstance(i, (PlaceID, OsmID)):
492+
if isinstance(i, (PlaceID, OsmID, PostcodeRef)):
431493
result.append(i)
432494
elif isinstance(i, int):
433495
if i > 0:
@@ -439,6 +501,8 @@ def format_excluded(ids: Any) -> List[PlaceRef]:
439501
elif len(i) > 1 and i[0].upper() in ('N', 'W', 'R') and i[1:].isdigit():
440502
if int(i[1:]) > 0:
441503
result.append(OsmID(i[0].upper(), int(i[1:])))
504+
elif match := POSTCODE_REF_RE.fullmatch(i):
505+
result.append(PostcodeRef(match.group('cc'), match.group('postcode')))
442506
else:
443507
raise UsageError(f"Invalid exclude ID: {i}")
444508
else:

src/nominatim_api/v1/format_json.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
from ..utils.json_writer import JsonWriter
1313
from ..results import AddressLines, ReverseResults, SearchResults
1414
from . import classtypes as cl
15-
from .helpers import _add_admin_level
15+
from .helpers import _add_admin_level, result_to_exclude_id
1616
from ..types import EntranceDetails
1717

1818

@@ -108,6 +108,10 @@ def format_base_json(results: Union[ReverseResults, SearchResults],
108108

109109
_write_osm_id(out, result.osm_object)
110110

111+
if (postcode_id := result_to_exclude_id(result)) is not None \
112+
and postcode_id.startswith('P'):
113+
out.keyval('postcode_id', postcode_id)
114+
111115
# lat and lon must be string values
112116
out.keyval('lat', f"{result.centroid.lat:0.7f}")\
113117
.keyval('lon', f"{result.centroid.lon:0.7f}")\
@@ -192,6 +196,10 @@ def format_base_geojson(results: Union[ReverseResults, SearchResults],
192196

193197
_write_osm_id(out, result.osm_object)
194198

199+
if (postcode_id := result_to_exclude_id(result)) is not None \
200+
and postcode_id.startswith('P'):
201+
out.keyval('postcode_id', postcode_id)
202+
195203
out.keyval('place_rank', result.rank_search)\
196204
.keyval('category', result.category[0])\
197205
.keyval('type', result.category[1])\
@@ -262,6 +270,10 @@ def format_base_geocodejson(results: Union[ReverseResults, SearchResults],
262270

263271
_write_osm_id(out, result.osm_object)
264272

273+
if (postcode_id := result_to_exclude_id(result)) is not None \
274+
and postcode_id.startswith('P'):
275+
out.keyval('postcode_id', postcode_id)
276+
265277
out.keyval('osm_key', result.category[0])\
266278
.keyval('osm_value', result.category[1])\
267279
.keyval('type', GEOCODEJSON_RANKS[max(3, min(28, result.rank_address))])\

src/nominatim_api/v1/helpers.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
import re
1414

1515
from ..results import SearchResults, SourceTable, BaseResult
16-
from ..types import SearchDetails, GeometryFormat
16+
from ..types import SearchDetails, GeometryFormat, PostcodeRef
1717

1818

1919
def _add_admin_level(result: BaseResult) -> Optional[Dict[str, str]]:
@@ -107,6 +107,22 @@ def extend_query_parts(queryparts: Dict[str, Any], details: Dict[str, Any],
107107
queryparts['featureType'] = feature_type
108108

109109

110+
def result_to_exclude_id(result: BaseResult) -> Optional[str]:
111+
"""Return the best stable follow-up identifier for a search result.
112+
"""
113+
if result.osm_object:
114+
return f"{result.osm_object[0]}{result.osm_object[1]}"
115+
116+
if result.category == ('place', 'postcode') and result.country_code \
117+
and result.names and 'ref' in result.names:
118+
return str(PostcodeRef(result.country_code, result.names['ref']))
119+
120+
if result.place_id:
121+
return str(result.place_id)
122+
123+
return None
124+
125+
110126
def deduplicate_results(results: SearchResults, max_results: int) -> SearchResults:
111127
""" Remove results that look like duplicates.
112128

src/nominatim_api/v1/server_glue.py

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
from .. import logging as loglib
2121
from ..core import NominatimAPIAsync
2222
from .format import RawDataList
23-
from ..types import DataLayer, GeometryFormat, PlaceRef, PlaceID, OsmID, Point
23+
from ..types import DataLayer, GeometryFormat, PlaceRef, PlaceID, OsmID, Point, parse_postcode_param
2424
from ..status import StatusResult
2525
from ..results import DetailedResult, ReverseResults, SearchResult, SearchResults
2626
from ..localization import Locales
@@ -155,11 +155,15 @@ async def details_endpoint(api: NominatimAPIAsync, params: ASGIAdaptor) -> Any:
155155
place: PlaceRef
156156
if place_id:
157157
place = PlaceID(place_id)
158-
else:
159-
osmtype = params.get('osmtype')
160-
if osmtype is None:
161-
params.raise_error("Missing ID parameter 'place_id' or 'osmtype'.")
158+
elif (postcode := params.get('postcode')) is not None:
159+
try:
160+
place = parse_postcode_param(postcode)
161+
except UsageError as err:
162+
params.raise_error(str(err))
163+
elif (osmtype := params.get('osmtype')) is not None:
162164
place = OsmID(osmtype, params.get_int('osmid'), params.get('class'))
165+
else:
166+
params.raise_error("Missing ID parameter 'place_id', 'postcode' or 'osmtype'.")
163167

164168
debug = setup_debugging(params)
165169

@@ -179,7 +183,7 @@ async def details_endpoint(api: NominatimAPIAsync, params: ASGIAdaptor) -> Any:
179183
return build_response(params, loglib.get_and_disable())
180184

181185
if result is None:
182-
params.raise_error('No place with that OSM ID found.', status=404)
186+
params.raise_error('No place with that ID found.', status=404)
183187

184188
locales = Locales.from_accept_languages(get_accepted_languages(params),
185189
params.config().OUTPUT_NAMES)
@@ -374,11 +378,9 @@ async def search_endpoint(api: NominatimAPIAsync, params: ASGIAdaptor) -> Any:
374378
params.get('featureType', ''),
375379
params.get_bool('namedetails', False),
376380
params.get_bool('extratags', False),
377-
(f"{r.osm_object[0]}{r.osm_object[1]}"
378-
if r.osm_object
379-
else str(r.place_id)
380-
for r in results
381-
if r.osm_object or r.place_id))
381+
(rid for rid in (helpers.result_to_exclude_id(r)
382+
for r in results)
383+
if rid is not None))
382384
queryparts['format'] = fmt
383385

384386
moreurl = params.base_uri() + '/search?' + urlencode(queryparts)

test/python/api/test_api_details.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -534,6 +534,29 @@ def test_lookup_in_postcode(apiobj, frontend):
534534
assert result.geometry == {'type': 'ST_Polygon'}
535535

536536

537+
def test_lookup_in_postcode_by_stable_ref(apiobj, frontend):
538+
import_date = dt.datetime(2022, 12, 7, 14, 14, 46, 0)
539+
apiobj.add_postcode(place_id=554,
540+
parent_place_id=152,
541+
postcode='94110',
542+
country_code='us',
543+
rank_search=20,
544+
indexed_date=import_date,
545+
centroid='POINT(-122.4170874 37.7536873)',
546+
geometry='POLYGON((-122.47 37.70, -122.47 37.80, '
547+
'-122.36 37.75, -122.47 37.70))')
548+
549+
api = frontend(apiobj, options={'details'})
550+
result = api.details(napi.PostcodeRef('us', '94110'))
551+
552+
assert result is not None
553+
assert result.source_table.name == 'POSTCODE'
554+
assert result.place_id == 554
555+
assert result.osm_object is None
556+
assert result.names == {'ref': '94110'}
557+
assert result.country_code == 'us'
558+
559+
537560
@pytest.mark.parametrize('lookup', [napi.PlaceID(9000),
538561
napi.OsmID('R', 12)])
539562
def test_lookup_postcode_with_address_details(apiobj, frontend, lookup):

0 commit comments

Comments
 (0)