Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 34 additions & 9 deletions src/nominatim_api/search/geocoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,26 @@ def sort_and_cut_results(self, results: SearchResults) -> SearchResults:

return final

def _get_result_rerank_text(self, result: SearchResult) -> set[str]:
if not self.params.locales:
return {result.display_name} if result.display_name else set()

label_parts: set[str] = set()
if result.address_rows:
for line in result.address_rows:
if line.isaddress and line.names:
address_name = self.params.locales.display_name(line.names)
if address_name:
label_parts.add(address_name)

if label_parts:
return label_parts

if result.names:
return {self.params.locales.display_name(result.names)}

return {result.display_name} if result.display_name else set()

def rerank_by_query(self, query: QueryStruct, results: SearchResults) -> None:
""" Adjust the accuracy of the localized result according to how well
they match the original query.
Expand All @@ -184,6 +204,7 @@ def rerank_by_query(self, query: QueryStruct, results: SearchResults) -> None:
for word in re.split('[-,: ]+', phrase.text) if word]
if not qwords:
return
norm_query = self.query_analyzer.normalize_text(' '.join(qwords))

for result in results:
# Negative importance indicates ordering by distance, which is
Expand All @@ -192,8 +213,12 @@ def rerank_by_query(self, query: QueryStruct, results: SearchResults) -> None:
or (result.importance is not None and result.importance < 0):
continue
distance = 0.0
norm = self.query_analyzer.normalize_text(' '.join((result.display_name,
result.country_code or '')))
# Use locale-aware text for word matching so that translated names
# (e.g., name:en) are included in the match pool.
rerank_text = self._get_result_rerank_text(result)
norm = self.query_analyzer.normalize_text(
' '.join((*rerank_text, result.country_code or ''))
)
words = set((w for w in re.split('[-,: ]+', norm) if w))
if not words:
continue
Expand All @@ -207,13 +232,13 @@ def rerank_by_query(self, query: QueryStruct, results: SearchResults) -> None:
# to offset this.
if result.rank_address == 4:
if self.params.locales and result.names:
loc_names = [result.names[t] for t in self.params.locales.name_tags
if t in result.names]
if loc_names:
norm_loc = self.query_analyzer.normalize_text(' '.join(loc_names))
loc_words = set(w for w in re.split('[-,: ]+', norm_loc) if w)
if loc_words and loc_words.isdisjoint(qwords):
result.accuracy += result.calculated_importance() * 0.5
country_names = {self.query_analyzer.normalize_text(result.names[t])
for t in self.params.locales.name_tags
if t in result.names}
if result.country_code:
country_names.add(result.country_code)
if norm_query not in country_names:
result.accuracy += result.calculated_importance() * 0.5
else:
distance *= 2
result.accuracy += distance * 0.3 / sum(len(w) for w in qwords)
Expand Down
20 changes: 20 additions & 0 deletions test/bdd/features/db/query/search_simple.feature
Original file line number Diff line number Diff line change
Expand Up @@ -100,3 +100,23 @@ Feature: Searching of simple objects
Then result 0 contains
| object |
| N10 |

# github #3871
Scenario: Localized result name is used for reranking
Given the places
| osm | class | type | name+name | name+name:en | geometry |
| N1 | place | city | Αθήνα | Athens | country:gr |
| N2 | place | town | Athens | | country:us |
When importing
And geocoding "Athens"
Comment thread
Itz-Agasta marked this conversation as resolved.
| accept-language |
| en |
Then result 0 contains
| object |
| N1 |
When geocoding "Athens"
| accept-language |
| de |
Then result 0 contains
| object |
| N2 |
Loading