Skip to content

Commit 5c558b2

Browse files
authored
Merge pull request #1031 from lonvia/rework-dedups-filter
Convert deduplicaton filter to predicate
2 parents 738a285 + acd8df5 commit 5c558b2

File tree

5 files changed

+77
-85
lines changed

5 files changed

+77
-85
lines changed

src/main/java/de/komoot/photon/GenericSearchHandler.java

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import de.komoot.photon.query.RequestBase;
44
import de.komoot.photon.query.RequestFactory;
5+
import de.komoot.photon.searcher.PhotonResult;
56
import de.komoot.photon.searcher.ResultFormatter;
67
import de.komoot.photon.searcher.SearchHandler;
78
import de.komoot.photon.searcher.StreetDupesRemover;
@@ -10,6 +11,7 @@
1011
import org.jspecify.annotations.NullMarked;
1112

1213
import java.io.IOException;
14+
import java.util.Comparator;
1315

1416
@NullMarked
1517
public class GenericSearchHandler<T extends RequestBase> implements Handler {
@@ -28,17 +30,14 @@ public GenericSearchHandler(RequestFactory<T> requestFactory, SearchHandler<T> r
2830
public void handle(Context context) {
2931
final T searchRequest = requestFactory.create(context);
3032

31-
var results = requestHandler.search(searchRequest);
33+
var results = requestHandler.search(searchRequest).stream()
34+
.sorted(Comparator.comparingDouble(PhotonResult::getScore).reversed());
3235

33-
// Further filtering
34-
if (searchRequest.getDedupe()){
35-
results = new StreetDupesRemover(searchRequest.getLanguage()).execute(results);
36+
if (searchRequest.getDedupe()) {
37+
results = results.filter(new StreetDupesRemover());
3638
}
3739

38-
// Restrict to the requested limit.
39-
if (results.size() > searchRequest.getLimit()) {
40-
results = results.subList(0, searchRequest.getLimit());
41-
}
40+
results = results.limit(searchRequest.getLimit());
4241

4342
String debugInfo = null;
4443
if (searchRequest.getDebug()) {
@@ -48,7 +47,7 @@ public void handle(Context context) {
4847
try {
4948
context.status(200)
5049
.result(formatter.convert(
51-
results, searchRequest.getLanguage(),
50+
results.toList(), searchRequest.getLanguage(),
5251
searchRequest.getReturnGeometry(),
5352
searchRequest.getDebug(), debugInfo));
5453
} catch (IOException e) {

src/main/java/de/komoot/photon/opensearch/OpenSearchSearchHandler.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,9 @@ public OpenSearchSearchHandler(OpenSearchClient client, int queryTimeout) {
2525

2626
@Override
2727
public List<PhotonResult> search(SimpleSearchRequest request) {
28-
final int limit = request.getLimit();
29-
final int extLimit = limit > 1 ? (int) Math.round(limit * 1.5) : 1;
28+
// Have it return more result candidates than results requested,
29+
// will be reranked and filtered later.
30+
final int extLimit = Math.max(5, (int) Math.round(request.getLimit() * 1.5));
3031

3132
var results = sendQuery(buildQuery(request, false), extLimit);
3233

src/main/java/de/komoot/photon/searcher/PhotonResult.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,11 @@ public interface PhotonResult {
1515
@Nullable
1616
Object get(String key);
1717

18+
default String getOrDefault(String key, String defValue) {
19+
String value = (String) get(key);
20+
return value == null ? defValue : value;
21+
}
22+
1823
@Nullable
1924
String getLocalised(String key, String language, String... altNames);
2025

src/main/java/de/komoot/photon/searcher/StreetDupesRemover.java

Lines changed: 30 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -3,71 +3,47 @@
33
import de.komoot.photon.opensearch.DocFields;
44
import org.jspecify.annotations.NullMarked;
55

6-
import java.util.ArrayList;
76
import java.util.HashSet;
8-
import java.util.List;
7+
import java.util.function.Predicate;
98

109
/**
1110
* Filter out duplicate streets from the list.
1211
*/
1312
@NullMarked
14-
public class StreetDupesRemover {
15-
private final String language;
16-
17-
public StreetDupesRemover(String language) {
18-
this.language = language;
19-
}
20-
21-
public List<PhotonResult> execute(List<PhotonResult> results) {
22-
final List<PhotonResult> filteredItems = new ArrayList<>(results.size());
23-
final HashSet<String> keys = new HashSet<>();
24-
25-
for (PhotonResult result : results) {
26-
if ("highway".equals(result.get(DocFields.OSM_KEY))) {
27-
// result is a street
28-
final String postcode = (String) result.get(DocFields.POSTCODE);
29-
final String name = result.getLocalised(DocFields.NAME, language);
30-
31-
if (postcode != null && name != null) {
32-
// street has a postcode and name
33-
34-
// OSM_VALUE is part of key to avoid deduplication of e.g. bus_stops and streets with same name
35-
String key = (String) result.get(DocFields.OSM_VALUE);
36-
if (key == null) {
37-
key = "";
38-
}
39-
40-
if (language.equals("nl")) {
41-
final String onlyDigitsPostcode = stripNonDigits(postcode);
42-
key += ":" + onlyDigitsPostcode + ":" + name;
43-
} else {
44-
key += ":" + postcode + ":" + name;
13+
public class StreetDupesRemover implements Predicate<PhotonResult> {
14+
final HashSet<String> keys = new HashSet<>();
15+
16+
@Override
17+
public boolean test(PhotonResult result) {
18+
if ("highway".equals(result.get(DocFields.OSM_KEY))) {
19+
// result is a street
20+
final String name = result.getLocalised(DocFields.NAME, "default");
21+
22+
if (name != null) {
23+
final String countryCode = (String) result.get(DocFields.COUNTRYCODE);
24+
String postcode = result.getOrDefault(DocFields.POSTCODE, "");
25+
26+
final var pcLength = postcode.length();
27+
final var spacePos = postcode.indexOf(" ");
28+
if (spacePos > 0) {
29+
if ("NL".equals(countryCode)) {
30+
postcode = postcode.substring(0, spacePos);
31+
} else if ("GB".equals(countryCode) && pcLength > spacePos + 2) {
32+
postcode = postcode.substring(0, spacePos + 2);
4533
}
46-
47-
if (keys.contains(key)) {
48-
// an osm highway object (e.g. street or bus_stop) with this osm_value + name + postcode is already part of the result list
49-
continue;
50-
}
51-
keys.add(key);
5234
}
53-
}
54-
filteredItems.add(result);
55-
}
5635

57-
return filteredItems;
58-
}
36+
// OSM_VALUE is part of key to avoid deduplication of e.g. bus_stops and streets with same name
37+
final String key = String.join(":",
38+
result.getOrDefault(DocFields.OSM_VALUE, ""),
39+
postcode,
40+
name,
41+
countryCode);
5942

60-
private static String stripNonDigits(
61-
final CharSequence input /* inspired by seh's comment */) {
62-
final StringBuilder sb = new StringBuilder(
63-
input.length() /* also inspired by seh's comment */);
64-
for (int i = 0; i < input.length(); i++) {
65-
final char c = input.charAt(i);
66-
if (c > 47 && c < 58) {
67-
sb.append(c);
43+
return keys.add(key);
6844
}
6945
}
70-
return sb.toString();
71-
}
7246

47+
return true;
48+
}
7349
}

src/test/java/de/komoot/photon/searcher/StreetDupesRemoverTest.java

Lines changed: 31 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -3,41 +3,52 @@
33
import de.komoot.photon.opensearch.DocFields;
44
import org.junit.jupiter.api.Test;
55

6-
import java.util.List;
7-
86
import static org.assertj.core.api.Assertions.*;
97

108
class StreetDupesRemoverTest {
119

1210
@Test
1311
void testDeduplicatesStreets() {
14-
StreetDupesRemover streetDupesRemover = new StreetDupesRemover("en");
15-
var allResults = List.of(
16-
createDummyResult("99999", "Main Street", "highway", "Unclassified"),
17-
createDummyResult("99999", "Main Street", "highway", "Unclassified"));
18-
19-
assertThat(streetDupesRemover.execute(allResults))
20-
.hasSize(1);
12+
assertThat(new StreetDupesRemover())
13+
.accepts(highway("99999", "Main Street", "unclassified", "de"))
14+
.rejects(highway("99999", "Main Street", "unclassified", "de"))
15+
.accepts(highway("99999", "Sub Street", "unclassified", "de"))
16+
.accepts(highway("99999", "Main Street", "unclassified", "ch"));
2117
}
2218

2319
@Test
2420
void testStreetAndBusStopNotDeduplicated() {
25-
StreetDupesRemover streetDupesRemover = new StreetDupesRemover("en");
26-
var allResults = List.of(
27-
createDummyResult("99999", "Main Street", "highway", "bus_stop"),
28-
createDummyResult("99999", "Main Street", "highway", "Unclassified"));
21+
assertThat(new StreetDupesRemover())
22+
.accepts(highway("99999", "Main Street", "bus_stop", "de"))
23+
.accepts(highway("99999", "Main Street", "unclassified", "de"));
24+
}
25+
26+
@Test
27+
void testNLStreet() {
28+
assertThat(new StreetDupesRemover())
29+
.accepts(highway("2345 XZ", "Main Street", "unclassified", "nl"))
30+
.accepts(highway("2346 AB", "Main Street", "unclassified", "nl"))
31+
.rejects(highway("2345 AB", "Main Street", "unclassified", "nl"))
32+
.accepts(highway("2", "Main Street", "unclassified", "nl"));
33+
}
34+
35+
@Test
36+
void testGBStreet() {
37+
assertThat(new StreetDupesRemover())
38+
.accepts(highway("WN8 6LT", "Main Street", "unclassified", "gb"))
39+
.accepts(highway("WN8 4LT", "Main Street", "unclassified", "gb"))
40+
.rejects(highway("WN8 634", "Main Street", "unclassified", "gb"))
41+
.accepts(highway("W", "Main Street", "unclassified", "gb"));
2942

30-
assertThat(streetDupesRemover.execute(allResults))
31-
.hasSize(2);
3243
}
3344

34-
private PhotonResult createDummyResult(String postCode, String name, String osmKey,
35-
String osmValue) {
45+
private PhotonResult highway(String postCode, String name, String osmValue, String countryCode) {
3646
return new MockPhotonResult()
3747
.put(DocFields.POSTCODE, postCode)
38-
.putLocalized(DocFields.NAME, "en", name)
39-
.put(DocFields.OSM_KEY, osmKey)
40-
.put(DocFields.OSM_VALUE, osmValue);
48+
.putLocalized(DocFields.NAME, "default", name)
49+
.put(DocFields.OSM_KEY, "highway")
50+
.put(DocFields.OSM_VALUE, osmValue)
51+
.put(DocFields.COUNTRYCODE, countryCode.toUpperCase());
4152
}
4253

4354
}

0 commit comments

Comments
 (0)