Skip to content

Commit ed2f517

Browse files
authored
Merge pull request #23 from open-meteo/update-to-alternatenamesV2
feat: update to alternatenamesV2 to consistently ignore historic geonames#22
2 parents d36a7f3 + 2335919 commit ed2f517

4 files changed

Lines changed: 76 additions & 20 deletions

File tree

README.md

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ Todo:
1414
The standalone `geocodingapi` binary can run on any 64-bit linux with recent libc. Currently only basic installation instructions for ubuntu 22.04 are available. Later Docker and others can be provided.
1515

1616
```bash
17-
api install zip
17+
apt install zip
1818

1919
wget https://github.com/open-meteo/geocoding-api/releases/download/0.1.1/geocoding-api_0.0.6_jammy_amd64.deb
2020
dpkg -i geocoding-api_0.1.1_jammy_amd64.deb
@@ -23,11 +23,9 @@ mkdir /var/lib/geocoding-api/data
2323
cd /var/lib/geocoding-api/data
2424
mkdir zip
2525
curl http://download.geonames.org/export/dump/allCountries.zip -o allCountries.zip
26-
curl http://download.geonames.org/export/dump/alternateNames.zip -o alternateNames.zip
27-
curl http://download.geonames.org/export/zip/allCountries.zip -o zip/allCountries.zip
26+
curl http://download.geonames.org/export/dump/alternateNamesV2.zip -o alternateNamesV2.zip
2827
unzip allCountries.zip
29-
unzip alternateNames.zip
30-
cd zip; unzip allCountries.zip; cd ..
28+
unzip alternateNamesV2.zip
3129

3230
systemctl enable geocoding-api.service
3331
systemctl start geocoding-api.service

Sources/App/AlternateNames.swift

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -42,15 +42,15 @@ struct AlternateNames {
4242
let isPreferredName = line.seekUntil(value: tab, offset: &offset).asciiToInt8
4343
let isShortName = line.seekUntil(value: tab, offset: &offset).asciiToInt8
4444
let isColloquial = line.seekUntil(value: tab, offset: &offset).asciiToInt8
45-
//let isHistoric = line[line.seekUntil(value: tab, offset: &offset)].asciiToInt8
45+
let isHistoric = line.seekUntil(value: tab, offset: &offset).asciiToInt8
4646

4747
let isolanguageString = isolanguage.string
4848

4949
if isolanguageString == "link" || isolanguageString == "wkdt" || isolanguageString == "fr_1793" {
5050
return
5151
}
5252

53-
if isColloquial == 1 { //isHistoric == 1 ||
53+
if isColloquial == 1 || isHistoric == 1 {
5454
return
5555
}
5656

@@ -68,7 +68,8 @@ struct AlternateNames {
6868
languageId: languages.findOrAppend(isolanguage),
6969
alternateName: alternateName,
7070
isPreferredeName: isPreferredName != 0,
71-
isShortName: isShortName != 0
71+
isShortName: isShortName != 0,
72+
isHistoric: isHistoric != 0
7273
)
7374

7475
if alternateNames[geonameid] != nil {
@@ -106,27 +107,38 @@ private struct AlternateName {
106107
let alternateName: String
107108
let isPreferredeName: Bool
108109
let isShortName: Bool
110+
let isHistoric: Bool
109111
}
110112

111-
fileprivate extension Array where Element == AlternateName {
112-
func getPreferred() -> String {
113-
var short: String? = nil
113+
extension Array where Element == AlternateName {
114+
fileprivate func getPreferred() -> String {
115+
var preferredShort: String? = nil
114116
var preferred: String? = nil
117+
var short: String? = nil
115118
var other: String? = nil
116-
for alternate in self {
117-
if alternate.isPreferredeName && alternate.isShortName {
118-
return alternate.alternateName
119+
var historic: String? = nil
120+
121+
for alternate in self.reversed() {
122+
if alternate.isHistoric {
123+
historic = alternate.alternateName
124+
continue
119125
}
120-
if alternate.isShortName {
121-
short = alternate.alternateName
126+
127+
if alternate.isPreferredeName && alternate.isShortName {
128+
preferredShort = alternate.alternateName
122129
continue
123130
}
124131
if alternate.isPreferredeName {
125132
preferred = alternate.alternateName
126133
continue
127134
}
135+
if alternate.isShortName {
136+
short = alternate.alternateName
137+
continue
138+
}
128139
other = alternate.alternateName
129140
}
130-
return short ?? preferred ?? other ?? ""
141+
142+
return preferredShort ?? preferred ?? short ?? other ?? historic ?? ""
131143
}
132144
}

Sources/App/GeocodingDatabase.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ import Vapor
33

44
extension GeocodingDatabase {
55
static let geonamesFile = URL(fileURLWithPath: "data/allCountries.txt")
6-
static let alternateNamesFiles = URL(fileURLWithPath: "data/alternateNames.txt")
6+
static let alternateNamesFiles = URL(fileURLWithPath: "data/alternateNamesV2.txt")
77
static let databaseFile = URL(fileURLWithPath: "data/database.bin")
88

99
/// Read geonames txt files and create an index protobuf file

Tests/AppTests/geocoding_apiTests.swift

Lines changed: 48 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ final class geocoding_apiTests: XCTestCase {
3232
XCTAssertEqual(q.queue[4].id, 5)
3333
}
3434

35-
func testExample() throws {
35+
func testZugspitze() throws {
3636
let logger = Logger(label: "test")
3737
let data = """
3838
1639953\t2760454\tja\tツークシュピッツェ\t\t\t\t\t\t
@@ -63,8 +63,54 @@ final class geocoding_apiTests: XCTestCase {
6363

6464
let names = AlternateNames(data: data, logger: logger)
6565
XCTAssertEqual(names.alternativesPreferred.count, 1)
66-
XCTAssertEqual(names.alternativesPreferred[2_760_454]?.count, 21)
66+
XCTAssertEqual(names.alternativesPreferred[2_760_454]!.count, 21)
67+
XCTAssertEqual(
68+
names.languages,
69+
[
70+
"ja", "nl", "pt", "sk", "sv", "tr", "it", "fa", "uk", "bar", "ko", "he", "mr", "be",
71+
"ka", "pnb", "lt", "ru", "zh", "ar", "mk",
72+
]
73+
)
74+
XCTAssertEqual(
75+
names.alternativesPreferred[2_760_454]![
76+
Int32(names.languages.firstIndex(of: "nl")!)
77+
]!,
78+
"Zugspitze"
79+
)
80+
}
81+
82+
func testTallinnDoNotUseHistoricNames() throws {
83+
// https://github.com/open-meteo/geocoding-api/issues/19
84+
let logger = Logger(label: "test")
85+
let data = """
86+
343021\t588409\t\tKolyvan\t\t\t\t\t\t
87+
343022\t588409\t\tRevel'\t\t\t\t\t\t
88+
343023\t588409\t\tKallinn\t\t\t\t\t\t
89+
343024\t588409\t\tTallin\t\t\t\t\t\t
90+
343026\t588409\t\tTallina\t\t\t\t\t\t
91+
343027\t588409\t\tReval\t\t\t\t1\t1219\t1918
92+
343028\t588409\t\tTalinas\t\t\t\t\t\t
93+
343029\t588409\t\tTallinna\t\t\t\t\t\t
94+
1894606\t588409\tde\tTallinn\t1\t\t\t\t\t
95+
1894607\t588409\ten\tTallinn\t\t\t\t\t\t
96+
11947298\t588409\tde\tReval\t\t\t\t1\t1219\t1918
97+
16401504\t588409\ten\tRevel\t\t\t\t1\t1219\t1918
98+
""".data(using: .utf8)!
99+
100+
let names = AlternateNames(data: data, logger: logger)
101+
XCTAssertEqual(names.alternativesPreferred.count, 1)
102+
XCTAssertEqual(names.alternativesPreferred[588409]!.count, 3)
103+
XCTAssertEqual(names.languages, ["", "de", "en"])
104+
XCTAssertEqual(
105+
names.alternativesPreferred[588409]![Int32(names.languages.firstIndex(of: "en")!)]!,
106+
"Tallinn"
107+
)
108+
}
67109

110+
func testExample() throws {
111+
// TODO: This test needs to be improved
112+
let logger = Logger(label: "test")
113+
let names = AlternateNames(data: "".data(using: .utf8)!, logger: logger)
68114
let data2 = """
69115
1529666\tBahnhof Grenzau\tBahnhof Grenzau\tBahnhof Grenzau,Grenzau\t50.45663\t7.66505\tS\tRSTN\tDE\t\t08\t00\t07143\t07143032\t0\t\t232\tEurope/Berlin\t2020-10-14
70116
2038682\tBahnhof Annaburg\tBahnhof Annaburg\tAnnaburg,Bahnhof Annaburg,Bahnhof Annaburg West\t51.72858\t13.03311\tS\tRSTN\tDE\t\t11\t\t\t\t0\t\t77\tEurope/Berlin\t2020-10-14

0 commit comments

Comments
 (0)