Skip to content

Commit 50f9f06

Browse files
authored
Merge pull request #41 from eyeseast/capture-raw
Capture raw geocoding results
2 parents 89d1d82 + 14d7063 commit 50f9f06

File tree

6 files changed

+196
-7
lines changed

6 files changed

+196
-7
lines changed

Makefile

+7
Original file line numberDiff line numberDiff line change
@@ -13,18 +13,21 @@ nominatim: tests/nominatim.db
1313
geocode-sqlite nominatim $^ innout_test \
1414
--location "{full}, {city}, {state} {postcode}" \
1515
--delay 1 \
16+
--raw \
1617
--user-agent "geocode-sqlite"
1718

1819
.PHONY: mapquest
1920
mapquest: tests/mapquest.db
2021
geocode-sqlite open-mapquest $^ innout_test \
2122
--location "{full}, {city}, {state} {postcode}" \
23+
--raw \
2224
--api-key "$(MAPQUEST_API_KEY)"
2325

2426
.PHONY: google
2527
google: tests/google.db
2628
geocode-sqlite googlev3 $^ innout_test \
2729
--location "{full}, {city}, {state} {postcode}" \
30+
--raw \
2831
--api-key "$(GOOGLE_API_KEY)" \
2932
--bbox 33.030551 -119.787326 34.695341 -115.832248
3033

@@ -33,19 +36,23 @@ bing: tests/bing.db
3336
geocode-sqlite bing $^ innout_test \
3437
--location "{full}, {city}, {state} {postcode}" \
3538
--delay 1 \
39+
--raw \
3640
--api-key "$(BING_API_KEY)"
3741

3842
.PHONY: mapbox
3943
mapbox: tests/mapbox.db
4044
geocode-sqlite mapbox $^ innout_test \
4145
--location "{full}, {city}, {state} {postcode}" \
4246
--delay 1 \
47+
--raw \
4348
--api-key "$(MAPBOX_API_KEY)"
4449

4550
.PHONY: opencage
4651
opencage: tests/opencage.db
4752
geocode-sqlite opencage $^ innout_test \
4853
--location "{full}, {city}, {state} {postcode}" \
54+
--delay '0.1' \
55+
--raw \
4956
--api-key "$(OPENCAGE_API_KEY)"
5057

5158
.PHONY: run

README.md

+21
Original file line numberDiff line numberDiff line change
@@ -84,9 +84,30 @@ From there, we have a set of options passed to every geocoder:
8484
- `longitude`: longitude column name
8585
- `geojson`: store results as GeoJSON, instead of in latitude and longitude columns
8686
- `spatialite`: store results in a SpatiaLite geometry column, instead of in latitude and longitude columns
87+
- `raw`: store raw geocoding results in a JSON column
8788

8889
Each geocoder takes additional, specific arguments beyond these, such as API keys. Again, [geopy's documentation](https://geopy.readthedocs.io/en/latest/#module-geopy.geocoders) is an excellent resource.
8990

91+
## Using SpatiaLite
92+
93+
The `--spatialite` flag will store results in a [geometry column](https://www.gaia-gis.it/gaia-sins/spatialite-cookbook-5/cookbook_topics.adminstration.html#topic_TABLE_to_SpatialTable), instead of `latitude` and `longitude` columns. This is useful if you're doing other GIS operations, such as using a [spatial index](https://www.gaia-gis.it/fossil/libspatialite/wiki?name=SpatialIndex). See the [SpatiaLite cookbook](https://www.gaia-gis.it/gaia-sins/spatialite-cookbook-5/index.html) and [functions list](https://www.gaia-gis.it/gaia-sins/spatialite-sql-latest.html) for more of what's possible.
94+
95+
## Capturing additional geocoding data
96+
97+
Geocoding services typically return more data than just coordinates. This might include accuracy, normalized addresses or other context. This can be captured using the `--raw` flag. By default, this will add a `raw` column and store the full geocoding response as JSON. If you want to rename that column, pass a value, like `--raw custom_raw`.
98+
99+
The shape of this response object will vary between services. You can query specific values using [SQLite's built-in JSON functions](https://www.sqlite.org/json1.html). For example, this will work with Google's geocoder:
100+
101+
```sql
102+
select
103+
json_extract(raw, '$.formatted_address') as address,
104+
json_extract(raw, '$.geometry.location_type') as location_type
105+
from
106+
innout_test
107+
```
108+
109+
Check each geocoding service's documentation for what's included in the response.
110+
90111
## Python API
91112

92113
The command line interface aims to support the most common options for each geocoder. For more fine-grained control, use the Python API.

geocode_sqlite/cli.py

+80-5
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,14 @@ def common_options(f):
6767
default=False,
6868
help="""Store results as a SpatiaLite geometry.
6969
Using this will add a geometry column instead of latitude and longitude columns.""",
70+
),
71+
click.option(
72+
"--raw",
73+
is_flag=False,
74+
default="",
75+
flag_value="raw",
76+
help="""Store raw geocoding results as JSON.
77+
This column will be called 'raw' by default. Pass a value to rename it.""",
7078
),
7179
click.pass_context,
7280
]
@@ -86,6 +94,7 @@ def fill_context(
8694
longitude,
8795
geojson,
8896
spatialite,
97+
raw,
8998
**kwargs,
9099
):
91100
"Add common options to context"
@@ -98,6 +107,7 @@ def fill_context(
98107
longitude=longitude,
99108
geojson=geojson,
100109
spatialite=spatialite,
110+
raw=raw,
101111
kwargs=kwargs,
102112
)
103113

@@ -113,6 +123,7 @@ def extract_context(ctx):
113123
ctx.obj["longitude"],
114124
ctx.obj["geojson"],
115125
ctx.obj["spatialite"],
126+
ctx.obj["raw"],
116127
ctx.obj.get("kwargs", {}),
117128
)
118129

@@ -143,7 +154,9 @@ def cli(ctx):
143154

144155

145156
# name changed in click 8.0
146-
result_callback = getattr(cli, "result_callback", None) or getattr(cli, "resultcallback")
157+
result_callback = getattr(cli, "result_callback", None) or getattr(
158+
cli, "resultcallback"
159+
)
147160

148161

149162
@result_callback()
@@ -159,6 +172,7 @@ def geocode(ctx, geocoder):
159172
longitude,
160173
geojson,
161174
spatialite,
175+
raw,
162176
kwargs,
163177
) = extract_context(ctx)
164178

@@ -178,6 +192,9 @@ def geocode(ctx, geocoder):
178192
if longitude != "longitude":
179193
click.echo(f"Using custom longitude field: {longitude}")
180194

195+
if raw and raw != "raw":
196+
click.echo(f"Using custom raw result field: {raw}")
197+
181198
if not (geojson or spatialite) and latitude not in columns:
182199
click.echo(f"Adding column: {latitude}")
183200
table.add_column(latitude, float)
@@ -194,6 +211,10 @@ def geocode(ctx, geocoder):
194211
click.echo("Adding geometry column")
195212
table.add_geometry_column(GEOMETRY_COLUMN, "POINT")
196213

214+
if raw and raw not in columns:
215+
click.echo(f"Adding {raw} column")
216+
table.add_column(raw, str)
217+
197218
if GEOCODER_COLUMN not in table.columns_dict:
198219
click.echo("Adding geocoder column")
199220
table.add_column(GEOCODER_COLUMN, str)
@@ -220,6 +241,7 @@ def geocode(ctx, geocoder):
220241
longitude_column=longitude,
221242
geojson=geojson,
222243
spatialite=spatialite,
244+
raw=raw,
223245
**kwargs,
224246
)
225247

@@ -244,6 +266,11 @@ def geocode(ctx, geocoder):
244266
click.echo(f"{pk}: {location.format(row)}")
245267

246268

269+
#############
270+
# Geocoders #
271+
#############
272+
273+
247274
@cli.command("test", hidden=True)
248275
@common_options
249276
@click.option("-p", "--db-path", type=click.Path(exists=True))
@@ -257,12 +284,22 @@ def use_tester(
257284
longitude,
258285
geojson,
259286
spatialite,
287+
raw,
260288
db_path,
261289
):
262290
"Only use this for testing"
263291
click.echo(f"Using test geocoder with database {db_path}")
264292
fill_context(
265-
ctx, database, table, location, delay, latitude, longitude, geojson, spatialite
293+
ctx,
294+
database,
295+
table,
296+
location,
297+
delay,
298+
latitude,
299+
longitude,
300+
geojson,
301+
spatialite,
302+
raw,
266303
)
267304
return DummyGeocoder(Database(db_path))
268305

@@ -287,12 +324,22 @@ def bing(
287324
longitude,
288325
geojson,
289326
spatialite,
327+
raw,
290328
api_key,
291329
):
292330
"Bing"
293331
click.echo("Using Bing geocoder")
294332
fill_context(
295-
ctx, database, table, location, delay, latitude, longitude, geojson, spatialite
333+
ctx,
334+
database,
335+
table,
336+
location,
337+
delay,
338+
latitude,
339+
longitude,
340+
geojson,
341+
spatialite,
342+
raw,
296343
)
297344
return geocoders.Bing(api_key=api_key)
298345

@@ -321,6 +368,7 @@ def google(
321368
longitude,
322369
geojson,
323370
spatialite,
371+
raw,
324372
api_key,
325373
domain,
326374
bbox,
@@ -337,6 +385,7 @@ def google(
337385
longitude,
338386
geojson,
339387
spatialite,
388+
raw,
340389
bounds=bbox,
341390
)
342391
return geocoders.GoogleV3(api_key=api_key, domain=domain)
@@ -363,6 +412,7 @@ def mapquest(
363412
longitude,
364413
geojson,
365414
spatialite,
415+
raw,
366416
api_key,
367417
bbox,
368418
):
@@ -378,6 +428,7 @@ def mapquest(
378428
longitude,
379429
geojson,
380430
spatialite,
431+
raw,
381432
bounds=bbox,
382433
)
383434
return geocoders.MapQuest(api_key=api_key)
@@ -406,13 +457,23 @@ def nominatim(
406457
longitude,
407458
geojson,
408459
spatialite,
460+
raw,
409461
user_agent,
410462
domain,
411463
):
412464
"Nominatim (OSM)"
413465
click.echo(f"Using Nominatim geocoder at {domain}")
414466
fill_context(
415-
ctx, database, table, location, delay, latitude, longitude, geojson, spatialite
467+
ctx,
468+
database,
469+
table,
470+
location,
471+
delay,
472+
latitude,
473+
longitude,
474+
geojson,
475+
spatialite,
476+
raw,
416477
)
417478
return geocoders.Nominatim(user_agent=user_agent, domain=domain)
418479

@@ -437,12 +498,22 @@ def open_mapquest(
437498
longitude,
438499
geojson,
439500
spatialite,
501+
raw,
440502
api_key,
441503
):
442504
"Open Mapquest"
443505
click.echo("Using MapQuest geocoder")
444506
fill_context(
445-
ctx, database, table, location, delay, latitude, longitude, geojson, spatialite
507+
ctx,
508+
database,
509+
table,
510+
location,
511+
delay,
512+
latitude,
513+
longitude,
514+
geojson,
515+
spatialite,
516+
raw,
446517
)
447518
return geocoders.MapQuest(api_key=api_key)
448519

@@ -474,6 +545,7 @@ def mapbox(
474545
longitude,
475546
geojson,
476547
spatialite,
548+
raw,
477549
api_key,
478550
bbox,
479551
proximity,
@@ -490,6 +562,7 @@ def mapbox(
490562
longitude,
491563
geojson,
492564
spatialite,
565+
raw,
493566
bbox=bbox,
494567
proximity=proximity,
495568
)
@@ -516,6 +589,7 @@ def opencage(
516589
longitude,
517590
geojson,
518591
spatialite,
592+
raw,
519593
api_key,
520594
):
521595
"OpenCage"
@@ -530,5 +604,6 @@ def opencage(
530604
longitude,
531605
geojson,
532606
spatialite,
607+
raw,
533608
)
534609
return geocoders.OpenCage(api_key=api_key)

geocode_sqlite/utils.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ def geocode_table(
2222
longitude_column="longitude",
2323
geojson=False,
2424
spatialite=False,
25+
raw="",
2526
force=False,
2627
**kwargs,
2728
):
@@ -124,6 +125,7 @@ def geocode_list(
124125
longitude_column="longitude",
125126
geojson=False,
126127
spatialite=False,
128+
raw="",
127129
**kwargs,
128130
):
129131
"""
@@ -140,7 +142,7 @@ def geocode_list(
140142
result = geocode_row(geocode, query_template, row, **kwargs)
141143
if result:
142144
row = update_row(
143-
row, result, latitude_column, longitude_column, geojson, spatialite
145+
row, result, latitude_column, longitude_column, geojson, spatialite, raw
144146
)
145147
row[GEOCODER_COLUMN] = get_geocoder_class(geocode)
146148

@@ -162,6 +164,7 @@ def update_row(
162164
longitude_column="longitude",
163165
geojson=False,
164166
spatialite=False,
167+
raw="",
165168
):
166169
"""
167170
Update a row before saving, either setting latitude and longitude,
@@ -180,6 +183,10 @@ def update_row(
180183
row[longitude_column] = result.longitude
181184
row[latitude_column] = result.latitude
182185

186+
if raw:
187+
# save the raw dictionary, let sqlite-utils turn it into a str
188+
row[raw] = result.raw
189+
183190
return row
184191

185192

setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from setuptools import setup
22
import os
33

4-
VERSION = "0.7.0"
4+
VERSION = "0.8.0"
55

66
requirements = ["click>=7.0", "sqlite_utils", "geopy"]
77

0 commit comments

Comments
 (0)